diff --git a/qpms/ewald.h b/qpms/ewald.h index 5c86775..8eda80c 100644 --- a/qpms/ewald.h +++ b/qpms/ewald.h @@ -253,7 +253,7 @@ void ewald3_2_sigma_long_Delta(complex double *target, double *target_err, int m * For production, use ewald3_2_sigma_long_Delta() instead. */ void ewald3_2_sigma_long_Delta_recurrent(complex double *target, double *target_err, int maxn, complex double x, - int xbranch, complex double z); + int xbranch, complex double z, _Bool bigimz); /// The Delta_n factor from [Kambe II], Appendix 3, used in 2D-in-3D long range sum. /** This function always uses Taylor expansion in \a z. diff --git a/qpms/ewaldsf.c b/qpms/ewaldsf.c index 8f50ee2..c2ac92d 100644 --- a/qpms/ewaldsf.c +++ b/qpms/ewaldsf.c @@ -15,10 +15,16 @@ #include #include "tiny_inlines.h" +// Some magic constants + #ifndef COMPLEXPART_REL_ZERO_LIMIT #define COMPLEXPART_REL_ZERO_LIMIT 1e-14 #endif +#ifndef DELTA_RECURRENT_EXPOVERFLOW_LIMIT +#define DELTA_RECURRENT_EXPOVERFLOW_LIMIT 10. +#endif + gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler; void IgnoreUnderflowsGSLErrorHandler (const char * reason, @@ -296,23 +302,54 @@ static inline complex double csqrt_branch(complex double x, int xbranch) { return csqrt(x) * min1pow(xbranch); } + // The Delta_n factor from [Kambe II], Appendix 3 // \f[ \Delta_n = \int_n^\infty t^{-1/2 - n} \exp(-t + z^2/(4t))\ud t \f] +/* If |Im z| is big, Faddeeva_z might cause double overflow. In such case, + * use bigimz = true to use a slightly different formula to initialise + * the first two elements. + * + * The actual choice is done outside this function in order to enable + * testing/comparison of the results. + */ void ewald3_2_sigma_long_Delta_recurrent(complex double *target, double *err, - int maxn, complex double x, int xbranch, complex double z) { + int maxn, complex double x, int xbranch, complex double z, bool bigimz) { complex double expfac = cexp(-x + 0.25 * z*z / x); complex double sqrtx = csqrt_branch(x, xbranch); // TODO check carefully, which branch is needed - // These are used in the first two recurrences - complex double w_plus = Faddeeva_w(+z/(2*sqrtx) + I*sqrtx, 0); - complex double w_minus = Faddeeva_w(-z/(2*sqrtx) + I*sqrtx, 0); + double w_plus_abs = NAN, w_minus_abs = NAN; // Used only if err != NULL QPMS_ASSERT(maxn >= 0); - if (maxn >= 0) - target[0] = 0.5 * M_SQRTPI * expfac * (w_minus + w_plus); - if (maxn >= 1) - target[1] = I / z * M_SQRTPI * expfac * (w_minus - w_plus); + // These are used to fill the first two elements for recurrence + if(!bigimz) { + complex double w_plus = Faddeeva_w(+z/(2*sqrtx) + I*sqrtx, 0); + complex double w_minus = Faddeeva_w(-z/(2*sqrtx) + I*sqrtx, 0); + if (maxn >= 0) + target[0] = 0.5 * M_SQRTPI * expfac * (w_minus + w_plus); + if (maxn >= 1) + target[1] = I / z * M_SQRTPI * expfac * (w_minus - w_plus); + if(err) { w_plus_abs = cabs(w_plus); w_minus_abs = cabs(w_minus); } + } else { + /* A different strategy to avoid double overflow using the formula + * w(y) = exp(-y*y) * erfc(-I*y): + * Labeling + * ž_± = ±z/(2*sqrtx) + I * sqrtx + * and expfac = exp(ž**2), where ž**2 = -x + (z*z/4/x), + * we have + * expfac * w(ž_±) = exp(ž**2 - ž_±**2) erfc(-I * ž_±) + * = exp(∓ I * z) erfc(-I * ž_±) + */ + complex double w_plus_n = cexp(- I * z) * Faddeeva_erfc(-I * (+z/(2*sqrtx) + I*sqrtx), 0); + complex double w_minus_n = cexp(+ I * z) * Faddeeva_erfc(-I * (-z/(2*sqrtx) + I*sqrtx), 0); + if (maxn >= 0) + target[0] = 0.5 * M_SQRTPI * (w_minus_n + w_plus_n); + if (maxn >= 1) + target[1] = I / z * M_SQRTPI * (w_minus_n - w_plus_n); + } for(int n = 1; n < maxn; ++n) { // The rest via recurrence // TODO The cpow(x, 0.5 - n) might perhaps better be replaced with a recurrently computed variant target[n+1] = -(4 / (z*z)) * (-(0.5 - n) * target[n] + target[n-1] - sqrtx * cpow(x, -n) * expfac); + if(isnan(creal(target[n+1])) || isnan(cimag(target[n+1]))) { + QPMS_WARN("Encountered NaN."); + } } if (err) { // The error estimates for library math functions are based on @@ -322,7 +359,8 @@ void ewald3_2_sigma_long_Delta_recurrent(complex double *target, double *err, // http://ab-initio.mit.edu/wiki/index.php/Faddeeva_Package // "we find that the accuracy is typically at at least 13 significant digits in both the real and imaginary parts" // FIXME the error estimate seems might be off by several orders of magnitude (try parameters x = -3, z = 0.5, maxn=20) - double w_plus_abs = cabs(w_plus), w_minus_abs = cabs(w_minus), expfac_abs = cabs(expfac); + // FIXME the error estimate does not take into account the alternative recurrence init. formula (bigimz) + double expfac_abs = cabs(expfac); double w_plus_err = w_plus_abs * 1e-13, w_minus_err = w_minus_abs * 1e-13; // LPTODO argument error contrib. double expfac_err = expfac_abs * (4 * DBL_EPSILON); // LPTODO add argument error contrib. double z_abs = cabs(z); @@ -407,6 +445,7 @@ void ewald3_2_sigma_long_Delta(complex double *target, double *err, if (absz < 2.) // TODO take into account also the other parameters ewald3_2_sigma_long_Delta_series(target, err, maxn, x, xbranch, z); else - ewald3_2_sigma_long_Delta_recurrent(target, err, maxn, x, xbranch, z); + ewald3_2_sigma_long_Delta_recurrent(target, err, maxn, x, xbranch, z, + fabs(cimag(z)) > DELTA_RECURRENT_EXPOVERFLOW_LIMIT); }