diff --git a/notes/ewald.lyx b/notes/ewald.lyx index d36af4e..0b32a2e 100644 --- a/notes/ewald.lyx +++ b/notes/ewald.lyx @@ -3814,11 +3814,11 @@ key "linton_lattice_2010" , therefore \begin_inset Formula -\begin{eqnarray*} -\sigma_{n}^{m(1)} & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\left(\frac{k\gamma_{\mu}}{2\eta}\right)^{2j}\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\\ - & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\left(\frac{k\gamma_{\mu}}{2}\right)^{2j}\underbrace{\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)}_{\Gamma_{j,\mu}}\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\\ - & = & -\frac{i^{n+1}}{k\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}n!\left(\tilde{\beta}_{\mu}/k\right)^{n-2j}\Gamma_{j,\mu}}{j!2^{2j}\left(n-2j\right)!}\left(\gamma_{\mu}\right)^{2j}. -\end{eqnarray*} +\begin{eqnarray} +\sigma_{n}^{m(1)} & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\left(\frac{k\gamma_{\mu}}{2\eta}\right)^{2j}\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\nonumber \\ + & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\left(\frac{k\gamma_{\mu}}{2}\right)^{2j}\underbrace{\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)}_{\Gamma_{j,\mu}}\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\nonumber \\ + & = & -\frac{i^{n+1}}{k\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}n!\left(\tilde{\beta}_{\mu}/k\right)^{n-2j}\Gamma_{j,\mu}}{j!2^{2j}\left(n-2j\right)!}\left(\gamma_{\mu}\right)^{2j}.\label{eq:1D_z_LRsum} +\end{eqnarray} \end_inset diff --git a/qpms/ewald.c b/qpms/ewald.c index 3ff8fbe..385b330 100644 --- a/qpms/ewald.c +++ b/qpms/ewald.c @@ -9,6 +9,7 @@ #include #include #include +#include // parameters for the quadrature of integral in (4.6) #ifndef INTEGRATION_WORKSPACE_LIMIT @@ -71,6 +72,7 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax /*, co c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *)); //if (c->s1_jMaxes == NULL) return NULL; + // ----- the "xy-plane constants" ------ //determine sizes size_t s1_constfacs_sz = 0; for (qpms_y_t y = 0; y < c->nelem_sc; ++y) { @@ -113,6 +115,34 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax /*, co c->s1_constfacs[y] = NULL; } + // ------ the "z-axis constants" ----- + // determine sizes + size_t s1_constfacs_1Dz_sz; + { + const qpms_l_t sz_n_lMax = lMax/2 + 1; // number of different j's for n = lMax + s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax + : isq(sz_n_lMax); + } + c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double)); + + size_t s1_constfacs_1Dz_sz_cumsum = 0; + for (qpms_l_t n = 0; n <= lMax; ++n) { + c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum; + for (qpms_l_t j = 0; j <= n/2; ++j) { + switch(type) { + case EWALD32_CONSTANTS_AGNOSTIC: + c->s1_constfacs[n][j] = -ipow(n+1) * min1pow(j) * factorial(n) + / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j)); + break; + default: + abort(); // wrong type argument or not implemented + } + } + s1_constfacs_1Dz_sz_cumsum += 1 + n / 2; + } + assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum); + + c->legendre_csphase = csphase; c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double)); // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c @@ -131,6 +161,7 @@ void qpms_ewald32_constants_free(qpms_ewald32_constants_t *c) { free(c->legendre0); free(c->s1_constfacs); free(c->s1_constfacs_base); + free(c->s1_constfacs_1Dz_base); free(c->s1_jMaxes); free(c); } @@ -344,6 +375,7 @@ int ewald3_21_xy_sigma_long ( complex double *target, // must be c->nelem_sc long double *err, const qpms_ewald32_constants_t *c, + const double eta, const double k, const double unitcell_volume /* with the corresponding lattice dimensionality */, const LatticeDimensionality latdim, PGenSph *pgen_K, const bool pgen_generates_shifted_points @@ -370,10 +402,10 @@ int ewald3_21_xy_sigma_long ( memset(err, 0, nelem_sc * sizeof(double)); } - const double commonfac = 1/(k*k*unitcell_area); // used in the very end (CFC) + const double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC) assert(commonfac > 0); - PGenSingleReturnData pgen_retdata; + PGenSphReturnData pgen_retdata; #ifndef NDEBUG double rbeta_pq_prev; #endif @@ -476,7 +508,8 @@ int ewald3_1_z_sigma_long ( complex double *target, // must be c->nelem_sc long double *err, const qpms_ewald32_constants_t *c, - const double unitcell_volume /* length in this case */, + const double eta, const double k, + const double unitcell_volume /* length (periodicity) in this case */, const LatticeDimensionality latdim, PGenSph *pgen_K, const bool pgen_generates_shifted_points /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift, @@ -505,7 +538,13 @@ int ewald3_1_z_sigma_long ( memset(err, 0, nelem_sc * sizeof(double)); } - PGenSingleReturnData pgen_retdata; + const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC) + assert(commonfac > 0); + + // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum). + qpms_csf_result Gamma_pq[lMax/2+1]; + + PGenSphReturnData pgen_retdata; // CHOOSE POINT BEGIN while ((pgen_retdata = PGenSph_next(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP assert(pgen_retdata.flags & PGEN_AT_Z); @@ -519,13 +558,14 @@ int ewald3_1_z_sigma_long ( pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!! beta_mu_z = K_z + beta_z; } + double rbeta_mu = fabs(beta_mu_z); // CHOOSE POINT END const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!! // R-DEPENDENT BEGIN - gamma_pq = lilgamma(rbeta_pq/k); - z = csq(gamma_pq*k/(2*eta)); // Když o tom tak přemýšlím, tak tohle je vlastně vždy reálné + complex double gamma_pq = lilgamma(rbeta_mu/k); // For real beta and k this is real or pure imaginary ... + const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real. for(qpms_l_t j = 0; j <= lMax/2; ++j) { int retval = complex_gamma_inc_e(0.5-j, z, Gamma_pq+j); // we take the other branch, cf. [Linton, p. 642 in the middle]: FIXME instead use the C11 CMPLX macros and fill in -O*I part to z in the line above @@ -533,26 +573,29 @@ int ewald3_1_z_sigma_long ( Gamma_pq[j].val = conj(Gamma_pq[j].val); //FIXME as noted above if(!(retval==0 || retval==GSL_EUNDRFLW)) abort(); } - if (latdim & LAT1D) - factor1d = k * M_SQRT1_2 * .5 * gamma_pq; // R-DEPENDENT END - // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array - // and just fetched for each n, m pair - for(qpms_l_t n = 0; n <= lMax; ++n) - for(qpms_m_t m = -n; m <= n; ++m) { - if((m+n) % 2 != 0) // odd coefficients are zero. - continue; - const qpms_y_t y = qpms_mn2y_sc(m, n); - const complex double e_imalpha_pq = cexp(I*m*arg_pq); + // and just fetched for each n + for(qpms_l_t n = 0; n <= lMax; ++n) { + const qpms_y_t y = qpms_mn2y_sc(0, n); complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c); double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors? - assert((n-abs(m))/2 == c->s1_jMaxes[y]); - for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop - * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation - * c->s1_constfacs[y][j]; + for(qpms_l_t j = 0; j <= n/2; ++j) { + complex double summand = pow(rbeta_mu/k, n-2*j) + * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop + c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] : + (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n)) + ) + // * min1pow_m_neg(m) // not needed as m == 0 + * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation + * c->s1_constfacs_1Dz[n][j]; + /* s1_consstfacs_1Dz[n][j] = + * + * -I**(n+1) (-1)**j * n! + * -------------------------- + * j! * 2**(2*j) * (n - 2*j)! + */ + if(err) { // FIXME include also other errors than Gamma_pq's relative error kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand)); @@ -560,13 +603,10 @@ int ewald3_1_z_sigma_long ( summand *= Gamma_pq[j].val; // GGG ckahanadd(&jsum, &jsum_c, summand); } - jsum *= phasefac * factor1d; // PFC + jsum *= phasefac; // PFC ckahanadd(target + y, target_c + y, jsum); if(err) kahanadd(err + y, err_c + y, jsum_err); } -#ifndef NDEBUG - rbeta_pq_prev = rbeta_pq; -#endif } // END POINT LOOP free(err_c); diff --git a/qpms/ewald.h b/qpms/ewald.h index 0656157..d356984 100644 --- a/qpms/ewald.h +++ b/qpms/ewald.h @@ -23,6 +23,7 @@ * according to the spherical-harmonic-normalisation-independent formulation in my notes * notes/ewald.lyx. * Both parts of lattice sums are then calculated with the P_n^|m| e^imf substituted in place + * // (N.B. or P_n^|m| e^imf (-1)^m for negative m) * of Y_n^m (this is quite a weird normalisation especially for negative |m|, but it is consistent * with the current implementation of the translation coefficients in translations.c; * in the long run, it might make more sense to replace it everywhere with normalised @@ -40,8 +41,30 @@ typedef struct { qpms_y_t nelem_sc; qpms_l_t *s1_jMaxes; complex double **s1_constfacs; // indices [y][j] where j is same as in [1, (4.5)] - // TODO probably normalisation and equatorial legendre polynomials should be included, too + /* These are the actual numbers now: (in the EWALD32_CONSTANTS_AGNOSTIC version) + * for m + n EVEN: + * + * s1_constfacs[y(m,n)][j] = + * + * -2 * I**(n+1) * sqrt(π) * ((n-m)/2)! * ((n+m)/2)! * (-1)**j + * ----------------------------------------------------------- + * j! * ((n-m)/2 - j)! * ((n+m)/2 + j)! + * + * for m + n ODD: + * + * s1_constfacs[y(m,n)][j] = 0 + */ complex double *s1_constfacs_base; // internal pointer holding the memory for the constants + // similarly for the 1D z-axis aligned case; now the indices are [n][j] (as m == 0) + complex double **s1_constfacs_1Dz; + /* These are the actual numbers now: + * s1_consstfacs_1Dz[n][j] = + * + * -I**(n+1) (-1)**j * n! + * -------------------------- + * j! * 2**(2*j) * (n - 2*j)! + */ + complex double *s1_constfacs_1Dz_base; double *legendre0; /* now with GSL_SF_LEGENDRE_NONE normalisation, because this is what is * what the multipliers from translations.c count with. @@ -83,6 +106,10 @@ int cx_gamma_inc_series_e(double a, complex z, qpms_csf_result * result); // if x is (almost) real, it just uses gsl_sf_gamma_inc_e int complex_gamma_inc_e(double a, complex double x, qpms_csf_result *result); +// Exponential integral for complex second argument +// If x is (almost) positive real, it just uses gsl_sf_expint_En_e +int complex_expint_n_e(int n, complex double x, qpms_csf_result *result); + // hypergeometric 2F2, used to calculate some errors int hyperg_2F2_series(const double a, const double b, const double c, const double d, diff --git a/qpms/ewaldsf.c b/qpms/ewaldsf.c index 20879e5..539bf02 100644 --- a/qpms/ewaldsf.c +++ b/qpms/ewaldsf.c @@ -1,5 +1,6 @@ #include "ewald.h" #include +#include #include #include // Maybe I should rather use DBL_EPSILON instead of GSL_DBL_EPSILON. #include "kahansum.h" @@ -99,6 +100,24 @@ int complex_gamma_inc_e(double a, complex double x, qpms_csf_result *result) { return cx_gamma_inc_series_e(a, x, result); } +// Exponential integral for complex argument; !UNTESTED! and probably not needed, as I expressed everything in terms of inc. gammas anyways. +int complex_expint_n_e(int n, complex double x, qpms_csf_result *result) { + if (creal(x) >= 0 && + (0 == fabs(cimag(x)) || // x is real positive; just use the real fun + fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) { + gsl_sf_result real_expint_result; + int retval = gsl_sf_expint_En_e(n, creal(x), &real_expint_result); + result->val = real_expint_result.val; + result->err = real_expint_result.err; + return retval; + } else { + int retval = complex_gamma_inc_e(-n+1, x, result); + complex double f = cpow(x, 2*n-2); + retval.val *= f; + retval.err *= cabs(f); + return retval; + } +} // inspired by GSL's hyperg_2F1_series int hyperg_2F2_series(const double a, const double b, const double c, const double d, diff --git a/qpms/tiny_inlines.h b/qpms/tiny_inlines.h index a499b2e..b07c271 100644 --- a/qpms/tiny_inlines.h +++ b/qpms/tiny_inlines.h @@ -13,6 +13,17 @@ static inline int min1pow_m_neg(int m) { return (m < 0) ? min1pow(m) : 1; } + +#if 0 +#ifdef __GSL_SF_LEGENDRE_H__ +static inline complex double +spharm_eval(gsl_sf_legendre_t P_normconv, int P_csphase, qpms_l_t l, qpms_m_t m, double P_n_abs_m, complex double exp_imf) { + + return; +} +#endif +#endif + // this has shitty precision: // static inline complex double ipow(int x) { return cpow(I, x); } @@ -32,4 +43,6 @@ static inline complex double ipow(int x) { } } +static inline int isq(int x) {return x * x;} + #endif // TINY_INLINES_H diff --git a/qpms/translations.c b/qpms/translations.c index 80470e2..816bbd6 100644 --- a/qpms/translations.c +++ b/qpms/translations.c @@ -516,7 +516,6 @@ static inline size_t qpms_trans_calculator_index_yyu(const qpms_trans_calculator #define SQ(x) ((x)*(x)) -static inline int isq(int x) { return x * x; } static inline double fsq(double x) {return x * x; } static void qpms_trans_calculator_multipliers_A_general(