Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested

Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
2018-11-24 12:51:57 +00:00 · 2018-11-24 12:51:57 +00:00 · 0c55595d08
parent ccc409ba34
commit 0c55595d08
6 changed files with 131 additions and 33 deletions
--- a/notes/ewald.lyx
+++ b/notes/ewald.lyx
@ -3814,11 +3814,11 @@ key "linton_lattice_2010"

 , therefore 
 \begin_inset Formula 
-\begin{eqnarray*}
-\sigma_{n}^{m(1)} & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\left(\frac{k\gamma_{\mu}}{2\eta}\right)^{2j}\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\\
- & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\left(\frac{k\gamma_{\mu}}{2}\right)^{2j}\underbrace{\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)}_{\Gamma_{j,\mu}}\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\\
- & = & -\frac{i^{n+1}}{k\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}n!\left(\tilde{\beta}_{\mu}/k\right)^{n-2j}\Gamma_{j,\mu}}{j!2^{2j}\left(n-2j\right)!}\left(\gamma_{\mu}\right)^{2j}.
-\end{eqnarray*}
+\begin{eqnarray}
+\sigma_{n}^{m(1)} & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\left(\frac{k\gamma_{\mu}}{2\eta}\right)^{2j}\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\nonumber \\
+ & = & -\frac{i^{n+1}}{k^{n+1}\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\left(\frac{k\gamma_{\mu}}{2}\right)^{2j}\underbrace{\Gamma\left(-j,\frac{k^{2}\gamma_{\mu}^{2}}{2\eta^{2}}\right)}_{\Gamma_{j,\mu}}\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\nonumber \\
+ & = & -\frac{i^{n+1}}{k\mathscr{A}}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}n!\left(\tilde{\beta}_{\mu}/k\right)^{n-2j}\Gamma_{j,\mu}}{j!2^{2j}\left(n-2j\right)!}\left(\gamma_{\mu}\right)^{2j}.\label{eq:1D_z_LRsum}
+\end{eqnarray}

 \end_inset

--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@ -9,6 +9,7 @@
 #include <gsl/gsl_integration.h>
 #include <gsl/gsl_errno.h>
 #include <gsl/gsl_sf_legendre.h>
+#include <gsl/gsl_sf_expint.h>

 // parameters for the quadrature of integral in (4.6)
 #ifndef INTEGRATION_WORKSPACE_LIMIT
@ -71,6 +72,7 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax /*, co
  c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
  //if (c->s1_jMaxes == NULL) return NULL;

+  // ----- the "xy-plane constants" ------
  //determine sizes
  size_t s1_constfacs_sz = 0;
  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
@ -113,6 +115,34 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax /*, co
      c->s1_constfacs[y] = NULL;
  }

+  // ------ the "z-axis constants" -----
+  // determine sizes
+  size_t s1_constfacs_1Dz_sz;
+  {
+    const qpms_l_t sz_n_lMax = lMax/2 + 1;  // number of different j's for n = lMax
+    s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
+                                     : isq(sz_n_lMax);
+  }
+  c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
+
+  size_t s1_constfacs_1Dz_sz_cumsum = 0;
+  for (qpms_l_t n = 0; n <= lMax; ++n) {
+    c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
+    for (qpms_l_t j = 0; j <= n/2; ++j) {
+      switch(type) {
+        case EWALD32_CONSTANTS_AGNOSTIC:
+          c->s1_constfacs[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
+            / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
+          break;
+        default:
+          abort(); // wrong type argument or not implemented
+      }
+    }
+    s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
+  }
+  assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
+
+
  c->legendre_csphase = csphase;
  c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
  // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
@ -131,6 +161,7 @@ void qpms_ewald32_constants_free(qpms_ewald32_constants_t *c) {
  free(c->legendre0);
  free(c->s1_constfacs);
  free(c->s1_constfacs_base);
+  free(c->s1_constfacs_1Dz_base);
  free(c->s1_jMaxes);
  free(c);
 }
@ -344,6 +375,7 @@ int ewald3_21_xy_sigma_long (
    complex double *target, // must be c->nelem_sc long
    double *err,
    const qpms_ewald32_constants_t *c,
+    const double eta, const double k,
    const double unitcell_volume /* with the corresponding lattice dimensionality */,
    const LatticeDimensionality latdim,
    PGenSph *pgen_K, const bool pgen_generates_shifted_points
@ -370,10 +402,10 @@ int ewald3_21_xy_sigma_long (
    memset(err, 0, nelem_sc * sizeof(double));
  }

-  const double commonfac = 1/(k*k*unitcell_area); // used in the very end (CFC)
+  const double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
  assert(commonfac > 0);

-  PGenSingleReturnData pgen_retdata;
+  PGenSphReturnData pgen_retdata;
 #ifndef NDEBUG
  double rbeta_pq_prev;
 #endif
@ -476,7 +508,8 @@ int ewald3_1_z_sigma_long (
    complex double *target, // must be c->nelem_sc long
    double *err,
    const qpms_ewald32_constants_t *c,
-    const double unitcell_volume /* length in this case */,
+    const double eta, const double k,
+    const double unitcell_volume /* length (periodicity) in this case */,
    const LatticeDimensionality latdim,
    PGenSph *pgen_K, const bool pgen_generates_shifted_points
    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
@ -505,7 +538,13 @@ int ewald3_1_z_sigma_long (
    memset(err, 0, nelem_sc * sizeof(double));
  }

-  PGenSingleReturnData pgen_retdata;
+  const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
+  assert(commonfac > 0);
+
+  // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
+  qpms_csf_result Gamma_pq[lMax/2+1];
+
+  PGenSphReturnData pgen_retdata;
  // CHOOSE POINT BEGIN
  while ((pgen_retdata = PGenSph_next(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
    assert(pgen_retdata.flags & PGEN_AT_Z);
@ -519,13 +558,14 @@ int ewald3_1_z_sigma_long (
          pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
      beta_mu_z = K_z + beta_z;
    }
+    double rbeta_mu = fabs(beta_mu_z);
  // CHOOSE POINT END

    const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!

    // R-DEPENDENT BEGIN
-    gamma_pq = lilgamma(rbeta_pq/k);
-    z = csq(gamma_pq*k/(2*eta)); // Když o tom tak přemýšlím, tak tohle je vlastně vždy reálné
+    complex double gamma_pq = lilgamma(rbeta_mu/k);  // For real beta and k this is real or pure imaginary ...
+    const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
    for(qpms_l_t j = 0; j <= lMax/2; ++j) {
      int retval = complex_gamma_inc_e(0.5-j, z, Gamma_pq+j);
      // we take the other branch, cf. [Linton, p. 642 in the middle]: FIXME instead use the C11 CMPLX macros and fill in -O*I part to z in the line above
@ -533,26 +573,29 @@ int ewald3_1_z_sigma_long (
        Gamma_pq[j].val = conj(Gamma_pq[j].val); //FIXME as noted above
      if(!(retval==0 || retval==GSL_EUNDRFLW)) abort();
    }
-    if (latdim & LAT1D)
-      factor1d = k * M_SQRT1_2 * .5 * gamma_pq;
    // R-DEPENDENT END
-    
    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
-    // and just fetched for each n, m pair
-    for(qpms_l_t n = 0; n <= lMax; ++n)
-      for(qpms_m_t m = -n; m <= n; ++m) {
-        if((m+n) % 2 != 0) // odd coefficients are zero.
-          continue;
-        const qpms_y_t y = qpms_mn2y_sc(m, n);
-        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
+    // and just fetched for each n
+    for(qpms_l_t n = 0; n <= lMax; ++n) {
+        const qpms_y_t y = qpms_mn2y_sc(0, n);
        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
-        assert((n-abs(m))/2 == c->s1_jMaxes[y]);
-        for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
-          complex double summand = pow(rbeta_pq/k, n-2*j) 
-            * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
-            * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
-            * c->s1_constfacs[y][j];
+        for(qpms_l_t j = 0; j <= n/2; ++j) {
+          complex double summand = pow(rbeta_mu/k, n-2*j) 
+            * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
+                c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
+                (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
+              )
+            // * min1pow_m_neg(m) // not needed as m == 0
+            * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
+            * c->s1_constfacs_1Dz[n][j];
+              /* s1_consstfacs_1Dz[n][j] =
+               *
+               *     -I**(n+1) (-1)**j * n!
+               *   --------------------------
+               *   j! * 2**(2*j) * (n - 2*j)!
+               */   
+
          if(err) {
            // FIXME include also other errors than Gamma_pq's relative error
             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
@ -560,13 +603,10 @@ int ewald3_1_z_sigma_long (
          summand *= Gamma_pq[j].val; // GGG
          ckahanadd(&jsum, &jsum_c, summand);
        }
-        jsum *= phasefac * factor1d; // PFC
+        jsum *= phasefac; // PFC
        ckahanadd(target + y, target_c + y, jsum);
        if(err) kahanadd(err + y, err_c + y, jsum_err);
      }
-#ifndef NDEBUG
-    rbeta_pq_prev = rbeta_pq;
-#endif
  } // END POINT LOOP
 
  free(err_c);
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@ -23,6 +23,7 @@
 * according to the spherical-harmonic-normalisation-independent formulation in my notes
 * notes/ewald.lyx.
 * Both parts of lattice sums are then calculated with the P_n^|m| e^imf substituted in place
+ *                                             // (N.B. or P_n^|m| e^imf (-1)^m for negative m) 
 * of Y_n^m (this is quite a weird normalisation especially for negative |m|, but it is consistent
 * with the current implementation of the translation coefficients in translations.c;
 * in the long run, it might make more sense to replace it everywhere with normalised
@ -40,8 +41,30 @@ typedef struct {
 	qpms_y_t nelem_sc;
 	qpms_l_t *s1_jMaxes;
 	complex double **s1_constfacs; // indices [y][j] where j is same as in [1, (4.5)]
-	// TODO probably normalisation and equatorial legendre polynomials should be included, too
+	/* These are the actual numbers now: (in the EWALD32_CONSTANTS_AGNOSTIC version)
+	 * for m + n EVEN:
+	 *
+	 * s1_constfacs[y(m,n)][j] = 
+	 *
+	 *   -2 * I**(n+1) * sqrt(π) * ((n-m)/2)! * ((n+m)/2)! * (-1)**j 
+	 *   -----------------------------------------------------------
+	 *              j! * ((n-m)/2 - j)! * ((n+m)/2 + j)!
+	 *
+	 * for m + n ODD:
+	 *
+	 * s1_constfacs[y(m,n)][j] = 0
+	 */
 	complex double *s1_constfacs_base; // internal pointer holding the memory for the constants
+	// similarly for the 1D z-axis aligned case; now the indices are [n][j] (as m == 0)
+	complex double **s1_constfacs_1Dz; 
+	/* These are the actual numbers now:
+	 * s1_consstfacs_1Dz[n][j] =
+	 *   
+	 *     -I**(n+1) (-1)**j * n!
+	 *   --------------------------
+	 *   j! * 2**(2*j) * (n - 2*j)!
+	 */
+	complex double *s1_constfacs_1Dz_base;

 	double *legendre0; /* now with GSL_SF_LEGENDRE_NONE normalisation, because this is what is
 			    * what the multipliers from translations.c count with.
@ -83,6 +106,10 @@ int cx_gamma_inc_series_e(double a, complex z, qpms_csf_result * result);
 // if x is (almost) real, it just uses gsl_sf_gamma_inc_e
 int complex_gamma_inc_e(double a, complex double x, qpms_csf_result *result);

+// Exponential integral for complex second argument
+// If x is (almost) positive real, it just uses gsl_sf_expint_En_e
+int complex_expint_n_e(int n, complex double x, qpms_csf_result *result);
+

 // hypergeometric 2F2, used to calculate some errors
 int hyperg_2F2_series(const double a, const double b, const double c, const double d,
--- a/qpms/ewaldsf.c
+++ b/qpms/ewaldsf.c
@ -1,5 +1,6 @@
 #include "ewald.h"
 #include <gsl/gsl_sf_gamma.h>
+#include <gsl/gsl_sf_expint.h>
 #include <gsl/gsl_sf_result.h>
 #include <gsl/gsl_machine.h> // Maybe I should rather use DBL_EPSILON instead of GSL_DBL_EPSILON.
 #include "kahansum.h"
@ -99,6 +100,24 @@ int complex_gamma_inc_e(double a, complex double x, qpms_csf_result *result) {
    return cx_gamma_inc_series_e(a, x, result);
 }

+// Exponential integral for complex argument; !UNTESTED! and probably not needed, as I expressed everything in terms of inc. gammas anyways.
+int complex_expint_n_e(int n, complex double x, qpms_csf_result *result) {
+  if (creal(x) >= 0 &&
+    (0 == fabs(cimag(x)) || // x is real positive; just use the real fun
+    fabs(cimag(x)) < fabs(creal(x)) * COMPLEXPART_REL_ZERO_LIMIT)) {
+    gsl_sf_result real_expint_result;
+    int retval = gsl_sf_expint_En_e(n, creal(x), &real_expint_result);
+    result->val = real_expint_result.val;
+    result->err = real_expint_result.err;
+    return retval;
+  } else {
+    int retval = complex_gamma_inc_e(-n+1, x, result);
+    complex double f = cpow(x, 2*n-2);
+    retval.val *= f;
+    retval.err *= cabs(f);
+    return retval;
+  }
+}

 // inspired by GSL's hyperg_2F1_series
 int hyperg_2F2_series(const double a, const double b, const double c, const double d,
--- a/qpms/tiny_inlines.h
+++ b/qpms/tiny_inlines.h
@ -13,6 +13,17 @@ static inline int min1pow_m_neg(int m) {
 	return (m < 0) ? min1pow(m) : 1;
 }

+
+#if 0
+#ifdef __GSL_SF_LEGENDRE_H__
+static inline complex double
+spharm_eval(gsl_sf_legendre_t P_normconv, int P_csphase, qpms_l_t l, qpms_m_t m, double P_n_abs_m, complex double exp_imf) {
+
+	return;
+}
+#endif
+#endif
+
 // this has shitty precision:
 // static inline complex double ipow(int x) { return cpow(I, x); }

@ -32,4 +43,6 @@ static inline complex double ipow(int x) {
  }
 }

+static inline int isq(int x) {return x * x;}
+
 #endif // TINY_INLINES_H
--- a/qpms/translations.c
+++ b/qpms/translations.c
@ -516,7 +516,6 @@ static inline size_t qpms_trans_calculator_index_yyu(const qpms_trans_calculator

 #define SQ(x) ((x)*(x))

-static inline int isq(int x) { return x * x; }
 static inline double fsq(double x) {return x * x; }

 static void qpms_trans_calculator_multipliers_A_general(