From b90bf2875b4dcdc3fad911d924f6ca973d4939d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= <marek@necada.org>
Date: Thu, 22 Nov 2018 16:49:37 +0200
Subject: [PATCH] =?UTF-8?q?Pr=C3=A1ce=20na=201D=20ewaldovi;=20du=20dom.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
---
 notes/ewald.lyx |  81 +++++++++++++++++++++++++++++---
 qpms/ewald.c    | 121 ++++++++++++++++++++++++++++++++++++++++++++++--
 qpms/lattices.h |   7 +++
 3 files changed, 199 insertions(+), 10 deletions(-)

diff --git a/notes/ewald.lyx b/notes/ewald.lyx
index 143ffc7..7690b31 100644
--- a/notes/ewald.lyx
+++ b/notes/ewald.lyx
@@ -242,6 +242,11 @@ theorems-starred
 \end_inset
 
 
+\begin_inset FormulaMacro
+\newcommand{\expint}{\mathrm{E}}
+\end_inset
+
+
 \end_layout
 
 \begin_layout Title
@@ -628,7 +633,7 @@ reference "eq:W definition"
 
 \end_inset
 
- and 
+ and legendre
 \begin_inset CommandInset ref
 LatexCommand eqref
 reference "eq:W sum in reciprocal space"
@@ -3720,6 +3725,70 @@ reference "eq:Ewald in 3D origin part"
  can be used directly without modifications.
 \end_layout
 
+\begin_layout Standard
+Another possibility is to consider the chain to be aligned along the 
+\begin_inset Formula $z$
+\end_inset
+
+-axis and to apply the formula 
+\begin_inset CommandInset citation
+LatexCommand cite
+after "(4.64)"
+key "linton_lattice_2010"
+
+\end_inset
+
+ instead.
+ Let us rewrite it again in the spherical-harmonic-normalisation-agnostic
+ way (N.B.
+ the relations 
+\begin_inset CommandInset citation
+LatexCommand cite
+after "(4.10)"
+key "linton_lattice_2010"
+
+\end_inset
+
+ 
+\begin_inset Formula $\sigma_{n}^{m}=\left(-1\right)^{m}\hat{\tau}_{n}^{m}$
+\end_inset
+
+, 
+\begin_inset CommandInset citation
+LatexCommand cite
+after "(A.5)"
+key "linton_lattice_2010"
+
+\end_inset
+
+ 
+\begin_inset Formula $P_{n}^{m}\left(\pm1\right)=\left(\pm1\right)^{n}\delta_{m0}$
+\end_inset
+
+ and 
+\begin_inset CommandInset citation
+LatexCommand cite
+after "(A.8)"
+key "linton_lattice_2010"
+
+\end_inset
+
+ 
+\begin_inset Formula $Y_{n}^{m}\left(\theta,\phi\right)=\left(-1\right)^{m}\sqrt{\frac{\left(2n+1\right)\left(n-m\right)!}{4\pi\left(n+m\right)!}}P_{n}^{m}\left(\cos\theta\right)e^{im\phi}$
+\end_inset
+
+)
+\begin_inset Formula 
+\begin{eqnarray*}
+\sigma_{n}^{m} & = & -\frac{i^{n+1}}{k^{n+1}a}\delta_{m0}\sqrt{\frac{2n+1}{4\pi}}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\expint_{j+1}\left(\frac{k^{2}\gamma^{\mu}}{4\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}\\
+ & = & -\frac{i^{n+1}}{k^{n+1}a}Y_{n}^{m}\left(\hat{\vect z}\sgn\tilde{\beta}_{\mu}\right)\delta_{m0}\left(\sgn\tilde{\beta}_{\mu}\right)^{-n}\sum_{\mu\in\ints}\sum_{j=0}^{\left[n/2\right]}\frac{\left(-1\right)^{j}}{j!}\eta^{2j}\expint_{j+1}\left(\frac{k^{2}\gamma^{\mu}}{4\eta^{2}}\right)\frac{n!\tilde{\beta}_{\mu}^{n-2j}}{\left(n-2j\right)!}
+\end{eqnarray*}
+
+\end_inset
+
+
+\end_layout
+
 \begin_layout Standard
 \begin_inset Note Note
 status open
@@ -3914,7 +3983,7 @@ where the spherical Hankel transform
  2)
 \begin_inset Formula 
 \[
-\bsht lg(k)\equiv\int_{0}^{\infty}\ud r\, r^{2}g(r)j_{l}\left(kr\right).
+\bsht lg(k)\equiv\int_{0}^{\infty}\ud r\,r^{2}g(r)j_{l}\left(kr\right).
 \]
 
 \end_inset
@@ -3924,7 +3993,7 @@ Using this convention, the inverse spherical Hankel transform is given by
  3)
 \begin_inset Formula 
 \[
-g(r)=\frac{2}{\pi}\int_{0}^{\infty}\ud k\, k^{2}\bsht lg(k)j_{l}(k),
+g(r)=\frac{2}{\pi}\int_{0}^{\infty}\ud k\,k^{2}\bsht lg(k)j_{l}(k),
 \]
 
 \end_inset
@@ -3937,7 +4006,7 @@ so it is not unitary.
 An unitary convention would look like this:
 \begin_inset Formula 
 \begin{equation}
-\usht lg(k)\equiv\sqrt{\frac{2}{\pi}}\int_{0}^{\infty}\ud r\, r^{2}g(r)j_{l}\left(kr\right).\label{eq:unitary 3d Hankel tf definition}
+\usht lg(k)\equiv\sqrt{\frac{2}{\pi}}\int_{0}^{\infty}\ud r\,r^{2}g(r)j_{l}\left(kr\right).\label{eq:unitary 3d Hankel tf definition}
 \end{equation}
 
 \end_inset
@@ -3991,8 +4060,8 @@ where the Hankel transform of order
  is defined as
 \begin_inset Formula 
 \begin{eqnarray}
-\pht mg\left(k\right) & = & \int_{0}^{\infty}\ud r\, g(r)J_{m}(kr)r\label{eq:unitary 2d Hankel tf definition}\\
- & = & \left(-1\right)^{m}\int_{0}^{\infty}\ud r\, g(r)J_{-m}(kr)r
+\pht mg\left(k\right) & = & \int_{0}^{\infty}\ud r\,g(r)J_{m}(kr)r\label{eq:unitary 2d Hankel tf definition}\\
+ & = & \left(-1\right)^{m}\int_{0}^{\infty}\ud r\,g(r)J_{-m}(kr)r
 \end{eqnarray}
 
 \end_inset
diff --git a/qpms/ewald.c b/qpms/ewald.c
index 02e7432..3ff8fbe 100644
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@@ -355,8 +355,9 @@ int ewald3_21_xy_sigma_long (
     const cart3_t beta,
     const cart3_t particle_shift
     )
-
 {
+  assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
+  assert((latdim & LAT1D) || (latdim & LAT2D));
   const qpms_y_t nelem_sc = c->nelem_sc;
   const qpms_l_t lMax = c->lMax;
   
@@ -379,6 +380,7 @@ int ewald3_21_xy_sigma_long (
   // recycleable values if rbeta_pq stays the same:
   complex double gamma_pq;
   complex double z;
+  double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
   // space for Gamma_pq[j]'s
   qpms_csf_result Gamma_pq[lMax/2+1];
 
@@ -408,6 +410,7 @@ int ewald3_21_xy_sigma_long (
     const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & PGEN_NEWR);
     if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
 
+
     // R-DEPENDENT BEGIN
     if (new_rbeta_pq) {
       gamma_pq = lilgamma(rbeta_pq/k);
@@ -417,9 +420,10 @@ int ewald3_21_xy_sigma_long (
         // we take the other branch, cf. [Linton, p. 642 in the middle]: FIXME instead use the C11 CMPLX macros and fill in -O*I part to z in the line above
         if(creal(z) < 0) 
           Gamma_pq[j].val = conj(Gamma_pq[j].val); //FIXME as noted above
-        if(!(retval==0 ||retval==GSL_EUNDRFLW)) abort();
+        if(!(retval==0 || retval==GSL_EUNDRFLW)) abort();
       }
-      // --------------- ZDE JSEM SKONČIL. TODO NEZAPOMEŇ TAKY POŘEŠIT PŘÍPAD 1D VS 2D
+      if (latdim & LAT1D)
+        factor1d = k * M_SQRT1_2 * .5 * gamma_pq;
     }
     // R-DEPENDENT END
     
@@ -446,7 +450,7 @@ int ewald3_21_xy_sigma_long (
           summand *= Gamma_pq[j].val; // GGG
           ckahanadd(&jsum, &jsum_c, summand);
         }
-        jsum *= phasefac; // PFC
+        jsum *= phasefac * factor1d; // PFC
         ckahanadd(target + y, target_c + y, jsum);
         if(err) kahanadd(err + y, err_c + y, jsum_err);
       }
@@ -466,6 +470,115 @@ int ewald3_21_xy_sigma_long (
 }
 
 
+// 1D chain along the z-axis; not many optimisations here as the same
+// shifted beta radius could be recycled only once anyways
+int ewald3_1_z_sigma_long (
+    complex double *target, // must be c->nelem_sc long
+    double *err,
+    const qpms_ewald32_constants_t *c,
+    const double unitcell_volume /* length in this case */,
+    const LatticeDimensionality latdim,
+    PGenSph *pgen_K, const bool pgen_generates_shifted_points
+    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
+     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
+     * and adds beta to the generated points before calculations.
+     * If true, it assumes that they are already shifted.
+     */,
+    const cart3_t beta,
+    const cart3_t particle_shift
+    )
+{
+  assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
+  assert(beta.x == 0 && beta.y == 0);
+  assert(particle_shift.x == 0 && particle_shift.y == 0);
+  const double beta_z = beta.z;
+  const double particle_shift_z = particle_shift_z;
+  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_l_t lMax = c->lMax;
+  
+  // Manual init of the ewald summation targets
+  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
+  memset(target, 0, nelem_sc * sizeof(complex double));
+  double *err_c = NULL;
+  if (err) {
+    err_c = calloc(nelem_sc, sizeof(double));
+    memset(err, 0, nelem_sc * sizeof(double));
+  }
+
+  PGenSingleReturnData pgen_retdata;
+  // CHOOSE POINT BEGIN
+  while ((pgen_retdata = PGenSph_next(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
+    assert(pgen_retdata.flags & PGEN_AT_Z);
+    double K_z, beta_mu_z;
+    if (pgen_generates_shifted_points) {
+      beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
+        pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
+      K_z = beta_mu_z - beta_z;
+    } else { // as in the old _points_and_shift functions
+      K_z = ((pgen_retdata.point_sph.theta == 0) ?
+          pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
+      beta_mu_z = K_z + beta_z;
+    }
+  // CHOOSE POINT END
+
+    const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
+
+    // R-DEPENDENT BEGIN
+    gamma_pq = lilgamma(rbeta_pq/k);
+    z = csq(gamma_pq*k/(2*eta)); // Když o tom tak přemýšlím, tak tohle je vlastně vždy reálné
+    for(qpms_l_t j = 0; j <= lMax/2; ++j) {
+      int retval = complex_gamma_inc_e(0.5-j, z, Gamma_pq+j);
+      // we take the other branch, cf. [Linton, p. 642 in the middle]: FIXME instead use the C11 CMPLX macros and fill in -O*I part to z in the line above
+      if(creal(z) < 0) 
+        Gamma_pq[j].val = conj(Gamma_pq[j].val); //FIXME as noted above
+      if(!(retval==0 || retval==GSL_EUNDRFLW)) abort();
+    }
+    if (latdim & LAT1D)
+      factor1d = k * M_SQRT1_2 * .5 * gamma_pq;
+    // R-DEPENDENT END
+    
+    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
+    // and just fetched for each n, m pair
+    for(qpms_l_t n = 0; n <= lMax; ++n)
+      for(qpms_m_t m = -n; m <= n; ++m) {
+        if((m+n) % 2 != 0) // odd coefficients are zero.
+          continue;
+        const qpms_y_t y = qpms_mn2y_sc(m, n);
+        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
+        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
+        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
+        assert((n-abs(m))/2 == c->s1_jMaxes[y]);
+        for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
+          complex double summand = pow(rbeta_pq/k, n-2*j) 
+            * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
+            * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
+            * c->s1_constfacs[y][j];
+          if(err) {
+            // FIXME include also other errors than Gamma_pq's relative error
+             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
+          }
+          summand *= Gamma_pq[j].val; // GGG
+          ckahanadd(&jsum, &jsum_c, summand);
+        }
+        jsum *= phasefac * factor1d; // PFC
+        ckahanadd(target + y, target_c + y, jsum);
+        if(err) kahanadd(err + y, err_c + y, jsum_err);
+      }
+#ifndef NDEBUG
+    rbeta_pq_prev = rbeta_pq;
+#endif
+  } // END POINT LOOP
+ 
+  free(err_c);
+  free(target_c);
+  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+    target[y] *= commonfac;
+  if(err)
+    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+      err[y] *= commonfac;
+  return 0;
+}
+
 
 struct sigma2_integrand_params {
   int n;
diff --git a/qpms/lattices.h b/qpms/lattices.h
index ddced1c..eddf5ed 100644
--- a/qpms/lattices.h
+++ b/qpms/lattices.h
@@ -21,10 +21,17 @@ typedef enum LatticeDimensionality {
 	LAT_2D_IN_3D = 34,
 	LAT_3D_IN_3D = 40,
 	// special coordinate arrangements (indicating possible optimisations)
+	LAT_ZONLY = 64,
+	LAT_XYONLY = 128,
 	LAT_1D_IN_3D_ZONLY = 97, // LAT1D | SPACE3D | 64
 	LAT_2D_IN_3D_XYONLY = 162 // LAT2D | SPACE3D | 128
 } LatticeDimensionality;
 
+inline static bool LatticeDimensionality_checkflags(
+		LatticeDimensionality a, LatticeDimensionality flags_a_has_to_contain) {
+	return ((a & flags_a_has_to_contain) == flags_a_has_to_contain);
+}
+
 #include <math.h>
 #include <stdbool.h>
 #include <assert.h>