From 858e99798196770c944ca8a212d3468c4c18470f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= <marek@necada.org>
Date: Wed, 5 Sep 2018 09:07:03 +0300
Subject: [PATCH] Ewald short-range part etc.

Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
---
 notes/ewald.lyx |  3 +-
 qpms/ewald.c    | 76 ++++++++++++++++++++++++++++++++++++++++---------
 qpms/ewald.h    | 31 ++++++++++++++++----
 3 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/notes/ewald.lyx b/notes/ewald.lyx
index 5a3647a..43e0c5d 100644
--- a/notes/ewald.lyx
+++ b/notes/ewald.lyx
@@ -3105,7 +3105,8 @@ Y_{n}^{m}\left(\frac{\pi}{2},\phi\right) & = & \left(-1\right)^{m}\sqrt{\frac{\l
 \begin_inset Formula 
 \begin{eqnarray*}
 \sigma_{n}^{m(1)} & = & -\frac{i^{n+1}}{2k^{2}\mathscr{A}}\left(-1\right)^{\left(n+m\right)/2}\sqrt{\left(2n+1\right)\left(n-m\right)!\left(n+m\right)!}\sum_{\vect K_{pq}\in\Lambda^{*}}^{'}\sum_{j=0}^{\left[\left(n-\left|m\right|/2\right)\right]}\frac{\left(-1\right)^{j}\left(\beta_{pq}/2k\right)^{n-2j}e^{im\phi_{\vect{\beta}_{pq}}}\Gamma_{j,pq}}{j!\left(\frac{1}{2}\left(n-m\right)-j\right)!\left(\frac{1}{2}\left(n+m\right)-j\right)!}\left(\frac{\gamma_{pq}}{2}\right)^{2j-1}\\
- & = & -\frac{i^{n+1}}{2k^{2}\mathscr{A}}\sqrt{\pi}2^{n+1}\left(\left(n-m\right)/2\right)!\left(\left(n+m\right)/2\right)!Y_{n}^{m}\left(0,\phi_{\vect{\beta}_{pq}}\right)\sum_{\vect K_{pq}\in\Lambda^{*}}^{'}\sum_{j=0}^{\left[\left(n-\left|m\right|/2\right)\right]}\frac{\left(-1\right)^{j}\left(\beta_{pq}/2k\right)^{n-2j}\Gamma_{j,pq}}{j!\left(\frac{1}{2}\left(n-m\right)-j\right)!\left(\frac{1}{2}\left(n+m\right)-j\right)!}\left(\frac{\gamma_{pq}}{2}\right)^{2j-1},
+ & = & -\frac{i^{n+1}}{2k^{2}\mathscr{A}}\sqrt{\pi}2^{n+1}\left(\left(n-m\right)/2\right)!\left(\left(n+m\right)/2\right)!Y_{n}^{m}\left(0,\phi_{\vect{\beta}_{pq}}\right)\sum_{\vect K_{pq}\in\Lambda^{*}}^{'}\sum_{j=0}^{\left[\left(n-\left|m\right|/2\right)\right]}\frac{\left(-1\right)^{j}\left(\beta_{pq}/2k\right)^{n-2j}\Gamma_{j,pq}}{j!\left(\frac{1}{2}\left(n-m\right)-j\right)!\left(\frac{1}{2}\left(n+m\right)-j\right)!}\left(\frac{\gamma_{pq}}{2}\right)^{2j-1}\\
+ & = & -\frac{i^{n+1}}{k\mathscr{A}}\sqrt{\pi}2\left(\left(n-m\right)/2\right)!\left(\left(n+m\right)/2\right)!Y_{n}^{m}\left(0,\phi_{\vect{\beta}_{pq}}\right)\sum_{\vect K_{pq}\in\Lambda^{*}}^{'}\sum_{j=0}^{\left[\left(n-\left|m\right|/2\right)\right]}\frac{\left(-1\right)^{j}\left(\beta_{pq}/k\right)^{n-2j}\Gamma_{j,pq}}{j!\left(\frac{1}{2}\left(n-m\right)-j\right)!\left(\frac{1}{2}\left(n+m\right)-j\right)!}\left(\gamma_{pq}\right)^{2j-1},
 \end{eqnarray*}
 
 \end_inset
diff --git a/qpms/ewald.c b/qpms/ewald.c
index 70d48c3..0637ce0 100644
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@@ -8,7 +8,6 @@
 #include <gsl/gsl_integration.h>
 #include <gsl/gsl_errno.h>
 
-
 // parameters for the quadrature of integral in (4.6)
 #ifndef INTEGRATION_WORKSPACE_LIMIT
 #define INTEGRATION_WORKSPACE_LIMIT 30000
@@ -22,6 +21,11 @@
 #define INTEGRATION_EPSREL 1e-13
 #endif
 
+#ifndef M_SQRTPI
+#define M_SQRTPI 1.7724538509055160272981674833411452
+#endif
+
+
 // sloppy implementation of factorial
 static inline double factorial(const int n) {
   assert(n >= 0);
@@ -40,7 +44,21 @@ static inline double factorial(const int n) {
 static inline complex double csq(complex double x) { return x * x; }
 static inline double sq(double x) { return x * x; }
 
-qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax)
+
+typedef enum {
+  EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
+  EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
+                              * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
+                              * replaced by the respective $Y_n^m(\pi/2,\alpha)$ 
+                              * spherical harmonic. See notes/ewald.lyx.
+                              */
+
+} ewald32_constants_option;
+
+static const ewald32_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
+
+qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax /*, const ewald32_constants_option type */,
+    const int csphase)
 {
   qpms_ewald32_constants_t *c = malloc(sizeof(qpms_ewald32_constants_t));
   //if (c == NULL) return NULL; // Do I really want to do this?
@@ -69,21 +87,40 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax)
       c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
       // and here comes the actual calculation
       for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
-        c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2) 
-          * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
-          * min1pow(j) * pow(0.5, n-2*j)
-          / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
-          * pow(0.5, 2*j-1);
+        switch(type) {
+          case EWALD32_CONSTANTS_ORIG: // NOT USED
+            c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2) 
+              * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
+              * min1pow(j) * pow(0.5, n-2*j)
+              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
+              * pow(0.5, 2*j-1);
+            break;
+          case EWALD32_CONSTANTS_AGNOSTIC:
+            c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI   
+              * factorial((n-m)/2) * factorial((n+m)/2)
+              * min1pow(j) 
+              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
+            break;
+          default:
+            abort();
+        }
       }
       s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
     }
     else
       c->s1_constfacs[y] = NULL;
   }
+
+  c->legendre0_csphase = csphase;
+  c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax), sizeof(double));
+  if(GSL_SUCCESS != gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE, lMax, 0, c->legendre0))
+    abort();
+
   return c;
 }
 
 void qpms_ewald32_constants_free(qpms_ewald32_constants_t *c) {
+  free(c->legendre0);
   free(c->s1_constfacs);
   free(c->s1_constfacs_base);
   free(c->s1_jMaxes);
@@ -149,7 +186,8 @@ int ewald32_sigma_long_shiftedpoints (
         double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
         for(qpms_l_t j = 0; j < (n-abs(m))/2; ++j) {
           complex double summand = pow(rbeta_pq/k, n-2*j) 
-            * e_imalpha_pq * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
+            * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] // This line can actually go outside j-loop
+            * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
             * c->s1_constfacs[y][j];
           if(err) {
             // FIXME include also other errors than Gamma_pq's relative error
@@ -244,21 +282,31 @@ int ewald32_sigma_short_shiftedpoints(
     double Rpq_shifted_arg = atan2(Rpq_shifted.x, Rpq_shifted.y); // POINT-DEPENDENT
     complex double e_beta_Rpq = cexp(I*cart2_dot(beta, Rpq_shifted)); // POINT-DEPENDENT
     
+    imaginary double prefacn = - I * pow(2./k, n+1) * M_2_SQRTPI / 2; // TODO put outside the R-loop and multiply in the end
     for(qpms_l_t n = 1; n <= lMax; ++n) {
-      imaginary double prefacn = - I * pow(2./k, n+1) * M_2_SQRTPI / 2;
       double R_pq_pown = pow(r_pq_shifted, n);
       // TODO the integral here
       double intres, interr;
       int retval = ewald32_sr_integral(r_pq_shifted, k, n, eta,
           &intres, &interr, workspace);
       if (retval) abort();
-
-      //!!!!!!!!!!!!!!!!!!!!!! ZDE JSEM SKONČIL !!!!!!!!!!!!!!!!!!!!!!
-      // potřeba pořešit legendreovy funkce
-
+      for (qpms_m_t m = -n; m <= n; ++m){
+        if((m+n) % 2 != 0) // odd coefficients are zero.
+          continue; // nothing needed, already done by memset
+        complex double e_imf = cexp(I*m*Rpq_shifted_arg);
+        double leg = c->legendre0[gsl_sf_legendre_array_index(n, m)];
+        qpms_y_t y = qpms_mn2y(m,n);
+        if(err)
+          kahanadd(err + y, err_c + y, fabs(leg * (prefacn / I) * R_pq_pown
+              * interr)); // TODO include also other errors
+        ckahanadd(target + y, target_c + y,
+            prefacn * R_pq_pown * leg * intres * e_beta_Rpq * e_imf);
+      }
+    }
+  }
 
   gsl_integration_workspace_free(workspace);
-  free(err_c);
+  if(err) free(err_c);
   free(target_c);
   return 0;
 }
diff --git a/qpms/ewald.h b/qpms/ewald.h
index 2e3a32a..8497e92 100644
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@@ -12,6 +12,18 @@
  *     Journal of Computational Physics 228 (2009) 1815–1829
  */
 
+/*
+ * N.B.!!! currently, the long-range parts are calculated not according to [1,(4.5)], but rather
+ * according to the spherical-harmonic-normalisation-independent formulation in my notes
+ * notes/ewald.lyx.
+ * Both parts of lattice sums are then calculated with the P_n^|m| e^imf substituted in place
+ * of Y_n^m (this is quite a weird normalisation especially for negative |m|, but it is consistent
+ * with the current implementation of the translation coefficients in translations.c;
+ * in the long run, it might make more sense to replace it everywhere with normalised
+ * Legendre polynomials).
+ */
+
+
 /* Object holding the constant factors from [1, (4.5)] */ 
 typedef struct { 
 	qpms_l_t lMax;
@@ -20,9 +32,18 @@ typedef struct {
 	complex double **s1_constfacs; // indices [y][j] where j is same as in [1, (4.5)]
 	// TODO probably normalisation and equatorial legendre polynomials should be included, too
 	complex double *s1_constfacs_base; // internal pointer holding the memory for the constants
+
+	double *legendre0; /* now with GSL_SF_LEGENDRE_NONE normalisation, because this is what is
+			    * what the multipliers from translations.c count with.
+			    */
+	// gsl_sf_legendre_t legendre0_type;
+	int legendre0_csphase;       /* 1 or -1; csphase of the Legendre polynomials saved in legendre0.
+					This is because I dont't actually consider this fixed in
+					translations.c */
+
 } qpms_ewald32_constants_t;
 
-qpms_ewald32_constants_t *qpms_ewald32_constants_init(qpms_l_t lMax);
+qpms_ewald32_constants_t *qpms_ewald32_constants_init(qpms_l_t lMax, int csphase);
 void qpms_ewald32_constants_free(qpms_ewald32_constants_t *);
 
 
@@ -69,7 +90,7 @@ int ewald32_sigma_long_shiftedpoints_e (
 		point2d beta,
 		point2d particle_shift
 );
-int ewald32_sigma_long_points_and_shift (
+int ewald32_sigma_long_points_and_shift (//NI
 		complex double *target_sigmalr_y, // must be c->nelem long
 		double *target_sigmalr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c,
@@ -78,7 +99,7 @@ int ewald32_sigma_long_points_and_shift (
 		point2d beta,
 		point2d particle_shift
 		);
-int ewald32_sigma_long_shiftedpoints_rordered(
+int ewald32_sigma_long_shiftedpoints_rordered(//NI
 		complex double *target_sigmalr_y, // must be c->nelem long
 		double *target_sigmalr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c,
@@ -95,7 +116,7 @@ int ewald32_sigma_short_shiftedpoints(
 		size_t npoints, const point2d *Rpoints_plus_particle_shift,
 		point2d particle_shift           // used only in the very end to multiply it by the phase
 		);
-int ewald32_sigma_short_points_and_shift(
+int ewald32_sigma_short_points_and_shift(//NI
 		complex double *target_sigmasr_y, // must be c->nelem long
 		double *target_sigmasr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here
@@ -103,7 +124,7 @@ int ewald32_sigma_short_points_and_shift(
 		size_t npoints, const point2d *Rpoints, 
 		point2d particle_shift
 		);
-int ewald32_sigma_short_points_rordered(
+int ewald32_sigma_short_points_rordered(//NI
 		complex double *target_sigmasr_y, // must be c->nelem long
 		double *target_sigmasr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here