From f66010792584d7f1d122a522bef0621dfbdaae27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= <marek@necada.org>
Date: Tue, 18 Sep 2018 10:37:07 +0000
Subject: [PATCH] =?UTF-8?q?Ewald=20summation=20=E2=80=93=20stupid=20implem?=
 =?UTF-8?q?entation=20of=20alternative=20lattice=20sums?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Former-commit-id: de2830e9e5b65fd679fcf5fd1e8ec3e13dba116a
---
 qpms/ewald.c        | 156 +++++++++++++++++++++
 qpms/ewald.h        |   5 +-
 tests/ewaldshift2.c | 329 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 488 insertions(+), 2 deletions(-)
 create mode 100644 tests/ewaldshift2.c

diff --git a/qpms/ewald.c b/qpms/ewald.c
index 5ee6f3d..e723a9c 100644
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@@ -239,6 +239,96 @@ int ewald32_sigma_long_shiftedpoints (
   return 0;
 }
 
+int ewald32_sigma_long_points_and_shift (
+    complex double *target, // must be c->nelem_sc long
+    double *err,
+    const qpms_ewald32_constants_t *c,
+    const double eta, const double k, const double unitcell_area,
+    const size_t npoints, const point2d *Kpoints,
+    const point2d beta, // not needed
+    const point2d particle_shift // target - src
+    ) 
+{
+  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_l_t lMax = c->lMax;
+  
+  // Manual init of the ewald summation targets
+  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
+  memset(target, 0, nelem_sc * sizeof(complex double));
+  double *err_c = NULL;
+  if (err) {
+    err_c = calloc(nelem_sc, sizeof(double));
+    memset(err, 0, nelem_sc * sizeof(double));
+  }
+
+  const double commonfac = 1/(k*k*unitcell_area); // used in the very end (CFC)
+  assert(commonfac > 0);
+
+  // space for Gamma_pq[j]'s
+  qpms_csf_result Gamma_pq[lMax/2+1];
+
+  // CHOOSE POINT BEGIN
+  for (size_t i = 0; i < npoints; ++i) { // BEGIN POINT LOOP
+    // Only these following two lines differ from ewald32_sigma_long_points_and_shift()!!! 
+    const point2d K_pq = Kpoints[i]; 
+    const point2d beta_pq = {K_pq.x + beta.x, K_pq.y + beta.y};
+    const double rbeta_pq = cart2norm(beta_pq);
+  // CHOOSE POINT END
+
+    const complex double phasefac = cexp(I*cart2_dot(K_pq,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
+    const double arg_pq = atan2(beta_pq.y, beta_pq.x); // POINT-DEPENDENT
+
+    // R-DEPENDENT BEGIN
+    const complex double gamma_pq = lilgamma(rbeta_pq/k);
+    const complex double z = csq(gamma_pq*k/(2*eta)); // Když o tom tak přemýšlím, tak tohle je vlastně vždy reálné
+    for(qpms_l_t j = 0; j <= lMax/2; ++j) {
+      int retval = complex_gamma_inc_e(0.5-j, z, Gamma_pq+j);
+      // we take the other branch, cf. [Linton, p. 642 in the middle]: FIXME instead use the C11 CMPLX macros and fill in -O*I part to z in the line above
+      if(creal(z) < 0) 
+        Gamma_pq[j].val = conj(Gamma_pq[j].val); //FIXME as noted above
+      if(!(retval==0 ||retval==GSL_EUNDRFLW)) abort();
+    }
+    // R-DEPENDENT END
+    
+    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
+    // and just fetched for each n, m pair
+    for(qpms_l_t n = 0; n <= lMax; ++n)
+      for(qpms_m_t m = -n; m <= n; ++m) {
+        if((m+n) % 2 != 0) // odd coefficients are zero.
+          continue;
+        const qpms_y_t y = qpms_mn2y_sc(m, n);
+        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
+        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
+        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
+        assert((n-abs(m))/2 == c->s1_jMaxes[y]);
+        for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
+          complex double summand = pow(rbeta_pq/k, n-2*j) 
+            * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
+            * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
+            * c->s1_constfacs[y][j];
+          if(err) {
+            // FIXME include also other errors than Gamma_pq's relative error
+             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
+          }
+          summand *= Gamma_pq[j].val; // GGG
+          ckahanadd(&jsum, &jsum_c, summand);
+        }
+        jsum *= phasefac; // PFC
+        ckahanadd(target + y, target_c + y, jsum);
+        if(err) kahanadd(err + y, err_c + y, jsum_err);
+      }
+  } // END POINT LOOP
+ 
+  free(err_c);
+  free(target_c);
+  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+    target[y] *= commonfac;
+  if(err)
+    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+      err[y] *= commonfac;
+  return 0;
+}
+
 
 struct sigma2_integrand_params {
   int n;
@@ -339,6 +429,72 @@ int ewald32_sigma_short_shiftedpoints(
   return 0;
 }
 
+int ewald32_sigma_short_points_and_shift(
+    complex double *target, // must be c->nelem_sc long
+    double *err,
+    const qpms_ewald32_constants_t *c, // N.B. not too useful here
+    const double eta, const double k,
+    const size_t npoints, const point2d *Rpoints,
+    const point2d beta,
+    const point2d particle_shift           // used only in the very end to multiply it by the phase
+    )
+{
+  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_l_t lMax = c->lMax;
+  gsl_integration_workspace *workspace = 
+    gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
+  
+  // Manual init of the ewald summation targets
+  complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
+  memset(target, 0, nelem_sc * sizeof(complex double));
+  double *err_c = NULL;
+  if (err) {
+    err_c = calloc(nelem_sc, sizeof(double));
+    memset(err, 0, nelem_sc * sizeof(double));
+  }
+  
+
+// CHOOSE POINT BEGIN
+  for (size_t i = 0; i < npoints; ++i) { // BEGIN POINT LOOP
+    //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
+    const point2d Rpq_shifted = cart2_add(Rpoints[i], cart2_scale(-1,particle_shift)); // CHECKSIGN!!!!
+    const double r_pq_shifted = cart2norm(Rpq_shifted);
+// CHOOSE POINT END
+
+    const double Rpq_shifted_arg = atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
+    const complex double e_beta_Rpq = cexp(I*cart2_dot(beta, Rpq_shifted)); // POINT-DEPENDENT
+    
+    for(qpms_l_t n = 0; n <= lMax; ++n) {
+      const double complex prefacn = - I * pow(2./k, n+1) * M_2_SQRTPI / 2; // TODO put outside the R-loop and multiply in the end
+      const double R_pq_pown = pow(r_pq_shifted, n);
+      // TODO the integral here
+      double intres, interr;
+      int retval = ewald32_sr_integral(r_pq_shifted, k, n, eta,
+          &intres, &interr, workspace);
+      if (retval) abort();
+      for (qpms_m_t m = -n; m <= n; ++m){
+        if((m+n) % 2 != 0) // odd coefficients are zero.
+          continue; // nothing needed, already done by memset
+        const complex double e_imf = cexp(I*m*Rpq_shifted_arg);
+        const double leg = c->legendre0[gsl_sf_legendre_array_index(n, abs(m))];
+        const qpms_y_t y = qpms_mn2y_sc(m,n);
+        if(err)
+          kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
+              * interr)); // TODO include also other errors
+        ckahanadd(target + y, target_c + y,
+            prefacn * R_pq_pown * leg * intres * e_beta_Rpq * e_imf * min1pow_m_neg(m));
+      }
+
+    }
+  }
+
+  gsl_integration_workspace_free(workspace);
+  if(err) free(err_c);
+  free(target_c);
+  return 0;
+}
+
+
  
 #if 0
 
diff --git a/qpms/ewald.h b/qpms/ewald.h
index 6883870..d790aea 100644
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@@ -106,7 +106,7 @@ int ewald32_sigma_long_shiftedpoints (
 		point2d beta,
 		point2d particle_shift
 );
-int ewald32_sigma_long_points_and_shift (//NI
+int ewald32_sigma_long_points_and_shift (
 		complex double *target_sigmalr_y, // must be c->nelem_sc long
 		double *target_sigmalr_y_err, // must be c->nelem_sc long or NULL
 		const qpms_ewald32_constants_t *c,
@@ -133,12 +133,13 @@ int ewald32_sigma_short_shiftedpoints(
 		point2d beta,
 		point2d particle_shift           // used only in the very end to multiply it by the phase
 		);
-int ewald32_sigma_short_points_and_shift(//NI
+int ewald32_sigma_short_points_and_shift(
 		complex double *target_sigmasr_y, // must be c->nelem_sc long
 		double *target_sigmasr_y_err, // must be c->nelem_sc long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here
 		double eta, double k,
 		size_t npoints, const point2d *Rpoints, 
+		point2d beta,
 		point2d particle_shift
 		);
 int ewald32_sigma_short_points_rordered(//NI
diff --git a/tests/ewaldshift2.c b/tests/ewaldshift2.c
new file mode 100644
index 0000000..72cc404
--- /dev/null
+++ b/tests/ewaldshift2.c
@@ -0,0 +1,329 @@
+// c99 -ggdb -Wall -I ../ ewaldshift2.c ../qpms/ewald.c ../qpms/ewaldsf.c  ../qpms/lattices2d.c -lgsl -lm -lblas
+
+// implementation of the [LT(4.16)] test
+#include <math.h>
+#define M_SQRTPI 1.7724538509055160272981674833411452 
+#include <qpms/ewald.h>
+#include <qpms/tiny_inlines.h>
+#include <qpms/indexing.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <float.h>
+#include <gsl/gsl_sf_legendre.h>
+typedef struct ewaldtest_triang_params {
+  qpms_l_t lMax;
+  point2d beta; 
+  point2d particle_shift;
+  double k; 
+  double a;
+  double eta;
+  double maxR;
+  double maxK;
+  double csphase;
+  TriangularLatticeOrientation orientation;
+} ewaldtest_triang_params;
+
+typedef struct ewaldtest_triang_results {
+  ewaldtest_triang_params p;
+  complex double *sigmas_short,
+          *sigmas_long,
+          sigma0,
+          *sigmas_total;
+  double *err_sigmas_short,
+    *err_sigmas_long,
+    err_sigma0,
+    *err_sigmas_total;
+  complex double *regsigmas_416;
+} ewaldtest_triang_results;
+
+
+ewaldtest_triang_params paramslist[] = {
+// lMax, beta, shift, k, a, eta, maxR, maxK, csphase, orientation
+/*
+ { 2, {2.7, 1}, {0.5,0.1325}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {2.7, 1}, {0.5,0.1325}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {2.7, 1}, {0.5,0.1325}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {2.7, 1}, {0.5,0.1325}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1, 1}, {0.5,0.}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+*/
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 1}, {0.5,0.1325}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1, 2.1}, {0.5,0.1325}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 2.1}, {0.5,0.1325}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 2.1}, {0.5,0.1325}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1, 2.1}, {0.5,0.1325}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+/*
+  { 2, {0, 3.1}, {0.5,0}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0.5,0}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0.5,0}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0.5,0}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {0, 1.1}, {0.5,0}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 1.1}, {0.5,0}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 1.1}, {0.5,0}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 1.1}, {0.5,0}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1,0}, {0,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1,0}, {0,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1,0}, {0,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1,0}, {0,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1,0}, {0,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+*/
+  { 2, {3.1*0.5,-3.1*0.8}, {0.8,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.8,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.8,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.8,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1*0.5,-1.1*0.8}, {0.8,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.8,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.8,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.8,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  // Poloviční posun oproti přodchozímu
+  { 2, {3.1*0.5,-3.1*0.8}, {0.4,0.25}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.4,0.25}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.4,0.25}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {3.1*0.5,-3.1*0.8}, {0.4,0.25}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 2, {1.1*0.5,-1.1*0.8}, {0.4,0.25}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.4,0.25}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.4,0.25}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {1.1*0.5,-1.1*0.8}, {0.4,0.25}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  // miniposun
+  { 3, {3.1*0.5,-3.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 3, {1.1*0.5,-1.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {0.004,0.0025}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 3, {3.1*0.5,-3.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {3.1*0.5,-3.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+  { 3, {1.1*0.5,-1.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 3, {1.1*0.5,-1.1*0.8}, {-0.004,-0.0025}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+
+
+
+
+/*
+  { 2, {0, 3.1}, {0,0.5}, 2.3, 0.97, 0.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0,0.5}, 2.3, 0.97, 1.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0,0.5}, 2.3, 0.97, 2.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+  { 2, {0, 3.1}, {0,0.5}, 2.3, 0.97, 3.5, 20, 160, 1., TRIANGULAR_VERTICAL},
+*/
+
+
+
+
+// end:
+//  { 0,  {0, 0}, 0, 0, 0, 0, 0, 0, 0}
+};
+
+void ewaldtest_triang_results_free(ewaldtest_triang_results *r) {
+  free(r->sigmas_short);
+  free(r->sigmas_long);
+  free(r->sigmas_total);
+  free(r->err_sigmas_long);
+  free(r->err_sigmas_total);
+  free(r->err_sigmas_short);
+  free(r->regsigmas_416);
+  free(r);
+}
+
+
+void dump_points2d_rordered(const points2d_rordered_t *ps, char *filename) {
+  FILE *f = fopen(filename, "w");
+  for (size_t i = 0; i < ps->nrs; ++i) {
+    fprintf(f, "# r = %.16g\n", ps->rs[i]);
+    for (ptrdiff_t j = ps->r_offsets[i]; j < ps->r_offsets[i+1]; ++j)
+      fprintf(f, "%.16g %.16g\n", ps->base[j].x, ps->base[j].y);
+  }
+  fclose(f);
+}
+
+
+static inline double san(double x) {
+  return fabs(x) < 1e-13 ? 0 : x;
+}
+
+ewaldtest_triang_results *ewaldtest_triang(const ewaldtest_triang_params p);
+
+int main() {
+  gsl_set_error_handler(IgnoreUnderflowsGSLErrorHandler);
+  for (size_t i = 0; i < sizeof(paramslist)/sizeof(ewaldtest_triang_params); ++i) {
+    ewaldtest_triang_params p = paramslist[i];
+    ewaldtest_triang_results *r = ewaldtest_triang(p);
+    // TODO print per-test header here
+    printf("===============================\n");
+    printf("a = %g, K = %g, Kmax = %g, Rmax = %g, lMax = %d, eta = %g, k = %g, beta = (%g,%g), ps = (%g,%g), csphase = %g\n",
+        p.a, 4*M_PI/sqrt(3)/p.a, p.maxK, p.maxR, p.lMax, p.eta, p.k, p.beta.x, p.beta.y, p.particle_shift.x, p.particle_shift.y, p.csphase);
+    printf("sigma0: %.16g%+.16gj\n", creal(r->sigma0), cimag(r->sigma0));    
+    for (qpms_l_t n = 0; n <= p.lMax; ++n) {
+      for (qpms_m_t m = -n; m <= n; ++m){
+        if ((m+n)%2) continue;
+        qpms_y_t y = qpms_mn2y_sc(m,n);
+        qpms_y_t y_conj = qpms_mn2y_sc(-m,n);
+        // y n m sigma_total (err), regsigmas_416 regsigmas_415_recon
+        printf("%zd %d %d: T:%.16g%+.16gj(%.3g) L:%.16g%+.16gj(%.3g) S:%.16g%+.16gj(%.3g) \n"
+            //"| predict %.16g%+.16gj \n| actual  %.16g%+.16gj\n"
+            ,
+            y, n, m, creal(san(r->sigmas_total[y])), san(cimag(r->sigmas_total[y])),
+            r->err_sigmas_total[y],
+            san(creal(r->sigmas_long[y])), san(cimag(r->sigmas_long[y])),
+            r->err_sigmas_long[y],
+            san(creal(r->sigmas_short[y])), san(cimag(r->sigmas_short[y])),
+            r->err_sigmas_short[y]
+            //san(creal(r->regsigmas_416[y])), san(cimag(r->regsigmas_416[y])),
+            //san(creal(r->sigmas_total[y]) + creal(r->sigmas_total[y_conj])),
+            //san(cimag(r->sigmas_total[y]) - cimag(r->sigmas_total[y_conj]))
+        );
+      }
+    }
+    ewaldtest_triang_results_free(r);
+  }
+  return 0;
+}
+
+
+int ewaldtest_counter = 0;
+
+
+ewaldtest_triang_results *ewaldtest_triang(const ewaldtest_triang_params p) {
+  const double a = p.a; //const double a = p.h * sqrt(3);
+ 
+  const double A = sqrt(3) * a * a / 2.; // unit cell size
+  const double K_len = 4*M_PI/a/sqrt(3); // reciprocal vector length
+  
+  
+  ewaldtest_triang_results *results = malloc(sizeof(ewaldtest_triang_results));
+  results->p = p;
+
+  triangular_lattice_gen_t *Rlg = triangular_lattice_gen_init(a, p.orientation, true, 0); // N.B. orig is included 
+  triangular_lattice_gen_extend_to_r(Rlg, p.maxR + a);
+  triangular_lattice_gen_t *Klg = triangular_lattice_gen_init(K_len, reverseTriangularLatticeOrientation(p.orientation), true, 0);
+  triangular_lattice_gen_extend_to_r(Klg, p.maxK + K_len);
+
+  point2d *Rpoints = Rlg->ps.base; 
+  point2d  *Kpoints = Klg->ps.base;
+  size_t nR = Rlg->ps.r_offsets[Rlg->ps.nrs],
+         nK = Klg->ps.r_offsets[Klg->ps.nrs];
+  
+  point2d particle_shift = p.particle_shift;
+  point2d minus_ps = {-particle_shift.x, -particle_shift.y};
+  point2d Rpoints_plus_shift[nR];
+  for(size_t i = 0; i < nR; ++i){
+    Rpoints_plus_shift[i].x = Rpoints[i].x - particle_shift.x;
+    Rpoints_plus_shift[i].y = Rpoints[i].y - particle_shift.y;
+  }
+
+  qpms_y_t nelem_sc = qpms_lMax2nelem_sc(p.lMax);
+
+  results->sigmas_short = malloc(sizeof(complex double)*nelem_sc);
+  results->sigmas_long = malloc(sizeof(complex double)*nelem_sc);
+  results->sigmas_total = malloc(sizeof(complex double)*nelem_sc);
+  results->err_sigmas_short = malloc(sizeof(double)*nelem_sc);
+  results->err_sigmas_long = malloc(sizeof(double)*nelem_sc);
+  results->err_sigmas_total = malloc(sizeof(double)*nelem_sc);
+
+  qpms_ewald32_constants_t *c = qpms_ewald32_constants_init(p.lMax, p.csphase);
+
+  points2d_rordered_t *Kpoints_plus_beta = points2d_rordered_shift(&(Klg->ps), p.beta,
+      8*DBL_EPSILON, 8*DBL_EPSILON);
+
+  char filename[BUFSIZ];
+  sprintf(filename, "betalattice_%d.out", ewaldtest_counter);
+  dump_points2d_rordered(Kpoints_plus_beta, filename);
+
+
+  if (0!=ewald32_sigma_long_points_and_shift(results->sigmas_long,
+        results->err_sigmas_long, c, p.eta, p.k, A,
+        nK, Kpoints,
+        p.beta, 
+        particle_shift /*minus_ps*/ )) 
+    abort();
+  if (0!=ewald32_sigma_short_points_and_shift(
+        results->sigmas_short, results->err_sigmas_short, c,
+        p.eta, p.k,
+        nR, Rpoints, p.beta, particle_shift)) 
+    abort();
+  if (0!=ewald32_sigma0(&(results->sigma0), &(results->err_sigma0), c, p.eta, p.k))
+    abort();
+  for(qpms_y_t y = 0; y < nelem_sc; ++y) {
+    results->sigmas_total[y] = results->sigmas_short[y] + results->sigmas_long[y];
+    results->err_sigmas_total[y] = results->err_sigmas_short[y] + results->err_sigmas_long[y];
+  }
+  results->sigmas_total[0] += results->sigma0;
+  results->err_sigmas_total[0] += results->err_sigma0;
+
+  // Now calculate the reference values [LT(4.16)]
+  results->regsigmas_416 = calloc(nelem_sc, sizeof(complex double));
+  results->regsigmas_416[0] = -2 * c->legendre0[gsl_sf_legendre_array_index(0,0)];
+  
+  {
+    double legendres[gsl_sf_legendre_array_n(p.lMax)];
+    points2d_rordered_t sel = 
+      points2d_rordered_annulus(Kpoints_plus_beta, 0, true, p.k, false);
+    if (0 != sel.nrs) 
+    { 
+      point2d *beta_pq_lessthan_k = sel.base + sel.r_offsets[0];
+      size_t beta_pq_lessthan_k_count = sel.r_offsets[sel.nrs] - sel.r_offsets[0];
+      for(size_t i = 0; i < beta_pq_lessthan_k_count; ++i) {
+        point2d beta_pq = beta_pq_lessthan_k[i];
+        double rbeta_pq = cart2norm(beta_pq);
+        double arg_pq = atan2(beta_pq.y, beta_pq.x);
+        double denom = sqrt(p.k*p.k - rbeta_pq*rbeta_pq);
+        if( gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
+              p.lMax, denom/p.k, p.csphase, legendres) != 0)
+          abort();
+        for (qpms_y_t y = 0; y < nelem_sc; ++y) {
+          qpms_l_t n; qpms_m_t m; 
+          qpms_y2mn_sc_p(y, &m, &n);
+          if ((m+n)%2 != 0)
+            continue;
+          complex double eimf = cexp(I*m*arg_pq);
+          results->regsigmas_416[y] +=
+            4*M_PI*ipow(n)/p.k/A 
+            * eimf * legendres[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m)
+            / denom;
+        }
+      }
+    }
+  }
+
+  points2d_rordered_free(Kpoints_plus_beta);
+  qpms_ewald32_constants_free(c);
+  triangular_lattice_gen_free(Klg);
+  triangular_lattice_gen_free(Rlg);
+  ++ewaldtest_counter;
+  return results;
+}