diff --git a/qpms/ewald.c b/qpms/ewald.c
index 2063ef2..7c63a85 100644
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@@ -1,7 +1,9 @@
 #include "ewald.h"
 #include <stdlib.h>
 #include "indexing.h"
+#include "kahansum.h"
 #include <assert.h>
+#include <string.h>
 #include "tiny_inlines.h"
 
 // sloppy implementation of factorial
@@ -19,6 +21,7 @@ static inline double factorial(const int n) {
     return tgamma(n + 1); // hope it's precise and that overflow does not happen
 }
 
+static inline complex double csq(complex double x) { return x * x; }
 
 
 qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax)
@@ -41,7 +44,7 @@ qpms_ewald32_constants_t *qpms_ewald32_constants_init(const qpms_l_t lMax)
       c->s1_jMaxes[y] = -1;
   }
 
-  c->s1_constfacs[0];
+  c->s1_constfacs[0]; //WTF???
   c->s1_constfacs_base = malloc(c->nelem * sizeof(complex double));
   size_t s1_constfacs_sz_cumsum = 0;
   for (qpms_y_t y = 0; y < c->nelem; ++y) {
@@ -71,3 +74,128 @@ void qpms_ewald32_constants_free(qpms_ewald32_constants_t *c) {
   free(c);
 }
 
+
+
+int ewald32_sigma_long_shiftedpoints (
+    complex double *target, // must be c->nelem long
+    double *err,
+    const qpms_ewald32_constants_t *c,
+    const double eta, const double k, const double unitcell_area,
+    const size_t npoints, const point2d *Kpoints_plus_beta,
+    const point2d particle_shift // target - src
+    ) 
+{
+  const qpms_y_t nelem = c->nelem;
+  const qpms_l_t lMax = c->lMax;
+  
+  // Manual init of the ewald summation targets
+  complex double *target_c = calloc(nelem, sizeof(complex double));
+  memset(target, 0, nelem * sizeof(complex double));
+  double *err_c = NULL;
+  if (err) {
+    err_c = calloc(nelem, sizeof(double));
+    memset(err, 0, nelem * sizeof(double));
+  }
+
+  const double commonfac = 1/(k*k*unitcell_area); // used in the very end (CFC)
+  assert(commonfac > 0);
+
+  // space for Gamma_pq[j]'s
+  qpms_csf_result Gamma_pq[lMax/2];
+
+  // CHOOSE POINT BEGIN
+  for (size_t i = 0; i < npoints; ++i) { // BEGIN POINT LOOP
+    point2d beta_pq = Kpoints_plus_beta[i];
+    double rbeta_pq = cart2norm(beta_pq);
+  // CHOOSE POINT END
+
+    complex double phasefac = cexp(I*cart2_dot(beta_pq,particle_shift)); // POINT-DEPENDENT (PFC)
+    double arg_pq = atan2(beta_pq.y, beta_pq.x); // POINT-DEPENDENT
+
+    // R-DEPENDENT BEGIN
+    complex double gamma_pq = lilgamma(rbeta_pq/k);
+    complex double z = csq(gamma_pq*k/(2*eta)); // Když o tom tak přemýšlím, tak tohle je vlastně vždy reálné
+    for(qpms_l_t j = 0; j < lMax/2; ++j) {
+      int retval = complex_gamma_inc_e(0.5-j, z, Gamma_pq+j);
+      if(retval) abort();
+    }
+    // R-DEPENDENT END
+    
+    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
+    // and just fetched for each n, m pair
+    for(qpms_l_t n = 0; n <= lMax; ++n)
+      for(qpms_m_t m = -n; m <= n; ++m) {
+        qpms_y_t y = qpms_mn2y(m, n);
+        complex double e_imalpha_pq = cexp(I*m*arg_pq);
+        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
+        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
+        for(qpms_l_t j = 0; j < (n-abs(m))/2; ++j) {
+          complex double summand = pow(rbeta_pq/k, n-2*j) 
+            * e_imalpha_pq * cpow(gamma_pq, 2*j-1) // * Gamma_pq[j] bellow (GGG) after error computation
+            * c->s1_constfacs[y][j];
+          if(err) {
+            // FIXME include also other errors than Gamma_pq's relative error
+             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
+          }
+          summand *= Gamma_pq[j].val; // GGG
+          ckahanadd(&jsum, &jsum_c, summand);
+        }
+        jsum *= phasefac;
+        ckahanadd(target + y, target_c + y, jsum);
+        if(err) kahanadd(err + y, err_c + y, jsum_err);
+      }
+  } // END POINT LOOP
+ 
+  free(err_c);
+  free(target_c);
+  for(qpms_y_t y = 0; y < nelem; ++y) // CFC common factor from above
+    target[y] *= commonfac;
+  if(err)
+    for(qpms_y_t y = 0; y < nelem; ++y)
+      err[y] *= commonfac;
+  return 0;
+}
+  
+
+#if 0
+
+
+int ewald32_sigma_long_points_and_shift (
+    complex double *target_sigmalr_y, // must be c->nelem long
+    const qpms_ewald32_constants_t *c,
+    double eta, double k, double unitcell_area,
+    size_t npoints, const point2d *Kpoints,
+    point2d beta,
+    point2d particle_shift
+    );
+int ewald32_sigma_long_shiftedpoints_rordered(
+    complex double *target_sigmalr_y, // must be c->nelem long
+    const qpms_ewald32_constants_t *c,
+    double eta, double k, double unitcell_area,
+    const points2d_rordered_t *Kpoints_plus_beta_rordered,
+    point2d particle_shift
+    );
+
+int ewald32_sigma_short_shiftedpoints(
+    complex double *target_sigmasr_y, // must be c->nelem long
+    const qpms_ewald32_constants_t *c, // N.B. not too useful here
+    double eta, double k,
+    size_t npoints, const point2d *Rpoints_plus_particle_shift,
+    point2d particle_shift           // used only in the very end to multiply it by the phase
+    );
+int ewald32_sigma_short_points_and_shift(
+    complex double *target_sigmasr_y, // must be c->nelem long
+    const qpms_ewald32_constants_t *c, // N.B. not too useful here
+    double eta, double k,
+    size_t npoints, const point2d *Rpoints,
+    point2d particle_shift
+    );
+int ewald32_sigma_short_points_rordered(
+    complex double *target_sigmasr_y, // must be c->nelem long
+    const qpms_ewald32_constants_t *c, // N.B. not too useful here
+    double eta, double k,
+    const points2d_rordered_t *Rpoints_plus_particle_shift_rordered,
+    point2d particle_shift    // used only in the very end to multiply it by the phase
+    );
+
+#endif
diff --git a/qpms/ewald.h b/qpms/ewald.h
index 9987afd..29d2856 100644
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@@ -55,8 +55,9 @@ int complex_gamma_inc_e(double a, complex double x, qpms_csf_result *result);
 // TODO make "compressed versions" where the (m+n)-odd terms (which are zero)
 // are not included.
 
-int ewald32_sigma_long_shiftedpoints ( 
+int ewald32_sigma_long_shiftedpoints_e ( 
 		complex double *target_sigmalr_y, // must be c->nelem long
+		double *target_sigmalr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c,
 		double eta, double k, double unitcell_area,
 		size_t npoints, const point2d *Kpoints_plus_beta,
@@ -64,6 +65,7 @@ int ewald32_sigma_long_shiftedpoints (
 );
 int ewald32_sigma_long_points_and_shift (
 		complex double *target_sigmalr_y, // must be c->nelem long
+		double *target_sigmalr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c,
 		double eta, double k, double unitcell_area,
 		size_t npoints, const point2d *Kpoints,
@@ -72,6 +74,7 @@ int ewald32_sigma_long_points_and_shift (
 		);
 int ewald32_sigma_long_shiftedpoints_rordered(
 		complex double *target_sigmalr_y, // must be c->nelem long
+		double *target_sigmalr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c,
 		double eta, double k, double unitcell_area,
 		const points2d_rordered_t *Kpoints_plus_beta_rordered,
@@ -80,6 +83,7 @@ int ewald32_sigma_long_shiftedpoints_rordered(
 
 int ewald32_sigma_short_shiftedpoints(
 		complex double *target_sigmasr_y, // must be c->nelem long
+		double *target_sigmasr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here
 		double eta, double k,
 		size_t npoints, const point2d *Rpoints_plus_particle_shift,
@@ -87,6 +91,7 @@ int ewald32_sigma_short_shiftedpoints(
 		);
 int ewald32_sigma_short_points_and_shift(
 		complex double *target_sigmasr_y, // must be c->nelem long
+		double *target_sigmasr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here
 		double eta, double k,
 		size_t npoints, const point2d *Rpoints, 
@@ -94,12 +99,12 @@ int ewald32_sigma_short_points_and_shift(
 		);
 int ewald32_sigma_short_points_rordered(
 		complex double *target_sigmasr_y, // must be c->nelem long
+		double *target_sigmasr_y_err, // must be c->nelem long or NULL
 		const qpms_ewald32_constants_t *c, // N.B. not too useful here
 		double eta, double k,
 		const points2d_rordered_t *Rpoints_plus_particle_shift_rordered,
 		point2d particle_shift    // used only in the very end to multiply it by the phase
 		);
-;
 
 
 
diff --git a/qpms/tiny_inlines.h b/qpms/tiny_inlines.h
index 70a563a..0ac7848 100644
--- a/qpms/tiny_inlines.h
+++ b/qpms/tiny_inlines.h
@@ -1,5 +1,6 @@
 #ifndef TINY_INLINES_H
 #define TINY_INLINES_H
+#include <stdlib.h>
 
 static inline int min1pow(int pow) { return (pow % 2) ? -1 : 1; }
 
@@ -17,6 +18,8 @@ static inline complex double ipow(int x) {
       return -1;
     case 3:
       return -I;
+    default:
+      abort();
   }
 }