qpms/qpms/ewald.c

#include "ewald.h"
#include <stdlib.h>
#include "indexing.h"
#include "kahansum.h"
#include <assert.h>
#include <string.h>
#include <complex.h>
#include "tiny_inlines.h"
#include "qpms_error.h"
#include <gsl/gsl_integration.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_sf_legendre.h>
#include <gsl/gsl_sf_expint.h>

// parameters for the quadrature of integral in (4.6)
#ifndef INTEGRATION_WORKSPACE_LIMIT
#define INTEGRATION_WORKSPACE_LIMIT 30000
#endif

#ifndef INTEGRATION_EPSABS
#define INTEGRATION_EPSABS 1e-13
#endif

#ifndef INTEGRATION_EPSREL
#define INTEGRATION_EPSREL 1e-13
#endif

#ifndef M_SQRTPI
#define M_SQRTPI 1.7724538509055160272981674833411452L
#endif


// sloppy implementation of factorial
// We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
static inline double factorial(const int n) {
  assert(n >= 0);
  if (n < 0)
    return 0; // should not happen in the functions below. (Therefore the assert above)
  else if (n <= 20) {
    double fac = 1;
    for (int i = 1; i <= n; ++i)
      fac *= i;
    return fac;
  }
  else 
    return tgamma(n + 1); // hope it's precise and that overflow does not happen
}

// sloppy implementation of double factorial n!!
static inline double double_factorial(int n) {
  assert(n >= 0);
  if (n <= 25) {
    double fac = 1;
    while (n > 0) {
      fac *= n;
      n -= 2;
    }
    return fac;
  } else {
    if (n % 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
      const int k = n / 2 + 1;
      return pow(2, k) * tgamma(k + 0.5) / M_SQRTPI;
    } else { // even, n!! = 2**(n/2) * (n/2)!
      const int k = n/2;
      return pow(2, k) * factorial(k);
    }
  }
}

// sloppy implementation of (n/2)! = Γ(n/2 + 1)
// It is _slightly_ more precise than direct call of tgamma for small odd n
static inline double factorial_of_half(const int n2) {
  assert(n2 >= 0);
  if (n2 % 2 == 0) return factorial(n2/2);
  else {
    if (n2 <= 50) { // odd, use (k - 0.5)! =  Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
      const int k = n2 / 2 + 1;
      double fac2 = 1;
      for(int j = 2*k - 1; j > 0; j -= 2)
        fac2 *= j;
      return fac2 * pow(2, -k) * M_SQRTPI;
    }
    else return tgamma(1. + 0.5*n2);
  }
}

static inline complex double csq(complex double x) { return x * x; }
static inline double sq(double x) { return x * x; }

/// Metadata describing the normalisation conventions used in ewald32_constants_t.
typedef enum {
  EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
  EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
                              * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
                              * replaced by the respective $Y_n^m(\pi/2,\alpha)$ 
                              * spherical harmonic. See notes/ewald.lyx.
                              */

} ewald3_constants_option;

// TODO perhaps rewrite everything as agnostic.
static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;

qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
    const int csphase)
{
  qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
  //if (c == NULL) return NULL; // Do I really want to do this?
  c->lMax = lMax;
  c->nelem_sc = qpms_lMax2nelem_sc(lMax);
  c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
  c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
  //if (c->s1_jMaxes == NULL) return NULL;

  // ----- the "xy-plane constants" ------
  //determine sizes
  size_t s1_constfacs_sz = 0;
  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    if ((m + n) % 2 == 0) 
      s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
    else
      c->s1_jMaxes[y] = -1;
  }

  c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
  size_t s1_constfacs_sz_cumsum = 0;
  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    if ((m + n) % 2 == 0) {
      c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
      // and here comes the actual calculation
      for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
        switch(type) {
          case EWALD32_CONSTANTS_ORIG: // NOT USED
            c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2) 
              * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
              * min1pow(j) * pow(0.5, n-2*j)
              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
              * pow(0.5, 2*j-1);
            break;
          case EWALD32_CONSTANTS_AGNOSTIC:
            c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI  
              * factorial((n-m)/2) * factorial((n+m)/2)
              * min1pow(j) 
              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
            break;
          default:
            QPMS_INVALID_ENUM(type);
        }
      }
      s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
    }
    else
      c->s1_constfacs[y] = NULL;
  }

  // -----  New generation 2D-in-3D constants ------
  // TODO it is not necessary to treat +|m| and -|m| cases separately
  // N.B. currently, this is only valid for EWALD32_CONSTANTS_AGNOSTIC (NOT CHECKED!)
  c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
  //determine sizes
  size_t S1_constfacs_sz = 0;
  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    const qpms_l_t L_M = n - abs(m);
    for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
      // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
      for(qpms_l_t s = j + (L_M - j) % 2; 
          (s <= 2 * j) && (s <= L_M);
          s += 2) {
        ++S1_constfacs_sz;
      }
    }
  }

  c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
  size_t S1_constfacs_sz_cumsum = 0; // second count
  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI  
              * factorial((n-m)/2) * factorial((n+m)/2);
    c->S1_constfacs[y] = c->S1_constfacs_base + S1_constfacs_sz_cumsum;
    size_t coeffs_per_y = 0;
    const qpms_l_t L_M = n - abs(m);
    for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
      // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
      for(qpms_l_t s = j + (L_M - j) % 2; 
          (s <= 2 * j) && (s <= L_M);
          s += 2) {
        c->S1_constfacs_base[S1_constfacs_sz_cumsum] = 
             yfactor * min1pow(j) 
             / factorial(2*j-s) / factorial(s - j) 
             / factorial_of_half(n - m - s) / factorial_of_half(n + m - s);
        ++S1_constfacs_sz_cumsum;
      }
    }
    S1_constfacs_sz_cumsum += coeffs_per_y;
  }
  QPMS_ASSERT(S1_constfacs_sz_cumsum = S1_constfacs_sz);
  c->S1_constfacs[c->nelem_sc] = c->S1_constfacs_base + S1_constfacs_sz; // For easier limit checks

  // ------ the "z-axis constants" -----
  // determine sizes
  size_t s1_constfacs_1Dz_sz;
  {
    const qpms_l_t sz_n_lMax = lMax/2 + 1;  // number of different j's for n = lMax
    s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
                                     : isq(sz_n_lMax);
  }
  c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
  c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));

  size_t s1_constfacs_1Dz_sz_cumsum = 0;
  for (qpms_l_t n = 0; n <= lMax; ++n) {
    c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
    for (qpms_l_t j = 0; j <= n/2; ++j) {
      switch(type) {
        case EWALD32_CONSTANTS_AGNOSTIC:
          c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
            / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
          break;
        default:
          QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
      }
    }
    s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
  }
  assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);


  c->legendre_csphase = csphase;
  c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
  c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
  c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
  // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
  c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
  // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
  return c;
}

void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
  free(c->legendre0);
  free(c->legendre_plus1);
  free(c->legendre_minus1);
  free(c->s1_constfacs);
  free(c->s1_constfacs_base);
  free(c->S1_constfacs);
  free(c->S1_constfacs_base);
  free(c->s1_constfacs_1Dz_base);
  free(c->s1_constfacs_1Dz);
  free(c->s1_jMaxes);
  free(c);
}


int ewald3_sigma0(complex double *result, double *err,
    const qpms_ewald3_constants_t *c,
    const double eta, const complex double k)
{
  qpms_csf_result gam;
  complex double z = -csq(k/(2*eta));
  QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z, 
     // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
     QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
  *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
  if(err) 
    *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
  return 0;
}

// Wrapper over cpow to correctly handle the k->0 behaviour
static inline complex double cpow_0lim_zi(complex double x, long z)
{
  if (x == 0 && z == 0)
    return 1;
  else if (x == 0 && z > 0) // lol branch cut in cpow
    return 0;
  else
    return cpow(x, z);
}

int ewald3_21_xy_sigma_long (
    complex double *target, // must be c->nelem_sc long
    double *err,
    const qpms_ewald3_constants_t *c,
    const double eta, const complex double k,
    const double unitcell_volume /* with the corresponding lattice dimensionality */,
    const LatticeDimensionality latdim,
    PGen *pgen_K, const bool pgen_generates_shifted_points
    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
     * and adds beta to the generated points before calculations.
     * If true, it assumes that they are already shifted.
     */,
    const cart3_t beta,
    const cart3_t particle_shift
    )
{
  const bool k_is_real = (cimag(k) == 0);
  assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
  assert((latdim & LAT1D) || (latdim & LAT2D));
  const qpms_y_t nelem_sc = c->nelem_sc;
  assert(nelem_sc > 0);
  const qpms_l_t lMax = c->lMax;
  
  // Manual init of the ewald summation targets
  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
  memset(target, 0, nelem_sc * sizeof(complex double));
  double *err_c = NULL;
  if (err) {
    err_c = calloc(nelem_sc, sizeof(double));
    memset(err, 0, nelem_sc * sizeof(double));
  }

#ifdef EWALD_AUTO_CUTOFF
  /* 
   * Experimental: stop the evaluation if the relative contribution of the "last layer"
   * is below DBL_EPSILON to speed up the calculation. 
   * This obviously can currently work only for "suitable PGen"s that generate points in
   * layers with increasing distance from the origin.
   * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
   * increasing integer index (starting from 1).
   * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
   * during run time instead of using this EWALD_AUTO_CUTOFF macro.
   */
  size_t lw = 1; // "Layer" index.
  size_t li = 0; // Counter of points inside the current "layer".
  double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
  kahaninit(&lsum, &lsum_c);
#endif

  const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
  if (k_is_real)
    assert(creal(commonfac) > 0);

  PGenSphReturnData pgen_retdata;
#ifndef NDEBUG
  double rbeta_pq_prev;
#endif
  // recycleable values if rbeta_pq stays the same:
  complex double gamma_pq;
  complex double z; // I?*κ*γ*particle_shift.z
  complex double x; // κ*γ/(2*η)
  complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
  // space for Gamma_pq[j]'s
  complex double Gamma_pq[lMax/2+1];
  double Gamma_pq_err[lMax/2+1];
  // cpow() is expensive, so we want to save and reuse these too:
  complex double minus_k_shiftz_pow[2*lMax + 1]; // __[i] = cpow(-k * particle_shift.z, i)
  { 
    long complex double x = 1;
    for(qpms_l_t i = 0; i <= 2*lMax; ++i) { 
      minus_k_shiftz_pow[i] = x;
      x *= -k * particle_shift.z;
    }
  }
  complex double rbeta_pq_div_k_pow[lMax + 1]; // __[i] = cpow_0lim_zi(rbeta_pq/k, i)
  complex double gamma_pq_powm1[2*lMax + 1]; // __[i] = cpow(gamma_pq, i-1)

  // CHOOSE POINT BEGIN
  // TODO maybe PGen_next_sph is not the best coordinate system choice here
  while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
    cart3_t K_pq_cart;
    sph_t beta_pq_sph;
    if (pgen_generates_shifted_points) {
      beta_pq_sph = pgen_retdata.point_sph;
      const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
      K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
    } else { // as in the old _points_and_shift functions
      const sph_t K_pq_sph = pgen_retdata.point_sph;
      K_pq_cart = sph2cart(K_pq_sph);
      const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
      beta_pq_sph = cart2sph(beta_pq_cart);
    }

    const double rbeta_pq = beta_pq_sph.r;
    const double arg_pq = beta_pq_sph.phi;
    //const double beta_pq_theta = beta_pq_sph.theta; //unused

  // CHOOSE POINT END

    const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!

    const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
    if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);


    // R-DEPENDENT BEGIN
    //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
    if (new_rbeta_pq) {
      gamma_pq = clilgamma(rbeta_pq/k);
      { // fill gamma_pq_powm1[] and rbeta_pq_div_k_pow[]
        long complex double x = 1./gamma_pq;
        for(qpms_l_t i = 0; i <= 2*lMax; ++i) {
          gamma_pq_powm1[i] = x;
          x *= gamma_pq;
        }
        for(qpms_l_t i = 0; i <= lMax; ++i) // not fastest, but foolproof
          rbeta_pq_div_k_pow[i] = cpow_0lim_zi(rbeta_pq / k, i);
      }

      complex double x = gamma_pq*k/(2*eta);
      complex double x2 = x*x;

      if(particle_shift.z == 0) {
        for(qpms_l_t j = 0; j <= lMax/2; ++j) {
          qpms_csf_result Gam;
          int retval = complex_gamma_inc_e(0.5-j, x2,
            // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
            QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
          QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
          Gamma_pq[j] = Gam.val;
          if(err) Gamma_pq_err[j] = Gam.err;
        } 
        if (latdim & LAT1D)
          factor1d =  M_SQRT1_2 * .5 * k * gamma_pq;
      } else if (latdim & LAT2D) {
        QPMS_UNTESTED;
        // TODO check the branches/phases!
        complex double z = I * k * gamma_pq * particle_shift.z;
        ewald3_2_sigma_long_Delta(Gamma_pq, err ?  Gamma_pq_err : NULL, lMax/2, x, 0 /* FIXME */, z);
      } else {
        QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
      }
    }
    // R-DEPENDENT END
    
    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
    // and just fetched for each n, m pair
    for(qpms_l_t n = 0; n <= lMax; ++n)
      for(qpms_m_t m = -n; m <= n; ++m) {
        if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
          continue;
        const qpms_y_t y = qpms_mn2y_sc(m, n);
        size_t constidx = 0; // constants offset
        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
          complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
          double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
        if (particle_shift.z == 0) { // TODO remove when the general case is stable and tested
          assert((n-abs(m))/2 == c->s1_jMaxes[y]);
          for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
            complex double summand = rbeta_pq_div_k_pow[n-2*j]
              * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
              * gamma_pq_powm1[2*j]// * Gamma_pq[j] bellow (GGG) after error computation
              * c->s1_constfacs[y][j];
            if(err) {
              // FIXME include also other errors than Gamma_pq's relative error
               kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
            }
            summand *= Gamma_pq[j]; // GGG
            ckahanadd(&jsum, &jsum_c, summand);
          }
          jsum *= phasefac * factor1d; // PFC
          ckahanadd(target + y, target_c + y, jsum);
#ifdef EWALD_AUTO_CUTOFF
          kahanadd(&lsum, &lsum_c, cabs(jsum));
#endif
          if(err) kahanadd(err + y, err_c + y, jsum_err);
        } else { // particle_shift.z != 0
          const qpms_l_t L_M = n - abs(m);
          for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
            complex double ssum, ssum_c; ckahaninit(&ssum, &ssum_c);
            // TODO errors of ssum
            // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
            for(qpms_l_t s = j + (L_M - j) % 2;
                (s <= 2 * j) && (s <= L_M);
                s += 2) {
              complex double ssummand = c->S1_constfacs[y][constidx]
                * minus_k_shiftz_pow[2*j - s] * rbeta_pq_div_k_pow[n - s];
              ckahanadd(&ssum, &ssum_c, ssummand);
              ++constidx;
            }
            const complex double jfactor = e_imalpha_pq * Gamma_pq[j] * gamma_pq_powm1[2*j];
            if (err) { // FIXME include also other sources of error than Gamma_pq's relative error
              double jfactor_err = Gamma_pq_err[j] * pow(cabs(gamma_pq), 2*j - 1);
              kahanadd(&jsum_err, &jsum_err_c, jfactor_err * ssum);
            }
            complex double jsummand = jfactor * ssum;
            ckahanadd(&jsum, &jsum_c, jsummand);
          }
          jsum *= phasefac; // factor1d not here, off-axis sums not implemented/allowed.
          ckahanadd(target + y, target_c + y, jsum);
#ifdef EWALD_AUTO_CUTOFF
          kahanadd(&lsum, &lsum_c, cabs(jsum));
#endif
          if(err) kahanadd(err + y, err_c + y, jsum_err);
        }
      }
#ifndef NDEBUG
    rbeta_pq_prev = rbeta_pq;
#endif
#ifdef EWALD_AUTO_CUTOFF
    ++li;
    double cursum_min = INFINITY;
    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
      const double a = cabs(target[y]);
      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
        cursum_min = MIN(cursum_min, a);
    }
    if (li >= 12 * lw + 1) {
      if(lsum < DBL_EPSILON * cursum_min)
        goto ewald3_21_xy_sigma_long_end_point_loop;
      kahaninit(&lsum, &lsum_c);
      li = 0;
      ++lw;
    }
#endif
  } // END POINT LOOP
#ifdef EWALD_AUTO_CUTOFF
ewald3_21_xy_sigma_long_end_point_loop:
#endif
 
  free(err_c);
  free(target_c);
  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
    target[y] *= commonfac;
  if(err)
    for(qpms_y_t y = 0; y < nelem_sc; ++y)
      err[y] *= commonfac;
  return 0;
}


// 1D chain along the z-axis; not many optimisations here as the same
// shifted beta radius could be recycled only once anyways
int ewald3_1_z_sigma_long (
    complex double *target, // must be c->nelem_sc long
    double *err,
    const qpms_ewald3_constants_t *c,
    const double eta, const complex double k,
    const double unitcell_volume /* length (periodicity) in this case */,
    const LatticeDimensionality latdim,
    PGen *pgen_K, const bool pgen_generates_shifted_points
    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
     * and adds beta to the generated points before calculations.
     * If true, it assumes that they are already shifted.
     */,
    const cart3_t beta,
    const cart3_t particle_shift
    )
{
  assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
  assert(beta.x == 0 && beta.y == 0);
  assert(particle_shift.x == 0 && particle_shift.y == 0);
  const double beta_z = beta.z;
  const double particle_shift_z = particle_shift_z;
  const qpms_y_t nelem_sc = c->nelem_sc;
  const qpms_l_t lMax = c->lMax;
  
  // Manual init of the ewald summation targets
  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
  memset(target, 0, nelem_sc * sizeof(complex double));
  double *err_c = NULL;
  if (err) {
    err_c = calloc(nelem_sc, sizeof(double));
    memset(err, 0, nelem_sc * sizeof(double));
  }

  const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
  assert(commonfac > 0);

  // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
  qpms_csf_result Gamma_pq[lMax/2+1];

  PGenSphReturnData pgen_retdata;
  // CHOOSE POINT BEGIN
  // TODO FIXME USE PGen_next_z
  while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
    assert(pgen_retdata.flags & PGEN_AT_Z);
    double K_z, beta_mu_z;
    if (pgen_generates_shifted_points) {
      beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
        pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
      K_z = beta_mu_z - beta_z;
    } else { // as in the old _points_and_shift functions
      K_z = ((pgen_retdata.point_sph.theta == 0) ?
          pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
      beta_mu_z = K_z + beta_z;
    }
    double rbeta_mu = fabs(beta_mu_z);
  // CHOOSE POINT END

    const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!

    // R-DEPENDENT BEGIN
    complex double gamma_pq = clilgamma(rbeta_mu/k);  // For real beta and k this is real or pure imaginary ...
    const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
    for(qpms_l_t j = 0; j <= lMax/2; ++j) {
      int retval = complex_gamma_inc_e(0.5-j, z,
          // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
          QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
      QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
    }
    // R-DEPENDENT END
    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
    // and just fetched for each n
    for(qpms_l_t n = 0; n <= lMax; ++n) {
        const qpms_y_t y = qpms_mn2y_sc(0, n);
        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
        for(qpms_l_t j = 0; j <= n/2; ++j) {
          complex double summand = pow(rbeta_mu/k, n-2*j) 
            * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
                c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
                (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
              )
            // * min1pow_m_neg(m) // not needed as m == 0
            * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
            * c->s1_constfacs_1Dz[n][j];
              /* s1_consstfacs_1Dz[n][j] =
               *
               *     -I**(n+1) (-1)**j * n!
               *   --------------------------
               *   j! * 2**(2*j) * (n - 2*j)!
               */   

          if(err) {
            // FIXME include also other errors than Gamma_pq's relative error
             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
          }
          summand *= Gamma_pq[j].val; // GGG
          ckahanadd(&jsum, &jsum_c, summand);
        }
        jsum *= phasefac; // PFC
        ckahanadd(target + y, target_c + y, jsum);
        if(err) kahanadd(err + y, err_c + y, jsum_err);
      }
  } // END POINT LOOP
 
  free(err_c);
  free(target_c);
  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
    target[y] *= commonfac;
  if(err)
    for(qpms_y_t y = 0; y < nelem_sc; ++y)
      err[y] *= commonfac;
  return 0;
}

int ewald3_sigma_long (
    complex double *target, // must be c->nelem_sc long
    double *err,
    const qpms_ewald3_constants_t *c,
    const double eta, const complex double k,
    const double unitcell_volume /* with the corresponding lattice dimensionality */,
    const LatticeDimensionality latdim,
    PGen *pgen_K, const bool pgen_generates_shifted_points
    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
     * and adds beta to the generated points before calculations.
     * If true, it assumes that they are already shifted.
     */,
    const cart3_t beta,
    const cart3_t particle_shift
    )
{
  assert(latdim & SPACE3D);
  if (latdim & LAT_XYONLY)
    return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
        latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
  else if (latdim & LAT_ZONLY)
    return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
        latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
  // TODO 3D case and general 2D case
  else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
} 

struct sigma2_integrand_params {
  int n;
  double k, R;
};

static double sigma2_integrand(double ksi, void *params) {
  struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
  return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
}

static int ewald32_sr_integral(double r, double k, int n, double eta,
    double *result, double *err, gsl_integration_workspace *workspace)
{
  struct sigma2_integrand_params p;


  const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
  p.n = n;
  eta *= R0;
  p.k = k * R0;
  p.R = r / R0;  // i.e. p.R = 1

  gsl_function F;
  F.function = sigma2_integrand;
  F.params = &p;

  int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS, 
      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
      workspace, result, err);
  double normfac = pow(R0, -2*p.n - 1);
  *result *= normfac;
  *err *= normfac;
  return retval;
}

// a version allowing complex k

struct sigma2_integrand_params_ck {
  int n;
  complex double k;
  double R;
};

// TODO ther might be some space for optimisation if needed, as now we calculate everything twice
// including the whole complex exponential (throwing the imaginary/real part away)
static double sigma2_integrand_ck_real(double ksi, void *params) {
  struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
  return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
}
static double sigma2_integrand_ck_imag(double ksi, void *params) {
  struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
  return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
}

static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
    complex double *result, double *err, gsl_integration_workspace *workspace)
{
  struct sigma2_integrand_params_ck p;


  const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
  p.n = n;
  eta *= R0;
  p.k = k * R0;
  p.R = r / R0;  // i.e. p.R = 1

  gsl_function F;
  F.params = &p;
  double result_real, result_imag, err_real, err_imag;
  
  F.function = sigma2_integrand_ck_real;
  // TODO check return values
  int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS, 
      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
      workspace, &result_real, &err_real);
  
  F.function = sigma2_integrand_ck_imag;
  // TODO check return values
  retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS, 
      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
      workspace, &result_imag, &err_imag);

  *result = result_real + I*result_imag;
  *err = sqrt(sq(err_real) + sq(err_imag));

  double normfac = pow(R0, -2*p.n - 1);
  *result *= normfac;
  *err *= normfac;
  return retval;
}

int ewald3_sigma_short( 
                complex double *target, // must be c->nelem_sc long
                double *err, // must be c->nelem_sc long or NULL
                const qpms_ewald3_constants_t *c,
                const double eta, const complex double k /* TODO COMPLEX */,
                const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
                PGen *pgen_R, const bool pgen_generates_shifted_points
                /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
                 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
                 * and adds particle_shift to the generated points before calculations.
                 * If true, it assumes that they are already shifted (if calculating interaction between
                 * different particles in the unit cell).
                 */,
                const cart3_t beta,
                const cart3_t particle_shift
                )
{
  const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
  const double kreal = creal(k);
  const qpms_y_t nelem_sc = c->nelem_sc;
  const qpms_l_t lMax = c->lMax;
  gsl_integration_workspace *workspace = 
    gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);

  double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
  
  // Manual init of the ewald summation targets
  complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
  memset(target, 0, nelem_sc * sizeof(complex double));
  double *err_c = NULL;
  if (err) {
    err_c = calloc(nelem_sc, sizeof(double));
    memset(err, 0, nelem_sc * sizeof(double));
  }

#ifdef EWALD_AUTO_CUTOFF
  /* 
   * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
   */
  size_t lw = 1; // "Layer" index.
  size_t li = 0; // Counter of points inside the current "layer".
  double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
  kahaninit(&lsum, &lsum_c);
#endif
  
  PGenSphReturnData pgen_retdata;
#ifndef NDEBUG
  double r_pq_shifted_prev;
#endif
  // recyclable variables if r_pq_shifted stays the same:
  double intres[lMax+1], interr[lMax+1];
  complex double cintres[lMax+1];

// CHOOSE POINT BEGIN
// TODO check whether _next_sph is the optimal coordinate system choice here
  while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
// CHOOSE POINT END
    cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
    sph_t Rpq_shifted_sph;
    if (pgen_generates_shifted_points) {
      Rpq_shifted_sph = pgen_retdata.point_sph;
      Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
    } else { // as in the old _points_and_shift functions
      //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
      const sph_t bravais_point_sph = pgen_retdata.point_sph;
      const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
      Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
      Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
    }

    // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
    const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
    const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
    const double r_pq_shifted = Rpq_shifted_sph.r;

    // if the radius is the same as in previous cycle, most of the calculations can be recycled
    const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
    if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);

    const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
    LatticeDimensionality speedup_regime = 0;
    if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
    if ((latdim & LAT_1D_IN_3D_ZONLY)  == LAT_1D_IN_3D_ZONLY)  speedup_regime = LAT_1D_IN_3D_ZONLY;

    const double * legendre_array;

    switch(speedup_regime) {
      // speedup checks for special geometries and Legendre polynomials
      case LAT_1D_IN_3D_ZONLY:
        assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
        assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
        legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
        break;
      case LAT_2D_IN_3D_XYONLY:
        assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
        assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
        // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
        legendre_array = c->legendre0;
        break;
      default:
        QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
        legendre_array = legendre_buf;
        break;
    }
    
    for(qpms_l_t n = 0; n <= lMax; ++n) {
      const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
      const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
      if (new_r_pq_shifted) {
        int retval;
        if (k_is_real) {
          double intres_real;
          retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
               &intres_real, interr + n, workspace);
          cintres[n] = intres_real;
        } else
          retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
               cintres+n, interr + n, workspace);
        QPMS_ENSURE_SUCCESS(retval);
      } // otherwise recycle the integrals
      for (qpms_m_t m = -n; m <= n; ++m){
        complex double e_imf;
        // SPEEDUPS for some special geometries
        if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
          if((m+n) % 2 != 0) // odd coefficients are zero.
            continue; // nothing needed, already done by memset
          e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
        } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
          if (m != 0) // m-non-zero coefficients are zero
            continue; // nothing needed, already done by memset
          e_imf = 1;
        } else { // general 1D/2D/3D lattice in 3D space
          e_imf = cexp(I*m*Rpq_shifted_arg);
        }

        const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];

        const qpms_y_t y = qpms_mn2y_sc(m,n);
        if(err)
          kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
              * interr[n])); // TODO include also other errors
        complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
        ckahanadd(target + y, target_c + y, thesummand);
#ifdef EWALD_AUTO_CUTOFF
        kahanadd(&lsum, &lsum_c, cabs(thesummand));
#endif
      }

    }
#ifndef NDEBUG
    r_pq_shifted_prev = r_pq_shifted;
#endif
#ifdef EWALD_AUTO_CUTOFF
    ++li;
    double cursum_min = INFINITY;
    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
      const double a = cabs(target[y]);
      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
        cursum_min = MIN(cursum_min, a);
    }
    if (li >= 12 * lw + 1) {
      if(lsum < DBL_EPSILON * cursum_min)
        goto ewald3_sigma_short_end_point_loop;
      kahaninit(&lsum, &lsum_c);
      li = 0;
      ++lw;
    }
#endif
  }
  //END POINT LOOP
#ifdef EWALD_AUTO_CUTOFF
ewald3_sigma_short_end_point_loop:
#endif

  gsl_integration_workspace_free(workspace);
  if(err) free(err_c);
  free(target_c);
  return 0;
}
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								#include "ewald.h"
 								#include <stdlib.h>
 								#include "indexing.h"
-												Ewald summation LR part, stupid.


Former-commit-id: 9795ad00b41e83c30feb027853a13959d8b506ea
											
										
										
											2018-08-27 09:45:11 +03:00
+								#include "kahansum.h"
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								#include <assert.h>
-												Ewald summation LR part, stupid.


Former-commit-id: 9795ad00b41e83c30feb027853a13959d8b506ea
											
										
										
											2018-08-27 09:45:11 +03:00
+								#include <string.h>
-												ewald.c compiles


Former-commit-id: 7bcd240f3d9b5aba233550e4242736e700f96c35
											
										
										
											2018-09-05 10:58:13 +03:00
+								#include <complex.h>
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								#include "tiny_inlines.h"
-												Complex wave number for ewald sum (failing) + unit test.


Former-commit-id: a6c5e566028602ecd1047bc0b44677a4eeae3046
											
										
										
											2019-08-19 15:10:39 +03:00
+								#include "qpms_error.h"
-												SR ewald sum in progress


Former-commit-id: 927486c4e73f5d60dd1f3c94ff5b4878506a5e17
											
										
										
											2018-08-27 14:39:29 +03:00
+								#include <gsl/gsl_integration.h>
 								#include <gsl/gsl_errno.h>
-												ewald.c compiles


Former-commit-id: 7bcd240f3d9b5aba233550e4242736e700f96c35
											
										
										
											2018-09-05 10:58:13 +03:00
+								#include <gsl/gsl_sf_legendre.h>
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								#include <gsl/gsl_sf_expint.h>
-												SR ewald sum in progress


Former-commit-id: 927486c4e73f5d60dd1f3c94ff5b4878506a5e17
											
										
										
											2018-08-27 14:39:29 +03:00
 								// parameters for the quadrature of integral in (4.6)
 								#ifndef INTEGRATION_WORKSPACE_LIMIT
 								#define INTEGRATION_WORKSPACE_LIMIT 30000
 								#endif
 								#ifndef INTEGRATION_EPSABS
-												More test params etc. (still wrong results)


Former-commit-id: 589f4a643ac318f384554b83318355289fdf827d
											
										
										
											2018-09-05 22:45:28 +03:00
+								#define INTEGRATION_EPSABS 1e-13
-												SR ewald sum in progress


Former-commit-id: 927486c4e73f5d60dd1f3c94ff5b4878506a5e17
											
										
										
											2018-08-27 14:39:29 +03:00
+								#endif
 								#ifndef INTEGRATION_EPSREL
 								#define INTEGRATION_EPSREL 1e-13
 								#endif
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								#ifndef M_SQRTPI
-												WIP Ewald 2D in 3D general z != 0 constant factors.


Former-commit-id: 787689f357bd8670948ba8ce7d8dc1205ca77d0f
											
										
										
											2020-05-29 15:55:52 +03:00
+								#define M_SQRTPI 1.7724538509055160272981674833411452L
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								#endif
-												Sigma0 ready, it's time for writing some tests.


Former-commit-id: 956254dcd4c337374d945465ff228fc40595fc3e
											
										
										
											2018-09-05 14:09:22 +03:00
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								// sloppy implementation of factorial
-												WIP Ewald 2D in 3D general z != 0 constant factors.


Former-commit-id: 787689f357bd8670948ba8ce7d8dc1205ca77d0f
											
										
										
											2020-05-29 15:55:52 +03:00
+								// We prefer to avoid tgamma/lgamma, as their errors are about 4+ bits
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								static inline double factorial(const int n) {
 								  assert(n >= 0);
 								  if (n < 0)
 								    return 0; // should not happen in the functions below. (Therefore the assert above)
 								  else if (n <= 20) {
 								    double fac = 1;
 								    for (int i = 1; i <= n; ++i)
 								      fac *= i;
 								    return fac;
 								  }
 								  else
 								    return tgamma(n + 1); // hope it's precise and that overflow does not happen
 								}
-												WIP Ewald 2D in 3D general z != 0 constant factors.


Former-commit-id: 787689f357bd8670948ba8ce7d8dc1205ca77d0f
											
										
										
											2020-05-29 15:55:52 +03:00
+								// sloppy implementation of double factorial n!!
 								static inline double double_factorial(int n) {
 								  assert(n >= 0);
 								  if (n <= 25) {
 								    double fac = 1;
 								    while (n > 0) {
 								      fac *= n;
 								      n -= 2;
 								    }
 								    return fac;
 								  } else {
 								    if (n % 2) { // odd, (2*k - 1)!! = 2**k * Γ(k + 0.5) / sqrt(п)
 								      const int k = n / 2 + 1;
 								      return pow(2, k) * tgamma(k + 0.5) / M_SQRTPI;
 								    } else { // even, n!! = 2**(n/2) * (n/2)!
 								      const int k = n/2;
 								      return pow(2, k) * factorial(k);
 								    }
 								  }
 								}
 								// sloppy implementation of (n/2)! = Γ(n/2 + 1)
 								// It is _slightly_ more precise than direct call of tgamma for small odd n
 								static inline double factorial_of_half(const int n2) {
 								  assert(n2 >= 0);
 								  if (n2 % 2 == 0) return factorial(n2/2);
 								  else {
 								    if (n2 <= 50) { // odd, use (k - 0.5)! =  Γ(k + 0.5) = 2**(-k) (2*k - 1)!! sqrt(п) for small n2
 								      const int k = n2 / 2 + 1;
 								      double fac2 = 1;
 								      for(int j = 2*k - 1; j > 0; j -= 2)
 								        fac2 *= j;
 								      return fac2 * pow(2, -k) * M_SQRTPI;
 								    }
 								    else return tgamma(1. + 0.5*n2);
 								  }
 								}
-												Ewald summation LR part, stupid.


Former-commit-id: 9795ad00b41e83c30feb027853a13959d8b506ea
											
										
										
											2018-08-27 09:45:11 +03:00
+								static inline complex double csq(complex double x) { return x * x; }
-												SR ewald sum in progress


Former-commit-id: 927486c4e73f5d60dd1f3c94ff5b4878506a5e17
											
										
										
											2018-08-27 14:39:29 +03:00
+								static inline double sq(double x) { return x * x; }
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
-												Doxygen, TODO list.


Former-commit-id: 72381c6157319efecf6c62f98851df6194e2a972
											
										
										
											2019-06-27 16:42:35 +03:00
+								/// Metadata describing the normalisation conventions used in ewald32_constants_t.
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								typedef enum {
 								  EWALD32_CONSTANTS_ORIG, // As in [1, (4,5)], NOT USED right now.
 								  EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
 								                              * convention – the $e^{im\alpha_pq}$ term in [1,(4.5)] being
 								                              * replaced by the respective $Y_n^m(\pi/2,\alpha)$
 								                              * spherical harmonic. See notes/ewald.lyx.
 								                              */
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								} ewald3_constants_option;
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								// TODO perhaps rewrite everything as agnostic.
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC;
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, const ewald3_constants_option type */,
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								    const int csphase)
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								{
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								  qpms_ewald3_constants_t *c = malloc(sizeof(qpms_ewald3_constants_t));
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  //if (c == NULL) return NULL; // Do I really want to do this?
 								  c->lMax = lMax;
-												Scalar m,n indexing to allow n = 0.


Former-commit-id: 4b6f2f3611a00e2019b54bfe73c407baafdd8355
											
										
										
											2018-09-05 15:39:02 +03:00
+								  c->nelem_sc = qpms_lMax2nelem_sc(lMax);
 								  c->s1_jMaxes = malloc(c->nelem_sc * sizeof(qpms_l_t));
 								  c->s1_constfacs = malloc(c->nelem_sc * sizeof(complex double *));
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  //if (c->s1_jMaxes == NULL) return NULL;
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								  // ----- the "xy-plane constants" ------
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  //determine sizes
 								  size_t s1_constfacs_sz = 0;
-												Scalar m,n indexing to allow n = 0.


Former-commit-id: 4b6f2f3611a00e2019b54bfe73c407baafdd8355
											
										
										
											2018-09-05 15:39:02 +03:00
+								  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 								    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								    if ((m + n) % 2 == 0)
 								      s1_constfacs_sz += 1 + (c->s1_jMaxes[y] = (n-abs(m))/2);
 								    else
 								      c->s1_jMaxes[y] = -1;
 								  }
-												[ewald.c] Fix malloc size


Former-commit-id: a2da5e820d4835e1cf91ab308a63f0aedd431789
											
										
										
											2018-09-07 15:39:52 +03:00
+								  c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  size_t s1_constfacs_sz_cumsum = 0;
-												Scalar m,n indexing to allow n = 0.


Former-commit-id: 4b6f2f3611a00e2019b54bfe73c407baafdd8355
											
										
										
											2018-09-05 15:39:02 +03:00
+								  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 								    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								    if ((m + n) % 2 == 0) {
 								      c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
 								      // and here comes the actual calculation
 								      for (qpms_l_t j = 0; j <= c->s1_jMaxes[y]; ++j){
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								        switch(type) {
 								          case EWALD32_CONSTANTS_ORIG: // NOT USED
 								            c->s1_constfacs[y][j] = -0.5 * ipow(n+1) * min1pow((n+m)/2)
 								              * sqrt((2*n + 1) * factorial(n-m) * factorial(n+m))
 								              * min1pow(j) * pow(0.5, n-2*j)
 								              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j))
 								              * pow(0.5, 2*j-1);
 								            break;
 								          case EWALD32_CONSTANTS_AGNOSTIC:
-												[ewald.c] Fix malloc size


Former-commit-id: a2da5e820d4835e1cf91ab308a63f0aedd431789
											
										
										
											2018-09-07 15:39:52 +03:00
+								            c->s1_constfacs[y][j] = -2 * ipow(n+1) * M_SQRTPI
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								              * factorial((n-m)/2) * factorial((n+m)/2)
 								              * min1pow(j)
 								              / (factorial(j) * factorial((n-m)/2-j) * factorial((n+m)/2-j));
 								            break;
 								          default:
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								            QPMS_INVALID_ENUM(type);
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								        }
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								      }
 								      s1_constfacs_sz_cumsum += 1 + c->s1_jMaxes[y];
 								    }
 								    else
 								      c->s1_constfacs[y] = NULL;
 								  }
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
-												Constant factors in general (off-plane) Ewald 2D-in-3D sum


Former-commit-id: a8224c69682b765a36988ee62e399d97cd979f2c
											
										
										
											2020-05-30 22:39:20 +03:00
+								  // -----  New generation 2D-in-3D constants ------
 								  // TODO it is not necessary to treat +|m| and -|m| cases separately
 								  // N.B. currently, this is only valid for EWALD32_CONSTANTS_AGNOSTIC (NOT CHECKED!)
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								  c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
-												Constant factors in general (off-plane) Ewald 2D-in-3D sum


Former-commit-id: a8224c69682b765a36988ee62e399d97cd979f2c
											
										
										
											2020-05-30 22:39:20 +03:00
+								  //determine sizes
 								  size_t S1_constfacs_sz = 0;
 								  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 								    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 								    const qpms_l_t L_M = n - abs(m);
 								    for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 								      // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 								      for(qpms_l_t s = j + (L_M - j) % 2;
 								          (s <= 2 * j) && (s <= L_M);
 								          s += 2) {
 								        ++S1_constfacs_sz;
 								      }
 								    }
 								  }
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
-												Constant factors in general (off-plane) Ewald 2D-in-3D sum


Former-commit-id: a8224c69682b765a36988ee62e399d97cd979f2c
											
										
										
											2020-05-30 22:39:20 +03:00
+								  c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
 								  size_t S1_constfacs_sz_cumsum = 0; // second count
 								  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
 								    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
 								    const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI
 								              * factorial((n-m)/2) * factorial((n+m)/2);
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								    c->S1_constfacs[y] = c->S1_constfacs_base + S1_constfacs_sz_cumsum;
-												Constant factors in general (off-plane) Ewald 2D-in-3D sum


Former-commit-id: a8224c69682b765a36988ee62e399d97cd979f2c
											
										
										
											2020-05-30 22:39:20 +03:00
+								    size_t coeffs_per_y = 0;
 								    const qpms_l_t L_M = n - abs(m);
 								    for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 								      // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 								      for(qpms_l_t s = j + (L_M - j) % 2;
 								          (s <= 2 * j) && (s <= L_M);
 								          s += 2) {
 								        c->S1_constfacs_base[S1_constfacs_sz_cumsum] =
 								             yfactor * min1pow(j)
 								             / factorial(2*j-s) / factorial(s - j)
 								             / factorial_of_half(n - m - s) / factorial_of_half(n + m - s);
 								        ++S1_constfacs_sz_cumsum;
 								      }
 								    }
 								    S1_constfacs_sz_cumsum += coeffs_per_y;
 								  }
 								  QPMS_ASSERT(S1_constfacs_sz_cumsum = S1_constfacs_sz);
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								  c->S1_constfacs[c->nelem_sc] = c->S1_constfacs_base + S1_constfacs_sz; // For easier limit checks
-												Constant factors in general (off-plane) Ewald 2D-in-3D sum


Former-commit-id: a8224c69682b765a36988ee62e399d97cd979f2c
											
										
										
											2020-05-30 22:39:20 +03:00
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								  // ------ the "z-axis constants" -----
 								  // determine sizes
 								  size_t s1_constfacs_1Dz_sz;
 								  {
 								    const qpms_l_t sz_n_lMax = lMax/2 + 1;  // number of different j's for n = lMax
 								    s1_constfacs_1Dz_sz = (lMax % 2) ? isq(sz_n_lMax) + sz_n_lMax
 								                                     : isq(sz_n_lMax);
 								  }
 								  c->s1_constfacs_1Dz_base = malloc(s1_constfacs_1Dz_sz * sizeof(complex double));
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  c->s1_constfacs_1Dz = malloc((lMax+1)*sizeof(complex double *));
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
 								  size_t s1_constfacs_1Dz_sz_cumsum = 0;
 								  for (qpms_l_t n = 0; n <= lMax; ++n) {
 								    c->s1_constfacs_1Dz[n] = c->s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum;
 								    for (qpms_l_t j = 0; j <= n/2; ++j) {
 								      switch(type) {
 								        case EWALD32_CONSTANTS_AGNOSTIC:
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								          c->s1_constfacs_1Dz[n][j] = -ipow(n+1) * min1pow(j) * factorial(n)
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								            / (factorial(j) * pow(2, 2*j) * factorial(n - 2*j));
 								          break;
 								        default:
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								          QPMS_INVALID_ENUM(type); // wrong type argument or not implemented
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								      }
 								    }
 								    s1_constfacs_1Dz_sz_cumsum += 1 + n / 2;
 								  }
 								  assert(s1_constfacs_1Dz_sz == s1_constfacs_1Dz_sz_cumsum);
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								  c->legendre_csphase = csphase;
-												ewald.c compiles


Former-commit-id: 7bcd240f3d9b5aba233550e4242736e700f96c35
											
										
										
											2018-09-05 10:58:13 +03:00
+								  c->legendre0 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  c->legendre_plus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
 								  c->legendre_minus1 = malloc(gsl_sf_legendre_array_n(lMax) * sizeof(double));
-												ewald.c compiles


Former-commit-id: 7bcd240f3d9b5aba233550e4242736e700f96c35
											
										
										
											2018-09-05 10:58:13 +03:00
+								  // N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								  c->legendre_normconv = GSL_SF_LEGENDRE_NONE;
-												ewald.c compiles


Former-commit-id: 7bcd240f3d9b5aba233550e4242736e700f96c35
											
										
										
											2018-09-05 10:58:13 +03:00
+								  // Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, 0, csphase, c->legendre0));
 								  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, +1, csphase, c->legendre_plus1));
 								  QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, -1, csphase, c->legendre_minus1));
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  return c;
 								}
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								void qpms_ewald3_constants_free(qpms_ewald3_constants_t *c) {
-												Ewald short-range part etc.


Former-commit-id: 181d2da97f80dfcbe942d27819d831895a2df263
											
										
										
											2018-09-05 09:07:03 +03:00
+								  free(c->legendre0);
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  free(c->legendre_plus1);
 								  free(c->legendre_minus1);
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  free(c->s1_constfacs);
 								  free(c->s1_constfacs_base);
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								  free(c->S1_constfacs);
 								  free(c->S1_constfacs_base);
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								  free(c->s1_constfacs_1Dz_base);
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  free(c->s1_constfacs_1Dz);
-												Lattice sum constants fators


Former-commit-id: 0337b5e459ad57ca81c4c903f1bc4446ec7e5566
											
										
										
											2018-08-21 18:13:42 +03:00
+								  free(c->s1_jMaxes);
 								  free(c);
 								}
-												Ewald summation LR part, stupid.


Former-commit-id: 9795ad00b41e83c30feb027853a13959d8b506ea
											
										
										
											2018-08-27 09:45:11 +03:00
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								int ewald3_sigma0(complex double *result, double *err,
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								    const qpms_ewald3_constants_t *c,
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								    const double eta, const complex double k)
-												Sigma0 ready, it's time for writing some tests.


Former-commit-id: 956254dcd4c337374d945465ff228fc40595fc3e
											
										
										
											2018-09-05 14:09:22 +03:00
+								{
 								  qpms_csf_result gam;
-												Remove the branch cut at negative real axis in Ewald sum.

Move the cut to the negative imaginary axis instead.

Also update comments.


Former-commit-id: a70e6a4db0ac33836d672d9ee2356ea19a899a30
											
										
										
											2019-10-09 13:20:20 +03:00
+								  complex double z = -csq(k/(2*eta));
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								  QPMS_ENSURE_SUCCESS(complex_gamma_inc_e(-0.5, z,
-												Remove the branch cut at negative real axis in Ewald sum.

Move the cut to the negative imaginary axis instead.

Also update comments.


Former-commit-id: a70e6a4db0ac33836d672d9ee2356ea19a899a30
											
										
										
											2019-10-09 13:20:20 +03:00
+								     // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								     QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, &gam));
-												Sigma0 ready, it's time for writing some tests.


Former-commit-id: 956254dcd4c337374d945465ff228fc40595fc3e
											
										
										
											2018-09-05 14:09:22 +03:00
+								  *result = gam.val * c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI;
 								  if(err)
-												Fix ewalds.c test output;  n.b. the current long-range parts.


Former-commit-id: f51675842d53a2ba0c6c2821180c11fe60445257
											
										
										
											2018-09-06 15:00:38 +03:00
+								    *err = gam.err * fabs(c->legendre0[gsl_sf_legendre_array_index(0,0)] / 2 / M_SQRTPI);
-												Sigma0 ready, it's time for writing some tests.


Former-commit-id: 956254dcd4c337374d945465ff228fc40595fc3e
											
										
										
											2018-09-05 14:09:22 +03:00
+								  return 0;
 								}
-												Handle k->0 in Ewald sum.


Former-commit-id: fb61b297ead4828541b763b2ecdb0c81de1d35f1
											
										
										
											2019-09-16 12:37:32 +03:00
+								// Wrapper over cpow to correctly handle the k->0 behaviour
 								static inline complex double cpow_0lim_zi(complex double x, long z)
 								{
 								  if (x == 0 && z == 0)
 								    return 1;
 								  else if (x == 0 && z > 0) // lol branch cut in cpow
 								    return 0;
 								  else
 								    return cpow(x, z);
 								}
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								int ewald3_21_xy_sigma_long (
 								    complex double *target, // must be c->nelem_sc long
 								    double *err,
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								    const qpms_ewald3_constants_t *c,
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								    const double eta, const complex double k,
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    const double unitcell_volume /* with the corresponding lattice dimensionality */,
 								    const LatticeDimensionality latdim,
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								    PGen *pgen_K, const bool pgen_generates_shifted_points
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 								     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 								     * and adds beta to the generated points before calculations.
 								     * If true, it assumes that they are already shifted.
 								     */,
 								    const cart3_t beta,
 								    const cart3_t particle_shift
 								    )
 								{
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								  const bool k_is_real = (cimag(k) == 0);
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								  assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
 								  assert((latdim & LAT1D) || (latdim & LAT2D));
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								  const qpms_y_t nelem_sc = c->nelem_sc;
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								  assert(nelem_sc > 0);
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								  const qpms_l_t lMax = c->lMax;
 								  // Manual init of the ewald summation targets
 								  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 								  memset(target, 0, nelem_sc * sizeof(complex double));
 								  double *err_c = NULL;
 								  if (err) {
 								    err_c = calloc(nelem_sc, sizeof(double));
 								    memset(err, 0, nelem_sc * sizeof(double));
 								  }
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#ifdef EWALD_AUTO_CUTOFF
 								  /*
 								   * Experimental: stop the evaluation if the relative contribution of the "last layer"
 								   * is below DBL_EPSILON to speed up the calculation.
 								   * This obviously can currently work only for "suitable PGen"s that generate points in
 								   * layers with increasing distance from the origin.
 								   * Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
 								   * increasing integer index (starting from 1).
 								   * TODO: define the layers (optionally) in the PGen metadata and switch this feature on
 								   * during run time instead of using this EWALD_AUTO_CUTOFF macro.
 								   */
 								  size_t lw = 1; // "Layer" index.
 								  size_t li = 0; // Counter of points inside the current "layer".
 								  double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 								  kahaninit(&lsum, &lsum_c);
 								#endif
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								  const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
 								  if (k_is_real)
 								    assert(creal(commonfac) > 0);
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								  PGenSphReturnData pgen_retdata;
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								#ifndef NDEBUG
 								  double rbeta_pq_prev;
 								#endif
 								  // recycleable values if rbeta_pq stays the same:
 								  complex double gamma_pq;
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								  complex double z; // I?*κ*γ*particle_shift.z
 								  complex double x; // κ*γ/(2*η)
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								  complex double factor1d = 1; // the "additional" factor for the 1D case (then it is not 1)
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								  // space for Gamma_pq[j]'s
-												Implementation of the Δ_n factor as a series; error estimates.

The error estimate for the recurrence approach is buggy.


Former-commit-id: f5183eaacf6a592461f07f72e04d346a42f9fca6
											
										
										
											2020-05-27 15:52:13 +03:00
+								  complex double Gamma_pq[lMax/2+1];
 								  double Gamma_pq_err[lMax/2+1];
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								  // cpow() is expensive, so we want to save and reuse these too:
 								  complex double minus_k_shiftz_pow[2*lMax + 1]; // __[i] = cpow(-k * particle_shift.z, i)
 								  {
 								    long complex double x = 1;
 								    for(qpms_l_t i = 0; i <= 2*lMax; ++i) {
 								      minus_k_shiftz_pow[i] = x;
 								      x *= -k * particle_shift.z;
 								    }
 								  }
 								  complex double rbeta_pq_div_k_pow[lMax + 1]; // __[i] = cpow_0lim_zi(rbeta_pq/k, i)
 								  complex double gamma_pq_powm1[2*lMax + 1]; // __[i] = cpow(gamma_pq, i-1)
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
 								  // CHOOSE POINT BEGIN
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								  // TODO maybe PGen_next_sph is not the best coordinate system choice here
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    cart3_t K_pq_cart;
 								    sph_t beta_pq_sph;
 								    if (pgen_generates_shifted_points) {
 								      beta_pq_sph = pgen_retdata.point_sph;
 								      const cart3_t beta_pq_cart = sph2cart(beta_pq_sph);
 								      K_pq_cart = cart3_add(cart3_scale(-1, beta), beta_pq_cart);
 								    } else { // as in the old _points_and_shift functions
 								      const sph_t K_pq_sph = pgen_retdata.point_sph;
 								      K_pq_cart = sph2cart(K_pq_sph);
 								      const cart3_t beta_pq_cart = cart3_add(K_pq_cart, beta);
 								      beta_pq_sph = cart2sph(beta_pq_cart);
 								    }
 								    const double rbeta_pq = beta_pq_sph.r;
 								    const double arg_pq = beta_pq_sph.phi;
-												ewald translations legacy code removal


Former-commit-id: da74041f5105d6b3a3f9fe3ddaa7306e973bd440
											
										
										
											2019-08-19 10:55:45 +03:00
+								    //const double beta_pq_theta = beta_pq_sph.theta; //unused
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
 								  // CHOOSE POINT END
 								    const complex double phasefac = cexp(I*cart3_dot(K_pq_cart,particle_shift)); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
-												Replace PGEN_NEWR flag with PGEN_OLD_R with the opposite meaning.


Former-commit-id: 5fef7e81addc980a7202b55fd52152981abe5cba
											
										
										
											2019-06-27 12:13:25 +03:00
+								    const bool new_rbeta_pq = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    if (!new_rbeta_pq) assert(rbeta_pq == rbeta_pq_prev);
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    // R-DEPENDENT BEGIN
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								    //void ewald3_2_sigma_long_Delta(complex double *target, int maxn, complex double x, complex double z) {
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								    if (new_rbeta_pq) {
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								      gamma_pq = clilgamma(rbeta_pq/k);
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								      { // fill gamma_pq_powm1[] and rbeta_pq_div_k_pow[]
 								        long complex double x = 1./gamma_pq;
 								        for(qpms_l_t i = 0; i <= 2*lMax; ++i) {
 								          gamma_pq_powm1[i] = x;
 								          x *= gamma_pq;
 								        }
 								        for(qpms_l_t i = 0; i <= lMax; ++i) // not fastest, but foolproof
 								          rbeta_pq_div_k_pow[i] = cpow_0lim_zi(rbeta_pq / k, i);
 								      }
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								      complex double x = gamma_pq*k/(2*eta);
 								      complex double x2 = x*x;
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								      if(particle_shift.z == 0) {
 								        for(qpms_l_t j = 0; j <= lMax/2; ++j) {
-												Implementation of the Δ_n factor as a series; error estimates.

The error estimate for the recurrence approach is buggy.


Former-commit-id: f5183eaacf6a592461f07f72e04d346a42f9fca6
											
										
										
											2020-05-27 15:52:13 +03:00
+								          qpms_csf_result Gam;
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								          int retval = complex_gamma_inc_e(0.5-j, x2,
 								            // we take the branch which is principal for the Re x < 0, Im x < 0 quadrant, cf. [Linton, p. 642 in the middle]
-												Implementation of the Δ_n factor as a series; error estimates.

The error estimate for the recurrence approach is buggy.


Former-commit-id: f5183eaacf6a592461f07f72e04d346a42f9fca6
											
										
										
											2020-05-27 15:52:13 +03:00
+								            QPMS_LIKELY(creal(x2) < 0) && !signbit(cimag(x2)) ? -1 : 0, &Gam);
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								          QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
-												Implementation of the Δ_n factor as a series; error estimates.

The error estimate for the recurrence approach is buggy.


Former-commit-id: f5183eaacf6a592461f07f72e04d346a42f9fca6
											
										
										
											2020-05-27 15:52:13 +03:00
+								          Gamma_pq[j] = Gam.val;
 								          if(err) Gamma_pq_err[j] = Gam.err;
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								        }
 								        if (latdim & LAT1D)
 								          factor1d =  M_SQRT1_2 * .5 * k * gamma_pq;
 								      } else if (latdim & LAT2D) {
 								        QPMS_UNTESTED;
 								        // TODO check the branches/phases!
 								        complex double z = I * k * gamma_pq * particle_shift.z;
-												Branch selection for Δ_n in Ewald sum


Former-commit-id: 6c3d6e6aa9010bb66975f65397b8a061fac1b5ef
											
										
										
											2020-05-28 13:13:08 +03:00
+								        ewald3_2_sigma_long_Delta(Gamma_pq, err ?  Gamma_pq_err : NULL, lMax/2, x, 0 /* FIXME */, z);
-												WIP 2D-in-3D Ewald sum for z != 0

Not tested; error estimates not yet implemented.


Former-commit-id: d6886f64eb8b7e137abf6f187f8cd75f21a5f591
											
										
										
											2020-05-19 16:00:08 +03:00
+								      } else {
 								        QPMS_NOT_IMPLEMENTED("1D lattices in 3D space outside of the line not implemented");
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								      }
 								    }
 								    // R-DEPENDENT END
 								    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
 								    // and just fetched for each n, m pair
 								    for(qpms_l_t n = 0; n <= lMax; ++n)
 								      for(qpms_m_t m = -n; m <= n; ++m) {
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								        if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								          continue;
 								        const qpms_y_t y = qpms_mn2y_sc(m, n);
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								        size_t constidx = 0; // constants offset
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								          complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 								          double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
 								        if (particle_shift.z == 0) { // TODO remove when the general case is stable and tested
 								          assert((n-abs(m))/2 == c->s1_jMaxes[y]);
 								          for(qpms_l_t j = 0; j <= c->s1_jMaxes[y]/*(n-abs(m))/2*/; ++j) { // FIXME </<= ?
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								            complex double summand = rbeta_pq_div_k_pow[n-2*j]
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								              * e_imalpha_pq  * c->legendre0[gsl_sf_legendre_array_index(n,abs(m))] * min1pow_m_neg(m) // This line can actually go outside j-loop
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								              * gamma_pq_powm1[2*j]// * Gamma_pq[j] bellow (GGG) after error computation
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								              * c->s1_constfacs[y][j];
 								            if(err) {
 								              // FIXME include also other errors than Gamma_pq's relative error
 								               kahanadd(&jsum_err, &jsum_err_c, Gamma_pq_err[j] * cabs(summand));
 								            }
 								            summand *= Gamma_pq[j]; // GGG
 								            ckahanadd(&jsum, &jsum_c, summand);
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								          }
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								          jsum *= phasefac * factor1d; // PFC
 								          ckahanadd(target + y, target_c + y, jsum);
 								#ifdef EWALD_AUTO_CUTOFF
 								          kahanadd(&lsum, &lsum_c, cabs(jsum));
 								#endif
 								          if(err) kahanadd(err + y, err_c + y, jsum_err);
 								        } else { // particle_shift.z != 0
 								          const qpms_l_t L_M = n - abs(m);
 								          for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
 								            complex double ssum, ssum_c; ckahaninit(&ssum, &ssum_c);
 								            // TODO errors of ssum
 								            // inner sum: j <= s <= min(2*j, n - |m|), s has the same parity as n - |m|
 								            for(qpms_l_t s = j + (L_M - j) % 2;
 								                (s <= 2 * j) && (s <= L_M);
 								                s += 2) {
 								              complex double ssummand = c->S1_constfacs[y][constidx]
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								                * minus_k_shiftz_pow[2*j - s] * rbeta_pq_div_k_pow[n - s];
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								              ckahanadd(&ssum, &ssum_c, ssummand);
 								              ++constidx;
 								            }
-												Ewald sum optimisation

Avoiding repeated cpow() calls yields more than 5x speedup
in the off-plane case.

											
										
										
											2020-07-03 11:46:34 +03:00
+								            const complex double jfactor = e_imalpha_pq * Gamma_pq[j] * gamma_pq_powm1[2*j];
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								            if (err) { // FIXME include also other sources of error than Gamma_pq's relative error
 								              double jfactor_err = Gamma_pq_err[j] * pow(cabs(gamma_pq), 2*j - 1);
 								              kahanadd(&jsum_err, &jsum_err_c, jfactor_err * ssum);
 								            }
 								            complex double jsummand = jfactor * ssum;
 								            ckahanadd(&jsum, &jsum_c, jsummand);
 								          }
 								          jsum *= phasefac; // factor1d not here, off-axis sums not implemented/allowed.
 								          ckahanadd(target + y, target_c + y, jsum);
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#ifdef EWALD_AUTO_CUTOFF
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								          kahanadd(&lsum, &lsum_c, cabs(jsum));
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#endif
-												Evaluate scattered electric fields in 2d-periodic system.


Former-commit-id: 36386215f9d330a3047cb9a294ccc1de55f2121f
											
										
										
											2020-05-31 14:13:56 +03:00
+								          if(err) kahanadd(err + y, err_c + y, jsum_err);
 								        }
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								      }
 								#ifndef NDEBUG
 								    rbeta_pq_prev = rbeta_pq;
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#endif
 								#ifdef EWALD_AUTO_CUTOFF
 								    ++li;
 								    double cursum_min = INFINITY;
 								    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 								      const double a = cabs(target[y]);
 								      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 								        cursum_min = MIN(cursum_min, a);
 								    }
 								    if (li >= 12 * lw + 1) {
 								      if(lsum < DBL_EPSILON * cursum_min)
 								        goto ewald3_21_xy_sigma_long_end_point_loop;
 								      kahaninit(&lsum, &lsum_c);
 								      li = 0;
 								      ++lw;
 								    }
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
+								#endif
 								  } // END POINT LOOP
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#ifdef EWALD_AUTO_CUTOFF
 								ewald3_21_xy_sigma_long_end_point_loop:
 								#endif
-												Duspát


Former-commit-id: 03503b9541198b86495636cef73271f5cce1d759
											
										
										
											2018-11-22 02:02:31 +02:00
 								  free(err_c);
 								  free(target_c);
 								  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 								    target[y] *= commonfac;
 								  if(err)
 								    for(qpms_y_t y = 0; y < nelem_sc; ++y)
 								      err[y] *= commonfac;
 								  return 0;
 								}
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								// 1D chain along the z-axis; not many optimisations here as the same
 								// shifted beta radius could be recycled only once anyways
 								int ewald3_1_z_sigma_long (
 								    complex double *target, // must be c->nelem_sc long
 								    double *err,
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								    const qpms_ewald3_constants_t *c,
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								    const double eta, const complex double k,
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								    const double unitcell_volume /* length (periodicity) in this case */,
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								    const LatticeDimensionality latdim,
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								    PGen *pgen_K, const bool pgen_generates_shifted_points
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 								     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 								     * and adds beta to the generated points before calculations.
 								     * If true, it assumes that they are already shifted.
 								     */,
 								    const cart3_t beta,
 								    const cart3_t particle_shift
 								    )
 								{
 								  assert(LatticeDimensionality_checkflags(latdim, LAT_1D_IN_3D_ZONLY));
 								  assert(beta.x == 0 && beta.y == 0);
 								  assert(particle_shift.x == 0 && particle_shift.y == 0);
 								  const double beta_z = beta.z;
 								  const double particle_shift_z = particle_shift_z;
 								  const qpms_y_t nelem_sc = c->nelem_sc;
 								  const qpms_l_t lMax = c->lMax;
 								  // Manual init of the ewald summation targets
 								  complex double *target_c = calloc(nelem_sc, sizeof(complex double));
 								  memset(target, 0, nelem_sc * sizeof(complex double));
 								  double *err_c = NULL;
 								  if (err) {
 								    err_c = calloc(nelem_sc, sizeof(double));
 								    memset(err, 0, nelem_sc * sizeof(double));
 								  }
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								  const double commonfac = 1/(k*unitcell_volume); // multiplied in the very end (CFC)
 								  assert(commonfac > 0);
 								  // space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
 								  qpms_csf_result Gamma_pq[lMax/2+1];
 								  PGenSphReturnData pgen_retdata;
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								  // CHOOSE POINT BEGIN
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								  // TODO FIXME USE PGen_next_z
 								  while ((pgen_retdata = PGen_next_sph(pgen_K)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								    assert(pgen_retdata.flags & PGEN_AT_Z);
 								    double K_z, beta_mu_z;
 								    if (pgen_generates_shifted_points) {
 								      beta_mu_z = ((pgen_retdata.point_sph.theta == 0) ?
 								        pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); //!!!CHECKSIGN!!!
 								      K_z = beta_mu_z - beta_z;
 								    } else { // as in the old _points_and_shift functions
 								      K_z = ((pgen_retdata.point_sph.theta == 0) ?
 								          pgen_retdata.point_sph.r : -pgen_retdata.point_sph.r); // !!!CHECKSIGN!!!
 								      beta_mu_z = K_z + beta_z;
 								    }
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								    double rbeta_mu = fabs(beta_mu_z);
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								  // CHOOSE POINT END
 								    const complex double phasefac = cexp(I * K_z * particle_shift_z); // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
 								    // R-DEPENDENT BEGIN
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								    complex double gamma_pq = clilgamma(rbeta_mu/k);  // For real beta and k this is real or pure imaginary ...
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								    const complex double z = csq(gamma_pq*k/(2*eta));// ... so the square (this) is in fact real.
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								    for(qpms_l_t j = 0; j <= lMax/2; ++j) {
-												Remove the branch cut at negative real axis in Ewald sum.

Move the cut to the negative imaginary axis instead.

Also update comments.


Former-commit-id: a70e6a4db0ac33836d672d9ee2356ea19a899a30
											
										
										
											2019-10-09 13:20:20 +03:00
+								      int retval = complex_gamma_inc_e(0.5-j, z,
 								          // we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
 								          QPMS_LIKELY(creal(z) < 0) && !signbit(cimag(z)) ? -1 : 0, Gamma_pq+j);
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								      QPMS_ENSURE_SUCCESS_OR(retval, GSL_EUNDRFLW);
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								    }
 								    // R-DEPENDENT END
 								    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								    // and just fetched for each n
 								    for(qpms_l_t n = 0; n <= lMax; ++n) {
 								        const qpms_y_t y = qpms_mn2y_sc(0, n);
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
 								        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								        for(qpms_l_t j = 0; j <= n/2; ++j) {
 								          complex double summand = pow(rbeta_mu/k, n-2*j)
 								            * ((beta_mu_z > 0) ? // TODO this can go outsize the j-loop
 								                c->legendre_plus1[gsl_sf_legendre_array_index(n,0)] :
 								                (c->legendre_minus1[gsl_sf_legendre_array_index(n,0)] * min1pow(n))
 								              )
 								            // * min1pow_m_neg(m) // not needed as m == 0
 								            * cpow(gamma_pq, 2*j) // * Gamma_pq[j] bellow (GGG) after error computation
 								            * c->s1_constfacs_1Dz[n][j];
 								              /* s1_consstfacs_1Dz[n][j] =
 								               *
 								               *     -I**(n+1) (-1)**j * n!
 								               *   --------------------------
 								               *   j! * 2**(2*j) * (n - 2*j)!
 								               */
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								          if(err) {
 								            // FIXME include also other errors than Gamma_pq's relative error
 								             kahanadd(&jsum_err, &jsum_err_c, Gamma_pq[j].err * cabs(summand));
 								          }
 								          summand *= Gamma_pq[j].val; // GGG
 								          ckahanadd(&jsum, &jsum_c, summand);
 								        }
-												Implement LR part of 1D in 3D Ewald sum along z-axis; compiles, untested


Former-commit-id: 2a9b972dc012401c08758ee768667dfd3a3882c4
											
										
										
											2018-11-24 14:51:57 +02:00
+								        jsum *= phasefac; // PFC
-												Práce na 1D ewaldovi; du dom.


Former-commit-id: dbecec91884dd005a2001c71d4bbe50de74fc8d0
											
										
										
											2018-11-22 16:49:37 +02:00
+								        ckahanadd(target + y, target_c + y, jsum);
 								        if(err) kahanadd(err + y, err_c + y, jsum_err);
 								      }
 								  } // END POINT LOOP
 								  free(err_c);
 								  free(target_c);
 								  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
 								    target[y] *= commonfac;
 								  if(err)
 								    for(qpms_y_t y = 0; y < nelem_sc; ++y)
 								      err[y] *= commonfac;
 								  return 0;
 								}
-												Wrap function ewald3_sigma_long


Former-commit-id: b36f1095d792b9bfd54d28fa9d0da5de0da0ba3f
											
										
										
											2018-11-25 01:10:32 +02:00
+								int ewald3_sigma_long (
 								    complex double *target, // must be c->nelem_sc long
 								    double *err,
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								    const qpms_ewald3_constants_t *c,
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								    const double eta, const complex double k,
-												Wrap function ewald3_sigma_long


Former-commit-id: b36f1095d792b9bfd54d28fa9d0da5de0da0ba3f
											
										
										
											2018-11-25 01:10:32 +02:00
+								    const double unitcell_volume /* with the corresponding lattice dimensionality */,
 								    const LatticeDimensionality latdim,
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								    PGen *pgen_K, const bool pgen_generates_shifted_points
-												Wrap function ewald3_sigma_long


Former-commit-id: b36f1095d792b9bfd54d28fa9d0da5de0da0ba3f
											
										
										
											2018-11-25 01:10:32 +02:00
+								    /* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
 								     * so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice,
 								     * and adds beta to the generated points before calculations.
 								     * If true, it assumes that they are already shifted.
 								     */,
 								    const cart3_t beta,
 								    const cart3_t particle_shift
 								    )
 								{
 								  assert(latdim & SPACE3D);
 								  if (latdim & LAT_XYONLY)
 								    return ewald3_21_xy_sigma_long(target, err, c, eta, k, unitcell_volume,
 								        latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 								  else if (latdim & LAT_ZONLY)
 								    return ewald3_1_z_sigma_long(target, err, c, eta, k, unitcell_volume,
 								        latdim, pgen_K, pgen_generates_shifted_points, beta, particle_shift);
 								  // TODO 3D case and general 2D case
-												Complex wave number for ewald sum (failing) + unit test.


Former-commit-id: a6c5e566028602ecd1047bc0b44677a4eeae3046
											
										
										
											2019-08-19 15:10:39 +03:00
+								  else QPMS_NOT_IMPLEMENTED("3D or general 2D (outside XY plane) Ewald sum.");
-												Wrap function ewald3_sigma_long


Former-commit-id: b36f1095d792b9bfd54d28fa9d0da5de0da0ba3f
											
										
										
											2018-11-25 01:10:32 +02:00
+								}
-												SR ewald sum in progress


Former-commit-id: 927486c4e73f5d60dd1f3c94ff5b4878506a5e17
											
										
										
											2018-08-27 14:39:29 +03:00
 								struct sigma2_integrand_params {
 								  int n;
 								  double k, R;
 								};
 								static double sigma2_integrand(double ksi, void *params) {
 								  struct sigma2_integrand_params *p = (struct sigma2_integrand_params *) params;
 								  return exp(-sq(p->R * ksi) + sq(p->k / ksi / 2)) * pow(ksi, 2*p->n);
 								}
 								static int ewald32_sr_integral(double r, double k, int n, double eta,
 								    double *result, double *err, gsl_integration_workspace *workspace)
 								{
 								  struct sigma2_integrand_params p;
 								  const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 								  p.n = n;
 								  eta *= R0;
 								  p.k = k * R0;
 								  p.R = r / R0;  // i.e. p.R = 1
 								  gsl_function F;
 								  F.function = sigma2_integrand;
 								  F.params = &p;
 								  int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 								      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 								      workspace, result, err);
 								  double normfac = pow(R0, -2*p.n - 1);
 								  *result *= normfac;
 								  *err *= normfac;
 								  return retval;
 								}
-												Some preparation for complex k


Former-commit-id: 62f3bc88de27f43ba82199a2fe221ba60b199e0d
											
										
										
											2018-12-21 16:50:53 +02:00
+								// a version allowing complex k
 								struct sigma2_integrand_params_ck {
 								  int n;
 								  complex double k;
 								  double R;
 								};
 								// TODO ther might be some space for optimisation if needed, as now we calculate everything twice
 								// including the whole complex exponential (throwing the imaginary/real part away)
 								static double sigma2_integrand_ck_real(double ksi, void *params) {
 								  struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 								  return creal(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 								}
 								static double sigma2_integrand_ck_imag(double ksi, void *params) {
 								  struct sigma2_integrand_params_ck *p = (struct sigma2_integrand_params_ck *) params;
 								  return cimag(cexp(-csq(p->R * ksi) + csq(p->k / ksi / 2))) * pow(ksi, 2*p->n);
 								}
 								static int ewald32_sr_integral_ck(double r, complex double k, int n, double eta,
 								    complex double *result, double *err, gsl_integration_workspace *workspace)
 								{
 								  struct sigma2_integrand_params_ck p;
 								  const double R0 = r; // Maybe could be chosen otherwise, but fuck it for now.
 								  p.n = n;
 								  eta *= R0;
 								  p.k = k * R0;
 								  p.R = r / R0;  // i.e. p.R = 1
 								  gsl_function F;
 								  F.params = &p;
 								  double result_real, result_imag, err_real, err_imag;
 								  F.function = sigma2_integrand_ck_real;
 								  // TODO check return values
 								  int retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 								      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 								      workspace, &result_real, &err_real);
 								  F.function = sigma2_integrand_ck_imag;
 								  // TODO check return values
 								  retval = gsl_integration_qagiu(&F, eta, INTEGRATION_EPSABS,
 								      INTEGRATION_EPSREL, INTEGRATION_WORKSPACE_LIMIT,
 								      workspace, &result_imag, &err_imag);
 								  *result = result_real + I*result_imag;
 								  *err = sqrt(sq(err_real) + sq(err_imag));
 								  double normfac = pow(R0, -2*p.n - 1);
 								  *result *= normfac;
 								  *err *= normfac;
 								  return retval;
 								}
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								int ewald3_sigma_short(
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								                complex double *target, // must be c->nelem_sc long
 								                double *err, // must be c->nelem_sc long or NULL
-												Rename ewald3_constants, move legacy code


Former-commit-id: e83dcfa532f7b8d7345103752aca924a56ad7138
											
										
										
											2018-12-21 21:31:07 +02:00
+								                const qpms_ewald3_constants_t *c,
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								                const double eta, const complex double k /* TODO COMPLEX */,
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								                const LatticeDimensionality latdim, // apart from asserts and possible optimisations ignored, as the SR formula stays the same
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								                PGen *pgen_R, const bool pgen_generates_shifted_points
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								                /* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
 								                 * so the function assumes that the generated points correspond to the unshifted Bravais lattice,
 								                 * and adds particle_shift to the generated points before calculations.
 								                 * If true, it assumes that they are already shifted (if calculating interaction between
 								                 * different particles in the unit cell).
 								                 */,
 								                const cart3_t beta,
 								                const cart3_t particle_shift
 								                )
 								{
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								  const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
 								  const double kreal = creal(k);
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								  const qpms_y_t nelem_sc = c->nelem_sc;
 								  const qpms_l_t lMax = c->lMax;
 								  gsl_integration_workspace *workspace =
 								    gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
 								  double legendre_buf[gsl_sf_legendre_array_n(c->lMax)]; // work space for the legendre polynomials (used only in the general case)
 								  // Manual init of the ewald summation targets
 								  complex double * const target_c = calloc(nelem_sc, sizeof(complex double));
 								  memset(target, 0, nelem_sc * sizeof(complex double));
 								  double *err_c = NULL;
 								  if (err) {
 								    err_c = calloc(nelem_sc, sizeof(double));
 								    memset(err, 0, nelem_sc * sizeof(double));
 								  }
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#ifdef EWALD_AUTO_CUTOFF
 								  /*
 								   * Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
 								   */
 								  size_t lw = 1; // "Layer" index.
 								  size_t li = 0; // Counter of points inside the current "layer".
 								  double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
 								  kahaninit(&lsum, &lsum_c);
 								#endif
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								  PGenSphReturnData pgen_retdata;
 								#ifndef NDEBUG
 								  double r_pq_shifted_prev;
 								#endif
 								  // recyclable variables if r_pq_shifted stays the same:
 								  double intres[lMax+1], interr[lMax+1];
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								  complex double cintres[lMax+1];
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
 								// CHOOSE POINT BEGIN
-												ewald.c fix new init memory management.


Former-commit-id: 582e34027bad4bd1ee017886d3dc2120a8c0bf3c
											
										
										
											2018-12-11 13:09:38 +02:00
+								// TODO check whether _next_sph is the optimal coordinate system choice here
 								  while ((pgen_retdata = PGen_next_sph(pgen_R)).flags & PGEN_NOTDONE) { // BEGIN POINT LOOP
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								// CHOOSE POINT END
 								    cart3_t Rpq_shifted_cart; // I will need both sph and cart representations anyway...
 								    sph_t Rpq_shifted_sph;
 								    if (pgen_generates_shifted_points) {
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								      Rpq_shifted_sph = pgen_retdata.point_sph;
 								      Rpq_shifted_cart = sph2cart(Rpq_shifted_sph);
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								    } else { // as in the old _points_and_shift functions
 								      //const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
 								      const sph_t bravais_point_sph = pgen_retdata.point_sph;
 								      const cart3_t bravais_point_cart = sph2cart(bravais_point_sph);
 								      Rpq_shifted_cart = cart3_add(bravais_point_cart, cart3_scale(-1, particle_shift)); // CHECKSIGN!!!
 								      Rpq_shifted_sph = cart2sph(Rpq_shifted_cart);
 								    }
 								    // TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
 								    const double Rpq_shifted_arg = Rpq_shifted_sph.phi; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
 								    const double Rpq_shifted_theta = Rpq_shifted_sph.theta; // POINT-DEPENDENT
 								    const double r_pq_shifted = Rpq_shifted_sph.r;
 								    // if the radius is the same as in previous cycle, most of the calculations can be recycled
-												Replace PGEN_NEWR flag with PGEN_OLD_R with the opposite meaning.


Former-commit-id: 5fef7e81addc980a7202b55fd52152981abe5cba
											
										
										
											2019-06-27 12:13:25 +03:00
+								    const bool new_r_pq_shifted = (!pgen_generates_shifted_points) || (pgen_retdata.flags & !PGEN_OLD_R);
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								    if (!new_r_pq_shifted) assert(r_pq_shifted_prev == r_pq_shifted);
 								    const complex double e_beta_Rpq = cexp(I*cart3_dot(beta, Rpq_shifted_cart)); // POINT-DEPENDENT
 								    LatticeDimensionality speedup_regime = 0;
 								    if ((latdim & LAT_2D_IN_3D_XYONLY) == LAT_2D_IN_3D_XYONLY) speedup_regime = LAT_2D_IN_3D_XYONLY;
 								    if ((latdim & LAT_1D_IN_3D_ZONLY)  == LAT_1D_IN_3D_ZONLY)  speedup_regime = LAT_1D_IN_3D_ZONLY;
 								    const double * legendre_array;
 								    switch(speedup_regime) {
 								      // speedup checks for special geometries and Legendre polynomials
 								      case LAT_1D_IN_3D_ZONLY:
 								        assert((pgen_retdata.flags & PGEN_AT_Z) == PGEN_AT_Z);
 								        assert(Rpq_shifted_theta == M_PI || Rpq_shifted_theta == 0);
 								        legendre_array = (Rpq_shifted_theta == 0) ? c->legendre_plus1 : c->legendre_minus1; // CHECKSIGN
 								        break;
 								      case LAT_2D_IN_3D_XYONLY:
 								        assert((pgen_retdata.flags &PGEN_AT_XY) == PGEN_AT_XY);
-												Test of the new api – results disagree with the old one.


Former-commit-id: ed09750246bca9a71e810745f443612ea0b989e8
											
										
										
											2018-12-11 13:36:12 +02:00
+								        assert(fabs(Rpq_shifted_theta - M_PI_2) < DBL_EPSILON * 1024);
 								        // assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								        legendre_array = c->legendre0;
 								        break;
 								      default:
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								        QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(c->legendre_normconv, lMax, cos(Rpq_shifted_theta), c->legendre_csphase, legendre_buf));
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								        legendre_array = legendre_buf;
 								        break;
 								    }
 								    for(qpms_l_t n = 0; n <= lMax; ++n) {
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								      const double complex prefacn = - I * (k_is_real ? pow(2./creal(k),n+1) : cpow(2./k, n+1)) * M_2_SQRTPI / 2; // profiling TODO put outside the R-loop and multiply in the end?
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								      const double R_pq_pown = pow(r_pq_shifted, n); // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
 								      if (new_r_pq_shifted) {
-												Complex k support for new ewald sums.

Basic tests give good results (only up to rounding error differences
compared to the previous versions), the time increase is almost negligible.


Former-commit-id: 0907517ca94f130a0d8b140ba7b525653e24090f
											
										
										
											2018-12-21 21:05:52 +02:00
+								        int retval;
 								        if (k_is_real) {
 								          double intres_real;
 								          retval = ewald32_sr_integral(r_pq_shifted, kreal, n, eta,
 								               &intres_real, interr + n, workspace);
 								          cintres[n] = intres_real;
 								        } else
 								          retval = ewald32_sr_integral_ck(r_pq_shifted, k, n, eta,
 								               cintres+n, interr + n, workspace);
-												Cleanup: eliminate bare abort()s

Former-commit-id: bb1e4ada19e6bcbf87d6ea1fce0897c4478fb045
											
										
										
											2020-03-23 23:04:26 +02:00
+								        QPMS_ENSURE_SUCCESS(retval);
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								      } // otherwise recycle the integrals
 								      for (qpms_m_t m = -n; m <= n; ++m){
 								        complex double e_imf;
 								        // SPEEDUPS for some special geometries
 								        if(speedup_regime == LAT_2D_IN_3D_XYONLY) { //2D lattice inside the xy plane
 								          if((m+n) % 2 != 0) // odd coefficients are zero.
 								            continue; // nothing needed, already done by memset
 								          e_imf = cexp(I*m*Rpq_shifted_arg); // profiling TODO: calculate outside the n-loop?
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								        } else if (speedup_regime == LAT_1D_IN_3D_ZONLY) { // 1D lattice along the z axis
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								          if (m != 0) // m-non-zero coefficients are zero
 								            continue; // nothing needed, already done by memset
 								          e_imf = 1;
 								        } else { // general 1D/2D/3D lattice in 3D space
 								          e_imf = cexp(I*m*Rpq_shifted_arg);
 								        }
 								        const double leg = legendre_array[gsl_sf_legendre_array_index(n, abs(m))];
 								        const qpms_y_t y = qpms_mn2y_sc(m,n);
 								        if(err)
 								          kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
-												General 3D Ewald short-range part now compiles.
Unchecked, untested.


Former-commit-id: 40c10a0eef6575cd29e95be5a0908e28c24ded1d
											
										
										
											2018-11-21 21:41:20 +02:00
+								              * interr[n])); // TODO include also other errors
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								        complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
 								        ckahanadd(target + y, target_c + y, thesummand);
 								#ifdef EWALD_AUTO_CUTOFF
 								        kahanadd(&lsum, &lsum_c, cabs(thesummand));
 								#endif
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								      }
 								    }
 								#ifndef NDEBUG
 								    r_pq_shifted_prev = r_pq_shifted;
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								#endif
 								#ifdef EWALD_AUTO_CUTOFF
 								    ++li;
 								    double cursum_min = INFINITY;
 								    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
 								      const double a = cabs(target[y]);
 								      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
 								        cursum_min = MIN(cursum_min, a);
 								    }
 								    if (li >= 12 * lw + 1) {
 								      if(lsum < DBL_EPSILON * cursum_min)
 								        goto ewald3_sigma_short_end_point_loop;
 								      kahaninit(&lsum, &lsum_c);
 								      li = 0;
 								      ++lw;
 								    }
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
+								#endif
 								  }
-												Experimental speedup by earlier cutoff for Ewald summation.

TODO check whether PGen is properly destroyed


Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
											
										
										
											2019-09-30 14:37:28 +03:00
+								  //END POINT LOOP
 								#ifdef EWALD_AUTO_CUTOFF
 								ewald3_sigma_short_end_point_loop:
 								#endif
-												Half-implemented the general short-range part of ewald sum in 3D,
unchecked, untested, dudom.


Former-commit-id: b3551114b8c7136e378feb6dc78cb575a5bfc162
											
										
										
											2018-11-21 19:49:07 +02:00
 								  gsl_integration_workspace_free(workspace);
 								  if(err) free(err_c);
 								  free(target_c);
 								  return 0;
 								}