General 2D vector translation coefficient app.

Results seem consistent with the prior triangular lattice code Former-commit-id: 99e2aec5d0662c46c2aaa5e4496033ddbb506042
2018-12-12 11:40:18 +02:00 · 2018-12-12 11:40:18 +02:00 · 665ad09dbb
parent 70af55649e
commit 665ad09dbb
3 changed files with 362 additions and 37 deletions
--- a/qpms/apps/2dlattice_ewald.c
+++ b/qpms/apps/2dlattice_ewald.c
@ -0,0 +1,154 @@
 // c99 -o ew_gen_kin -Wall -I ../.. -O2 -ggdb -DQPMS_VECTORS_NICE_TRANSFORMATIONS -DLATTICESUMS32 2dlattice_ewald.c ../translations.c ../ewald.c ../ewaldsf.c ../gaunt.c ../lattices2d.c ../latticegens.c -lgsl -lm -lblas 
 #include <stdio.h>
 #include <math.h>
 #include <string.h>
 #include <qpms/translations.h>
 #include <qpms/lattices.h>
 #include <gsl/gsl_const_mksa.h>
 // Command line order:
 // outfile b1.x b1.y b2.x b2.y lMax scuffomega refindex npart part0.x part0.y [part1.x part1.y [...]]
 //
 // Standard input (per line):
 // k.x k.y
 //
 // Output data format (line):
 //
 #define MAXKCOUNT 200 // 200 // serves as klist default buffer size 
 //#define KMINCOEFF 0.783 //0.9783 // 0.783 // not used if KSTDIN defined
 //#define KMAXCOEFF 1.217 //1.0217 // 1.217 // not used if KSTDIN defined
 #define KLAYERS 20
 #define RLAYERS 20
 const double s3 = 1.732050807568877293527446341505872366942805253810380628055;
 // IMPORTANT: lattice properties here
 const qpms_y_t lMax = 3;
 const double REFINDEX = 1.52;
 const double LATTICE_H = 576e-9;
 static const double SCUFF_OMEGAUNIT = 3e14;
 static const double hbar = GSL_CONST_MKSA_PLANCKS_CONSTANT_HBAR;
 static const double eV = GSL_CONST_MKSA_ELECTRON_CHARGE;
 static const double c0 = GSL_CONST_MKSA_SPEED_OF_LIGHT;
 int main (int argc, char **argv) {
  //const double LATTICE_A = s3*LATTICE_H;
  //const double INVLATTICE_A = 4*M_PI / s3 / LATTICE_A;
  if (argc < 12) abort();
  char *outfile = argv[1];
  char *errfile = NULL; // Filename for the error estimate output; NOT USED
  cart2_t b1 = {strtod(argv[2], NULL), strtod(argv[3], NULL)},
          b2 = {strtod(argv[4], NULL), strtod(argv[5], NULL)};
  const qpms_l_t lMax = strtol(argv[6], NULL, 10); assert(lMax>0);
  const double scuffomega = strtod(argv[7], NULL);
  const double refindex = strtod(argv[8], NULL);
  const int npart = strtol(argv[9], NULL, 10);  assert(npart>0);
  assert(argc >= 2*npart + 10);
  assert(npart > 0);
  cart2_t part_positions[npart];
  for(int p = 0; p < npart; ++p) {
    part_positions[p].x = strtod(argv[10+2*p], NULL);
    part_positions[p].y = strtod(argv[10+2*p+1], NULL);
  }
 //#ifdef KSTDIN
  size_t kcount = 0;
  size_t klist_capacity = MAXKCOUNT;
  cart2_t *klist = malloc(sizeof(cart2_t) * klist_capacity);
  while (scanf("%lf %lf", &(klist[kcount].x), &(klist[kcount].y)) == 2) {
    ++kcount;
    if(kcount >= klist_capacity) {
      klist_capacity *= 2;
      klist = realloc(klist, sizeof(cart2_t) * klist_capacity);
      if (klist == NULL) abort();
    }
  }
 //#else
 #if 0
  cart2_t klist[MAXKCOUNT];
  int kcount = MAXKCOUNT;
  for (int i = 0; i <  kcount; ++i) { // TODO this should depend on orientation...
    klist[i].x = 0;
    klist[i].y = (4.* M_PI / 3. / LATTICE_A) * (KMINCOEFF + (KMAXCOEFF-KMINCOEFF)/kcount*i);
  }
 #endif
  const double unitcell_area = l2d_unitcell_area(b1, b2);
  l2d_reduceBasis(b1, b2, &b1, &b2);
  // TODO more clever way of determining the cutoff
  const double a = sqrt(unitcell_area); // N.B. different meaning than before
  const double maxR = 25 * a;
  const double maxK = 25 * 2*M_PI/a;
  qpms_trans_calculator *c = qpms_trans_calculator_init(lMax, QPMS_NORMALISATION_POWER_CS); // vai POWER_CS?
  FILE *out = fopen(outfile, "w");
  FILE *err = NULL;
  if (errfile)
    err = fopen(errfile, "w");
  {
    const double omega = scuffomega * SCUFF_OMEGAUNIT;
    const double EeV = omega * hbar / eV;
    const double k0_vac = omega / c0;
    const double k0_eff  = k0_vac * refindex;
    const double eta = 5.224/a; // FIXME quite arbitrary, but this one should work
    // indices : destpart (A/B-particle), srcpart (A/B-particle), coeff type (A/B- type), desty, srcy
    complex double W[npart][npart][2][c->nelem][c->nelem];
    double Werr[npart][npart][npart][c->nelem][c->nelem];
    for (size_t ki = 0; ki < kcount; ++ki) {
      cart2_t beta = klist[ki];
      memset(W, 0, sizeof(W));
      if(err)
        memset(Werr, 0, sizeof(Werr));
      const ptrdiff_t deststride = &(W[0][0][0][1][0]) - &(W[0][0][0][0][0]);
      const ptrdiff_t srcstride = &(W[0][0][0][0][1]) - &(W[0][0][0][0][0]);
      assert (srcstride == 1 && deststride == c->nelem);
      for (size_t ps = 0; ps < npart; ++ps) for (size_t pd = 0; pd < npart; ++pd) 
        // TODO optimize (calculate only once for each particle shift; especially if pd == ps)
        qpms_trans_calculator_get_AB_arrays_e32(c,
            &(W[pd][ps][0][0][0]), err ? &(Werr[pd][ps][0][0][0]) : NULL, // Adest, Aerr,
            &(W[pd][ps][1][0][0]), err ? &(Werr[pd][ps][1][0][0]) : NULL, // Bdest, Berr,
            deststride, srcstride,
            eta, k0_eff, b1, b2,
            beta,
            cart2_substract(part_positions[pd], part_positions[ps]),  // CHECKSIGN
            maxR, maxK 
            );
        // TODO CHECK B<-A vs. A<-B relation
      fprintf(out, "%.16g\t%.16g\t%.16g\t%.16g\t%.16g\t",
          scuffomega, EeV, k0_eff, beta.x, beta.y);
      if(err) fprintf(err, "%.16g\t%.16g\t%16g\t%.16g\t%.16g\t",
          scuffomega, EeV, k0_eff, beta.x, beta.y);
      size_t totalelems = sizeof(W) / sizeof(complex double);
      for (size_t i = 0; i < totalelems; ++i) {
        complex double w = ((complex double *)W)[i];
        fprintf(out, "%.16g\t%.16g\t", creal(w), cimag(w));
        if (err) 
          fprintf(err, "%.3g\t", ((double *)Werr)[i]);
      }
      fputc('\n', out);
      if(err) fputc('\n', err);
    }
  }
  fclose(out);
  if(err) fclose(err);
 //#ifdef KSTDIN
  free(klist);
 //#endif
  qpms_trans_calculator_free(c);
 }
--- a/qpms/translations.c
+++ b/qpms/translations.c
@ -1154,6 +1154,125 @@ int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c,
 }
 #ifdef LATTICESUMS31
 int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_trans_calculator *c,
    complex double * const Adest, double * const Aerr,
    complex double * const Bdest, double * const Berr,
    const ptrdiff_t deststride, const ptrdiff_t srcstride,
    /* qpms_bessel_t J*/ // assume QPMS_HANKEL_PLUS
    const double eta, const double k, const double unitcell_area,
    const size_t nRpoints, const cart2_t *Rpoints, // n.b. can't contain 0; TODO automatic recognition and skip
    const size_t nKpoints, const cart2_t *Kpoints,
    const double beta,//DIFF21
    const double particle_shift//DIFF21
    )
 {
  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e32c->lMax);
  //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
  const bool doerr = Aerr || Berr;
  const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
  complex double *sigmas_short = malloc(sizeof(complex double)*nelem2_sc);
  complex double *sigmas_long = malloc(sizeof(complex double)*nelem2_sc);
  complex double *sigmas_total = malloc(sizeof(complex double)*nelem2_sc);
  double *serr_short, *serr_long, *serr_total;
  if(doerr) {
    serr_short = malloc(sizeof(double)*nelem2_sc);
    serr_long = malloc(sizeof(double)*nelem2_sc);
    serr_total = malloc(sizeof(double)*nelem2_sc);
  } else serr_short = serr_long = serr_total = NULL;
  int retval;
  retval = ewald31z_sigma_long_points_and_shift(sigmas_long, serr_long, //DIFF21
      c->e32c, eta, k, unitcell_area, nKpoints, Kpoints, beta, particle_shift);
  if (retval) abort();
  retval = ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
      c->e32c, eta, k, nRpoints, Rpoints, beta, particle_shift);
  if (retval) abort();
  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
    sigmas_total[y] = sigmas_short[y] + sigmas_long[y];
  if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
    serr_total[y]  = serr_short[y] + serr_long[y];
  complex double sigma0 = 0; double sigma0_err = 0;
  if (do_sigma0) {
    retval = ewald31z_sigma0(&sigma0, &sigma0_err, c->e32c, eta, k);//DIFF21
    if(retval) abort();
    const qpms_l_t y = qpms_mn2y_sc(0,0);
    sigmas_total[y] += sigma0;
    if(doerr) serr_total[y] += sigma0_err;
  }
  switch(qpms_normalisation_t_normonly(c->normalisation)) {
    case QPMS_NORMALISATION_TAYLOR:
    case QPMS_NORMALISATION_POWER:
    case QPMS_NORMALISATION_NONE:
      {
        ptrdiff_t desti = 0, srci = 0;
        for (qpms_l_t n = 1; n <= c->lMax; ++n) for (qpms_m_t m = -n; m <= n; ++m) {
          for (qpms_l_t nu = 1; nu <= c->lMax; ++nu) for (qpms_m_t mu = -nu; mu <= nu; ++mu){
            const size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
            const size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
            complex double Asum, Asumc; ckahaninit(&Asum, &Asumc);
            double Asumerr, Asumerrc; if(Aerr) kahaninit(&Asumerr, &Asumerrc);
            const qpms_m_t mu_m  = mu - m;
            // TODO skip if ... (N.B. skip will be different for 31z and 32)
            for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p = n + nu - 2*q;
              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
              const complex double multiplier = c->A_multipliers[i][q];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Asum, &Asumc, multiplier * sigma);
              if (Aerr) kahanadd(&Asumerr, &Asumerrc, multiplier * serr_total[y_sc]);
            }
            *(Adest + deststride * desti + srcstride * srci) = Asum;
            if (Aerr) *(Aerr + deststride * desti + srcstride * srci) = Asumerr;
            // TODO skip if ...
            complex double Bsum, Bsumc; ckahaninit(&Bsum, &Bsumc);
            double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
             for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p_ = n + nu - 2*q + 1;
              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); 
              const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
              if (Berr) kahanadd(&Bsumerr, &Bsumerrc, multiplier * serr_total[y_sc]);
            }
            *(Bdest + deststride * desti + srcstride * srci) = Bsum;
            if (Berr) *(Berr + deststride * desti + srcstride * srci) = Bsumerr;
            ++srci;
          }
          ++desti;
          srci = 0;
        }
      }
      break;
    default:
      abort();
  }
  free(sigmas_short);
  free(sigmas_long);
  free(sigmas_total);
  if(doerr) {
    free(serr_short);
    free(serr_long);
    free(serr_total);
  }
  return 0;
 }
 #endif LATTICESUMS_31
 #ifdef LATTICESUMS32
 int qpms_trans_calculator_get_AB_arrays_e32_both_points_and_shift(const qpms_trans_calculator *c,
@ -1271,48 +1390,28 @@ int qpms_trans_calculator_get_AB_arrays_e32_both_points_and_shift(const qpms_tra
  return 0;
 }
 #if 0
 int qpms_trans_calculator_e32_long_points_and_shift(const qpms_trans_calculator *c,
                complex double *Adest_long, double *Aerr_long,
                complex double *Bdest_long, double *Berr_long,
                double eta, double k, double unitcell_area,
                size_t npoints, const cart2_t *Kpoints,
                cart2_t beta,
                cart2_t particle_shift
                )
 {
 }
-int qpms_trans_calculator_e32_short_points_and_shift(const qpms_trans_calculator *c,
+// N.B. alternative point generation strategy toggled by macro GEN_RSHIFTEDPOINTS
-                complex double *Adest_short, double *Aerr_short,
+// and GEN_KSHIFTEDPOINTS.
-                complex double *Bdest_short, double *Berr_short,
+// The results should be the same. The performance can slightly differ (especially
-                double eta, double k,
+// if some optimizations in the point generators are implemented.)
-                size_t npoints, const cart2_t *Rpoints,
+int qpms_trans_calculator_get_AB_arrays_e32(const qpms_trans_calculator *c,
                cart2_t beta,
                cart2_t particle_shift
                };
 #endif // 0
 #endif // LATTICESUMS32
 #ifdef LATTICESUMS31
 int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_trans_calculator *c,
    complex double * const Adest, double * const Aerr,
    complex double * const Bdest, double * const Berr,
    const ptrdiff_t deststride, const ptrdiff_t srcstride,
    /* qpms_bessel_t J*/ // assume QPMS_HANKEL_PLUS
-    const double eta, const double k, const double unitcell_area,
+    const double eta, const double k,
-    const size_t nRpoints, const cart2_t *Rpoints, // n.b. can't contain 0; TODO automatic recognition and skip
+    const cart2_t b1, const cart2_t b2,
-    const size_t nKpoints, const cart2_t *Kpoints,
+    const cart2_t beta,
-    const double beta,//DIFF21
+    const cart2_t particle_shift,
-    const double particle_shift//DIFF21
+    double maxR, double maxK
    )
 {
  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e32c->lMax);
  //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
  const bool doerr = Aerr || Berr;
-  const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
+  const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
  complex double *sigmas_short = malloc(sizeof(complex double)*nelem2_sc);
  complex double *sigmas_long = malloc(sizeof(complex double)*nelem2_sc);
@ -1324,13 +1423,48 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
    serr_total = malloc(sizeof(double)*nelem2_sc);
  } else serr_short = serr_long = serr_total = NULL;
  const double unitcell_area = l2d_unitcell_area(b1, b2);
  cart2_t rb1, rb2; // reciprocal basis
  if (QPMS_SUCCESS != l2d_reciprocalBasis2pi(b1, b2, &rb1, &rb2)) abort();
  PGen Rgen = PGen_xyWeb_new(b1, b2, BASIS_RTOL, 
 #ifdef GEN_RSHIFTEDPOINTS
      cart2_scale(-1 /*CHECKSIGN*/, particle_shift),
 #else
      CART2_ZERO,
 #endif
      0, !do_sigma0, maxR, false);
  PGen Kgen = PGen_xyWeb_new(rb1, rb2, BASIS_RTOL,
 #ifdef GEN_KSHIFTEDPOINTS
      beta,
 #else
     CART2_ZERO,
 #endif
     0, true, maxK, false);
  int retval;
-  retval = ewald31z_sigma_long_points_and_shift(sigmas_long, serr_long, //DIFF21
+  //retval = ewald32_sigma_long_points_and_shift(sigmas_long, serr_long,
-      c->e32c, eta, k, unitcell_area, nKpoints, Kpoints, beta, particle_shift);
+  //    c->e32c, eta, k, unitcell_area, nKpoints, Kpoints, beta, particle_shift);
  retval = ewald3_sigma_long(sigmas_long, serr_long, c->e32c, eta, k, 
      unitcell_area, LAT_2D_IN_3D_XYONLY, &Kgen,
 #ifdef GEN_KSHIFTEDPOINTS
      true,
 #else
      false,
 #endif
      cart22cart3xy(beta), cart22cart3xy(particle_shift));
  if (retval) abort();
-  retval = ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
+  //retval = ewald32_sigma_short_points_and_shift(sigmas_short, serr_short,
-      c->e32c, eta, k, nRpoints, Rpoints, beta, particle_shift);
+  //    c->e32c, eta, k, nRpoints, Rpoints, beta, particle_shift);
  retval = ewald3_sigma_short(sigmas_short, serr_short, c->e32c, eta, k,
      LAT_2D_IN_3D_XYONLY, &Rgen, 
 #ifdef GEN_RSHIFTEDPOINTS
      true,
 #else
      false,
 #endif
      cart22cart3xy(beta), cart22cart3xy(particle_shift));
  if (retval) abort();
  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
@ -1340,7 +1474,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
  complex double sigma0 = 0; double sigma0_err = 0;
  if (do_sigma0) {
-    retval = ewald31z_sigma0(&sigma0, &sigma0_err, c->e32c, eta, k);//DIFF21
+    retval = ewald32_sigma0(&sigma0, &sigma0_err, c->e32c, eta, k);
    if(retval) abort();
    const qpms_l_t y = qpms_mn2y_sc(0,0);
    sigmas_total[y] += sigma0;
@ -1361,7 +1495,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
            double Asumerr, Asumerrc; if(Aerr) kahaninit(&Asumerr, &Asumerrc);
            const qpms_m_t mu_m  = mu - m;
-            // TODO skip if ... (N.B. skip will be different for 31z and 32)
+            // TODO skip if ...
            for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p = n + nu - 2*q;
              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
@ -1410,6 +1544,31 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
  }
  return 0;
 }
 #if 0
 int qpms_trans_calculator_e32_long_points_and_shift(const qpms_trans_calculator *c,
                complex double *Adest_long, double *Aerr_long,
                complex double *Bdest_long, double *Berr_long,
                double eta, double k, double unitcell_area,
                size_t npoints, const cart2_t *Kpoints,
                cart2_t beta,
                cart2_t particle_shift
                )
 {
 }
 int qpms_trans_calculator_e32_short_points_and_shift(const qpms_trans_calculator *c,
                complex double *Adest_short, double *Aerr_short,
                complex double *Bdest_short, double *Berr_short,
                double eta, double k,
                size_t npoints, const cart2_t *Rpoints,
                cart2_t beta,
                cart2_t particle_shift
                };
 #endif // 0
 #endif // LATTICESUMS32
 #ifdef LATTICESUMS_OLD
 int qpms_trans_calculator_get_shortrange_AB_arrays_buf(const qpms_trans_calculator *c,
--- a/qpms/translations.h
+++ b/qpms/translations.h
@ -191,6 +191,18 @@ int qpms_trans_calculator_get_AB_arrays_e32_both_points_and_shift(const qpms_tra
 		const cart2_t beta,
 		const cart2_t particle_shift
 		);
 int qpms_trans_calculator_get_AB_arrays_e32(const qpms_trans_calculator *c,
 		complex double *Adest, double *Aerr,
 		complex double *Bdest, double *Berr,
 		const ptrdiff_t deststride, const ptrdiff_t srcstride,
 		const double eta, const double k,
 		cart2_t b1, cart2_t b2,
 		const cart2_t beta,
 		const cart2_t particle_shift,
 		double maxR, double maxK
 		);
 #endif //LATTICESUMS32
 #ifdef LATTICESUMS31