Indexing documentation, pedantic use of qpms_y_sc_t

2022-03-25 21:51:17 +02:00 · 2022-03-25 21:51:17 +02:00 · 10d3beca1c
parent f9081c5922
commit 10d3beca1c
4 changed files with 73 additions and 36 deletions
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@ -136,7 +136,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons

  c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
  size_t s1_constfacs_sz_cumsum = 0;
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    if ((m + n) % 2 == 0) {
      c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
@ -172,7 +172,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
  c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
  //determine sizes
  size_t S1_constfacs_sz = 0;
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    const qpms_l_t L_M = n - abs(m);
    for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
@ -187,7 +187,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons

  c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
  size_t S1_constfacs_sz_cumsum = 0; // second count
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
    qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
    const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI  
              * factorial((n-m)/2) * factorial((n+m)/2);
@ -314,7 +314,7 @@ int ewald3_21_xy_sigma_long (
  const bool k_is_real = (cimag(k) == 0);
  assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
  assert((latdim & LAT1D) || (latdim & LAT2D));
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
  assert(nelem_sc > 0);
  const qpms_l_t lMax = c->lMax;
  
@ -446,7 +446,7 @@ int ewald3_21_xy_sigma_long (
      for(qpms_m_t m = -n; m <= n; ++m) {
        if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
          continue;
-        const qpms_y_t y = qpms_mn2y_sc(m, n);
+        const qpms_y_sc_t y = qpms_mn2y_sc(m, n);
        size_t constidx = 0; // constants offset
        const complex double e_imalpha_pq = cexp(I*m*arg_pq);
          complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
@ -509,7 +509,7 @@ int ewald3_21_xy_sigma_long (
 #ifdef EWALD_AUTO_CUTOFF
    ++li;
    double cursum_min = INFINITY;
-    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
+    for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
      const double a = cabs(target[y]);
      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
        cursum_min = MIN(cursum_min, a);
@ -529,10 +529,10 @@ ewald3_21_xy_sigma_long_end_point_loop:
 
  free(err_c);
  free(target_c);
-  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+  for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
    target[y] *= commonfac;
  if(err)
-    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+    for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
      err[y] *= commonfac;
  return 0;
 }
@ -562,7 +562,7 @@ int ewald3_1_z_sigma_long (
  assert(particle_shift.x == 0 && particle_shift.y == 0);
  const double beta_z = beta.z;
  const double particle_shift_z = particle_shift_z;
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
  const qpms_l_t lMax = c->lMax;
  
  // Manual init of the ewald summation targets
@ -613,7 +613,7 @@ int ewald3_1_z_sigma_long (
    // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
    // and just fetched for each n
    for(qpms_l_t n = 0; n <= lMax; ++n) {
-        const qpms_y_t y = qpms_mn2y_sc(0, n);
+        const qpms_y_sc_t y = qpms_mn2y_sc(0, n);
        complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
        double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
        for(qpms_l_t j = 0; j <= n/2; ++j) {
@ -647,10 +647,10 @@ int ewald3_1_z_sigma_long (
 
  free(err_c);
  free(target_c);
-  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+  for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
    target[y] *= commonfac;
  if(err)
-    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+    for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
      err[y] *= commonfac;
  return 0;
 }
@ -793,7 +793,7 @@ int ewald3_sigma_short(
 {
  const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
  const double kreal = creal(k);
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
  const qpms_l_t lMax = c->lMax;
  gsl_integration_workspace *workspace = 
    gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
@ -930,7 +930,7 @@ int ewald3_sigma_short(
 #ifdef EWALD_AUTO_CUTOFF
    ++li;
    double cursum_min = INFINITY;
-    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
+    for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
      const double a = cabs(target[y]);
      if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
        cursum_min = MIN(cursum_min, a);
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@ -47,7 +47,7 @@ gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
 */
 typedef struct qpms_ewald3_constants_t { 
 	qpms_l_t lMax;
-	qpms_y_t nelem_sc;
+	qpms_y_sc_t nelem_sc;
 	/// The values of maximum \a j's in the long-range part summation, `[(l-|m|/2)]`.
 	qpms_l_t *s1_jMaxes;
 	/// The constant factors for the long range part of a 2D Ewald sum.
--- a/qpms/indexing.h
+++ b/qpms/indexing.h
@ -1,5 +1,28 @@
 /*! \file indexing.h
 * \brief Various index conversion functions.
+ *
+ * Index conventions in QPMS
+ * -------------------------
+ *
+ * Depending on circumstances (e.g. whether scalar or vector spherical wavefunctions
+ * are being used), QPMS uses two mappings between the multipole degree/order index 
+ * pair (\a l, \a m), due to historical reasons ocassionally also noted as (\a n, \a m),
+ * and a flat non-negative integer index.
+ * 
+ * Both mappings map the (\a l, \a m) pair in lexicographic order (with 
+ * the basic constraint \f$ |m| \le l \f$), the difference is that the "scalar"
+ * mapping starts from \a l = 0, whereas the "vector" mapping starts from \a l = 1, 
+ * so that for the zeroth elements we have
+ *  * (0, 0) ↔ 0, (1, -1) ↔ 1 in the "scalar" mapping,
+ *  * (1, -1) ↔ 0 in the "vector" mapping.
+ *
+ * The vector SWF flat index is typically denoted by `y` and its type is \ref qpms_y_t, 
+ * whereas  * the scalar SWF flat index is typically denoted by `y_sc` and its 
+ * type is \ref qpms_y_sc_t.
+ *
+ * Moreover, there is another mapping that encodes also the electric/magnetic/scalar
+ * WSWF type index, see \ref qpms_uvswfi_t.
+ *
 */
 #ifndef QPMS_INDEXING_H
 #define QPMS_INDEXING_H
@ -8,50 +31,64 @@
 #include <math.h>
 #include "optim.h"

-
-
-static inline qpms_y_t qpms_mn2y(qpms_m_t m, qpms_l_t n) {
+/// Mapping from multipole degree, order to flat index (degree ≥ 1)
+static inline qpms_y_t qpms_mn2y(
+    qpms_m_t m, ///< multipole order
+    qpms_l_t n ///< multipole degree (a.k.a. \a l); must be greater than 0
+    ) {
        return n * (n + 1) + m - 1;
 }

+/// Mapping from flat index to multipole degree (degree ≥ 1)
 static inline qpms_lm_t qpms_y2n(qpms_y_t y) {
        //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
        return sqrt(y+1);
 }

+/// Mapping from flat index, multipole degree (≥ 1) to multipole order
 static inline qpms_m_t qpms_yn2m(qpms_y_t y, qpms_l_t n) {
        return y-qpms_mn2y(0,n);
 }

+/// Mapping from flat index to multipole degree, order (degree ≥ 1)
 static inline void qpms_y2mn_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
        *m=qpms_yn2m(y,*n=qpms_y2n(y));
 }

+/// Number of spherical multipoles with `lmax` ≥ degree ≥ 1
 static inline qpms_y_t qpms_lMax2nelem(qpms_l_t lmax){
 	return lmax * ((qpms_y_t)lmax + 2);
 }

 // Scalar versions: they have a place for the 0, 0 term in the beginning

-static inline qpms_y_t qpms_mn2y_sc(qpms_m_t m, qpms_l_t n) {
+/// Mapping from multipole degree, order to flat index (degree ≥ 0)
+static inline qpms_y_sc_t qpms_mn2y_sc(
+    qpms_m_t m, ///< multipole order
+    qpms_l_t n ///< multipole degree (a.k.a. \a l); muste be greater or equal than 0
+    ) {
        return n * (n + 1) + m;
 }

-static inline qpms_lm_t qpms_y2n_sc(qpms_y_t y) {
+/// Mapping from flat index to multipole degree (degree ≥ 0)
+static inline qpms_lm_t qpms_y2n_sc(qpms_y_sc_t y) {
        //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
        return sqrt(y);
 }

-static inline qpms_m_t qpms_yn2m_sc(qpms_y_t y, qpms_l_t n) {
+/// Mapping from flat index, multipole degree (≥ 0) to multipole order
+static inline qpms_m_t qpms_yn2m_sc(qpms_y_sc_t y, qpms_l_t n) {
        return y-qpms_mn2y_sc(0,n);
 }

-static inline void qpms_y2mn_sc_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
+/// Mapping from flat index to multipole degree, order (degree ≥ 0)
+static inline void qpms_y2mn_sc_p(qpms_y_sc_t y, qpms_m_t *m, qpms_l_t *n){
        *m=qpms_yn2m_sc(y,*n=qpms_y2n_sc(y));
 }

-static inline qpms_y_t qpms_lMax2nelem_sc(qpms_l_t lmax){
-	return lmax * ((qpms_y_t)lmax + 2) + 1;
+/// Number of spherical multipoles with `lmax` ≥ degree ≥ 0
+static inline qpms_y_sc_t qpms_lMax2nelem_sc(qpms_l_t lmax){
+	return lmax * ((qpms_y_sc_t)lmax + 2) + 1;
 }

 // TODO maybe enable crashing / validity control by macro definitions...
--- a/qpms/translations.c
+++ b/qpms/translations.c
@ -825,7 +825,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
    )
 {

-  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
+  const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
  //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
  const bool doerr = Aerr || Berr;
  const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@ -846,15 +846,15 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
  QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
      c->e3c, eta, k, nRpoints, Rpoints, beta, particle_shift));

-  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
    sigmas_total[y] = sigmas_short[y] + sigmas_long[y];
-  if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
    serr_total[y]  = serr_short[y] + serr_long[y];

  complex double sigma0 = 0; double sigma0_err = 0;
  if (do_sigma0) {
    QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
-    const qpms_l_t y = qpms_mn2y_sc(0,0);
+    const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
    sigmas_total[y] += sigma0;
    if(doerr) serr_total[y] += sigma0_err;
  }
@ -872,7 +872,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
            // TODO skip if ... (N.B. skip will be different for 31z and 32)
            for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p = n + nu - 2*q;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); 
              const complex double multiplier = c->A_multipliers[i][q];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Asum, &Asumc, multiplier * sigma);
@ -887,7 +887,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
            double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
             for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p_ = n + nu - 2*q + 1;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); 
              const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
@ -937,7 +937,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
    )
 {

-  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
+  const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
  //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
  const bool doerr = Aerr || Berr;
  const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0) && (particle_shift.z == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@ -1006,17 +1006,17 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
      PGen_destroy(&Rgen);
  }

-  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
    sigmas_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? sigmas_short[y] : 0)
                    + ((parts & QPMS_EWALD_LONG_RANGE)  ? sigmas_long[y] : 0);
-  if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
    serr_total[y]  = ((parts & QPMS_EWALD_SHORT_RANGE) ? serr_short[y] : 0)
                    + ((parts & QPMS_EWALD_LONG_RANGE)  ? serr_long[y] : 0);

  complex double sigma0 = 0; double sigma0_err = 0;
  if (do_sigma0 && (parts & QPMS_EWALD_0TERM)) {
    QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
-    const qpms_l_t y = qpms_mn2y_sc(0,0);
+    const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
    sigmas_total[y] += sigma0;
    if(doerr) serr_total[y] += sigma0_err;
  }
@ -1034,7 +1034,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
            // TODO skip if ...
            for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p = n + nu - 2*q;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); 
              const complex double multiplier = c->A_multipliers[i][q];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Asum, &Asumc, multiplier * sigma);
@ -1049,7 +1049,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
            double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
             for(qpms_l_t q = 0; q <= qmax; ++q) {
              const qpms_l_t p_ = n + nu - 2*q + 1;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); 
              const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
              complex double sigma = sigmas_total[y_sc];
              ckahanadd(&Bsum, &Bsumc, multiplier * sigma);