From bac3c3276d00038f3453b8d3163e6e5ab723b5a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= <marek@necada.org>
Date: Fri, 25 Mar 2022 21:51:17 +0200
Subject: [PATCH] Indexing documentation, pedantic use of qpms_y_sc_t

---
 qpms/ewald.c        | 28 +++++++++++------------
 qpms/ewald.h        |  2 +-
 qpms/indexing.h     | 55 +++++++++++++++++++++++++++++++++++++--------
 qpms/translations.c | 24 ++++++++++----------
 4 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/qpms/ewald.c b/qpms/ewald.c
index 7fb7df8..8c727a1 100644
--- a/qpms/ewald.c
+++ b/qpms/ewald.c
@@ -136,7 +136,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
 
   c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
   size_t s1_constfacs_sz_cumsum = 0;
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
     if ((m + n) % 2 == 0) {
       c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
@@ -172,7 +172,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
   c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
   //determine sizes
   size_t S1_constfacs_sz = 0;
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
     const qpms_l_t L_M = n - abs(m);
     for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
@@ -187,7 +187,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
 
   c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
   size_t S1_constfacs_sz_cumsum = 0; // second count
-  for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
+  for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
     qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
     const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI  
               * factorial((n-m)/2) * factorial((n+m)/2);
@@ -314,7 +314,7 @@ int ewald3_21_xy_sigma_long (
   const bool k_is_real = (cimag(k) == 0);
   assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
   assert((latdim & LAT1D) || (latdim & LAT2D));
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
   assert(nelem_sc > 0);
   const qpms_l_t lMax = c->lMax;
   
@@ -446,7 +446,7 @@ int ewald3_21_xy_sigma_long (
       for(qpms_m_t m = -n; m <= n; ++m) {
         if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
           continue;
-        const qpms_y_t y = qpms_mn2y_sc(m, n);
+        const qpms_y_sc_t y = qpms_mn2y_sc(m, n);
         size_t constidx = 0; // constants offset
         const complex double e_imalpha_pq = cexp(I*m*arg_pq);
           complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
@@ -509,7 +509,7 @@ int ewald3_21_xy_sigma_long (
 #ifdef EWALD_AUTO_CUTOFF
     ++li;
     double cursum_min = INFINITY;
-    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
+    for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
       const double a = cabs(target[y]);
       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
         cursum_min = MIN(cursum_min, a);
@@ -529,10 +529,10 @@ ewald3_21_xy_sigma_long_end_point_loop:
  
   free(err_c);
   free(target_c);
-  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+  for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
     target[y] *= commonfac;
   if(err)
-    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+    for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
       err[y] *= commonfac;
   return 0;
 }
@@ -562,7 +562,7 @@ int ewald3_1_z_sigma_long (
   assert(particle_shift.x == 0 && particle_shift.y == 0);
   const double beta_z = beta.z;
   const double particle_shift_z = particle_shift_z;
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
   const qpms_l_t lMax = c->lMax;
   
   // Manual init of the ewald summation targets
@@ -613,7 +613,7 @@ int ewald3_1_z_sigma_long (
     // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
     // and just fetched for each n
     for(qpms_l_t n = 0; n <= lMax; ++n) {
-        const qpms_y_t y = qpms_mn2y_sc(0, n);
+        const qpms_y_sc_t y = qpms_mn2y_sc(0, n);
         complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
         double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
         for(qpms_l_t j = 0; j <= n/2; ++j) {
@@ -647,10 +647,10 @@ int ewald3_1_z_sigma_long (
  
   free(err_c);
   free(target_c);
-  for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
+  for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
     target[y] *= commonfac;
   if(err)
-    for(qpms_y_t y = 0; y < nelem_sc; ++y)
+    for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
       err[y] *= commonfac;
   return 0;
 }
@@ -793,7 +793,7 @@ int ewald3_sigma_short(
 {
   const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
   const double kreal = creal(k);
-  const qpms_y_t nelem_sc = c->nelem_sc;
+  const qpms_y_sc_t nelem_sc = c->nelem_sc;
   const qpms_l_t lMax = c->lMax;
   gsl_integration_workspace *workspace = 
     gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
@@ -930,7 +930,7 @@ int ewald3_sigma_short(
 #ifdef EWALD_AUTO_CUTOFF
     ++li;
     double cursum_min = INFINITY;
-    for (qpms_y_t y = 0; y < nelem_sc; ++y) {
+    for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
       const double a = cabs(target[y]);
       if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
         cursum_min = MIN(cursum_min, a);
diff --git a/qpms/ewald.h b/qpms/ewald.h
index 96121c8..8881586 100644
--- a/qpms/ewald.h
+++ b/qpms/ewald.h
@@ -47,7 +47,7 @@ gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
  */
 typedef struct qpms_ewald3_constants_t { 
 	qpms_l_t lMax;
-	qpms_y_t nelem_sc;
+	qpms_y_sc_t nelem_sc;
 	/// The values of maximum \a j's in the long-range part summation, `[(l-|m|/2)]`.
 	qpms_l_t *s1_jMaxes;
 	/// The constant factors for the long range part of a 2D Ewald sum.
diff --git a/qpms/indexing.h b/qpms/indexing.h
index 59e90eb..aab7478 100644
--- a/qpms/indexing.h
+++ b/qpms/indexing.h
@@ -1,5 +1,28 @@
 /*! \file indexing.h
  * \brief Various index conversion functions.
+ *
+ * Index conventions in QPMS
+ * -------------------------
+ *
+ * Depending on circumstances (e.g. whether scalar or vector spherical wavefunctions
+ * are being used), QPMS uses two mappings between the multipole degree/order index 
+ * pair (\a l, \a m), due to historical reasons ocassionally also noted as (\a n, \a m),
+ * and a flat non-negative integer index.
+ * 
+ * Both mappings map the (\a l, \a m) pair in lexicographic order (with 
+ * the basic constraint \f$ |m| \le l \f$), the difference is that the "scalar"
+ * mapping starts from \a l = 0, whereas the "vector" mapping starts from \a l = 1, 
+ * so that for the zeroth elements we have
+ *  * (0, 0) ↔ 0, (1, -1) ↔ 1 in the "scalar" mapping,
+ *  * (1, -1) ↔ 0 in the "vector" mapping.
+ *
+ * The vector SWF flat index is typically denoted by `y` and its type is \ref qpms_y_t, 
+ * whereas  * the scalar SWF flat index is typically denoted by `y_sc` and its 
+ * type is \ref qpms_y_sc_t.
+ *
+ * Moreover, there is another mapping that encodes also the electric/magnetic/scalar
+ * WSWF type index, see \ref qpms_uvswfi_t.
+ *
  */
 #ifndef QPMS_INDEXING_H
 #define QPMS_INDEXING_H
@@ -8,50 +31,64 @@
 #include <math.h>
 #include "optim.h"
 
-
-
-static inline qpms_y_t qpms_mn2y(qpms_m_t m, qpms_l_t n) {
+/// Mapping from multipole degree, order to flat index (degree ≥ 1)
+static inline qpms_y_t qpms_mn2y(
+    qpms_m_t m, ///< multipole order
+    qpms_l_t n ///< multipole degree (a.k.a. \a l); must be greater than 0
+    ) {
         return n * (n + 1) + m - 1;
 }
 
+/// Mapping from flat index to multipole degree (degree ≥ 1)
 static inline qpms_lm_t qpms_y2n(qpms_y_t y) {
         //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
         return sqrt(y+1);
 }
 
+/// Mapping from flat index, multipole degree (≥ 1) to multipole order
 static inline qpms_m_t qpms_yn2m(qpms_y_t y, qpms_l_t n) {
         return y-qpms_mn2y(0,n);
 }
 
+/// Mapping from flat index to multipole degree, order (degree ≥ 1)
 static inline void qpms_y2mn_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
         *m=qpms_yn2m(y,*n=qpms_y2n(y));
 }
 
+/// Number of spherical multipoles with `lmax` ≥ degree ≥ 1
 static inline qpms_y_t qpms_lMax2nelem(qpms_l_t lmax){
 	return lmax * ((qpms_y_t)lmax + 2);
 }
 
 // Scalar versions: they have a place for the 0, 0 term in the beginning
 
-static inline qpms_y_t qpms_mn2y_sc(qpms_m_t m, qpms_l_t n) {
+/// Mapping from multipole degree, order to flat index (degree ≥ 0)
+static inline qpms_y_sc_t qpms_mn2y_sc(
+    qpms_m_t m, ///< multipole order
+    qpms_l_t n ///< multipole degree (a.k.a. \a l); muste be greater or equal than 0
+    ) {
         return n * (n + 1) + m;
 }
 
-static inline qpms_lm_t qpms_y2n_sc(qpms_y_t y) {
+/// Mapping from flat index to multipole degree (degree ≥ 0)
+static inline qpms_lm_t qpms_y2n_sc(qpms_y_sc_t y) {
         //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
         return sqrt(y);
 }
 
-static inline qpms_m_t qpms_yn2m_sc(qpms_y_t y, qpms_l_t n) {
+/// Mapping from flat index, multipole degree (≥ 0) to multipole order
+static inline qpms_m_t qpms_yn2m_sc(qpms_y_sc_t y, qpms_l_t n) {
         return y-qpms_mn2y_sc(0,n);
 }
 
-static inline void qpms_y2mn_sc_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
+/// Mapping from flat index to multipole degree, order (degree ≥ 0)
+static inline void qpms_y2mn_sc_p(qpms_y_sc_t y, qpms_m_t *m, qpms_l_t *n){
         *m=qpms_yn2m_sc(y,*n=qpms_y2n_sc(y));
 }
 
-static inline qpms_y_t qpms_lMax2nelem_sc(qpms_l_t lmax){
-	return lmax * ((qpms_y_t)lmax + 2) + 1;
+/// Number of spherical multipoles with `lmax` ≥ degree ≥ 0
+static inline qpms_y_sc_t qpms_lMax2nelem_sc(qpms_l_t lmax){
+	return lmax * ((qpms_y_sc_t)lmax + 2) + 1;
 }
 
 // TODO maybe enable crashing / validity control by macro definitions...
diff --git a/qpms/translations.c b/qpms/translations.c
index 118bc28..3ae667f 100644
--- a/qpms/translations.c
+++ b/qpms/translations.c
@@ -825,7 +825,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
     )
 {
 
-  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
+  const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
   //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
   const bool doerr = Aerr || Berr;
   const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@@ -846,15 +846,15 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
   QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
       c->e3c, eta, k, nRpoints, Rpoints, beta, particle_shift));
 
-  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
     sigmas_total[y] = sigmas_short[y] + sigmas_long[y];
-  if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
     serr_total[y]  = serr_short[y] + serr_long[y];
 
   complex double sigma0 = 0; double sigma0_err = 0;
   if (do_sigma0) {
     QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
-    const qpms_l_t y = qpms_mn2y_sc(0,0);
+    const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
     sigmas_total[y] += sigma0;
     if(doerr) serr_total[y] += sigma0_err;
   }
@@ -872,7 +872,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
             // TODO skip if ... (N.B. skip will be different for 31z and 32)
             for(qpms_l_t q = 0; q <= qmax; ++q) {
               const qpms_l_t p = n + nu - 2*q;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); 
               const complex double multiplier = c->A_multipliers[i][q];
               complex double sigma = sigmas_total[y_sc];
               ckahanadd(&Asum, &Asumc, multiplier * sigma);
@@ -887,7 +887,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
             double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
              for(qpms_l_t q = 0; q <= qmax; ++q) {
               const qpms_l_t p_ = n + nu - 2*q + 1;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); 
               const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
               complex double sigma = sigmas_total[y_sc];
               ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
@@ -937,7 +937,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
     )
 {
 
-  const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
+  const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
   //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
   const bool doerr = Aerr || Berr;
   const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0) && (particle_shift.z == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@@ -1006,17 +1006,17 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
       PGen_destroy(&Rgen);
   }
 
-  for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
     sigmas_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? sigmas_short[y] : 0)
                     + ((parts & QPMS_EWALD_LONG_RANGE)  ? sigmas_long[y] : 0);
-  if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
+  if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
     serr_total[y]  = ((parts & QPMS_EWALD_SHORT_RANGE) ? serr_short[y] : 0)
                     + ((parts & QPMS_EWALD_LONG_RANGE)  ? serr_long[y] : 0);
 
   complex double sigma0 = 0; double sigma0_err = 0;
   if (do_sigma0 && (parts & QPMS_EWALD_0TERM)) {
     QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
-    const qpms_l_t y = qpms_mn2y_sc(0,0);
+    const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
     sigmas_total[y] += sigma0;
     if(doerr) serr_total[y] += sigma0_err;
   }
@@ -1034,7 +1034,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
             // TODO skip if ...
             for(qpms_l_t q = 0; q <= qmax; ++q) {
               const qpms_l_t p = n + nu - 2*q;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); 
               const complex double multiplier = c->A_multipliers[i][q];
               complex double sigma = sigmas_total[y_sc];
               ckahanadd(&Asum, &Asumc, multiplier * sigma);
@@ -1049,7 +1049,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
             double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
              for(qpms_l_t q = 0; q <= qmax; ++q) {
               const qpms_l_t p_ = n + nu - 2*q + 1;
-              const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); 
+              const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); 
               const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
               complex double sigma = sigmas_total[y_sc];
               ckahanadd(&Bsum, &Bsumc, multiplier * sigma);