From bac3c3276d00038f3453b8d3163e6e5ab723b5a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= Date: Fri, 25 Mar 2022 21:51:17 +0200 Subject: [PATCH] Indexing documentation, pedantic use of qpms_y_sc_t --- qpms/ewald.c | 28 +++++++++++------------ qpms/ewald.h | 2 +- qpms/indexing.h | 55 +++++++++++++++++++++++++++++++++++++-------- qpms/translations.c | 24 ++++++++++---------- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/qpms/ewald.c b/qpms/ewald.c index 7fb7df8..8c727a1 100644 --- a/qpms/ewald.c +++ b/qpms/ewald.c @@ -136,7 +136,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double)); size_t s1_constfacs_sz_cumsum = 0; - for (qpms_y_t y = 0; y < c->nelem_sc; ++y) { + for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) { qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n); if ((m + n) % 2 == 0) { c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum; @@ -172,7 +172,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *)); //determine sizes size_t S1_constfacs_sz = 0; - for (qpms_y_t y = 0; y < c->nelem_sc; ++y) { + for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) { qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n); const qpms_l_t L_M = n - abs(m); for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum @@ -187,7 +187,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double)); size_t S1_constfacs_sz_cumsum = 0; // second count - for (qpms_y_t y = 0; y < c->nelem_sc; ++y) { + for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) { qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n); const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI * factorial((n-m)/2) * factorial((n+m)/2); @@ -314,7 +314,7 @@ int ewald3_21_xy_sigma_long ( const bool k_is_real = (cimag(k) == 0); assert((latdim & LAT_XYONLY) && (latdim & SPACE3D)); assert((latdim & LAT1D) || (latdim & LAT2D)); - const qpms_y_t nelem_sc = c->nelem_sc; + const qpms_y_sc_t nelem_sc = c->nelem_sc; assert(nelem_sc > 0); const qpms_l_t lMax = c->lMax; @@ -446,7 +446,7 @@ int ewald3_21_xy_sigma_long ( for(qpms_m_t m = -n; m <= n; ++m) { if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero. continue; - const qpms_y_t y = qpms_mn2y_sc(m, n); + const qpms_y_sc_t y = qpms_mn2y_sc(m, n); size_t constidx = 0; // constants offset const complex double e_imalpha_pq = cexp(I*m*arg_pq); complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c); @@ -509,7 +509,7 @@ int ewald3_21_xy_sigma_long ( #ifdef EWALD_AUTO_CUTOFF ++li; double cursum_min = INFINITY; - for (qpms_y_t y = 0; y < nelem_sc; ++y) { + for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) { const double a = cabs(target[y]); if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis cursum_min = MIN(cursum_min, a); @@ -529,10 +529,10 @@ ewald3_21_xy_sigma_long_end_point_loop: free(err_c); free(target_c); - for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above + for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above target[y] *= commonfac; if(err) - for(qpms_y_t y = 0; y < nelem_sc; ++y) + for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) err[y] *= commonfac; return 0; } @@ -562,7 +562,7 @@ int ewald3_1_z_sigma_long ( assert(particle_shift.x == 0 && particle_shift.y == 0); const double beta_z = beta.z; const double particle_shift_z = particle_shift_z; - const qpms_y_t nelem_sc = c->nelem_sc; + const qpms_y_sc_t nelem_sc = c->nelem_sc; const qpms_l_t lMax = c->lMax; // Manual init of the ewald summation targets @@ -613,7 +613,7 @@ int ewald3_1_z_sigma_long ( // TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array // and just fetched for each n for(qpms_l_t n = 0; n <= lMax; ++n) { - const qpms_y_t y = qpms_mn2y_sc(0, n); + const qpms_y_sc_t y = qpms_mn2y_sc(0, n); complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c); double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors? for(qpms_l_t j = 0; j <= n/2; ++j) { @@ -647,10 +647,10 @@ int ewald3_1_z_sigma_long ( free(err_c); free(target_c); - for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above + for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above target[y] *= commonfac; if(err) - for(qpms_y_t y = 0; y < nelem_sc; ++y) + for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) err[y] *= commonfac; return 0; } @@ -793,7 +793,7 @@ int ewald3_sigma_short( { const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops const double kreal = creal(k); - const qpms_y_t nelem_sc = c->nelem_sc; + const qpms_y_sc_t nelem_sc = c->nelem_sc; const qpms_l_t lMax = c->lMax; gsl_integration_workspace *workspace = gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT); @@ -930,7 +930,7 @@ int ewald3_sigma_short( #ifdef EWALD_AUTO_CUTOFF ++li; double cursum_min = INFINITY; - for (qpms_y_t y = 0; y < nelem_sc; ++y) { + for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) { const double a = cabs(target[y]); if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis cursum_min = MIN(cursum_min, a); diff --git a/qpms/ewald.h b/qpms/ewald.h index 96121c8..8881586 100644 --- a/qpms/ewald.h +++ b/qpms/ewald.h @@ -47,7 +47,7 @@ gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler; */ typedef struct qpms_ewald3_constants_t { qpms_l_t lMax; - qpms_y_t nelem_sc; + qpms_y_sc_t nelem_sc; /// The values of maximum \a j's in the long-range part summation, `[(l-|m|/2)]`. qpms_l_t *s1_jMaxes; /// The constant factors for the long range part of a 2D Ewald sum. diff --git a/qpms/indexing.h b/qpms/indexing.h index 59e90eb..aab7478 100644 --- a/qpms/indexing.h +++ b/qpms/indexing.h @@ -1,5 +1,28 @@ /*! \file indexing.h * \brief Various index conversion functions. + * + * Index conventions in QPMS + * ------------------------- + * + * Depending on circumstances (e.g. whether scalar or vector spherical wavefunctions + * are being used), QPMS uses two mappings between the multipole degree/order index + * pair (\a l, \a m), due to historical reasons ocassionally also noted as (\a n, \a m), + * and a flat non-negative integer index. + * + * Both mappings map the (\a l, \a m) pair in lexicographic order (with + * the basic constraint \f$ |m| \le l \f$), the difference is that the "scalar" + * mapping starts from \a l = 0, whereas the "vector" mapping starts from \a l = 1, + * so that for the zeroth elements we have + * * (0, 0) ↔ 0, (1, -1) ↔ 1 in the "scalar" mapping, + * * (1, -1) ↔ 0 in the "vector" mapping. + * + * The vector SWF flat index is typically denoted by `y` and its type is \ref qpms_y_t, + * whereas * the scalar SWF flat index is typically denoted by `y_sc` and its + * type is \ref qpms_y_sc_t. + * + * Moreover, there is another mapping that encodes also the electric/magnetic/scalar + * WSWF type index, see \ref qpms_uvswfi_t. + * */ #ifndef QPMS_INDEXING_H #define QPMS_INDEXING_H @@ -8,50 +31,64 @@ #include #include "optim.h" - - -static inline qpms_y_t qpms_mn2y(qpms_m_t m, qpms_l_t n) { +/// Mapping from multipole degree, order to flat index (degree ≥ 1) +static inline qpms_y_t qpms_mn2y( + qpms_m_t m, ///< multipole order + qpms_l_t n ///< multipole degree (a.k.a. \a l); must be greater than 0 + ) { return n * (n + 1) + m - 1; } +/// Mapping from flat index to multipole degree (degree ≥ 1) static inline qpms_lm_t qpms_y2n(qpms_y_t y) { //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want return sqrt(y+1); } +/// Mapping from flat index, multipole degree (≥ 1) to multipole order static inline qpms_m_t qpms_yn2m(qpms_y_t y, qpms_l_t n) { return y-qpms_mn2y(0,n); } +/// Mapping from flat index to multipole degree, order (degree ≥ 1) static inline void qpms_y2mn_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){ *m=qpms_yn2m(y,*n=qpms_y2n(y)); } +/// Number of spherical multipoles with `lmax` ≥ degree ≥ 1 static inline qpms_y_t qpms_lMax2nelem(qpms_l_t lmax){ return lmax * ((qpms_y_t)lmax + 2); } // Scalar versions: they have a place for the 0, 0 term in the beginning -static inline qpms_y_t qpms_mn2y_sc(qpms_m_t m, qpms_l_t n) { +/// Mapping from multipole degree, order to flat index (degree ≥ 0) +static inline qpms_y_sc_t qpms_mn2y_sc( + qpms_m_t m, ///< multipole order + qpms_l_t n ///< multipole degree (a.k.a. \a l); muste be greater or equal than 0 + ) { return n * (n + 1) + m; } -static inline qpms_lm_t qpms_y2n_sc(qpms_y_t y) { +/// Mapping from flat index to multipole degree (degree ≥ 0) +static inline qpms_lm_t qpms_y2n_sc(qpms_y_sc_t y) { //return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want return sqrt(y); } -static inline qpms_m_t qpms_yn2m_sc(qpms_y_t y, qpms_l_t n) { +/// Mapping from flat index, multipole degree (≥ 0) to multipole order +static inline qpms_m_t qpms_yn2m_sc(qpms_y_sc_t y, qpms_l_t n) { return y-qpms_mn2y_sc(0,n); } -static inline void qpms_y2mn_sc_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){ +/// Mapping from flat index to multipole degree, order (degree ≥ 0) +static inline void qpms_y2mn_sc_p(qpms_y_sc_t y, qpms_m_t *m, qpms_l_t *n){ *m=qpms_yn2m_sc(y,*n=qpms_y2n_sc(y)); } -static inline qpms_y_t qpms_lMax2nelem_sc(qpms_l_t lmax){ - return lmax * ((qpms_y_t)lmax + 2) + 1; +/// Number of spherical multipoles with `lmax` ≥ degree ≥ 0 +static inline qpms_y_sc_t qpms_lMax2nelem_sc(qpms_l_t lmax){ + return lmax * ((qpms_y_sc_t)lmax + 2) + 1; } // TODO maybe enable crashing / validity control by macro definitions... diff --git a/qpms/translations.c b/qpms/translations.c index 118bc28..3ae667f 100644 --- a/qpms/translations.c +++ b/qpms/translations.c @@ -825,7 +825,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr ) { - const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax); + const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax); //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax); const bool doerr = Aerr || Berr; const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector @@ -846,15 +846,15 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21 c->e3c, eta, k, nRpoints, Rpoints, beta, particle_shift)); - for(qpms_y_t y = 0; y < nelem2_sc; ++y) + for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y) sigmas_total[y] = sigmas_short[y] + sigmas_long[y]; - if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y) + if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y) serr_total[y] = serr_short[y] + serr_long[y]; complex double sigma0 = 0; double sigma0_err = 0; if (do_sigma0) { QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k)); - const qpms_l_t y = qpms_mn2y_sc(0,0); + const qpms_y_sc_t y = qpms_mn2y_sc(0,0); sigmas_total[y] += sigma0; if(doerr) serr_total[y] += sigma0_err; } @@ -872,7 +872,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr // TODO skip if ... (N.B. skip will be different for 31z and 32) for(qpms_l_t q = 0; q <= qmax; ++q) { const qpms_l_t p = n + nu - 2*q; - const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); + const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); const complex double multiplier = c->A_multipliers[i][q]; complex double sigma = sigmas_total[y_sc]; ckahanadd(&Asum, &Asumc, multiplier * sigma); @@ -887,7 +887,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc); for(qpms_l_t q = 0; q <= qmax; ++q) { const qpms_l_t p_ = n + nu - 2*q + 1; - const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); + const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET]; complex double sigma = sigmas_total[y_sc]; ckahanadd(&Bsum, &Bsumc, multiplier * sigma); @@ -937,7 +937,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c, ) { - const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax); + const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax); //const qpms_y_t nelem = qpms_lMax2nelem(c->lMax); const bool doerr = Aerr || Berr; const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0) && (particle_shift.z == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector @@ -1006,17 +1006,17 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c, PGen_destroy(&Rgen); } - for(qpms_y_t y = 0; y < nelem2_sc; ++y) + for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y) sigmas_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? sigmas_short[y] : 0) + ((parts & QPMS_EWALD_LONG_RANGE) ? sigmas_long[y] : 0); - if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y) + if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y) serr_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? serr_short[y] : 0) + ((parts & QPMS_EWALD_LONG_RANGE) ? serr_long[y] : 0); complex double sigma0 = 0; double sigma0_err = 0; if (do_sigma0 && (parts & QPMS_EWALD_0TERM)) { QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k)); - const qpms_l_t y = qpms_mn2y_sc(0,0); + const qpms_y_sc_t y = qpms_mn2y_sc(0,0); sigmas_total[y] += sigma0; if(doerr) serr_total[y] += sigma0_err; } @@ -1034,7 +1034,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c, // TODO skip if ... for(qpms_l_t q = 0; q <= qmax; ++q) { const qpms_l_t p = n + nu - 2*q; - const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p); + const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p); const complex double multiplier = c->A_multipliers[i][q]; complex double sigma = sigmas_total[y_sc]; ckahanadd(&Asum, &Asumc, multiplier * sigma); @@ -1049,7 +1049,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c, double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc); for(qpms_l_t q = 0; q <= qmax; ++q) { const qpms_l_t p_ = n + nu - 2*q + 1; - const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_); + const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_); const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET]; complex double sigma = sigmas_total[y_sc]; ckahanadd(&Bsum, &Bsumc, multiplier * sigma);