Indexing documentation, pedantic use of qpms_y_sc_t

This commit is contained in:
Marek Nečada 2022-03-25 21:51:17 +02:00
parent 7e318fc6cd
commit bac3c3276d
4 changed files with 73 additions and 36 deletions

View File

@ -136,7 +136,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
size_t s1_constfacs_sz_cumsum = 0;
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
if ((m + n) % 2 == 0) {
c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
@ -172,7 +172,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
//determine sizes
size_t S1_constfacs_sz = 0;
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
const qpms_l_t L_M = n - abs(m);
for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
@ -187,7 +187,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
size_t S1_constfacs_sz_cumsum = 0; // second count
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI
* factorial((n-m)/2) * factorial((n+m)/2);
@ -314,7 +314,7 @@ int ewald3_21_xy_sigma_long (
const bool k_is_real = (cimag(k) == 0);
assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
assert((latdim & LAT1D) || (latdim & LAT2D));
const qpms_y_t nelem_sc = c->nelem_sc;
const qpms_y_sc_t nelem_sc = c->nelem_sc;
assert(nelem_sc > 0);
const qpms_l_t lMax = c->lMax;
@ -446,7 +446,7 @@ int ewald3_21_xy_sigma_long (
for(qpms_m_t m = -n; m <= n; ++m) {
if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
continue;
const qpms_y_t y = qpms_mn2y_sc(m, n);
const qpms_y_sc_t y = qpms_mn2y_sc(m, n);
size_t constidx = 0; // constants offset
const complex double e_imalpha_pq = cexp(I*m*arg_pq);
complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
@ -509,7 +509,7 @@ int ewald3_21_xy_sigma_long (
#ifdef EWALD_AUTO_CUTOFF
++li;
double cursum_min = INFINITY;
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
const double a = cabs(target[y]);
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
cursum_min = MIN(cursum_min, a);
@ -529,10 +529,10 @@ ewald3_21_xy_sigma_long_end_point_loop:
free(err_c);
free(target_c);
for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
target[y] *= commonfac;
if(err)
for(qpms_y_t y = 0; y < nelem_sc; ++y)
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
err[y] *= commonfac;
return 0;
}
@ -562,7 +562,7 @@ int ewald3_1_z_sigma_long (
assert(particle_shift.x == 0 && particle_shift.y == 0);
const double beta_z = beta.z;
const double particle_shift_z = particle_shift_z;
const qpms_y_t nelem_sc = c->nelem_sc;
const qpms_y_sc_t nelem_sc = c->nelem_sc;
const qpms_l_t lMax = c->lMax;
// Manual init of the ewald summation targets
@ -613,7 +613,7 @@ int ewald3_1_z_sigma_long (
// TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
// and just fetched for each n
for(qpms_l_t n = 0; n <= lMax; ++n) {
const qpms_y_t y = qpms_mn2y_sc(0, n);
const qpms_y_sc_t y = qpms_mn2y_sc(0, n);
complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
for(qpms_l_t j = 0; j <= n/2; ++j) {
@ -647,10 +647,10 @@ int ewald3_1_z_sigma_long (
free(err_c);
free(target_c);
for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
target[y] *= commonfac;
if(err)
for(qpms_y_t y = 0; y < nelem_sc; ++y)
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
err[y] *= commonfac;
return 0;
}
@ -793,7 +793,7 @@ int ewald3_sigma_short(
{
const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
const double kreal = creal(k);
const qpms_y_t nelem_sc = c->nelem_sc;
const qpms_y_sc_t nelem_sc = c->nelem_sc;
const qpms_l_t lMax = c->lMax;
gsl_integration_workspace *workspace =
gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
@ -930,7 +930,7 @@ int ewald3_sigma_short(
#ifdef EWALD_AUTO_CUTOFF
++li;
double cursum_min = INFINITY;
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
const double a = cabs(target[y]);
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
cursum_min = MIN(cursum_min, a);

View File

@ -47,7 +47,7 @@ gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
*/
typedef struct qpms_ewald3_constants_t {
qpms_l_t lMax;
qpms_y_t nelem_sc;
qpms_y_sc_t nelem_sc;
/// The values of maximum \a j's in the long-range part summation, `[(l-|m|/2)]`.
qpms_l_t *s1_jMaxes;
/// The constant factors for the long range part of a 2D Ewald sum.

View File

@ -1,5 +1,28 @@
/*! \file indexing.h
* \brief Various index conversion functions.
*
* Index conventions in QPMS
* -------------------------
*
* Depending on circumstances (e.g. whether scalar or vector spherical wavefunctions
* are being used), QPMS uses two mappings between the multipole degree/order index
* pair (\a l, \a m), due to historical reasons ocassionally also noted as (\a n, \a m),
* and a flat non-negative integer index.
*
* Both mappings map the (\a l, \a m) pair in lexicographic order (with
* the basic constraint \f$ |m| \le l \f$), the difference is that the "scalar"
* mapping starts from \a l = 0, whereas the "vector" mapping starts from \a l = 1,
* so that for the zeroth elements we have
* * (0, 0) 0, (1, -1) 1 in the "scalar" mapping,
* * (1, -1) 0 in the "vector" mapping.
*
* The vector SWF flat index is typically denoted by `y` and its type is \ref qpms_y_t,
* whereas * the scalar SWF flat index is typically denoted by `y_sc` and its
* type is \ref qpms_y_sc_t.
*
* Moreover, there is another mapping that encodes also the electric/magnetic/scalar
* WSWF type index, see \ref qpms_uvswfi_t.
*
*/
#ifndef QPMS_INDEXING_H
#define QPMS_INDEXING_H
@ -8,50 +31,64 @@
#include <math.h>
#include "optim.h"
static inline qpms_y_t qpms_mn2y(qpms_m_t m, qpms_l_t n) {
/// Mapping from multipole degree, order to flat index (degree ≥ 1)
static inline qpms_y_t qpms_mn2y(
qpms_m_t m, ///< multipole order
qpms_l_t n ///< multipole degree (a.k.a. \a l); must be greater than 0
) {
return n * (n + 1) + m - 1;
}
/// Mapping from flat index to multipole degree (degree ≥ 1)
static inline qpms_lm_t qpms_y2n(qpms_y_t y) {
//return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
return sqrt(y+1);
}
/// Mapping from flat index, multipole degree (≥ 1) to multipole order
static inline qpms_m_t qpms_yn2m(qpms_y_t y, qpms_l_t n) {
return y-qpms_mn2y(0,n);
}
/// Mapping from flat index to multipole degree, order (degree ≥ 1)
static inline void qpms_y2mn_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
*m=qpms_yn2m(y,*n=qpms_y2n(y));
}
/// Number of spherical multipoles with `lmax` ≥ degree ≥ 1
static inline qpms_y_t qpms_lMax2nelem(qpms_l_t lmax){
return lmax * ((qpms_y_t)lmax + 2);
}
// Scalar versions: they have a place for the 0, 0 term in the beginning
static inline qpms_y_t qpms_mn2y_sc(qpms_m_t m, qpms_l_t n) {
/// Mapping from multipole degree, order to flat index (degree ≥ 0)
static inline qpms_y_sc_t qpms_mn2y_sc(
qpms_m_t m, ///< multipole order
qpms_l_t n ///< multipole degree (a.k.a. \a l); muste be greater or equal than 0
) {
return n * (n + 1) + m;
}
static inline qpms_lm_t qpms_y2n_sc(qpms_y_t y) {
/// Mapping from flat index to multipole degree (degree ≥ 0)
static inline qpms_lm_t qpms_y2n_sc(qpms_y_sc_t y) {
//return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
return sqrt(y);
}
static inline qpms_m_t qpms_yn2m_sc(qpms_y_t y, qpms_l_t n) {
/// Mapping from flat index, multipole degree (≥ 0) to multipole order
static inline qpms_m_t qpms_yn2m_sc(qpms_y_sc_t y, qpms_l_t n) {
return y-qpms_mn2y_sc(0,n);
}
static inline void qpms_y2mn_sc_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
/// Mapping from flat index to multipole degree, order (degree ≥ 0)
static inline void qpms_y2mn_sc_p(qpms_y_sc_t y, qpms_m_t *m, qpms_l_t *n){
*m=qpms_yn2m_sc(y,*n=qpms_y2n_sc(y));
}
static inline qpms_y_t qpms_lMax2nelem_sc(qpms_l_t lmax){
return lmax * ((qpms_y_t)lmax + 2) + 1;
/// Number of spherical multipoles with `lmax` ≥ degree ≥ 0
static inline qpms_y_sc_t qpms_lMax2nelem_sc(qpms_l_t lmax){
return lmax * ((qpms_y_sc_t)lmax + 2) + 1;
}
// TODO maybe enable crashing / validity control by macro definitions...

View File

@ -825,7 +825,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
)
{
const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
//const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
const bool doerr = Aerr || Berr;
const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@ -846,15 +846,15 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
c->e3c, eta, k, nRpoints, Rpoints, beta, particle_shift));
for(qpms_y_t y = 0; y < nelem2_sc; ++y)
for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
sigmas_total[y] = sigmas_short[y] + sigmas_long[y];
if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
serr_total[y] = serr_short[y] + serr_long[y];
complex double sigma0 = 0; double sigma0_err = 0;
if (do_sigma0) {
QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
const qpms_l_t y = qpms_mn2y_sc(0,0);
const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
sigmas_total[y] += sigma0;
if(doerr) serr_total[y] += sigma0_err;
}
@ -872,7 +872,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
// TODO skip if ... (N.B. skip will be different for 31z and 32)
for(qpms_l_t q = 0; q <= qmax; ++q) {
const qpms_l_t p = n + nu - 2*q;
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p);
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p);
const complex double multiplier = c->A_multipliers[i][q];
complex double sigma = sigmas_total[y_sc];
ckahanadd(&Asum, &Asumc, multiplier * sigma);
@ -887,7 +887,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
for(qpms_l_t q = 0; q <= qmax; ++q) {
const qpms_l_t p_ = n + nu - 2*q + 1;
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_);
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_);
const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
complex double sigma = sigmas_total[y_sc];
ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
@ -937,7 +937,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
)
{
const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
//const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
const bool doerr = Aerr || Berr;
const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0) && (particle_shift.z == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
@ -1006,17 +1006,17 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
PGen_destroy(&Rgen);
}
for(qpms_y_t y = 0; y < nelem2_sc; ++y)
for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
sigmas_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? sigmas_short[y] : 0)
+ ((parts & QPMS_EWALD_LONG_RANGE) ? sigmas_long[y] : 0);
if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
serr_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? serr_short[y] : 0)
+ ((parts & QPMS_EWALD_LONG_RANGE) ? serr_long[y] : 0);
complex double sigma0 = 0; double sigma0_err = 0;
if (do_sigma0 && (parts & QPMS_EWALD_0TERM)) {
QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
const qpms_l_t y = qpms_mn2y_sc(0,0);
const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
sigmas_total[y] += sigma0;
if(doerr) serr_total[y] += sigma0_err;
}
@ -1034,7 +1034,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
// TODO skip if ...
for(qpms_l_t q = 0; q <= qmax; ++q) {
const qpms_l_t p = n + nu - 2*q;
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p);
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p);
const complex double multiplier = c->A_multipliers[i][q];
complex double sigma = sigmas_total[y_sc];
ckahanadd(&Asum, &Asumc, multiplier * sigma);
@ -1049,7 +1049,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
for(qpms_l_t q = 0; q <= qmax; ++q) {
const qpms_l_t p_ = n + nu - 2*q + 1;
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_);
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_);
const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
complex double sigma = sigmas_total[y_sc];
ckahanadd(&Bsum, &Bsumc, multiplier * sigma);