Indexing documentation, pedantic use of qpms_y_sc_t
This commit is contained in:
parent
f9081c5922
commit
10d3beca1c
28
qpms/ewald.c
28
qpms/ewald.c
|
@ -136,7 +136,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
|
|||
|
||||
c->s1_constfacs_base = malloc(s1_constfacs_sz * sizeof(complex double));
|
||||
size_t s1_constfacs_sz_cumsum = 0;
|
||||
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
|
||||
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
|
||||
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
|
||||
if ((m + n) % 2 == 0) {
|
||||
c->s1_constfacs[y] = c->s1_constfacs_base + s1_constfacs_sz_cumsum;
|
||||
|
@ -172,7 +172,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
|
|||
c->S1_constfacs = malloc((1+c->nelem_sc) * sizeof(complex double *));
|
||||
//determine sizes
|
||||
size_t S1_constfacs_sz = 0;
|
||||
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
|
||||
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
|
||||
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
|
||||
const qpms_l_t L_M = n - abs(m);
|
||||
for(qpms_l_t j = 0; j <= L_M; ++j) { // outer sum
|
||||
|
@ -187,7 +187,7 @@ qpms_ewald3_constants_t *qpms_ewald3_constants_init(const qpms_l_t lMax /*, cons
|
|||
|
||||
c->S1_constfacs_base = malloc(S1_constfacs_sz * sizeof(complex double));
|
||||
size_t S1_constfacs_sz_cumsum = 0; // second count
|
||||
for (qpms_y_t y = 0; y < c->nelem_sc; ++y) {
|
||||
for (qpms_y_sc_t y = 0; y < c->nelem_sc; ++y) {
|
||||
qpms_l_t n; qpms_m_t m; qpms_y2mn_sc_p(y, &m, &n);
|
||||
const complex double yfactor = -2 * ipow(n+1) * M_SQRTPI
|
||||
* factorial((n-m)/2) * factorial((n+m)/2);
|
||||
|
@ -314,7 +314,7 @@ int ewald3_21_xy_sigma_long (
|
|||
const bool k_is_real = (cimag(k) == 0);
|
||||
assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
|
||||
assert((latdim & LAT1D) || (latdim & LAT2D));
|
||||
const qpms_y_t nelem_sc = c->nelem_sc;
|
||||
const qpms_y_sc_t nelem_sc = c->nelem_sc;
|
||||
assert(nelem_sc > 0);
|
||||
const qpms_l_t lMax = c->lMax;
|
||||
|
||||
|
@ -446,7 +446,7 @@ int ewald3_21_xy_sigma_long (
|
|||
for(qpms_m_t m = -n; m <= n; ++m) {
|
||||
if((particle_shift.z == 0) && ((m+n) % 2 != 0)) // odd coefficients are zero.
|
||||
continue;
|
||||
const qpms_y_t y = qpms_mn2y_sc(m, n);
|
||||
const qpms_y_sc_t y = qpms_mn2y_sc(m, n);
|
||||
size_t constidx = 0; // constants offset
|
||||
const complex double e_imalpha_pq = cexp(I*m*arg_pq);
|
||||
complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
|
||||
|
@ -509,7 +509,7 @@ int ewald3_21_xy_sigma_long (
|
|||
#ifdef EWALD_AUTO_CUTOFF
|
||||
++li;
|
||||
double cursum_min = INFINITY;
|
||||
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
|
||||
for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
|
||||
const double a = cabs(target[y]);
|
||||
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
|
||||
cursum_min = MIN(cursum_min, a);
|
||||
|
@ -529,10 +529,10 @@ ewald3_21_xy_sigma_long_end_point_loop:
|
|||
|
||||
free(err_c);
|
||||
free(target_c);
|
||||
for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
|
||||
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
|
||||
target[y] *= commonfac;
|
||||
if(err)
|
||||
for(qpms_y_t y = 0; y < nelem_sc; ++y)
|
||||
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
|
||||
err[y] *= commonfac;
|
||||
return 0;
|
||||
}
|
||||
|
@ -562,7 +562,7 @@ int ewald3_1_z_sigma_long (
|
|||
assert(particle_shift.x == 0 && particle_shift.y == 0);
|
||||
const double beta_z = beta.z;
|
||||
const double particle_shift_z = particle_shift_z;
|
||||
const qpms_y_t nelem_sc = c->nelem_sc;
|
||||
const qpms_y_sc_t nelem_sc = c->nelem_sc;
|
||||
const qpms_l_t lMax = c->lMax;
|
||||
|
||||
// Manual init of the ewald summation targets
|
||||
|
@ -613,7 +613,7 @@ int ewald3_1_z_sigma_long (
|
|||
// TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
|
||||
// and just fetched for each n
|
||||
for(qpms_l_t n = 0; n <= lMax; ++n) {
|
||||
const qpms_y_t y = qpms_mn2y_sc(0, n);
|
||||
const qpms_y_sc_t y = qpms_mn2y_sc(0, n);
|
||||
complex double jsum, jsum_c; ckahaninit(&jsum, &jsum_c);
|
||||
double jsum_err, jsum_err_c; kahaninit(&jsum_err, &jsum_err_c); // TODO do I really need to kahan sum errors?
|
||||
for(qpms_l_t j = 0; j <= n/2; ++j) {
|
||||
|
@ -647,10 +647,10 @@ int ewald3_1_z_sigma_long (
|
|||
|
||||
free(err_c);
|
||||
free(target_c);
|
||||
for(qpms_y_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
|
||||
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y) // CFC common factor from above
|
||||
target[y] *= commonfac;
|
||||
if(err)
|
||||
for(qpms_y_t y = 0; y < nelem_sc; ++y)
|
||||
for(qpms_y_sc_t y = 0; y < nelem_sc; ++y)
|
||||
err[y] *= commonfac;
|
||||
return 0;
|
||||
}
|
||||
|
@ -793,7 +793,7 @@ int ewald3_sigma_short(
|
|||
{
|
||||
const bool k_is_real = (cimag(k) == 0); // TODO check how the compiler optimises the loops
|
||||
const double kreal = creal(k);
|
||||
const qpms_y_t nelem_sc = c->nelem_sc;
|
||||
const qpms_y_sc_t nelem_sc = c->nelem_sc;
|
||||
const qpms_l_t lMax = c->lMax;
|
||||
gsl_integration_workspace *workspace =
|
||||
gsl_integration_workspace_alloc(INTEGRATION_WORKSPACE_LIMIT);
|
||||
|
@ -930,7 +930,7 @@ int ewald3_sigma_short(
|
|||
#ifdef EWALD_AUTO_CUTOFF
|
||||
++li;
|
||||
double cursum_min = INFINITY;
|
||||
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
|
||||
for (qpms_y_sc_t y = 0; y < nelem_sc; ++y) {
|
||||
const double a = cabs(target[y]);
|
||||
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
|
||||
cursum_min = MIN(cursum_min, a);
|
||||
|
|
|
@ -47,7 +47,7 @@ gsl_error_handler_t IgnoreUnderflowsGSLErrorHandler;
|
|||
*/
|
||||
typedef struct qpms_ewald3_constants_t {
|
||||
qpms_l_t lMax;
|
||||
qpms_y_t nelem_sc;
|
||||
qpms_y_sc_t nelem_sc;
|
||||
/// The values of maximum \a j's in the long-range part summation, `[(l-|m|/2)]`.
|
||||
qpms_l_t *s1_jMaxes;
|
||||
/// The constant factors for the long range part of a 2D Ewald sum.
|
||||
|
|
|
@ -1,5 +1,28 @@
|
|||
/*! \file indexing.h
|
||||
* \brief Various index conversion functions.
|
||||
*
|
||||
* Index conventions in QPMS
|
||||
* -------------------------
|
||||
*
|
||||
* Depending on circumstances (e.g. whether scalar or vector spherical wavefunctions
|
||||
* are being used), QPMS uses two mappings between the multipole degree/order index
|
||||
* pair (\a l, \a m), due to historical reasons ocassionally also noted as (\a n, \a m),
|
||||
* and a flat non-negative integer index.
|
||||
*
|
||||
* Both mappings map the (\a l, \a m) pair in lexicographic order (with
|
||||
* the basic constraint \f$ |m| \le l \f$), the difference is that the "scalar"
|
||||
* mapping starts from \a l = 0, whereas the "vector" mapping starts from \a l = 1,
|
||||
* so that for the zeroth elements we have
|
||||
* * (0, 0) ↔ 0, (1, -1) ↔ 1 in the "scalar" mapping,
|
||||
* * (1, -1) ↔ 0 in the "vector" mapping.
|
||||
*
|
||||
* The vector SWF flat index is typically denoted by `y` and its type is \ref qpms_y_t,
|
||||
* whereas * the scalar SWF flat index is typically denoted by `y_sc` and its
|
||||
* type is \ref qpms_y_sc_t.
|
||||
*
|
||||
* Moreover, there is another mapping that encodes also the electric/magnetic/scalar
|
||||
* WSWF type index, see \ref qpms_uvswfi_t.
|
||||
*
|
||||
*/
|
||||
#ifndef QPMS_INDEXING_H
|
||||
#define QPMS_INDEXING_H
|
||||
|
@ -8,50 +31,64 @@
|
|||
#include <math.h>
|
||||
#include "optim.h"
|
||||
|
||||
|
||||
|
||||
static inline qpms_y_t qpms_mn2y(qpms_m_t m, qpms_l_t n) {
|
||||
/// Mapping from multipole degree, order to flat index (degree ≥ 1)
|
||||
static inline qpms_y_t qpms_mn2y(
|
||||
qpms_m_t m, ///< multipole order
|
||||
qpms_l_t n ///< multipole degree (a.k.a. \a l); must be greater than 0
|
||||
) {
|
||||
return n * (n + 1) + m - 1;
|
||||
}
|
||||
|
||||
/// Mapping from flat index to multipole degree (degree ≥ 1)
|
||||
static inline qpms_lm_t qpms_y2n(qpms_y_t y) {
|
||||
//return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
|
||||
return sqrt(y+1);
|
||||
}
|
||||
|
||||
/// Mapping from flat index, multipole degree (≥ 1) to multipole order
|
||||
static inline qpms_m_t qpms_yn2m(qpms_y_t y, qpms_l_t n) {
|
||||
return y-qpms_mn2y(0,n);
|
||||
}
|
||||
|
||||
/// Mapping from flat index to multipole degree, order (degree ≥ 1)
|
||||
static inline void qpms_y2mn_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
|
||||
*m=qpms_yn2m(y,*n=qpms_y2n(y));
|
||||
}
|
||||
|
||||
/// Number of spherical multipoles with `lmax` ≥ degree ≥ 1
|
||||
static inline qpms_y_t qpms_lMax2nelem(qpms_l_t lmax){
|
||||
return lmax * ((qpms_y_t)lmax + 2);
|
||||
}
|
||||
|
||||
// Scalar versions: they have a place for the 0, 0 term in the beginning
|
||||
|
||||
static inline qpms_y_t qpms_mn2y_sc(qpms_m_t m, qpms_l_t n) {
|
||||
/// Mapping from multipole degree, order to flat index (degree ≥ 0)
|
||||
static inline qpms_y_sc_t qpms_mn2y_sc(
|
||||
qpms_m_t m, ///< multipole order
|
||||
qpms_l_t n ///< multipole degree (a.k.a. \a l); muste be greater or equal than 0
|
||||
) {
|
||||
return n * (n + 1) + m;
|
||||
}
|
||||
|
||||
static inline qpms_lm_t qpms_y2n_sc(qpms_y_t y) {
|
||||
/// Mapping from flat index to multipole degree (degree ≥ 0)
|
||||
static inline qpms_lm_t qpms_y2n_sc(qpms_y_sc_t y) {
|
||||
//return (sqrt(5+y)-2)/2; // the cast will truncate the fractional part, which is what we want
|
||||
return sqrt(y);
|
||||
}
|
||||
|
||||
static inline qpms_m_t qpms_yn2m_sc(qpms_y_t y, qpms_l_t n) {
|
||||
/// Mapping from flat index, multipole degree (≥ 0) to multipole order
|
||||
static inline qpms_m_t qpms_yn2m_sc(qpms_y_sc_t y, qpms_l_t n) {
|
||||
return y-qpms_mn2y_sc(0,n);
|
||||
}
|
||||
|
||||
static inline void qpms_y2mn_sc_p(qpms_y_t y, qpms_m_t *m, qpms_l_t *n){
|
||||
/// Mapping from flat index to multipole degree, order (degree ≥ 0)
|
||||
static inline void qpms_y2mn_sc_p(qpms_y_sc_t y, qpms_m_t *m, qpms_l_t *n){
|
||||
*m=qpms_yn2m_sc(y,*n=qpms_y2n_sc(y));
|
||||
}
|
||||
|
||||
static inline qpms_y_t qpms_lMax2nelem_sc(qpms_l_t lmax){
|
||||
return lmax * ((qpms_y_t)lmax + 2) + 1;
|
||||
/// Number of spherical multipoles with `lmax` ≥ degree ≥ 0
|
||||
static inline qpms_y_sc_t qpms_lMax2nelem_sc(qpms_l_t lmax){
|
||||
return lmax * ((qpms_y_sc_t)lmax + 2) + 1;
|
||||
}
|
||||
|
||||
// TODO maybe enable crashing / validity control by macro definitions...
|
||||
|
|
|
@ -825,7 +825,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
|
|||
)
|
||||
{
|
||||
|
||||
const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
|
||||
const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
|
||||
//const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
|
||||
const bool doerr = Aerr || Berr;
|
||||
const bool do_sigma0 = (particle_shift == 0)//DIFF21((particle_shift.x == 0) && (particle_shift.y == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
|
||||
|
@ -846,15 +846,15 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
|
|||
QPMS_ENSURE_SUCCESS(ewald31z_sigma_short_points_and_shift(sigmas_short, serr_short, //DIFF21
|
||||
c->e3c, eta, k, nRpoints, Rpoints, beta, particle_shift));
|
||||
|
||||
for(qpms_y_t y = 0; y < nelem2_sc; ++y)
|
||||
for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
|
||||
sigmas_total[y] = sigmas_short[y] + sigmas_long[y];
|
||||
if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
|
||||
if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
|
||||
serr_total[y] = serr_short[y] + serr_long[y];
|
||||
|
||||
complex double sigma0 = 0; double sigma0_err = 0;
|
||||
if (do_sigma0) {
|
||||
QPMS_ENSURE_SUCCESS(ewald31z_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
|
||||
const qpms_l_t y = qpms_mn2y_sc(0,0);
|
||||
const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
|
||||
sigmas_total[y] += sigma0;
|
||||
if(doerr) serr_total[y] += sigma0_err;
|
||||
}
|
||||
|
@ -872,7 +872,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
|
|||
// TODO skip if ... (N.B. skip will be different for 31z and 32)
|
||||
for(qpms_l_t q = 0; q <= qmax; ++q) {
|
||||
const qpms_l_t p = n + nu - 2*q;
|
||||
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p);
|
||||
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p);
|
||||
const complex double multiplier = c->A_multipliers[i][q];
|
||||
complex double sigma = sigmas_total[y_sc];
|
||||
ckahanadd(&Asum, &Asumc, multiplier * sigma);
|
||||
|
@ -887,7 +887,7 @@ int qpms_trans_calculator_get_AB_arrays_e31z_both_points_and_shift(const qpms_tr
|
|||
double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
|
||||
for(qpms_l_t q = 0; q <= qmax; ++q) {
|
||||
const qpms_l_t p_ = n + nu - 2*q + 1;
|
||||
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_);
|
||||
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_);
|
||||
const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
|
||||
complex double sigma = sigmas_total[y_sc];
|
||||
ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
|
||||
|
@ -937,7 +937,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
|
|||
)
|
||||
{
|
||||
|
||||
const qpms_y_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
|
||||
const qpms_y_sc_t nelem2_sc = qpms_lMax2nelem_sc(c->e3c->lMax);
|
||||
//const qpms_y_t nelem = qpms_lMax2nelem(c->lMax);
|
||||
const bool doerr = Aerr || Berr;
|
||||
const bool do_sigma0 = ((particle_shift.x == 0) && (particle_shift.y == 0) && (particle_shift.z == 0)); // FIXME ignoring the case where particle_shift equals to lattice vector
|
||||
|
@ -1006,17 +1006,17 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
|
|||
PGen_destroy(&Rgen);
|
||||
}
|
||||
|
||||
for(qpms_y_t y = 0; y < nelem2_sc; ++y)
|
||||
for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
|
||||
sigmas_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? sigmas_short[y] : 0)
|
||||
+ ((parts & QPMS_EWALD_LONG_RANGE) ? sigmas_long[y] : 0);
|
||||
if (doerr) for(qpms_y_t y = 0; y < nelem2_sc; ++y)
|
||||
if (doerr) for(qpms_y_sc_t y = 0; y < nelem2_sc; ++y)
|
||||
serr_total[y] = ((parts & QPMS_EWALD_SHORT_RANGE) ? serr_short[y] : 0)
|
||||
+ ((parts & QPMS_EWALD_LONG_RANGE) ? serr_long[y] : 0);
|
||||
|
||||
complex double sigma0 = 0; double sigma0_err = 0;
|
||||
if (do_sigma0 && (parts & QPMS_EWALD_0TERM)) {
|
||||
QPMS_ENSURE_SUCCESS(ewald3_sigma0(&sigma0, &sigma0_err, c->e3c, eta, k));
|
||||
const qpms_l_t y = qpms_mn2y_sc(0,0);
|
||||
const qpms_y_sc_t y = qpms_mn2y_sc(0,0);
|
||||
sigmas_total[y] += sigma0;
|
||||
if(doerr) serr_total[y] += sigma0_err;
|
||||
}
|
||||
|
@ -1034,7 +1034,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
|
|||
// TODO skip if ...
|
||||
for(qpms_l_t q = 0; q <= qmax; ++q) {
|
||||
const qpms_l_t p = n + nu - 2*q;
|
||||
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p);
|
||||
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p);
|
||||
const complex double multiplier = c->A_multipliers[i][q];
|
||||
complex double sigma = sigmas_total[y_sc];
|
||||
ckahanadd(&Asum, &Asumc, multiplier * sigma);
|
||||
|
@ -1049,7 +1049,7 @@ int qpms_trans_calculator_get_AB_arrays_e32_e(const qpms_trans_calculator *c,
|
|||
double Bsumerr, Bsumerrc; if(Berr) kahaninit(&Bsumerr, &Bsumerrc);
|
||||
for(qpms_l_t q = 0; q <= qmax; ++q) {
|
||||
const qpms_l_t p_ = n + nu - 2*q + 1;
|
||||
const qpms_y_t y_sc = qpms_mn2y_sc(mu_m, p_);
|
||||
const qpms_y_sc_t y_sc = qpms_mn2y_sc(mu_m, p_);
|
||||
const complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
|
||||
complex double sigma = sigmas_total[y_sc];
|
||||
ckahanadd(&Bsum, &Bsumc, multiplier * sigma);
|
||||
|
|
Loading…
Reference in New Issue