Build modeproblem matrix full w cached Bessels

Former-commit-id: 4cfd631317511ee8765f4a98a179f6295e4142c9
This commit is contained in:
Marek Nečada 2020-01-23 20:18:26 +02:00
parent 2f03fc58b4
commit 1f63d2b529
4 changed files with 137 additions and 30 deletions

View File

@ -15,7 +15,7 @@ add_library (qpms SHARED translations.c tmatrices.c vecprint.c vswf.c wigner.c e
ewaldsf.c pointgroups.c latticegens.c ewaldsf.c pointgroups.c latticegens.c
lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c
bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c
lll.c beyn.c lll.c beyn.c scatsys_translation_booster.c
) )
use_c99() use_c99()
@ -31,7 +31,7 @@ target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label) target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label)
target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS
EWALD_AUTO_CUTOFF EWALD_AUTO_CUTOFF QPMS_EVALUATE_PARANOID_ASSERTS
) )
install(TARGETS qpms install(TARGETS qpms

View File

@ -1,9 +1,15 @@
// Functionality to speedup translation matrix computations in large finite arrays // Functionality to speedup translation matrix computations in large finite arrays
// by caching Bessel function values etc. // by caching Bessel function values etc.
#include <cblas.h>
#include "scatsystem.h" #include "scatsystem.h"
#include "translations_inlines.h"
#include <assert.h> #include <assert.h>
#include <stdlib.h> #include <stdlib.h>
#include <math.h>
#include <string.h>
#include "vectors.h"
#include "qpms_error.h" #include "qpms_error.h"
#include "qpms_specfunc.h"
#define SQ(x) ((x)*(x)) #define SQ(x) ((x)*(x))
#define MAX(x, y) (((x) > (y)) ? (x) : (y)) #define MAX(x, y) (((x) > (y)) ? (x) : (y))
@ -44,21 +50,16 @@ static inline ppid_t pairarr_len(qpms_ss_pi_t pn_total) {
} }
typedef struct qpms_scatsys_translation_booster { typedef struct qpms_scatsys_translation_booster {
double *r; // Unique distances array, indices are ppid_t double *r; // Unique distances array, indices are ppid_t
qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r) qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r)
size_t r_count; // Number of different interparticle distances (length of r[]) size_t r_count; // Number of different interparticle distances (length of r[])
size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type) size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type)
// Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums // Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums
// of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the // of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the
// Bessel function "cache" // Bessel function "cache"
size_t *bessel_offsets_r; size_t *bessel_offsets_r;
} booster_t; } booster_t;
struct uoppid_r_pair {
double r;
uoppid_t id;
}
/// Sorts an array and throws away duplicit elements. /// Sorts an array and throws away duplicit elements.
/** /**
* The unique elements (according to the compare function) * The unique elements (according to the compare function)
@ -67,9 +68,9 @@ struct uoppid_r_pair {
* \returns Number of kept unique array members * \returns Number of kept unique array members
*/ */
static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size, static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *)) { int (*compar)(const void *, const void *)) {
if (nmemb = 0) return 0; // corner case if (nmemb == 0) return 0; // corner case
qsort(base, nmemb, size, compar); qsort(base, nmemb, size, compar);
size_t left = 0; size_t left = 0;
for (size_t src = 1; src < nmemb; ++src) { for (size_t src = 1; src < nmemb; ++src) {
assert(left <= src); assert(left <= src);
@ -79,7 +80,7 @@ static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size,
memcpy((char *)base + left*size, (char *)base + src*size, size); memcpy((char *)base + left*size, (char *)base + src*size, size);
} }
} }
return left + 1; return left + 1;
} }
static int cmp_double(const void *aa, const void *bb) { static int cmp_double(const void *aa, const void *bb) {
@ -92,31 +93,31 @@ static int cmp_double(const void *aa, const void *bb) {
} }
booster_t *qpms_scatsys_translation_booster_create( booster_t *qpms_scatsys_translation_booster_create(
const qpms_scatsys_ss *ss) { const qpms_scatsys_t *ss) {
const qpms_ss_pi_t np = ss->p_count; const qpms_ss_pi_t np = ss->p_count;
booster_t *b; booster_t *b;
QPMS_CRASHING_MALLOC(b, sizeof(*b)); QPMS_CRASHING_MALLOC(b, sizeof(*b));
QPMS_CRASHING_MALLOC(b->r, sizeof(double) * uopairarr_len(np)); QPMS_CRASHING_MALLOC(b->r, sizeof(double) * uopairarr_len(np));
for(qpms_ss_pi_t i = 0; i < np; ++i) for(qpms_ss_pi_t i = 0; i < np; ++i)
for(qpms_ss_pi_t j = 0; j < i; ++j) for(qpms_ss_pi_t j = 0; j < i; ++j)
b->r[uopairid(pn, i, j)] = cart3_dist(ss->p[i].pos, ss->p[j].pos); b->r[uopairid(np, i, j)] = cart3_dist(ss->p[i].pos, ss->p[j].pos);
b->r_count = sort_and_eliminate(b->r, uopairrarr_len(np), sizeof(*b->r), cmp_double); b->r_count = sort_and_eliminate(b->r, uopairarr_len(np), sizeof(*b->r), cmp_double);
QPMS_CRASHING_REALLOC(b->r, b->r_count * sizeof(*b->r)); QPMS_CRASHING_REALLOC(b->r, b->r_count * sizeof(*b->r));
QPMS_CRASHING_CALLOC(b->lMax_r, b->r_count, sizeof(*b->lMax_r)); QPMS_CRASHING_CALLOC(b->lMax_r, b->r_count, sizeof(*b->lMax_r));
for(qpms_ss_pi_t i = 0; i < np; ++i) for(qpms_ss_pi_t i = 0; i < np; ++i)
for(qpms_ss_pi_t j = 0; j < i; ++j) { for(qpms_ss_pi_t j = 0; j < i; ++j) {
const uoppid_t pid = uopairid(pn, i, j); const uoppid_t pid = uopairid(np, i, j);
const double r = cart3_dist(ss->p[i].pos, ss->p[j].pos); const double r = cart3_dist(ss->p[i].pos, ss->p[j].pos);
double *rhit = bsearch(&r, b->r, b->r_count, sizeof(*b->r), cmp_double); double *rhit = bsearch(&r, b->r, b->r_count, sizeof(*b->r), cmp_double);
QPMS_ASSERT(rhit != NULL); QPMS_ASSERT(rhit != NULL);
QPMS_ASSERT(rhit >= b->r); QPMS_ASSERT(rhit >= b->r);
const size_t ri = b->r_map[pid] = r_hit - b->r; const size_t ri = b->r_map[pid] = rhit - b->r;
b->lMax_r[ri] = MAX(b->lMax_r[ri], b->lMax_r[ri] = MAX(b->lMax_r[ri],
MAX(qpms_ss_bspec_pi(ss, i)->lMax, qpms_ss_bspec_pi(ss, j)->lMax)); MAX(qpms_ss_bspec_pi(ss, i)->lMax, qpms_ss_bspec_pi(ss, j)->lMax));
} }
QPMS_CRASHING_MALLOC(b->bessel_offset_r, (b->r_count + 1) * sizeof(*b->bessel_offset_r)); QPMS_CRASHING_MALLOC(b->bessel_offsets_r, (b->r_count + 1) * sizeof(*b->bessel_offsets_r));
size_t bessel_offset = 0; size_t bessel_offset = 0;
for(size_t ri = 0; ri < b->r_count; ++ri) { for(size_t ri = 0; ri < b->r_count; ++ri) {
b->bessel_offsets_r[ri] = bessel_offset; b->bessel_offsets_r[ri] = bessel_offset;
@ -134,6 +135,7 @@ static qpms_errno_t qpms_scatsys_translation_booster_eval_bessels(
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // Is there a case for different J? QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // Is there a case for different J?
2*b->lMax_r[ri]+1, k * b->r[ri], 2*b->lMax_r[ri]+1, k * b->r[ri],
target + b->bessel_offsets_r[ri])); target + b->bessel_offsets_r[ri]));
}
return QPMS_SUCCESS; return QPMS_SUCCESS;
} }
@ -143,4 +145,106 @@ typedef struct qpms_scatsysw_translation_booster {
complex double *bessels; complex double *bessels;
} boosterw_t; } boosterw_t;
boosterw_t *qpms_scatsysw_translation_booster_create(
const qpms_scatsys_at_omega_t *ssw) {
QPMS_PARANOID_ASSERT(ssw->ss);
const booster_t *const b = ssw->ss->tbooster;
QPMS_ASSERT(b);
boosterw_t *bw;
QPMS_CRASHING_MALLOC(bw, sizeof(*bw));
// Evaluate bessel functions
QPMS_CRASHING_MALLOC(bw->bessels, b->bessel_offsets_r[b->r_count] * sizeof(*bw->bessels));
for(size_t ri = 0; ri < b->r_count; ++ri) {
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // is there a case for other J?
b->bessel_offsets_r[ri+1]-b->bessel_offsets_r[ri]-1,
b->r[ri] * ssw->wavenumber,
bw->bessels + b->bessel_offsets_r[ri]));
}
bw->b = b;
return bw;
}
void qpms_scatsysw_translation_booster_free(boosterw_t *bw) {
if (bw) {
free (bw->bessels);
}
free (bw);
}
complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target,
const qpms_scatsys_at_omega_t *ssw)
{
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
const qpms_scatsys_t *const ss = ssw->ss;
const booster_t *const b = ss->tbooster;
const boosterw_t *const bw = ssw->translation_cache;
const qpms_trans_calculator *const c = ss->c;
const complex double k = ssw->wavenumber;
const size_t full_len = ss->fecv_size;
if(!target)
QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
complex double *tmp;
QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double));
memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
const complex double zero = 0, minusone = -1;
{ // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
size_t fullvec_offsetR = 0;
for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
const cart3_t posR = ss->p[piR].pos;
size_t fullvec_offsetC = 0;
// dest particle T-matrix
const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
if(piC != piR) { // The diagonal will be dealt with later.
uoppid_t pid = uopairid(ss->p_count, piC, piR);
const cart3_t posC = ss->p[piC].pos;
const sph_t dlj = cart2sph(cart3_substract(posR, posC));
const size_t ri = b->r_map[pid];
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
const qpms_l_t pair_lMax = b->lMax_r[pid];
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
{ // this replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
complex double A[pair_nelem][pair_nelem]; // VLA, TODO flatten
complex double B[pair_nelem][pair_nelem]; // VLA, TODO flatten
{ // this replaces qpms_trans_calculator_get_AB_arrays() and ..._buf()
const double costheta = cos(dlj.theta);
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
2*c->lMax+1, costheta, -1, legendre_buf));
const double * const legendres = legendre_buf;
const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri];
qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A[0], B[0],
/*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres);
qpms_trans_array_from_AB(tmp,// tmp is S(piR<-piC)
bspecR, bspecC->n, bspecC, 1, A[0], B[0], pair_lMax);
}
}
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/,
full_len /*ldc*/);
}
fullvec_offsetC += bspecC->n;
}
fullvec_offsetR += bspecR->n;
}
}
// diagonal part M[pi,pi] = +1
for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] = +1;
free(tmp);
return target;
}

View File

@ -1110,6 +1110,8 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full(
const qpms_scatsys_at_omega_t *ssw const qpms_scatsys_at_omega_t *ssw
) )
{ {
if (ssw->translation_cache)
return qpms_scatsysw_build_modeproblem_matrix_full_boosted(target, ssw);
const complex double k = ssw->wavenumber; const complex double k = ssw->wavenumber;
const qpms_scatsys_t *ss = ssw->ss; const qpms_scatsys_t *ss = ssw->ss;
const size_t full_len = ss->fecv_size; const size_t full_len = ss->fecv_size;

View File

@ -134,9 +134,6 @@ typedef struct qpms_ss_derived_tmatrix_t {
struct qpms_trans_calculator; struct qpms_trans_calculator;
struct qpms_scatsys_translation_booster; struct qpms_scatsys_translation_booster;
void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *);
struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create(
const qpms_scatsys_ss *ss);
typedef struct qpms_scatsys_t { typedef struct qpms_scatsys_t {
struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium. struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium.
@ -551,6 +548,10 @@ struct beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
double res_tol ///< (default: `0.0`) TODO DOC. double res_tol ///< (default: `0.0`) TODO DOC.
); );
void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *);
struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create(
const qpms_scatsys_t *ss);
#if 0 #if 0
/// Searches for scattering system's eigenmodes using Beyn's algorithm. /// Searches for scattering system's eigenmodes using Beyn's algorithm.
/** /**