diff --git a/qpms/CMakeLists.txt b/qpms/CMakeLists.txt index 66036a8..7d942ad 100644 --- a/qpms/CMakeLists.txt +++ b/qpms/CMakeLists.txt @@ -15,7 +15,7 @@ add_library (qpms SHARED translations.c tmatrices.c vecprint.c vswf.c wigner.c e ewaldsf.c pointgroups.c latticegens.c lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c - lll.c beyn.c + lll.c beyn.c scatsys_translation_booster.c ) use_c99() @@ -31,7 +31,7 @@ target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label) target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS - EWALD_AUTO_CUTOFF + EWALD_AUTO_CUTOFF QPMS_EVALUATE_PARANOID_ASSERTS ) install(TARGETS qpms diff --git a/qpms/scatsys_translation_booster.c b/qpms/scatsys_translation_booster.c index 489590b..fce9e7b 100644 --- a/qpms/scatsys_translation_booster.c +++ b/qpms/scatsys_translation_booster.c @@ -1,9 +1,15 @@ // Functionality to speedup translation matrix computations in large finite arrays // by caching Bessel function values etc. +#include #include "scatsystem.h" +#include "translations_inlines.h" #include #include +#include +#include +#include "vectors.h" #include "qpms_error.h" +#include "qpms_specfunc.h" #define SQ(x) ((x)*(x)) #define MAX(x, y) (((x) > (y)) ? (x) : (y)) @@ -44,21 +50,16 @@ static inline ppid_t pairarr_len(qpms_ss_pi_t pn_total) { } typedef struct qpms_scatsys_translation_booster { - double *r; // Unique distances array, indices are ppid_t - qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r) - size_t r_count; // Number of different interparticle distances (length of r[]) - size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type) - // Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums - // of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the - // Bessel function "cache" - size_t *bessel_offsets_r; + double *r; // Unique distances array, indices are ppid_t + qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r) + size_t r_count; // Number of different interparticle distances (length of r[]) + size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type) + // Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums + // of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the + // Bessel function "cache" + size_t *bessel_offsets_r; } booster_t; -struct uoppid_r_pair { - double r; - uoppid_t id; -} - /// Sorts an array and throws away duplicit elements. /** * The unique elements (according to the compare function) @@ -67,9 +68,9 @@ struct uoppid_r_pair { * \returns Number of kept unique array members */ static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size, - int (*compar)(const void *, const void *)) { - if (nmemb = 0) return 0; // corner case - qsort(base, nmemb, size, compar); + int (*compar)(const void *, const void *)) { + if (nmemb == 0) return 0; // corner case + qsort(base, nmemb, size, compar); size_t left = 0; for (size_t src = 1; src < nmemb; ++src) { assert(left <= src); @@ -79,7 +80,7 @@ static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size, memcpy((char *)base + left*size, (char *)base + src*size, size); } } - return left + 1; + return left + 1; } static int cmp_double(const void *aa, const void *bb) { @@ -92,31 +93,31 @@ static int cmp_double(const void *aa, const void *bb) { } booster_t *qpms_scatsys_translation_booster_create( - const qpms_scatsys_ss *ss) { + const qpms_scatsys_t *ss) { const qpms_ss_pi_t np = ss->p_count; booster_t *b; QPMS_CRASHING_MALLOC(b, sizeof(*b)); QPMS_CRASHING_MALLOC(b->r, sizeof(double) * uopairarr_len(np)); for(qpms_ss_pi_t i = 0; i < np; ++i) for(qpms_ss_pi_t j = 0; j < i; ++j) - b->r[uopairid(pn, i, j)] = cart3_dist(ss->p[i].pos, ss->p[j].pos); - b->r_count = sort_and_eliminate(b->r, uopairrarr_len(np), sizeof(*b->r), cmp_double); - + b->r[uopairid(np, i, j)] = cart3_dist(ss->p[i].pos, ss->p[j].pos); + b->r_count = sort_and_eliminate(b->r, uopairarr_len(np), sizeof(*b->r), cmp_double); + QPMS_CRASHING_REALLOC(b->r, b->r_count * sizeof(*b->r)); QPMS_CRASHING_CALLOC(b->lMax_r, b->r_count, sizeof(*b->lMax_r)); for(qpms_ss_pi_t i = 0; i < np; ++i) for(qpms_ss_pi_t j = 0; j < i; ++j) { - const uoppid_t pid = uopairid(pn, i, j); + const uoppid_t pid = uopairid(np, i, j); const double r = cart3_dist(ss->p[i].pos, ss->p[j].pos); double *rhit = bsearch(&r, b->r, b->r_count, sizeof(*b->r), cmp_double); QPMS_ASSERT(rhit != NULL); QPMS_ASSERT(rhit >= b->r); - const size_t ri = b->r_map[pid] = r_hit - b->r; + const size_t ri = b->r_map[pid] = rhit - b->r; b->lMax_r[ri] = MAX(b->lMax_r[ri], MAX(qpms_ss_bspec_pi(ss, i)->lMax, qpms_ss_bspec_pi(ss, j)->lMax)); } - - QPMS_CRASHING_MALLOC(b->bessel_offset_r, (b->r_count + 1) * sizeof(*b->bessel_offset_r)); + + QPMS_CRASHING_MALLOC(b->bessel_offsets_r, (b->r_count + 1) * sizeof(*b->bessel_offsets_r)); size_t bessel_offset = 0; for(size_t ri = 0; ri < b->r_count; ++ri) { b->bessel_offsets_r[ri] = bessel_offset; @@ -134,6 +135,7 @@ static qpms_errno_t qpms_scatsys_translation_booster_eval_bessels( QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // Is there a case for different J? 2*b->lMax_r[ri]+1, k * b->r[ri], target + b->bessel_offsets_r[ri])); + } return QPMS_SUCCESS; } @@ -143,4 +145,106 @@ typedef struct qpms_scatsysw_translation_booster { complex double *bessels; } boosterw_t; +boosterw_t *qpms_scatsysw_translation_booster_create( + const qpms_scatsys_at_omega_t *ssw) { + QPMS_PARANOID_ASSERT(ssw->ss); + const booster_t *const b = ssw->ss->tbooster; + QPMS_ASSERT(b); + boosterw_t *bw; + QPMS_CRASHING_MALLOC(bw, sizeof(*bw)); + // Evaluate bessel functions + QPMS_CRASHING_MALLOC(bw->bessels, b->bessel_offsets_r[b->r_count] * sizeof(*bw->bessels)); + for(size_t ri = 0; ri < b->r_count; ++ri) { + QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // is there a case for other J? + b->bessel_offsets_r[ri+1]-b->bessel_offsets_r[ri]-1, + b->r[ri] * ssw->wavenumber, + bw->bessels + b->bessel_offsets_r[ri])); + } + + bw->b = b; + return bw; +} + +void qpms_scatsysw_translation_booster_free(boosterw_t *bw) { + if (bw) { + free (bw->bessels); + } + free (bw); +} + +complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted( + /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. + complex double *target, + const qpms_scatsys_at_omega_t *ssw) +{ + QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster); + const qpms_scatsys_t *const ss = ssw->ss; + const booster_t *const b = ss->tbooster; + const boosterw_t *const bw = ssw->translation_cache; + const qpms_trans_calculator *const c = ss->c; + + const complex double k = ssw->wavenumber; + const size_t full_len = ss->fecv_size; + if(!target) + QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double)); + complex double *tmp; + QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double)); + memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary? + double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays + const complex double zero = 0, minusone = -1; + { // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC) + size_t fullvec_offsetR = 0; + for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) { + const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec; + const cart3_t posR = ss->p[piR].pos; + size_t fullvec_offsetC = 0; + // dest particle T-matrix + const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m; + for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { + const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec; + if(piC != piR) { // The diagonal will be dealt with later. + uoppid_t pid = uopairid(ss->p_count, piC, piR); + const cart3_t posC = ss->p[piC].pos; + const sph_t dlj = cart2sph(cart3_substract(posR, posC)); + const size_t ri = b->r_map[pid]; + QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]); + const qpms_l_t pair_lMax = b->lMax_r[pid]; + const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax); + { // this replaces qpms_trans_calculator_get_trans_array(): + // R is dest, C is src + QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm); + QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax); + QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0); + complex double A[pair_nelem][pair_nelem]; // VLA, TODO flatten + complex double B[pair_nelem][pair_nelem]; // VLA, TODO flatten + { // this replaces qpms_trans_calculator_get_AB_arrays() and ..._buf() + const double costheta = cos(dlj.theta); + QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE, + 2*c->lMax+1, costheta, -1, legendre_buf)); + const double * const legendres = legendre_buf; + const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri]; + qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A[0], B[0], + /*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres); + qpms_trans_array_from_AB(tmp,// tmp is S(piR<-piC) + bspecR, bspecC->n, bspecC, 1, A[0], B[0], pair_lMax); + } + } + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/, + &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/, + tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/, + target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/, + full_len /*ldc*/); + } + fullvec_offsetC += bspecC->n; + } + fullvec_offsetR += bspecR->n; + } + } + // diagonal part M[pi,pi] = +1 + for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] = +1; + + free(tmp); + return target; +} diff --git a/qpms/scatsystem.c b/qpms/scatsystem.c index 4a9a326..1495051 100644 --- a/qpms/scatsystem.c +++ b/qpms/scatsystem.c @@ -1110,6 +1110,8 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full( const qpms_scatsys_at_omega_t *ssw ) { + if (ssw->translation_cache) + return qpms_scatsysw_build_modeproblem_matrix_full_boosted(target, ssw); const complex double k = ssw->wavenumber; const qpms_scatsys_t *ss = ssw->ss; const size_t full_len = ss->fecv_size; diff --git a/qpms/scatsystem.h b/qpms/scatsystem.h index 98e04fa..cb48c96 100644 --- a/qpms/scatsystem.h +++ b/qpms/scatsystem.h @@ -134,9 +134,6 @@ typedef struct qpms_ss_derived_tmatrix_t { struct qpms_trans_calculator; struct qpms_scatsys_translation_booster; -void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *); -struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create( - const qpms_scatsys_ss *ss); typedef struct qpms_scatsys_t { struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium. @@ -551,6 +548,10 @@ struct beyn_result_t *qpms_scatsys_finite_find_eigenmodes( double res_tol ///< (default: `0.0`) TODO DOC. ); +void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *); +struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create( + const qpms_scatsys_t *ss); + #if 0 /// Searches for scattering system's eigenmodes using Beyn's algorithm. /**