Parallel modeproblem matrix fixed?

Former-commit-id: 9ad51b186a68689a754ce986d7f8bf2f97ac258f
This commit is contained in:
Marek Nečada 2020-01-28 13:04:05 +02:00
parent 8f4a8c7c7b
commit 96c9e95ea0
2 changed files with 107 additions and 101 deletions

View File

@ -251,15 +251,21 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec; const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
if(piC != piR) { // The diagonal will be dealt with later. if(piC != piR) { // The diagonal will be dealt with later.
uoppid_t pid = uopairid(ss->p_count, piC, piR);
const cart3_t posC = ss->p[piC].pos; const cart3_t posC = ss->p[piC].pos;
#if 0
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
tmp, // tmp is S(piR<-piC)
bspecR, bspecC->n, bspecC, 1,
k, posR, posC, QPMS_HANKEL_PLUS));
#else
{ // this replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src
const sph_t dlj = cart2sph(cart3_substract(posR, posC)); const sph_t dlj = cart2sph(cart3_substract(posR, posC));
const uoppid_t pid = uopairid(ss->p_count, piC, piR);
const size_t ri = b->r_map[pid]; const size_t ri = b->r_map[pid];
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]); QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
const qpms_l_t pair_lMax = b->lMax_r[ri]; const qpms_l_t pair_lMax = b->lMax_r[ri];
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax); const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
{ // this replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm); QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax); QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0); QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
@ -275,6 +281,7 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax); bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
} }
} }
#endif
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/, bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/, &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
@ -301,15 +308,14 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
*a = arg; *a = arg;
const qpms_scatsys_at_omega_t *ssw = a->ssw; const qpms_scatsys_at_omega_t *ssw = a->ssw;
const complex double k = ssw->wavenumber;
const qpms_scatsys_t *ss = ssw->ss;
const qpms_iri_t iri = a->iri;
const size_t packedlen = ss->saecv_sizes[iri];
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster); QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
const qpms_scatsys_t * const ss = ssw->ss;
const qpms_trans_calculator *const c = ss->c;
const booster_t *const b = ss->tbooster; const booster_t *const b = ss->tbooster;
const boosterw_t *const bw = ssw->translation_cache; const boosterw_t *const bw = ssw->translation_cache;
const qpms_trans_calculator *const c = ss->c; const complex double k = ssw->wavenumber;
const qpms_iri_t iri = a->iri;
const size_t packedlen = ss->saecv_sizes[iri];
// some of the following workspaces are probably redundant; TODO optimize later. // some of the following workspaces are probably redundant; TODO optimize later.
@ -318,15 +324,15 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
complex double *Sblock, *TSblock; complex double *Sblock, *TSblock;
QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn)); QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn)); QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
// Workspaces for the translation operator A and B matrices
complex double *A, *B;
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
// Workspace for the intermediate particle-orbit matrix result // Workspace for the intermediate particle-orbit matrix result
complex double *tmp; complex double *tmp;
QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order); QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
// Workspace for A, B arrays
complex double *A, *B;
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
const complex double one = 1, zero = 0, minusone = -1; const complex double one = 1, zero = 0, minusone = -1;
@ -361,7 +367,6 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
const size_t packed_orbit_offsetR = const size_t packed_orbit_offsetR =
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR] ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
+ osnR * otR->irbase_sizes[iri]; + osnR * otR->irbase_sizes[iri];
for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) { for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR]; qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
assert(opiR == ss->p_orbitinfo[piR].p); assert(opiR == ss->p_orbitinfo[piR].p);
@ -395,7 +400,7 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
Sblock, // Sblock is S(piR->piC) Sblock, // Sblock is S(piR->piC)
bspecR, bspecC->n, bspecC, 1, bspecR, bspecC->n, bspecC, 1,
k, posR, posC, QPMS_HANKEL_PLUS)); k, posR, posC, QPMS_HANKEL_PLUS));
#endif #else
{ // this block replaces qpms_trans_calculator_get_trans_array(): { // this block replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src // R is dest, C is src
const sph_t dlj = cart2sph(cart3_substract(posR, posC)); const sph_t dlj = cart2sph(cart3_substract(posR, posC));
@ -404,7 +409,9 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]); QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
const qpms_l_t pair_lMax = b->lMax_r[ri]; const qpms_l_t pair_lMax = b->lMax_r[ri];
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax); const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
{ // this replaces qpms_trans_calculator_get_AB_arrays() and _buf() { // this replaces qpms_trans_calculator_get_AB_arrays() and _buf()
const double costheta = cos(dlj.theta); const double costheta = cos(dlj.theta);
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE, QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
@ -418,6 +425,8 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax); bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
} }
#endif
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans, SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/, bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/, &minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
@ -449,12 +458,13 @@ void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boost
} }
} }
} }
}
free(tmp);
free(A); free(A);
free(B); free(B);
free(tmp);
free(Sblock); free(Sblock);
free(TSblock); free(TSblock);
return NULL; return NULL;
} }

View File

@ -1647,9 +1647,6 @@ static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thre
} }
} }
} }
} }
free(tmp); free(tmp);
free(Sblock); free(Sblock);
@ -1810,7 +1807,6 @@ complex double *qpms_scatsysw_apply_Tmatrices_full(
} }
ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss, ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss,
const complex double *cvf, const cart3_t where, const complex double *cvf, const cart3_t where,
const complex double k) { const complex double k) {