diff --git a/qpms/qpms_c.pyx b/qpms/qpms_c.pyx index 61cb749..84d0f72 100644 --- a/qpms/qpms_c.pyx +++ b/qpms/qpms_c.pyx @@ -1468,12 +1468,15 @@ cdef class ScatteringSystem: qpms_scatsys_build_modeproblem_matrix_full(&target_view[0][0], self.s, k) return target - def modeproblem_matrix_packed(self, double k, qpms_iri_t iri): + def modeproblem_matrix_packed(self, double k, qpms_iri_t iri, version=None): cdef size_t rlen = self.saecv_sizes[iri] cdef np.ndarray[np.complex_t, ndim=2] target = np.empty( (rlen,rlen),dtype=complex, order='C') cdef cdouble[:,::1] target_view = target - qpms_scatsys_build_modeproblem_matrix_irrep_packed(&target_view[0][0], self.s, iri, k) + if (version == 'R'): + qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(&target_view[0][0], self.s, iri, k) + else: + qpms_scatsys_build_modeproblem_matrix_irrep_packed(&target_view[0][0], self.s, iri, k) return target def translation_matrix_full(self, double k): diff --git a/qpms/qpms_cdefs.pxd b/qpms/qpms_cdefs.pxd index 80b60e9..6f21a18 100644 --- a/qpms/qpms_cdefs.pxd +++ b/qpms/qpms_cdefs.pxd @@ -291,6 +291,8 @@ cdef extern from "scatsystem.h": const qpms_scatsys_t *ss, double k) cdouble *qpms_scatsys_build_modeproblem_matrix_irrep_packed(cdouble *target, const qpms_scatsys_t *ss, qpms_iri_t iri, double k) + cdouble *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR( + cdouble *target, const qpms_scatsys_t *ss, qpms_iri_t iri, double k) diff --git a/qpms/scatsystem.c b/qpms/scatsystem.c index 8e523f6..4878506 100644 --- a/qpms/scatsystem.c +++ b/qpms/scatsystem.c @@ -14,6 +14,7 @@ #include #include "qpms_error.h" #include "translations.h" +#include #define SQ(x) ((x)*(x)) #define QPMS_SCATSYS_LEN_RTOL 1e-13 @@ -1441,4 +1442,122 @@ complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed( return target_packed; } +complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR( + /// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated. + complex double *target_packed, + const qpms_scatsys_t *ss, qpms_iri_t iri, + double k ///< Wave number to use in the translation matrix. + ) +{ + const size_t packedlen = ss->saecv_sizes[iri]; + if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps? + return target_packed; + if (target_packed == NULL) + target_packed = malloc(SQ(packedlen)*sizeof(complex double)); + if (target_packed == NULL) abort(); + memset(target_packed, 0, SQ(packedlen)*sizeof(complex double)); + // some of the following workspaces are probably redundant; TODO optimize later. + + // workspaces for the uncompressed particle<-particle tranlation matrix block + // and the result of multiplying with a T-matrix + complex double *Sblock, *TSblock; + QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn)); + QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn)); + + // Workspace for the intermediate particle-orbit matrix result + complex double *tmp; + QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order); + + const complex double one = 1, zero = 0; + + for(qpms_ss_pi_t opistartR = 0; opistartR < ss->p_count; + opistartR += ss->orbit_types[ss->p_orbitinfo[opistartR].t].size //orbit_p_countR; might write a while() instead + ) { + const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR]; + const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t; + const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn; + const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR; + const qpms_ss_orbit_pi_t orbit_p_countR = otR->size; + const size_t orbit_packedsizeR = otR->irbase_sizes[iri]; + + if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep + const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n + const qpms_vswf_set_spec_t *bspecR = ss->tm[ss->p[orbitstartpiR].tmatrix_id]->spec; + // This is the orbit-level matrix projecting the whole orbit onto the irrep. + const complex double *omR = otR->irbases + otR->irbase_offsets[iri]; + // Orbit coeff vector's full size: + const size_t orbit_fullsizeR = otR->size * otR->bspecn; + // This is where the orbit starts in the "packed" vector: + const size_t packed_orbit_offsetR = + ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR] + + osnR * otR->irbase_sizes[iri]; + for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) { + qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR]; + assert(opiR == ss->p_orbitinfo[piR].p); + const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t; + assert(ss->p_orbitinfo[piR].osn == osnR); + const cart3_t posR = ss->p[piR].pos; + // dest particle T-matrix + const complex double *tmmR = ss->tm[ss->p[piR].tmatrix_id]->m; + for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop + const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t; + const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC; + const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn; + const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p; + // This is where the particle's orbit starts in the "packed" vector: + const size_t packed_orbit_offsetC = + ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC] + + osnC * otC->irbase_sizes[iri]; + const qpms_vswf_set_spec_t *bspecC = ss->tm[ss->p[piC].tmatrix_id]->spec; + // Orbit coeff vector's full size: + const size_t orbit_fullsizeC = otC->size * otC->bspecn; + const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n + const size_t orbit_packedsizeC = otC->irbase_sizes[iri]; + // This is the orbit-level matrix projecting the whole orbit onto the irrep. + const complex double *omC = otC->irbases + otC->irbase_offsets[iri]; + + if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep + if(piC != piR) { // non-diagonal, calculate TS + const cart3_t posC = ss->p[piC].pos; + QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c, + Sblock, // Sblock is S(piR->piC) + bspecR, bspecC->n, bspecC, 1, + k, posR, posC)); + + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/, + &one/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/, + Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/, + TSblock /*c*/, bspecC->n /*ldc*/); + } else { // diagonal, fill with diagonal -1 + for (size_t row = 0; row < bspecR->n; ++row) + for (size_t col = 0; col < bspecC->n; ++col) + TSblock[row * bspecC->n + col] = (row == col)? -1 : 0; + } + + // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC] + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, + particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/, + &one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/, + omC + opiC*particle_fullsizeC /*B*/, + orbit_fullsizeC/*ldB*/, &zero /*beta*/, + tmp /*C*/, orbit_packedsizeC /*LDC*/); + + // target[oiR|piR,oiC|piC] += U[...] tmp[...] + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/, + &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/, + tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/, + target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/, + packedlen /*ldC*/); + } + } + } + } + } + free(tmp); + free(Sblock); + free(TSblock); + return target_packed; +} diff --git a/qpms/scatsystem.h b/qpms/scatsystem.h index 5616767..bf42990 100644 --- a/qpms/scatsystem.h +++ b/qpms/scatsystem.h @@ -486,12 +486,20 @@ complex double *qpms_scatsys_build_modeproblem_matrix_full( const qpms_scatsys_t *ss, double k ///< Wave number to use in the translation matrix. ); +/// Creates the mode problem matrix directly in the irrep-packed form. complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed( /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. complex double *target, const qpms_scatsys_t *ss, qpms_iri_t iri, double k ///< Wave number to use in the translation matrix. ); +/// Alternative implementation of qpms_scatsys_build_modeproblem_matrix_irrep_packed(). +complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR( + /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. + complex double *target, + const qpms_scatsys_t *ss, qpms_iri_t iri, + double k ///< Wave number to use in the translation matrix. + ); /// NOT IMPLEMENTED Dumps a qpms_scatsys_t structure to a file. qpms_errno_t qpms_scatsys_dump(qpms_scatsys_t *ss, char *path);