Alternative implementation of irrep packed modeproblem matrix.
Former-commit-id: ed37e98423e03f96f6fb850820bfae451f505f21
This commit is contained in:
parent
96a6fdf7fa
commit
7db7123c91
|
@ -1468,12 +1468,15 @@ cdef class ScatteringSystem:
|
||||||
qpms_scatsys_build_modeproblem_matrix_full(&target_view[0][0], self.s, k)
|
qpms_scatsys_build_modeproblem_matrix_full(&target_view[0][0], self.s, k)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def modeproblem_matrix_packed(self, double k, qpms_iri_t iri):
|
def modeproblem_matrix_packed(self, double k, qpms_iri_t iri, version=None):
|
||||||
cdef size_t rlen = self.saecv_sizes[iri]
|
cdef size_t rlen = self.saecv_sizes[iri]
|
||||||
cdef np.ndarray[np.complex_t, ndim=2] target = np.empty(
|
cdef np.ndarray[np.complex_t, ndim=2] target = np.empty(
|
||||||
(rlen,rlen),dtype=complex, order='C')
|
(rlen,rlen),dtype=complex, order='C')
|
||||||
cdef cdouble[:,::1] target_view = target
|
cdef cdouble[:,::1] target_view = target
|
||||||
qpms_scatsys_build_modeproblem_matrix_irrep_packed(&target_view[0][0], self.s, iri, k)
|
if (version == 'R'):
|
||||||
|
qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(&target_view[0][0], self.s, iri, k)
|
||||||
|
else:
|
||||||
|
qpms_scatsys_build_modeproblem_matrix_irrep_packed(&target_view[0][0], self.s, iri, k)
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def translation_matrix_full(self, double k):
|
def translation_matrix_full(self, double k):
|
||||||
|
|
|
@ -291,6 +291,8 @@ cdef extern from "scatsystem.h":
|
||||||
const qpms_scatsys_t *ss, double k)
|
const qpms_scatsys_t *ss, double k)
|
||||||
cdouble *qpms_scatsys_build_modeproblem_matrix_irrep_packed(cdouble *target,
|
cdouble *qpms_scatsys_build_modeproblem_matrix_irrep_packed(cdouble *target,
|
||||||
const qpms_scatsys_t *ss, qpms_iri_t iri, double k)
|
const qpms_scatsys_t *ss, qpms_iri_t iri, double k)
|
||||||
|
cdouble *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(
|
||||||
|
cdouble *target, const qpms_scatsys_t *ss, qpms_iri_t iri, double k)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "qpms_error.h"
|
#include "qpms_error.h"
|
||||||
#include "translations.h"
|
#include "translations.h"
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#define SQ(x) ((x)*(x))
|
#define SQ(x) ((x)*(x))
|
||||||
#define QPMS_SCATSYS_LEN_RTOL 1e-13
|
#define QPMS_SCATSYS_LEN_RTOL 1e-13
|
||||||
|
@ -1441,4 +1442,122 @@ complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed(
|
||||||
return target_packed;
|
return target_packed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(
|
||||||
|
/// Target memory with capacity for ss->saecv_sizes[iri]**2 elements. If NULL, new will be allocated.
|
||||||
|
complex double *target_packed,
|
||||||
|
const qpms_scatsys_t *ss, qpms_iri_t iri,
|
||||||
|
double k ///< Wave number to use in the translation matrix.
|
||||||
|
)
|
||||||
|
{
|
||||||
|
const size_t packedlen = ss->saecv_sizes[iri];
|
||||||
|
if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
|
||||||
|
return target_packed;
|
||||||
|
if (target_packed == NULL)
|
||||||
|
target_packed = malloc(SQ(packedlen)*sizeof(complex double));
|
||||||
|
if (target_packed == NULL) abort();
|
||||||
|
memset(target_packed, 0, SQ(packedlen)*sizeof(complex double));
|
||||||
|
|
||||||
|
// some of the following workspaces are probably redundant; TODO optimize later.
|
||||||
|
|
||||||
|
// workspaces for the uncompressed particle<-particle tranlation matrix block
|
||||||
|
// and the result of multiplying with a T-matrix
|
||||||
|
complex double *Sblock, *TSblock;
|
||||||
|
QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
|
||||||
|
QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
|
||||||
|
|
||||||
|
// Workspace for the intermediate particle-orbit matrix result
|
||||||
|
complex double *tmp;
|
||||||
|
QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
|
||||||
|
|
||||||
|
const complex double one = 1, zero = 0;
|
||||||
|
|
||||||
|
for(qpms_ss_pi_t opistartR = 0; opistartR < ss->p_count;
|
||||||
|
opistartR += ss->orbit_types[ss->p_orbitinfo[opistartR].t].size //orbit_p_countR; might write a while() instead
|
||||||
|
) {
|
||||||
|
const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
|
||||||
|
const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
|
||||||
|
const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
|
||||||
|
const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
|
||||||
|
const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
|
||||||
|
const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
|
||||||
|
|
||||||
|
if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
|
||||||
|
const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
|
||||||
|
const qpms_vswf_set_spec_t *bspecR = ss->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
|
||||||
|
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
|
||||||
|
const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
|
||||||
|
// Orbit coeff vector's full size:
|
||||||
|
const size_t orbit_fullsizeR = otR->size * otR->bspecn;
|
||||||
|
// This is where the orbit starts in the "packed" vector:
|
||||||
|
const size_t packed_orbit_offsetR =
|
||||||
|
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
|
||||||
|
+ osnR * otR->irbase_sizes[iri];
|
||||||
|
for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
|
||||||
|
qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
|
||||||
|
assert(opiR == ss->p_orbitinfo[piR].p);
|
||||||
|
const qpms_ss_oti_t otiR = ss->p_orbitinfo[piR].t;
|
||||||
|
assert(ss->p_orbitinfo[piR].osn == osnR);
|
||||||
|
const cart3_t posR = ss->p[piR].pos;
|
||||||
|
// dest particle T-matrix
|
||||||
|
const complex double *tmmR = ss->tm[ss->p[piR].tmatrix_id]->m;
|
||||||
|
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
|
||||||
|
const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
|
||||||
|
const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
|
||||||
|
const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
|
||||||
|
const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
|
||||||
|
// This is where the particle's orbit starts in the "packed" vector:
|
||||||
|
const size_t packed_orbit_offsetC =
|
||||||
|
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
|
||||||
|
+ osnC * otC->irbase_sizes[iri];
|
||||||
|
const qpms_vswf_set_spec_t *bspecC = ss->tm[ss->p[piC].tmatrix_id]->spec;
|
||||||
|
// Orbit coeff vector's full size:
|
||||||
|
const size_t orbit_fullsizeC = otC->size * otC->bspecn;
|
||||||
|
const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
|
||||||
|
const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
|
||||||
|
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
|
||||||
|
const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
|
||||||
|
|
||||||
|
if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
|
||||||
|
if(piC != piR) { // non-diagonal, calculate TS
|
||||||
|
const cart3_t posC = ss->p[piC].pos;
|
||||||
|
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
|
||||||
|
Sblock, // Sblock is S(piR->piC)
|
||||||
|
bspecR, bspecC->n, bspecC, 1,
|
||||||
|
k, posR, posC));
|
||||||
|
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
||||||
|
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
|
||||||
|
&one/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
|
||||||
|
Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
|
||||||
|
TSblock /*c*/, bspecC->n /*ldc*/);
|
||||||
|
} else { // diagonal, fill with diagonal -1
|
||||||
|
for (size_t row = 0; row < bspecR->n; ++row)
|
||||||
|
for (size_t col = 0; col < bspecC->n; ++col)
|
||||||
|
TSblock[row * bspecC->n + col] = (row == col)? -1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
|
||||||
|
particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
|
||||||
|
&one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
|
||||||
|
omC + opiC*particle_fullsizeC /*B*/,
|
||||||
|
orbit_fullsizeC/*ldB*/, &zero /*beta*/,
|
||||||
|
tmp /*C*/, orbit_packedsizeC /*LDC*/);
|
||||||
|
|
||||||
|
// target[oiR|piR,oiC|piC] += U[...] tmp[...]
|
||||||
|
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
||||||
|
orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
|
||||||
|
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
|
||||||
|
tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
|
||||||
|
target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
|
||||||
|
packedlen /*ldC*/);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free(tmp);
|
||||||
|
free(Sblock);
|
||||||
|
free(TSblock);
|
||||||
|
return target_packed;
|
||||||
|
}
|
||||||
|
|
|
@ -486,12 +486,20 @@ complex double *qpms_scatsys_build_modeproblem_matrix_full(
|
||||||
const qpms_scatsys_t *ss,
|
const qpms_scatsys_t *ss,
|
||||||
double k ///< Wave number to use in the translation matrix.
|
double k ///< Wave number to use in the translation matrix.
|
||||||
);
|
);
|
||||||
|
/// Creates the mode problem matrix directly in the irrep-packed form.
|
||||||
complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed(
|
complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed(
|
||||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||||
complex double *target,
|
complex double *target,
|
||||||
const qpms_scatsys_t *ss, qpms_iri_t iri,
|
const qpms_scatsys_t *ss, qpms_iri_t iri,
|
||||||
double k ///< Wave number to use in the translation matrix.
|
double k ///< Wave number to use in the translation matrix.
|
||||||
);
|
);
|
||||||
|
/// Alternative implementation of qpms_scatsys_build_modeproblem_matrix_irrep_packed().
|
||||||
|
complex double *qpms_scatsys_build_modeproblem_matrix_irrep_packed_orbitorderR(
|
||||||
|
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||||
|
complex double *target,
|
||||||
|
const qpms_scatsys_t *ss, qpms_iri_t iri,
|
||||||
|
double k ///< Wave number to use in the translation matrix.
|
||||||
|
);
|
||||||
|
|
||||||
/// NOT IMPLEMENTED Dumps a qpms_scatsys_t structure to a file.
|
/// NOT IMPLEMENTED Dumps a qpms_scatsys_t structure to a file.
|
||||||
qpms_errno_t qpms_scatsys_dump(qpms_scatsys_t *ss, char *path);
|
qpms_errno_t qpms_scatsys_dump(qpms_scatsys_t *ss, char *path);
|
||||||
|
|
Loading…
Reference in New Issue