Handling of zero packed orbit sizes.

Former-commit-id: 75ff318b4c91477a22f0fcee30fcb8812de1faba
This commit is contained in:
Marek Nečada 2019-03-10 16:53:01 +00:00
parent f30c5a88d9
commit 297995690a
1 changed files with 82 additions and 72 deletions

View File

@ -807,6 +807,8 @@ complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed,
const complex double *orig_full, const qpms_scatsys_t *ss, const complex double *orig_full, const qpms_scatsys_t *ss,
qpms_iri_t iri){ qpms_iri_t iri){
const size_t packedlen = ss->saecv_sizes[iri]; const size_t packedlen = ss->saecv_sizes[iri];
if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
return target_packed;
const size_t full_len = ss->fecv_size; const size_t full_len = ss->fecv_size;
if (target_packed == NULL) if (target_packed == NULL)
target_packed = malloc(SQ(packedlen)*sizeof(complex double)); target_packed = malloc(SQ(packedlen)*sizeof(complex double));
@ -837,45 +839,47 @@ complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed,
const complex double *omR = otR->irbases + otR->irbase_offsets[iri]; const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
size_t fullvec_offsetC = 0; size_t fullvec_offsetC = 0;
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t; for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC; const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn; const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p; const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
// This is where the particle's orbit starts in the "packed" vector: const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
const size_t packed_orbit_offsetC = // This is where the particle's orbit starts in the "packed" vector:
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC] const size_t packed_orbit_offsetC =
+ osnC * otC->irbase_sizes[iri]; ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
// Orbit coeff vector's full size: + osnC * otC->irbase_sizes[iri];
const size_t orbit_fullsizeC = otC->size * otC->bspecn; // Orbit coeff vector's full size:
const size_t particle_fullsizeC = otC->bspecn; const size_t orbit_fullsizeC = otC->size * otC->bspecn;
const size_t orbit_packedsizeC = otC->irbase_sizes[iri]; const size_t particle_fullsizeC = otC->bspecn;
// This is the orbit-level matrix projecting the whole orbit onto the irrep. const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
const complex double *omC = otC->irbases + otC->irbase_offsets[iri]; // This is the orbit-level matrix projecting the whole orbit onto the irrep.
const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
// tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC] if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, // tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans,
&one /*alpha*/, orig_full + full_len*fullvec_offsetR + fullvec_offsetC/*A*/, particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
full_len/*ldA*/, &one /*alpha*/, orig_full + full_len*fullvec_offsetR + fullvec_offsetC/*A*/,
omC + opiC*particle_fullsizeC /*B*/, full_len/*ldA*/,
orbit_fullsizeC/*ldB*/, &zero /*beta*/, omC + opiC*particle_fullsizeC /*B*/,
tmp /*C*/, orbit_packedsizeC /*LDC*/); orbit_fullsizeC/*ldB*/, &zero /*beta*/,
tmp /*C*/, orbit_packedsizeC /*LDC*/);
// target[oiR|piR,oiC|piC] += U[...] tmp[...] // target[oiR|piR,oiC|piC] += U[...] tmp[...]
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/, orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, &one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
orbit_fullsizeR/*ldA*/, orbit_fullsizeR/*ldA*/,
tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/, tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/, target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
packedlen /*ldC*/); packedlen /*ldC*/);
}
fullvec_offsetC += otC->bspecn; fullvec_offsetC += otC->bspecn;
}
fullvec_offsetR += otR->bspecn;
} }
fullvec_offsetR += otR->bspecn;
} }
free(tmp); free(tmp);
return target_packed; return target_packed;
} }
@ -892,6 +896,8 @@ complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full,
if (target_full == NULL) abort(); if (target_full == NULL) abort();
if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double)); if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double));
if(!packedlen) return target_full; // Empty irrep, do nothing.
// Workspace for the intermediate particle-orbit matrix result // Workspace for the intermediate particle-orbit matrix result
complex double *tmp = malloc(sizeof(complex double) * SQ(ss->max_bspecn) complex double *tmp = malloc(sizeof(complex double) * SQ(ss->max_bspecn)
* ss->sym->order); if (!tmp) abort(); * ss->sym->order); if (!tmp) abort();
@ -916,40 +922,42 @@ complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full,
const complex double *omR = otR->irbases + otR->irbase_offsets[iri]; const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
size_t fullvec_offsetC = 0; size_t fullvec_offsetC = 0;
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop if (orbit_packedsizeR) // avoid crash on empty irrep
const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t; for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC; const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn; const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p; const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
// This is where the particle's orbit starts in the "packed" vector: const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
const size_t packed_orbit_offsetC = // This is where the particle's orbit starts in the "packed" vector:
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC] const size_t packed_orbit_offsetC =
+ osnC * otC->irbase_sizes[iri]; ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
// Orbit coeff vector's full size: + osnC * otC->irbase_sizes[iri];
const size_t orbit_fullsizeC = otC->size * otC->bspecn; // Orbit coeff vector's full size:
const size_t particle_fullsizeC = otC->bspecn; const size_t orbit_fullsizeC = otC->size * otC->bspecn;
const size_t orbit_packedsizeC = otC->irbase_sizes[iri]; const size_t particle_fullsizeC = otC->bspecn;
// This is the orbit-level matrix projecting the whole orbit onto the irrep. const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
const complex double *omC = otC->irbases + otC->irbase_offsets[iri]; // This is the orbit-level matrix projecting the whole orbit onto the irrep.
// tmp = P U const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
// tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC] if (orbit_packedsizeC) { // avoid crash on empty irrep
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, // tmp = P U
orbit_packedsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeC /*K*/, // tmp[oiR|piR,piC] = ∑_K M[piR,K] U[K,piC]
&one /*alpha*/, orig_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC/*A*/, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
packedlen/*ldA*/, orbit_packedsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeC /*K*/,
omC + opiC*particle_fullsizeC /*B*/, &one /*alpha*/, orig_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC/*A*/,
orbit_fullsizeC/*ldB*/, &zero /*beta*/, packedlen/*ldA*/,
tmp /*C*/, particle_fullsizeC /*LDC*/); omC + opiC*particle_fullsizeC /*B*/,
orbit_fullsizeC/*ldB*/, &zero /*beta*/,
// target[oiR|piR,oiC|piC] += U*[...] tmp[...] tmp /*C*/, particle_fullsizeC /*LDC*/);
cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
particle_fullsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeR /*K*/,
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
orbit_fullsizeR/*ldA*/,
tmp /*B*/, particle_fullsizeC /*ldB*/, &one /*beta*/,
target_full + full_len*fullvec_offsetR + fullvec_offsetC /*C*/,
full_len /*ldC*/);
// target[oiR|piR,oiC|piC] += U*[...] tmp[...]
cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans,
particle_fullsizeR /*M*/, particle_fullsizeC /*N*/, orbit_packedsizeR /*K*/,
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/,
orbit_fullsizeR/*ldA*/,
tmp /*B*/, particle_fullsizeC /*ldB*/, &one /*beta*/,
target_full + full_len*fullvec_offsetR + fullvec_offsetC /*C*/,
full_len /*ldC*/);
}
fullvec_offsetC += otC->bspecn; fullvec_offsetC += otC->bspecn;
} }
fullvec_offsetR += otR->bspecn; fullvec_offsetR += otR->bspecn;
@ -965,6 +973,7 @@ complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed,
const complex double *orig_full, const qpms_scatsys_t *ss, const complex double *orig_full, const qpms_scatsys_t *ss,
const qpms_iri_t iri) { const qpms_iri_t iri) {
const size_t packedlen = ss->saecv_sizes[iri]; const size_t packedlen = ss->saecv_sizes[iri];
if (!packedlen) return target_packed; // Empty irrep
if (target_packed == NULL) if (target_packed == NULL)
target_packed = malloc(packedlen*sizeof(complex double)); target_packed = malloc(packedlen*sizeof(complex double));
if (target_packed == NULL) abort(); if (target_packed == NULL) abort();
@ -988,13 +997,12 @@ complex double *qpms_scatsys_irrep_pack_vector(complex double *target_packed,
const size_t orbit_packedsize = ot->irbase_sizes[iri]; const size_t orbit_packedsize = ot->irbase_sizes[iri];
// This is the orbit-level matrix projecting the whole orbit onto the irrep. // This is the orbit-level matrix projecting the whole orbit onto the irrep.
const complex double *om = ot->irbases + ot->irbase_offsets[iri]; const complex double *om = ot->irbases + ot->irbase_offsets[iri];
if (orbit_packedsize) // avoid crash on empty irrep
cblas_zgemv(CblasRowMajor/*order*/, CblasNoTrans/*transA*/, cblas_zgemv(CblasRowMajor/*order*/, CblasNoTrans/*transA*/,
orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/, orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/,
om + opi*particle_fullsize/*A*/, orbit_fullsize/*lda*/, om + opi*particle_fullsize/*A*/, orbit_fullsize/*lda*/,
orig_full+fullvec_offset/*X*/, 1/*incX*/, orig_full+fullvec_offset/*X*/, 1/*incX*/,
&one/*beta*/, target_packed+packed_orbit_offset/*Y*/, 1/*incY*/); &one/*beta*/, target_packed+packed_orbit_offset/*Y*/, 1/*incY*/);
fullvec_offset += ot->bspecn; fullvec_offset += ot->bspecn;
} }
return target_packed; return target_packed;
@ -1013,6 +1021,7 @@ complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full,
const complex double one = 1; const complex double one = 1;
const size_t packedlen = ss->saecv_sizes[iri]; const size_t packedlen = ss->saecv_sizes[iri];
if (!packedlen) return target_full; // Completely empty irrep
size_t fullvec_offset = 0; size_t fullvec_offset = 0;
for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) { for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) {
@ -1030,7 +1039,8 @@ complex double *qpms_scatsys_irrep_unpack_vector(complex double *target_full,
// This is the orbit-level matrix projecting the whole orbit onto the irrep. // This is the orbit-level matrix projecting the whole orbit onto the irrep.
const complex double *om = ot->irbases + ot->irbase_offsets[iri]; const complex double *om = ot->irbases + ot->irbase_offsets[iri];
cblas_zgemv(CblasRowMajor/*order*/, CblasConjTrans/*transA*/, if (orbit_packedsize) // empty irrep, avoid zgemv crashing.
cblas_zgemv(CblasRowMajor/*order*/, CblasConjTrans/*transA*/,
orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/, om + opi*particle_fullsize/*A*/, orbit_packedsize/*M*/, particle_fullsize/*N*/, &one/*alpha*/, om + opi*particle_fullsize/*A*/,
orbit_fullsize/*lda*/, orig_packed+packed_orbit_offset /*X*/, 1/*incX*/, &one/*beta*/, orbit_fullsize/*lda*/, orig_packed+packed_orbit_offset /*X*/, 1/*incX*/, &one/*beta*/,
target_full+fullvec_offset/*Y*/, 1/*incY*/); target_full+fullvec_offset/*Y*/, 1/*incY*/);