From d11db980f98bab10c470e02b0fff1d781d332a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= Date: Wed, 13 Mar 2019 12:28:40 +0200 Subject: [PATCH] A slow but working reference implementation of matrix (un)packing. Former-commit-id: 5f85c4983a8754ebae03980242b896a79954a73e --- qpms/qpms_error.h | 4 +- qpms/scatsystem.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++ qpms/scatsystem.h | 20 ++++++++ 3 files changed, 136 insertions(+), 2 deletions(-) diff --git a/qpms/qpms_error.h b/qpms/qpms_error.h index 33597ab..798b5d7 100644 --- a/qpms/qpms_error.h +++ b/qpms/qpms_error.h @@ -19,9 +19,9 @@ void qpms_pr_debug_at_flf(const char *filename, unsigned int linenum, //void qpms_error_at_line(const char *filename, unsigned int linenum, // const char *fmt, ...); -#define QPMS_CRASHING_MALLOC(pointer, size) {(pointer) = malloc(size); if(!pointer) qpms_pr_debug_at_flf(__FILE__,__LINE__,__func__, "Allocation of %zd bytes for " #pointer " failed.", (size_t) (size));} +#define QPMS_CRASHING_MALLOC(pointer, size) {(pointer) = malloc(size); if(!pointer && (size)) qpms_pr_debug_at_flf(__FILE__,__LINE__,__func__, "Allocation of %zd bytes for " #pointer " failed.", (size_t) (size));} -#define QPMS_CRASHING_REALLOC(pointer, size) {(pointer) = realloc(pointer, size); if(!pointer) qpms_pr_debug_at_flf(__FILE__,__LINE__,__func__, "Rellocation of %zd bytes for " #pointer " failed.", (size_t) (size));} +#define QPMS_CRASHING_REALLOC(pointer, size) {(pointer) = realloc(pointer, size); if(!pointer && (size)) qpms_pr_debug_at_flf(__FILE__,__LINE__,__func__, "Rellocation of %zd bytes for " #pointer " failed.", (size_t) (size));} #define QPMS_WTF qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,"Unexpected error.") diff --git a/qpms/scatsystem.c b/qpms/scatsystem.c index 341b838..600dada 100644 --- a/qpms/scatsystem.c +++ b/qpms/scatsystem.c @@ -866,6 +866,119 @@ complex double *qpms_orbit_irrep_basis(size_t *basis_size, return target; } +complex double *qpms_scatsys_irrep_transform_matrix(complex double *U, + const qpms_scatsys_t *ss, qpms_iri_t iri) { + const size_t packedlen = ss->saecv_sizes[iri]; + const size_t full_len = ss->fecv_size; + if (U == NULL) + QPMS_CRASHING_MALLOC(U,full_len * packedlen * sizeof(complex double)); + memset(U, 0, full_len * packedlen * sizeof(complex double)); + + size_t fullvec_offset = 0; + + for(qpms_ss_pi_t pi = 0; pi < ss->p_count; ++pi) { + const qpms_ss_oti_t oti = ss->p_orbitinfo[pi].t; + const qpms_ss_orbit_type_t *const ot = ss->orbit_types + oti; + const qpms_ss_osn_t osn = ss->p_orbitinfo[pi].osn; + const qpms_ss_orbit_pi_t opi = ss->p_orbitinfo[pi].p; + // This is where the particle's orbit starts in the "packed" vector: + const size_t packed_orbit_offset = ss->saecv_ot_offsets[iri*ss->orbit_type_count + oti] + + osn * ot->irbase_sizes[iri]; + // Orbit coeff vector's full size: + const size_t orbit_fullsize = ot->size * ot->bspecn; + const size_t particle_fullsize = ot->bspecn; + const size_t orbit_packedsize = ot->irbase_sizes[iri]; + // This is the orbit-level matrix projecting the whole orbit onto the irrep. + const complex double *om = ot->irbases + ot->irbase_offsets[iri]; + + for (size_t prow = 0; prow < orbit_packedsize; ++prow) + for (size_t pcol = 0; pcol < ot->bspecn; ++pcol) + U[full_len * (packed_orbit_offset + prow) + (fullvec_offset + pcol)] + = om[orbit_fullsize * prow + (opi * ot->bspecn + pcol)]; + fullvec_offset += ot->bspecn; + } + + return U; +} + +complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed, + const complex double *orig_full, const qpms_scatsys_t *ss, + qpms_iri_t iri){ + const size_t packedlen = ss->saecv_sizes[iri]; + if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps? + return target_packed; + const size_t full_len = ss->fecv_size; + if (target_packed == NULL) + target_packed = malloc(SQ(packedlen)*sizeof(complex double)); + if (target_packed == NULL) abort(); + memset(target_packed, 0, SQ(packedlen)*sizeof(complex double)); + + // Workspace for the intermediate matrix + complex double *tmp; + QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double)); + + complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri); + + const complex double one = 1, zero = 0; + + // tmp = F U* + cblas_zgemm( + CblasRowMajor, CblasNoTrans, CblasConjTrans, + full_len /*M*/, packedlen /*N*/, full_len /*K*/, + &one /*alpha*/, orig_full/*A*/, full_len/*ldA*/, + U /*B*/, full_len/*ldB*/, + &zero /*beta*/, tmp /*C*/, packedlen /*LDC*/); + // target = U tmp + cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + packedlen /*M*/, packedlen /*N*/, full_len /*K*/, + &one /*alpha*/, U/*A*/, full_len/*ldA*/, + tmp /*B*/, packedlen /*ldB*/, &zero /*beta*/, + target_packed /*C*/, packedlen /*ldC*/); + + free(tmp); + free(U); + return target_packed; +} + +/// Transforms a big "packed" matrix into the full basis (trivial implementation). +complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full, + const complex double *orig_packed, const qpms_scatsys_t *ss, + qpms_iri_t iri, bool add){ + const size_t packedlen = ss->saecv_sizes[iri]; + const size_t full_len = ss->fecv_size; + if (target_full == NULL) + target_full = malloc(SQ(full_len)*sizeof(complex double)); + if (target_full == NULL) abort(); + if(!add) memset(target_full, 0, SQ(full_len)*sizeof(complex double)); + + if(!packedlen) return target_full; // Empty irrep, do nothing. + + // Workspace for the intermediate matrix result + complex double *tmp; + QPMS_CRASHING_MALLOC(tmp, full_len * packedlen * sizeof(complex double)); + + complex double *U = qpms_scatsys_irrep_transform_matrix(NULL, ss, iri); + + const complex double one = 1, zero = 0; + + // tmp = P U + cblas_zgemm( + CblasRowMajor, CblasNoTrans, CblasNoTrans, + packedlen /*M*/, full_len /*N*/, packedlen /*K*/, + &one /*alpha*/, orig_packed/*A*/, packedlen/*ldA*/, + U /*B*/, full_len/*ldB*/, + &zero /*beta*/, tmp /*C*/, full_len /*LDC*/); + // target += U* tmp + cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, + full_len /*M*/, full_len /*N*/, packedlen /*K*/, + &one /*alpha*/, U/*A*/, full_len/*ldA*/, + tmp /*B*/, full_len /*ldB*/, &one /*beta*/, + target_full /*C*/, full_len /*ldC*/); + free(tmp); + free(U); + return target_full; +} + complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed, const complex double *orig_full, const qpms_scatsys_t *ss, qpms_iri_t iri){ @@ -947,6 +1060,7 @@ complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed, return target_packed; } + /// Transforms a big "packed" matrix into the full basis. /** TODO doc */ complex double *qpms_scatsys_irrep_unpack_matrix(complex double *target_full, diff --git a/qpms/scatsystem.h b/qpms/scatsystem.h index be503f3..439756c 100644 --- a/qpms/scatsystem.h +++ b/qpms/scatsystem.h @@ -426,6 +426,26 @@ qpms_scatsys_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig, const st /// Destroys the result of qpms_scatsys_apply_symmetry or qpms_scatsys_load. void qpms_scatsys_free(qpms_scatsys_t *s); +/// Creates a "full" transformation matrix U that takes a full vector and projects it onto an symmetry adapted basis. +/** Mostly as a reference and a debugging tool, as multiplicating these big matrices would be inefficient. + * + * TODO doc about shape etc. + */ +complex double *qpms_scatsys_irrep_transform_matrix(complex double *target_U, + const qpms_scatsys_t *ss, qpms_iri_t iri); + +/// Projects a "big" matrix onto an irrep (slow reference implementation). +/** TODO doc */ +complex double *qpms_scatsys_irrep_pack_matrix_stupid(complex double *target_packed, + const complex double *orig_full, const qpms_scatsys_t *ss, + qpms_iri_t iri); + +/// Transforms a big "packed" matrix into the full basis (slow reference implementation). +/** TODO doc */ +complex double *qpms_scatsys_irrep_unpack_matrix_stupid(complex double *target_full, + const complex double *orig_packed, const qpms_scatsys_t *ss, + qpms_iri_t iri, bool add); + /// Projects a "big" matrix onto an irrep. /** TODO doc */ complex double *qpms_scatsys_irrep_pack_matrix(complex double *target_packed,