qpms/qpms/qpmsblas.h

35 lines
1.6 KiB
C

/*! \file qpmsblas.h
* \brief Naïve implementation of BLAS functions to workaround OpenBLAS bugs.
*
* There is a bug in certain versions of OpenBLAS causing crashes if many
* BLAS functions are run from different threads in parallel.
* Typically, this happens when one calculates some large matrix (such as the
* global translation matrix) in several threads and cblas_zgemm()
* is called on relatively small submatrices.
* Because the submatrices are small, we can use a naïve,
* serial implementation of cblas_zgemm() as a workaround without a substantial
* impact on performance.
*
* If included together with <cblas.h>, this must be include _afterwards_ because of the typedefs!
*/
#ifndef QPMSBLAS_H
#define QPMSBLAS_H
#define QPMS_BLAS_INDEX_T long long int
#ifndef CBLAS_H
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE;
typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
#endif
/// Naïve serial reimplementation of cblas_zgemm.
void qpms_zgemm(CBLAS_LAYOUT Order, CBLAS_TRANSPOSE TransA, CBLAS_TRANSPOSE TransB,
const QPMS_BLAS_INDEX_T M, const QPMS_BLAS_INDEX_T N, const QPMS_BLAS_INDEX_T K,
const _Complex double *alpha, const _Complex double *A, const QPMS_BLAS_INDEX_T lda,
const _Complex double *B, const QPMS_BLAS_INDEX_T ldb,
const _Complex double *beta, _Complex double *C, const QPMS_BLAS_INDEX_T ldc);
#endif //QPMSBLAS_H