Pure BLAS Beyn: cleanup commented out GSL code

This commit is contained in:
Marek Nečada 2021-06-11 10:38:49 +03:00
parent 2be3521333
commit f19c590d6e
1 changed files with 2 additions and 72 deletions

View File

@ -269,29 +269,18 @@ BeynSolver *BeynSolver_create(int M, int L)
QPMS_ENSURE(L <= M, "We expect L <= M, but we got L = %d, M = %d ", L, M); QPMS_ENSURE(L <= M, "We expect L <= M, but we got L = %d, M = %d ", L, M);
// storage for eigenvalues and eigenvectors // storage for eigenvalues and eigenvectors
//solver->eigenvalues = gsl_vector_complex_calloc(L);
QPMS_CRASHING_CALLOC(solver->eigenvalues, L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->eigenvalues, L, sizeof(complex double));
//solver->eigenvalue_errors = gsl_vector_complex_calloc(L);
QPMS_CRASHING_CALLOC(solver->eigenvalue_errors, L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->eigenvalue_errors, L, sizeof(complex double));
//solver->residuals = gsl_vector_calloc(L);
QPMS_CRASHING_CALLOC(solver->residuals, L, sizeof(double)); QPMS_CRASHING_CALLOC(solver->residuals, L, sizeof(double));
//solver->eigenvectors = gsl_matrix_complex_calloc(L, M);
QPMS_CRASHING_CALLOC(solver->eigenvectors, L * M, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->eigenvectors, L * M, sizeof(complex double));
// storage for singular values, random VHat matrix, etc. used in algorithm // storage for singular values, random VHat matrix, etc. used in algorithm
//solver->A0 = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->A0, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A0, M * L, sizeof(complex double));
//solver->A1 = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->A1, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A1, M * L, sizeof(complex double));
//solver->A0_coarse = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->A0_coarse, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A0_coarse, M * L, sizeof(complex double));
//solver->A1_coarse = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->A1_coarse, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A1_coarse, M * L, sizeof(complex double));
//solver->MInvVHat = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->MInvVHat, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->MInvVHat, M * L, sizeof(complex double));
//solver->VHat = gsl_matrix_complex_calloc(M,L);
QPMS_CRASHING_CALLOC(solver->VHat, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->VHat, M * L, sizeof(complex double));
//solver->Sigma = gsl_vector_calloc(L);
QPMS_CRASHING_CALLOC(solver->Sigma, L, sizeof(double)); QPMS_CRASHING_CALLOC(solver->Sigma, L, sizeof(double));
// Beyn Step 1: Generate random matrix VHat // Beyn Step 1: Generate random matrix VHat
BeynSolver_srandom(solver,(unsigned)time(NULL)); BeynSolver_srandom(solver,(unsigned)time(NULL));
@ -359,14 +348,9 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
// Beyn Step 3: Compute SVD: A0 = V0_full * Sigma * W0T_full // Beyn Step 3: Compute SVD: A0 = V0_full * Sigma * W0T_full
if(verbose) printf(" Beyn: computing SVD...\n"); if(verbose) printf(" Beyn: computing SVD...\n");
//gsl_matrix_complex* V0_full = gsl_matrix_complex_alloc(m,l);
//gsl_matrix_complex_memcpy(V0_full,A0);
complex double *V0_full; complex double *V0_full;
QPMS_CRASHING_MALLOCPY(V0_full, A0, m * l * sizeof(complex double)); QPMS_CRASHING_MALLOCPY(V0_full, A0, m * l * sizeof(complex double));
//gsl_matrix_complex* W0T_full = gsl_matrix_complex_alloc(l,l);
complex double *W0T_full; QPMS_CRASHING_MALLOC(W0T_full, l * l * sizeof(complex double)); complex double *W0T_full; QPMS_CRASHING_MALLOC(W0T_full, l * l * sizeof(complex double));
//QPMS_ENSURE(Sigma->stride == 1, "Sigma vector stride must be 1 for LAPACKE_zgesdd, got %zd.", Sigma->stride);
//QPMS_ENSURE(V0_full->size1 >= V0_full->size2, "m = %zd, l = %zd, what the hell?");
QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR, // A = U*Σ*conjg(V') QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR, // A = U*Σ*conjg(V')
'O' /*jobz, 'O' overwrites a with U and saves conjg(V') in vt if m >= n, i.e. if M >= L, which holds*/, 'O' /*jobz, 'O' overwrites a with U and saves conjg(V') in vt if m >= n, i.e. if M >= L, which holds*/,
m, // V0_full->size1 /* m, number of rows */, m, // V0_full->size1 /* m, number of rows */,
@ -385,8 +369,8 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
// compute effective rank K (number of eigenvalue candidates) // compute effective rank K (number of eigenvalue candidates)
int K=0; int K=0;
for (int k=0; k < l/* k<Sigma->size*/ /* this is l, actually */; k++) { for (int k=0; k < l/* k<Sigma->size*/ /* this is l, actually */; k++) {
if (verbose) printf("Beyn: SV(%d)=%e\n",k, Sigma[k] /*gsl_vector_get(Sigma, k)*/); if (verbose) printf("Beyn: SV(%d)=%e\n",k, Sigma[k] );
if (k < rank_sel_min || Sigma[k] /*gsl_vector_get(Sigma, k)*/ > rank_tol) if (k < rank_sel_min || Sigma[k] > rank_tol)
K++; K++;
} }
if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l); if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l);
@ -397,41 +381,26 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
// Beyn step 5: B = V0' * A1 * W0 * Sigma^-1 // Beyn step 5: B = V0' * A1 * W0 * Sigma^-1
// set V0, W0T = matrices of first K right, left singular vectors // set V0, W0T = matrices of first K right, left singular vectors
//gsl_matrix_complex *V0 = gsl_matrix_complex_alloc(m,K);
//gsl_matrix_complex *W0T= gsl_matrix_complex_alloc(K,l);
complex double *V0, *W0T; complex double *V0, *W0T;
QPMS_CRASHING_MALLOC(V0, m * K * sizeof(complex double)); QPMS_CRASHING_MALLOC(V0, m * K * sizeof(complex double));
QPMS_CRASHING_MALLOC(W0T, K * l * sizeof(complex double)); QPMS_CRASHING_MALLOC(W0T, K * l * sizeof(complex double));
// TODO this is stupid, some parts could be handled simply by realloc. // TODO this is stupid, some parts could be handled simply by realloc.
for (int k = 0; k < K; ++k) { for (int k = 0; k < K; ++k) {
//gsl_vector_complex_view tmp;
//tmp = gsl_matrix_complex_column(V0_full, k);
//gsl_matrix_complex_set_col(V0, k, &(tmp.vector));
for(int i = 0; i < m; ++i) for(int i = 0; i < m; ++i)
MAT(V0, m, K, i, k) = MAT(V0_full, m, l, i, k); MAT(V0, m, K, i, k) = MAT(V0_full, m, l, i, k);
//tmp = gsl_matrix_complex_row(W0T_full, k);
//gsl_matrix_complex_set_row(W0T, k, &(tmp.vector));
for(int j = 0; j < l; ++j) for(int j = 0; j < l; ++j)
MAT(W0T, K, l, k, j) = MAT(W0T_full, l, l, k, j); MAT(W0T, K, l, k, j) = MAT(W0T_full, l, l, k, j);
} }
//gsl_matrix_complex_free(V0_full);
free(V0_full); free(V0_full);
//gsl_matrix_complex_free(W0T_full);
free(W0T_full); free(W0T_full);
//gsl_matrix_complex *TM2 = gsl_matrix_complex_calloc(K,l);
//gsl_matrix_complex *B = gsl_matrix_complex_calloc(K,K);
complex double *TM2, *B; complex double *TM2, *B;
QPMS_CRASHING_CALLOC(TM2, K * l, sizeof(complex double)); QPMS_CRASHING_CALLOC(TM2, K * l, sizeof(complex double));
QPMS_CRASHING_CALLOC(B, K * K, sizeof(complex double)); QPMS_CRASHING_CALLOC(B, K * K, sizeof(complex double));
if(verbose) printf(" Multiplying V0*A1->TM...\n"); if(verbose) printf(" Multiplying V0*A1->TM...\n");
//const gsl_complex one = gsl_complex_rect(1,0);
//const gsl_complex zero = gsl_complex_rect(0,0);
//gsl_blas_zgemm(CblasConjTrans, CblasNoTrans, one,
// V0, A1, zero, TM2);
// dims: V0[m,K], A1[m,l], TM2[K,l] // dims: V0[m,K], A1[m,l], TM2[K,l]
const complex double one = 1, zero = 0; const complex double one = 1, zero = 0;
cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, K, l, m, cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, K, l, m,
@ -440,29 +409,19 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
if(verbose) printf(" Multiplying TM*W0T->B...\n"); if(verbose) printf(" Multiplying TM*W0T->B...\n");
//gsl_blas_zgemm(CblasNoTrans, CblasConjTrans, one,
// TM2, W0T, zero, B); // TM2, W0T, zero, B);
// DIMS: TM2[K,l], W0T[K,l], B[K,K] // DIMS: TM2[K,l], W0T[K,l], B[K,K]
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, K, K, l, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, K, K, l,
&one, TM2, l, W0T, l, &zero, B, K); &one, TM2, l, W0T, l, &zero, B, K);
//gsl_matrix_complex_free(W0T);
//gsl_matrix_complex_free(TM2);
free(W0T); free(W0T);
free(TM2); free(TM2);
if(verbose) printf(" Scaling B <- B*Sigma^{-1}\n"); if(verbose) printf(" Scaling B <- B*Sigma^{-1}\n");
//gsl_vector_complex *tmp = gsl_vector_complex_calloc(K);
for(int i = 0; i < K; i++) { for(int i = 0; i < K; i++) {
//gsl_matrix_complex_get_col(tmp, B, i);
//gsl_vector_complex_scale(tmp, gsl_complex_rect(1.0/gsl_vector_get(Sigma,i), 0));
//gsl_matrix_complex_set_col(B,i,tmp);
for(int j = 0; j < K; j++) for(int j = 0; j < K; j++)
MAT(B, K, K, j, i) /= Sigma[i]; MAT(B, K, K, j, i) /= Sigma[i];
} }
//gsl_vector_complex_free(tmp);
//for(int m=0; m<K; m++) // B <- B * Sigma^{-1}
// Beyn step 6. // Beyn step 6.
// Eigenvalue decomposition B -> S*Lambda*S' // Eigenvalue decomposition B -> S*Lambda*S'
@ -475,14 +434,10 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
*/ */
if(verbose) printf(" Eigensolving (%i,%i)\n",K,K); if(verbose) printf(" Eigensolving (%i,%i)\n",K,K);
//gsl_vector_complex *Lambda = gsl_vector_complex_alloc(K); // eigenvalues
//gsl_matrix_complex *S = gsl_matrix_complex_alloc(K,K); // eigenvectors
complex double *Lambda /* eigenvalues */ , *S /* eigenvector */; complex double *Lambda /* eigenvalues */ , *S /* eigenvector */;
QPMS_CRASHING_MALLOC(Lambda, K * sizeof(complex double)); QPMS_CRASHING_MALLOC(Lambda, K * sizeof(complex double));
QPMS_CRASHING_MALLOC(S, K * K * sizeof(complex double)); QPMS_CRASHING_MALLOC(S, K * K * sizeof(complex double));
//QPMS_ENSURE(Sigma->stride == 1, "Sigma vector stride must be 1 for LAPACKE_zgesdd, got %zd.", Sigma->stride);
//QPMS_ENSURE(Lambda->stride == 1, "Lambda vector stride must be 1 for LAPACKE_zgesdd, got %zd.", Sigma->stride);
// dims: B[K,K], S[K,K], Lambda[K] // dims: B[K,K], S[K,K], Lambda[K]
QPMS_ENSURE_SUCCESS(LAPACKE_zgeev( QPMS_ENSURE_SUCCESS(LAPACKE_zgeev(
LAPACK_ROW_MAJOR, LAPACK_ROW_MAJOR,
@ -498,59 +453,44 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_functio
K //S->tda/* ldvr */ K //S->tda/* ldvr */
)); ));
//gsl_matrix_complex_free(B);
free(B); free(B);
// V0S <- V0*S // V0S <- V0*S
printf("Multiplying V0*S...\n"); printf("Multiplying V0*S...\n");
//gsl_matrix_complex *V0S = gsl_matrix_complex_alloc(m, K);
//QPMS_ENSURE_SUCCESS(gsl_blas_zgemm(CblasNoTrans, CblasNoTrans,
// one, V0, S, zero, V0S));
complex double *V0S; complex double *V0S;
QPMS_CRASHING_MALLOC(V0S, m * K * sizeof(complex double)); QPMS_CRASHING_MALLOC(V0S, m * K * sizeof(complex double));
// dims: V0[m,K], S[K,K], V0S[m,K] // dims: V0[m,K], S[K,K], V0S[m,K]
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, K, K, cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, K, K,
&one, V0, K, S, K, &zero, V0S, K); &one, V0, K, S, K, &zero, V0S, K);
//gsl_matrix_complex_free(V0);
free(V0); free(V0);
// FIXME!!! make clear relation between KRetained and K in the results! // FIXME!!! make clear relation between KRetained and K in the results!
// (If they differ, there are possibly some spurious eigenvalues.) // (If they differ, there are possibly some spurious eigenvalues.)
int KRetained = 0; int KRetained = 0;
//gsl_matrix_complex *Mmat = gsl_matrix_complex_alloc(m, m);
//gsl_vector_complex *MVk = gsl_vector_complex_alloc(m);
complex double *Mmat, *MVk; complex double *Mmat, *MVk;
QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double)); QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double));
QPMS_CRASHING_MALLOC(MVk, m * sizeof(complex double)); QPMS_CRASHING_MALLOC(MVk, m * sizeof(complex double));
for (int k = 0; k < K; ++k) { for (int k = 0; k < K; ++k) {
//const gsl_complex zgsl = gsl_complex_add(gsl_complex_rect(creal(z0), cimag(z0)), gsl_vector_complex_get(Lambda, k));
//const complex double z = GSL_REAL(zgsl) + I*GSL_IMAG(zgsl);
const complex double z = z0 + Lambda[k]; const complex double z = z0 + Lambda[k];
//gsl_vector_complex_const_view Vk = gsl_matrix_complex_const_column(V0S, k);
double residual = 0; double residual = 0;
if(res_tol > 0) { if(res_tol > 0) {
QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, Params)); QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, Params));
//QPMS_ENSURE_SUCCESS(gsl_blas_zgemv(CblasNoTrans, one, Mmat, &(Vk.vector), zero, MVk));
// Vk[i] == V0S[i, k]; dims: Mmat[m,m], Vk[m] (V0S[m, K]), MVk[m] // Vk[i] == V0S[i, k]; dims: Mmat[m,m], Vk[m] (V0S[m, K]), MVk[m]
cblas_zgemv(CblasRowMajor, CblasNoTrans, m, m, cblas_zgemv(CblasRowMajor, CblasNoTrans, m, m,
&one, Mmat, m, &MAT(V0S, m, K, 0, k), K /* stride of Vk in V0S */, &one, Mmat, m, &MAT(V0S, m, K, 0, k), K /* stride of Vk in V0S */,
&zero, MVk, 1); &zero, MVk, 1);
//residual = gsl_blas_dznrm2(MVk);
residual = cblas_dznrm2(m, MVk, 1); residual = cblas_dznrm2(m, MVk, 1);
if (verbose) printf("Beyn: Residual(%i)=%e\n",k,residual); if (verbose) printf("Beyn: Residual(%i)=%e\n",k,residual);
} }
if (res_tol > 0 && residual > res_tol) continue; if (res_tol > 0 && residual > res_tol) continue;
//gsl_vector_complex_set(eigenvalues, KRetained, zgsl);
eigenvalues[KRetained] = z; eigenvalues[KRetained] = z;
if(eigenvectors) { if(eigenvectors) {
//gsl_matrix_complex_set_row(eigenvectors, KRetained, &(Vk.vector));
for(int j = 0; j < m; ++j) for(int j = 0; j < m; ++j)
MAT(eigenvectors, l, m, KRetained, j) = MAT(V0S, m, K, j, k); MAT(eigenvectors, l, m, KRetained, j) = MAT(V0S, m, K, j, k);
//gsl_vector_set(solver->residuals, KRetained, residual);
} }
++KRetained; ++KRetained;
@ -607,7 +547,6 @@ beyn_result_t *beyn_solve(const size_t m, const size_t l,
QPMS_ENSURE_SUCCESS(M_inv_Vhat_function(MInvVHat, m, l, VHat, z, params)); QPMS_ENSURE_SUCCESS(M_inv_Vhat_function(MInvVHat, m, l, VHat, z, params));
} else { } else {
lapack_int *pivot; lapack_int *pivot;
//gsl_matrix_complex *Mmat = gsl_matrix_complex_alloc(m,m);
complex double *Mmat; complex double *Mmat;
QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double)); QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double));
QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, params)); QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, params));
@ -630,30 +569,22 @@ beyn_result_t *beyn_solve(const size_t m, const size_t l,
free(Mmat); free(Mmat);
} }
// gsl_matrix_complex_scale(MInvVHat, cs2g(dz));
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
MInvVHat[i] *= dz; MInvVHat[i] *= dz;
//gsl_matrix_complex_add(A0, MInvVHat);
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
A0[i] += MInvVHat[i]; A0[i] += MInvVHat[i];
if((n%2)==0) { if((n%2)==0) {
//gsl_matrix_complex_add(A0_coarse, MInvVHat);
//gsl_matrix_complex_add(A0_coarse, MInvVHat);
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
A0_coarse[i] += 2 * MInvVHat[i]; A0_coarse[i] += 2 * MInvVHat[i];
} }
// A_1 scaling as in Beyn's Remark 3.2(b) for numerical stability. // A_1 scaling as in Beyn's Remark 3.2(b) for numerical stability.
//gsl_matrix_complex_scale(MInvVHat, cs2g(z - z0));
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
MInvVHat[i] *= (z - z0); MInvVHat[i] *= (z - z0);
//gsl_matrix_complex_add(A1, MInvVHat);
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
A1[i] += MInvVHat[i]; A1[i] += MInvVHat[i];
if((n%2)==0) { if((n%2)==0) {
for(size_t i = 0; i < m * l; ++i) for(size_t i = 0; i < m * l; ++i)
//gsl_matrix_complex_add(A1_coarse, MInvVHat);
//gsl_matrix_complex_add(A1_coarse, MInvVHat);
A1_coarse[i] += 2 * MInvVHat[i]; A1_coarse[i] += 2 * MInvVHat[i];
} }
} }
@ -669,7 +600,6 @@ beyn_result_t *beyn_solve(const size_t m, const size_t l,
// Beyn Steps 36 // Beyn Steps 36
int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, rank_sel_min, res_tol); int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, rank_sel_min, res_tol);
//gsl_blas_zaxpy(gsl_complex_rect(-1,0), eigenvalues, eigenvalue_errors);
const complex double minusone = -1.; const complex double minusone = -1.;
//TODO maybe change the sizes to correspend to retained eigval count K, not l //TODO maybe change the sizes to correspend to retained eigval count K, not l
cblas_zaxpy(l, &minusone, eigenvalues, 1, eigenvalue_errors, 1); cblas_zaxpy(l, &minusone, eigenvalues, 1, eigenvalue_errors, 1);