#define STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1] #include #include #include #include #include #include #include "qpms_error.h" #include #include #include "beyn.h" #define SQ(x) ((x)*(x)) // matrix access #define MAT(mat_, n_rows_, n_cols_, i_row_, i_col_) (mat_[(n_cols_) * (i_row_) + (i_col_)]) typedef struct BeynSolver { int M; // dimension of matrices int L; // number of columns of VHat matrix complex double *eigenvalues, *eigenvalue_errors; // [L] complex double *eigenvectors; // [L][M] (!!!) complex double *A0, *A1, *A0_coarse, *A1_coarse, *MInvVHat; // [M][L] complex double *VHat; // [M][L] double *Sigma, *residuals; // [L] } BeynSolver; // constructor, destructor BeynSolver *BeynSolver_create(int M, int L); void BeynSolver_free(BeynSolver *solver); // reset the random matrix VHat used in Beyn's algorithm void BeynSolver_srandom(BeynSolver *solver, unsigned int RandSeed); // Uniformly random number from interval [a, b]. static double randU(double a, double b) { return a + (b-a) * random() * (1. / RAND_MAX); } // Random number from normal distribution (via Box-Muller transform, which is enough for our purposes). static double randN(double Sigma, double Mu) { double u1 = randU(0, 1); double u2 = randU(0, 1); return Mu + Sigma*sqrt(-2*log(u1))*cos(2.*M_PI*u2); } static complex double zrandN(double sigma, double mu){ return randN(sigma, mu) + I*randN(sigma, mu); } static inline double dsq(double a) { return a * a; } static _Bool beyn_contour_ellipse_inside_test(struct beyn_contour_t *c, complex double z) { double rRe = c->z_dz[c->n][0]; double rIm = c->z_dz[c->n][1]; complex double zn = z - c->centre; return dsq(creal(zn)/rRe) + dsq(cimag(zn)/rIm) < 1; } beyn_contour_t *beyn_contour_ellipse(complex double centre, double rRe, double rIm, size_t n) { beyn_contour_t *c; QPMS_CRASHING_MALLOC(c, sizeof(beyn_contour_t) + (n+1)*sizeof(c->z_dz[0])); c->centre = centre; c->n = n; for(size_t i = 0; i < n; ++i) { double t = i*2*M_PI/n; double st = sin(t), ct = cos(t); c->z_dz[i][0] = centre + ct*rRe + I*st*rIm; c->z_dz[i][1] = (-rRe*st + I*rIm*ct) * (2*M_PI / n); } // We hide the half-axes metadata after the discretisation points. c->z_dz[n][0] = rRe; c->z_dz[n][1] = rIm; c->inside_test = beyn_contour_ellipse_inside_test; return c; } // Sets correct sign to zero for a given branch cut orientation static inline complex double align_zero(complex double z, beyn_contour_halfellipse_orientation or) { // Maybe redundant, TODO check the standard. const double positive_zero = copysign(0., +1.); const double negative_zero = copysign(0., -1.); switch(or) { // ensure correct zero signs; CHECKME!!! case BEYN_CONTOUR_HALFELLIPSE_RE_PLUS: if(creal(z) == 0 && signbit(creal(z))) z = positive_zero + I * cimag(z); break; case BEYN_CONTOUR_HALFELLIPSE_RE_MINUS: if(creal(z) == 0 && !signbit(creal(z))) z = negative_zero + I * cimag(z); break; case BEYN_CONTOUR_HALFELLIPSE_IM_PLUS: if(cimag(z) == 0 && signbit(cimag(z))) z = creal(z) + I * positive_zero; break; case BEYN_CONTOUR_HALFELLIPSE_IM_MINUS: if(cimag(z) == 0 && !signbit(cimag(z))) z = creal(z) + I * negative_zero; break; default: QPMS_WTF; } return z; } beyn_contour_t *beyn_contour_halfellipse(complex double centre, double rRe, double rIm, size_t n, beyn_contour_halfellipse_orientation or) { beyn_contour_t *c; QPMS_CRASHING_MALLOC(c, sizeof(beyn_contour_t) + (n+1)*sizeof(c->z_dz[0]) + sizeof(beyn_contour_halfellipse_orientation)); c->centre = centre; c->n = n; const size_t nline = n/2; const size_t narc = n - nline; complex double faktor; double l = rRe, h = rIm; switch(or) { case BEYN_CONTOUR_HALFELLIPSE_RE_PLUS: faktor = -I; l = rIm; h = rRe; break; case BEYN_CONTOUR_HALFELLIPSE_RE_MINUS: faktor = I; l = rIm; h = rRe; break; case BEYN_CONTOUR_HALFELLIPSE_IM_PLUS: faktor = 1; break; case BEYN_CONTOUR_HALFELLIPSE_IM_MINUS: faktor = -1; break; default: QPMS_WTF; } for(size_t i = 0; i < narc; ++i) { double t = (i+0.5)*M_PI/narc; double st = sin(t), ct = cos(t); c->z_dz[i][0] = centre + faktor*(ct*l + I*st*h); c->z_dz[i][1] = faktor * (-l*st + I*h*ct) * (M_PI / narc); } for(size_t i = 0; i < nline; ++i) { double t = 0.5 * (1 - (double) nline) + i; c->z_dz[narc + i][0] = align_zero(centre + faktor * t * 2 * l / nline, or); c->z_dz[narc + i][1] = faktor * 2 * l / nline; } // We hide the half-axes metadata after the discretisation points. c->z_dz[n][0] = rRe; c->z_dz[n][1] = rIm; // ugly... *((beyn_contour_halfellipse_orientation *) &c->z_dz[n+1][0]) = or; c->inside_test = NULL; // TODO beyn_contour_halfellipse_inside_test; return c; } beyn_contour_t *beyn_contour_kidney(complex double centre, double rRe, double rIm, const double rounding, const size_t n, beyn_contour_halfellipse_orientation or) { beyn_contour_t *c; QPMS_ENSURE(rounding >= 0 && rounding < .5, "rounding must lie in the interval [0, 0.5)"); QPMS_CRASHING_MALLOC(c, sizeof(beyn_contour_t) + (n+1)*sizeof(c->z_dz[0]) + sizeof(beyn_contour_halfellipse_orientation)); c->centre = centre; c->n = n; complex double faktor; double l = rRe, h = rIm; switch(or) { case BEYN_CONTOUR_HALFELLIPSE_RE_PLUS: faktor = -I; l = rIm; h = rRe; break; case BEYN_CONTOUR_HALFELLIPSE_RE_MINUS: faktor = I; l = rIm; h = rRe; break; case BEYN_CONTOUR_HALFELLIPSE_IM_PLUS: faktor = 1; break; case BEYN_CONTOUR_HALFELLIPSE_IM_MINUS: faktor = -1; break; default: QPMS_WTF; } // Small circle centre coordinates. const double y = rounding * h; // distance from the cut / straight line const double x = sqrt(SQ(h - y) - SQ(y)); const double alpha = asin(y/(h-y)); const double ar = l/h; // aspect ratio // Parameter range (equal to the contour length if ar == 1) const double tmax = 2 * (x + (M_PI_2 + alpha) * y + (M_PI_2 - alpha) * h); const double dt = tmax / n; size_t i = 0; double t; // Straight line, first part double t_lo = 0, t_hi = x; for(; t = i * dt, t <= t_hi; ++i) { c->z_dz[i][0] = align_zero(centre + (t - t_lo) * ar * faktor, or); c->z_dz[i][1] = dt * ar * faktor; } // First small arc t_lo = t_hi; t_hi = t_lo + (M_PI_2 + alpha) * y; for(; t = i * dt, t < t_hi; ++i) { double phi = (t - t_lo) / y - M_PI_2; c->z_dz[i][0] = centre + (ar * (x + y * cos(phi)) + y * (1 + sin(phi)) * I) * faktor; c->z_dz[i][1] = dt * (- ar * sin(phi) + cos(phi) * I) * faktor; } // Big arc t_lo = t_hi; t_hi = t_lo + (M_PI - 2 * alpha) * h; for(; t = i * dt, t < t_hi; ++i) { double phi = (t - t_lo) / h + alpha; c->z_dz[i][0] = centre + (ar * (h * cos(phi)) + h * sin(phi) * I) * faktor; c->z_dz[i][1] = dt * (- ar * sin(phi) + cos(phi) * I) * faktor; } // Second small arc t_lo = t_hi; t_hi = t_lo + (M_PI_2 + alpha) * y; for(; t = i * dt, t < t_hi; ++i) { double phi = (t - t_lo) / y + M_PI - alpha; c->z_dz[i][0] = centre + (ar * (- x + y * cos(phi)) + y * (1 + sin(phi)) * I) * faktor; c->z_dz[i][1] = dt * (- ar * sin(phi) + cos(phi) * I) * faktor; } // Straight line, second part t_lo = t_hi; t_hi = tmax; for(; t = i * dt, i < n; ++i) { c->z_dz[i][0] = align_zero(centre + (t - t_lo - x) * ar * faktor, or); c->z_dz[i][1] = dt * ar * faktor; } #if 0 // TODO later // We hide the half-axes metadata after the discretisation points. c->z_dz[n][0] = rRe; c->z_dz[n][1] = rIm; // ugly... *((beyn_contour_halfellipse_orientation *) &c->z_dz[n+1][0]) = or; #endif c->inside_test = NULL; // TODO beyn_contour_halfellipse_inside_test; return c; } void beyn_result_free(beyn_result_t *r) { if(r) { free(r->eigval); free(r->eigval_err); free(r->residuals); free(r->eigvec); free(r->ranktest_SV); free(r); } } BeynSolver *BeynSolver_create(int M, int L) { BeynSolver *solver; QPMS_CRASHING_CALLOC(solver, 1, sizeof(*solver)); solver->M = M; solver->L = L; QPMS_ENSURE(L <= M, "We expect L <= M, but we got L = %d, M = %d ", L, M); // storage for eigenvalues and eigenvectors QPMS_CRASHING_CALLOC(solver->eigenvalues, L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->eigenvalue_errors, L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->residuals, L, sizeof(double)); QPMS_CRASHING_CALLOC(solver->eigenvectors, L * M, sizeof(complex double)); // storage for singular values, random VHat matrix, etc. used in algorithm QPMS_CRASHING_CALLOC(solver->A0, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A1, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A0_coarse, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->A1_coarse, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->MInvVHat, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->VHat, M * L, sizeof(complex double)); QPMS_CRASHING_CALLOC(solver->Sigma, L, sizeof(double)); // Beyn Step 1: Generate random matrix VHat BeynSolver_srandom(solver,(unsigned)time(NULL)); return solver; } void BeynSolver_free(BeynSolver *solver) { free(solver->eigenvalues); free(solver->eigenvalue_errors); free(solver->eigenvectors); free(solver->A0); free(solver->A1); free(solver->A0_coarse); free(solver->A1_coarse); free(solver->MInvVHat); free(solver->Sigma); free(solver->residuals); free(solver->VHat); free(solver); } void BeynSolver_free_partial(BeynSolver *solver) { free(solver->A0); free(solver->A1); free(solver->A0_coarse); free(solver->A1_coarse); free(solver->MInvVHat); free(solver->VHat); free(solver); } void BeynSolver_srandom(BeynSolver *solver, unsigned int RandSeed) { if (RandSeed==0) RandSeed=time(0); srandom(RandSeed); for(size_t i = 0; i < solver->M * solver->L; ++i) solver->VHat[i] = zrandN(1, 0); } /* * linear-algebra manipulations on the A0 and A1 matrices * (obtained via numerical quadrature) to extract eigenvalues * and eigenvectors */ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_t M_function, void *Params, complex double *A0, complex double *A1, double complex z0, complex double *eigenvalues, complex double *eigenvectors, const double rank_tol, size_t rank_sel_min, const double res_tol) { const size_t m = solver->M; const size_t l = solver->L; double *Sigma = solver->Sigma; int verbose = 1; // TODO // Beyn Step 3: Compute SVD: A0 = V0_full * Sigma * W0T_full if(verbose) printf(" Beyn: computing SVD...\n"); complex double *V0_full; QPMS_CRASHING_MALLOCPY(V0_full, A0, m * l * sizeof(complex double)); complex double *W0T_full; QPMS_CRASHING_MALLOC(W0T_full, l * l * sizeof(complex double)); QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR, // A = U*Σ*conjg(V') 'O' /*jobz, 'O' overwrites a with U and saves conjg(V') in vt if m >= n, i.e. if M >= L, which holds*/, m, // V0_full->size1 /* m, number of rows */, l, // V0_full->size2 /* n, number of columns */, V0_full, //(lapack_complex_double *)(V0_full->data) /*a*/, l, //V0_full->tda /*lda*/, Sigma, //Sigma->data /*s*/, NULL /*u; not used*/, m /*ldu; should not be really used but lapacke requires it for some obscure reason*/, W0T_full, //(lapack_complex_double *)W0T_full->data /*vt*/, l //W0T_full->tda /*ldvt*/ )); // Beyn Step 4: Rank test for Sigma // compute effective rank K (number of eigenvalue candidates) int K=0; for (int k=0; k < l/* ksize*/ /* this is l, actually */; k++) { if (verbose) printf("Beyn: SV(%d)=%e\n",k, Sigma[k] ); if (k < rank_sel_min || Sigma[k] > rank_tol) K++; } if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l); if (K==0) { QPMS_WARN("no singular values found in Beyn eigensolver\n"); return 0; } // Beyn step 5: B = V0' * A1 * W0 * Sigma^-1 // set V0, W0T = matrices of first K right, left singular vectors complex double *V0, *W0T; QPMS_CRASHING_MALLOC(V0, m * K * sizeof(complex double)); QPMS_CRASHING_MALLOC(W0T, K * l * sizeof(complex double)); // TODO this is stupid, some parts could be handled simply by realloc. for (int k = 0; k < K; ++k) { for(int i = 0; i < m; ++i) MAT(V0, m, K, i, k) = MAT(V0_full, m, l, i, k); for(int j = 0; j < l; ++j) MAT(W0T, K, l, k, j) = MAT(W0T_full, l, l, k, j); } free(V0_full); free(W0T_full); complex double *TM2, *B; QPMS_CRASHING_CALLOC(TM2, K * l, sizeof(complex double)); QPMS_CRASHING_CALLOC(B, K * K, sizeof(complex double)); if(verbose) printf(" Multiplying V0*A1->TM...\n"); // dims: V0[m,K], A1[m,l], TM2[K,l] const complex double one = 1, zero = 0; cblas_zgemm(CblasRowMajor, CblasConjTrans, CblasNoTrans, K, l, m, &one, V0, K, A1, l, &zero, TM2, l); if(verbose) printf(" Multiplying TM*W0T->B...\n"); // TM2, W0T, zero, B); // DIMS: TM2[K,l], W0T[K,l], B[K,K] cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasConjTrans, K, K, l, &one, TM2, l, W0T, l, &zero, B, K); free(W0T); free(TM2); if(verbose) printf(" Scaling B <- B*Sigma^{-1}\n"); for(int i = 0; i < K; i++) { for(int j = 0; j < K; j++) MAT(B, K, K, j, i) /= Sigma[i]; } // Beyn step 6. // Eigenvalue decomposition B -> S*Lambda*S' /* According to Beyn's paper (Algorithm 1), one should check conditioning * of the eigenvalues; if they are ill-conditioned, one should perform * a procedure involving Schur decomposition and reordering. * * Beyn refers to MATLAB routines eig, condeig, schur, ordschur. * I am not sure about the equivalents in LAPACK, TODO check zgeevx, zgeesx. */ if(verbose) printf(" Eigensolving (%i,%i)\n",K,K); complex double *Lambda /* eigenvalues */ , *S /* eigenvector */; QPMS_CRASHING_MALLOC(Lambda, K * sizeof(complex double)); QPMS_CRASHING_MALLOC(S, K * K * sizeof(complex double)); // dims: B[K,K], S[K,K], Lambda[K] QPMS_ENSURE_SUCCESS(LAPACKE_zgeev( LAPACK_ROW_MAJOR, 'N' /* jobvl; don't compute left eigenvectors */, 'V' /* jobvr; do compute right eigenvectors */, K /* n */, B, //(lapack_complex_double *)(B->data) /* a */, K, //B->tda /* lda */, Lambda, //(lapack_complex_double *) Lambda->data /* w */, NULL /* vl */, m /* ldvl, not used but for some reason needed */, S, //(lapack_complex_double *)(S->data)/* vr */, K //S->tda/* ldvr */ )); free(B); // V0S <- V0*S printf("Multiplying V0*S...\n"); complex double *V0S; QPMS_CRASHING_MALLOC(V0S, m * K * sizeof(complex double)); // dims: V0[m,K], S[K,K], V0S[m,K] cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, K, K, &one, V0, K, S, K, &zero, V0S, K); free(V0); // FIXME!!! make clear relation between KRetained and K in the results! // (If they differ, there are possibly some spurious eigenvalues.) int KRetained = 0; complex double *Mmat, *MVk; QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double)); QPMS_CRASHING_MALLOC(MVk, m * sizeof(complex double)); for (int k = 0; k < K; ++k) { const complex double z = z0 + Lambda[k]; double residual = 0; if(res_tol > 0) { QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, Params)); // Vk[i] == V0S[i, k]; dims: Mmat[m,m], Vk[m] (V0S[m, K]), MVk[m] cblas_zgemv(CblasRowMajor, CblasNoTrans, m, m, &one, Mmat, m, &MAT(V0S, m, K, 0, k), K /* stride of Vk in V0S */, &zero, MVk, 1); residual = cblas_dznrm2(m, MVk, 1); if (verbose) printf("Beyn: Residual(%i)=%e\n",k,residual); } if (res_tol > 0 && residual > res_tol) continue; eigenvalues[KRetained] = z; if(eigenvectors) { // eigenvectors is NULL when calculating the "coarse" contour for error estimates for(int j = 0; j < m; ++j) MAT(eigenvectors, l, m, KRetained, j) = MAT(V0S, m, K, j, k); solver->residuals[KRetained] = residual; } ++KRetained; } free(V0S); free(Mmat); free(MVk); free(S); free(Lambda); return KRetained; } beyn_result_t *beyn_solve(const size_t m, const size_t l, beyn_function_M_t M_function, beyn_function_M_inv_Vhat_t M_inv_Vhat_function, void *params, const beyn_contour_t *contour, double rank_tol, size_t rank_sel_min, double res_tol) { BeynSolver *solver = BeynSolver_create(m, l); complex double *A0 = solver->A0; complex double *A1 = solver->A1; complex double *A0_coarse = solver->A0_coarse; complex double *A1_coarse = solver->A1_coarse; complex double *MInvVHat = solver->MInvVHat; complex double *VHat = solver->VHat; /***************************************************************/ /* evaluate contour integrals by numerical quadrature to get */ /* A0 and A1 matrices */ /***************************************************************/ // TODO zeroing probably redundant (Used calloc...) memset(A0, 0, m * l * sizeof(complex double)); memset(A1, 0, m * l * sizeof(complex double)); memset(A0_coarse, 0, m * l * sizeof(complex double)); memset(A1_coarse, 0, m * l * sizeof(complex double)); const size_t N = contour->n; if(N & 1) QPMS_WARN("Contour discretisation point number is odd (%zd)," " the error estimates might be a bit off.", N); // Beyn Step 2: Computa A0, A1 const complex double z0 = contour->centre; for(int n=0; nz_dz[n][0]; const complex double dz = contour->z_dz[n][1]; memcpy(MInvVHat, VHat, m * l * sizeof(complex double)); if(M_inv_Vhat_function) { QPMS_ENSURE_SUCCESS(M_inv_Vhat_function(MInvVHat, m, l, VHat, z, params)); } else { lapack_int *pivot; complex double *Mmat; QPMS_CRASHING_MALLOC(Mmat, m * m * sizeof(complex double)); QPMS_ENSURE_SUCCESS(M_function(Mmat, m, z, params)); QPMS_CRASHING_MALLOC(pivot, sizeof(lapack_int) * m); #if 0 QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, m /*m*/, m /*n*/,(lapack_complex_double *) Mmat->data /*a*/, Mmat->tda /*lda*/, pivot /*ipiv*/)); QPMS_ENSURE(MInvVHat->tda == l, "wut?"); QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR, 'N' /*trans*/, m /*n*/, l/*nrhs*/, (lapack_complex_double *)(Mmat->data) /*a*/, Mmat->tda /*lda*/, pivot/*ipiv*/, (lapack_complex_double *)(MInvVHat->data) /*b*/, MInvVHat->tda/*ldb*/)); #endif QPMS_ENSURE_SUCCESS(LAPACKE_zgetrf(LAPACK_ROW_MAJOR, m /*m*/, m /*n*/, Mmat /*a*/, m /*lda*/, pivot /*ipiv*/)); QPMS_ENSURE_SUCCESS(LAPACKE_zgetrs(LAPACK_ROW_MAJOR, 'N' /*trans*/, m /*n*/, l/*nrhs*/, Mmat /*a*/, m /*lda*/, pivot/*ipiv*/, MInvVHat /*b*/, l /*ldb*/)); free(pivot); free(Mmat); } for(size_t i = 0; i < m * l; ++i) MInvVHat[i] *= dz; for(size_t i = 0; i < m * l; ++i) A0[i] += MInvVHat[i]; if((n%2)==0) { for(size_t i = 0; i < m * l; ++i) A0_coarse[i] += 2 * MInvVHat[i]; } // A_1 scaling as in Beyn's Remark 3.2(b) for numerical stability. for(size_t i = 0; i < m * l; ++i) MInvVHat[i] *= (z - z0); for(size_t i = 0; i < m * l; ++i) A1[i] += MInvVHat[i]; if((n%2)==0) { for(size_t i = 0; i < m * l; ++i) A1_coarse[i] += 2 * MInvVHat[i]; } } complex double *eigenvalues = solver->eigenvalues; complex double *eigenvalue_errors = solver->eigenvalue_errors; complex double *eigenvectors = solver->eigenvectors; // Repeat Steps 3–6 with rougher contour approximation to get an error estimate. int K_coarse = beyn_process_matrices(solver, M_function, params, A0_coarse, A1_coarse, z0, eigenvalue_errors, /*eigenvectors_coarse*/ NULL, rank_tol, rank_sel_min, res_tol); // Reid did also fabs on the complex and real parts ^^^. // Beyn Steps 3–6 int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, rank_sel_min, res_tol); const complex double minusone = -1.; //TODO maybe change the sizes to correspend to retained eigval count K, not l cblas_zaxpy(l, &minusone, eigenvalues, 1, eigenvalue_errors, 1); beyn_result_t *result; QPMS_CRASHING_MALLOC(result, sizeof(*result)); result->eigval = solver->eigenvalues; result->eigval_err = solver->eigenvalue_errors; result->residuals = solver->residuals; result->eigvec = solver->eigenvectors; result->ranktest_SV = solver->Sigma; result->neig = K; result->vlen = m; BeynSolver_free_partial(solver); return result; }