Compare commits

...

26 Commits

Author SHA1 Message Date
Marek Nečada acc08f8863 Fix finiterectlat-modes.py obvious errors.
Former-commit-id: a538ed6c3c84cebffccd41272994027039e46b57
2020-01-30 02:03:43 +02:00
Marek Nečada b4381bd13d Simple finite řectangular lattice mode search script
Former-commit-id: 374eec706353088dfc3a1248b96be31172bdaefb
2020-01-30 01:42:03 +02:00
Marek Nečada 6233e1c210 Avoid tmgen multiplicities (->slowdown) in ScatteringSystem constructor
Former-commit-id: d4d20d3f019dee1765681d4b2f2fce95ea49fb37
2020-01-28 21:28:07 +02:00
Marek Nečada e3834fdad7 Remove build type hardcode spec.
Also add QPMS_NORETURN attribute/macro.

TODO cherry-pick this


Former-commit-id: 1e5b9ae308ce958f6970ddc343d22ed5f8e5661c
2020-01-28 18:12:35 +02:00
Marek Nečada acec5bed98 Legendre function cache.
Former-commit-id: 17370bcc6d24cebdbfc80c9a3b2801c68f2686ff
2020-01-28 17:25:30 +02:00
Marek Nečada 96c9e95ea0 Parallel modeproblem matrix fixed?
Former-commit-id: 9ad51b186a68689a754ce986d7f8bf2f97ac258f
2020-01-28 13:04:05 +02:00
Marek Nečada 8f4a8c7c7b Fix memory leaks; use error macros
TODO cherry-pick this.


Former-commit-id: be5d23a5b880c46636719c98e5b818388cc9a4c3
2020-01-28 09:51:20 +02:00
Marek Nečada 338fc00bfe Translation-cached version of modeproblem matrix.
Former-commit-id: c76945d915138870e1a4f150038705a5fa82ce48
2020-01-28 07:24:54 +02:00
Marek Nečada 775976816e Use translation cache in beyn's algorithm (full matrix only)
Former-commit-id: b9f95a726a8f4de7e6822c38089ca149e4fad1c9
2020-01-26 11:04:57 +02:00
Marek Nečada 00ab187510 Fix stupid bugs
Former-commit-id: 91cc762c0b228037241536aa18e662596587f0eb
2020-01-26 10:47:42 +02:00
Marek Nečada b62f1dadc5 WIP fixes, cython interface
Former-commit-id: 8a97ee8adf11b21c6fbaf2d0afe6c6d2e81a8d69
2020-01-26 09:33:20 +02:00
Marek Nečada 7d19bed4cd Booster constructor/destructor calls etc.
Former-commit-id: 8bf3c410498ae79a7bfb22b6644b465657b62752
2020-01-26 08:03:47 +02:00
Marek Nečada 7a80c9e0f2 Jdu spát
Former-commit-id: fa35c9818ffc5d5a57dfe1881a2a8489039dadb6
2020-01-25 23:39:04 +02:00
Marek Nečada 1f63d2b529 Build modeproblem matrix full w cached Bessels
Former-commit-id: 4cfd631317511ee8765f4a98a179f6295e4142c9
2020-01-23 20:18:26 +02:00
Marek Nečada 2f03fc58b4 Expose qpms_trans_calculator_get_AB_arrays_precalcbuf()
Former-commit-id: 151d3f7bf615366cad3da5589f3165f452c00474
2020-01-23 17:01:31 +02:00
Marek Nečada f33b102768 Extract and inline translation matrix reordering procedure.
Former-commit-id: 9aee9e199b9c3aed76207b1314031278bc4614ea
2020-01-23 16:12:55 +02:00
Marek Nečada e4d84b3b25 Minor translations refactoring.
Former-commit-id: cb3a6e9e5fb1dcfc69f1bc84a46e128bebd27fde
2020-01-23 11:59:13 +02:00
Marek Nečada 5cc29210d7 Jdu spát.
Former-commit-id: e253bb5068bdb4ad8ccef9879dadb8e1463a8968
2020-01-23 00:23:33 +02:00
Marek Nečada effe59bc50 Translation booster: pre-calculate Bessel funs.
Former-commit-id: 85548558b3f65ac9e0a88c72adb4874dab98ca9e
2020-01-23 00:10:34 +02:00
Marek Nečada 8209e9df6e WIP translation booster
Former-commit-id: ce2523b52f5c7bc9a8c6edd79f7e6d329c9634c1
2020-01-22 18:13:10 +02:00
Marek Nečada 8251eba955 WIP translation booster
Former-commit-id: 4ed4c1f7c7948013c4b89bf6cb4c665d541ca3d8
2020-01-22 17:07:15 +02:00
Marek Nečada af12f2301f WIP scatsys translation booster.
Former-commit-id: 2909ff20c1805a9c4a16f0fcd8a82f4c54e1a84a
2020-01-22 14:45:31 +02:00
Marek Nečada a0acdfdc5d Irrep-decomposed scatsys beyn; fix missing FinitePointGroup reference
Former-commit-id: 2829bd16ef4dd30afac5482537dc120c6ad896cc
2020-01-22 13:37:22 +02:00
Marek Nečada 36c6826b5a Beyn algorithm cython wrapper (finite systems)
Former-commit-id: 6dde6db2c89c32e26803cd393e1c7310d21427bd
2020-01-21 18:51:06 +02:00
Marek Nečada ed3322ec93 Beyn wrappers for finite system, doxygen
Former-commit-id: 065d8f5efb10d014a3b52f63b64feaeec6233ae7
2020-01-21 18:20:22 +02:00
Marek Nečada f082838c5f Beyn algorithm "cherry-pick" from 'newbeyn_unitcell'
- Add rank_min_sel argument to beyn_solve() and beyn_solve_gsl()
- Fix order of K and K_coarse evaluation (K_coarse should probably
  be removed).


Former-commit-id: c0a241f8712439ba84e7c907658ebb6071528482
2020-01-21 15:31:38 +02:00
19 changed files with 1173 additions and 115 deletions

View File

@ -5,8 +5,6 @@ include(GNUInstallDirs)
project (QPMS) project (QPMS)
set(CMAKE_BUILD_TYPE Debug)
macro(use_c99) macro(use_c99)
if (CMAKE_VERSION VERSION_LESS "3.1") if (CMAKE_VERSION VERSION_LESS "3.1")
if (CMAKE_C_COMPILER_ID STREQUAL "GNU") if (CMAKE_C_COMPILER_ID STREQUAL "GNU")

122
misc/finiterectlat-modes.py Executable file
View File

@ -0,0 +1,122 @@
#!/usr/bin/env python3
import math
from qpms.argproc import ArgParser
ap = ArgParser(['rectlattice2d_finite', 'single_particle', 'single_lMax', ])
ap.add_argument("-t", "--rank-tolerance", type=float, default=1e11)
ap.add_argument("-c", "--min-candidates", type=int, default=1, help='always try at least this many eigenvalue candidates, even if their SVs in the rank tests are lower than rank_tolerance')
ap.add_argument("-T", "--residual-tolerance", type=float, default=0.)
ap.add_argument("-o", "--output", type=str, required=False, help='output path (if not provided, will be generated automatically)')
#ap.add_argument("-O", "--plot-out", type=str, required=False, help="path to plot output (optional)")
#ap.add_argument("-P", "--plot", action='store_true', help="if -p not given, plot to a default path")
#ap.add_argument("-g", "--save-gradually", action='store_true', help="saves the partial result after computing each irrep")
ap.add_argument("-f", "--centre", type=complex, required=True, help='Contour centre in eV')
ap.add_argument("--ai", type=float, default=0.05, help="Contour imaginary half-axis in eV")
ap.add_argument("--ar", type=float, default=0.05, help="Contour real half-axis in eV")
ap.add_argument("-N", type=int, default="150", help="Integration contour discretisation size")
ap.add_argument("--irrep", type=str, default="none", help="Irrep subspace (irrep index from 0 to 7, irrep label, or 'none' for no irrep decomposition")
a=ap.parse_args()
import logging
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
Nx, Ny = a.size
px, py = a.period
particlestr = ("sph" if a.height is None else "cyl") + ("_r%gnm" % (a.radius*1e9))
if a.height is not None: particlestr += "_h%gnm" % (a.height * 1e9)
defaultprefix = "%s_p%gnmx%gnm_%dx%d_m%s_n%g_L%d_c(%s±%g±%gj)eV_cn%d" % (
particlestr, px*1e9, py*1e9, Nx, Ny, str(a.material), a.refractive_index, a.lMax,
str(a.centre), a.ai, a.ar, a.N)
logging.info("Default file prefix: %s" % defaultprefix)
def inside_ellipse(point_xy, centre_xy, halfaxes_xy):
x = point_xy[0] - centre_xy[0]
y = point_xy[1] - centre_xy[1]
ax = halfaxes_xy[0]
ay = halfaxes_xy[1]
return ((x/ax)**2 + (y/ay)**2) <= 1
import numpy as np
import qpms
from qpms.cybspec import BaseSpec
from qpms.cytmatrices import CTMatrix, TMatrixGenerator
from qpms.qpms_c import Particle
from qpms.cymaterials import EpsMu, EpsMuGenerator, LorentzDrudeModel, lorentz_drude
from qpms.cycommon import DebugFlags, dbgmsg_enable
from qpms import FinitePointGroup, ScatteringSystem, BesselType, eV, hbar
from qpms.symmetries import point_group_info
eh = eV/hbar
dbgmsg_enable(DebugFlags.INTEGRATION)
#Particle positions
orig_x = (np.arange(Nx/2) + (0 if (Nx % 2) else .5)) * px
orig_y = (np.arange(Ny/2) + (0 if (Ny % 2) else .5)) * py
orig_xy = np.stack(np.meshgrid(orig_x, orig_y), axis = -1)
bspec = BaseSpec(lMax = a.lMax)
medium = EpsMuGenerator(ap.background_epsmu)
particles= [Particle(orig_xy[i], ap.tmgen, bspec) for i in np.ndindex(orig_xy.shape[:-1])]
sym = FinitePointGroup(point_group_info['D2h'])
logging.info("Creating scattering system object")
ss, ssw = ScatteringSystem.create(particles, medium, sym, a.centre * eh)
if a.irrep == 'none':
iri = None # no irrep decomposition
irname = 'full'
logging.info("Not performing irrep decomposition and working with the full space of dimension %d." % ss.fecv_size)
else:
try:
iri = int(a.irrep)
except ValueError:
iri = ss.irrep_names.index(a.irrep)
irname = ss.irrep_names[iri]
logging.info("Using irrep subspace %s (iri = %d) of dimension %d." % (irname, iri, ss.saecv_sizes[iri]))
outfile_tmp = defaultprefix + ".tmp" if a.output is None else a.output + ".tmp"
logging.info("Starting Beyn's algorithm")
results = ss.find_modes(iri=iri, omega_centre = a.centre*eh, omega_rr=a.ar*eh, omega_ri=a.ai*eh, contour_points=a.N,
rank_tol=a.rank_tolerance, rank_min_sel=a.min_candidates, res_tol=a.residual_tolerance)
results['inside_contour'] = inside_ellipse((results['eigval'].real, results['eigval'].imag),
(a.centre.real*eh, a.centre.imag*eh), (a.ar*eh, a.ai*eh))
results['refractive_index_internal'] = [medium(om).n for om in results['eigval']]
outfile = defaultprefix + (('_ir%s_%s.npz' % (str(iri), irname)) if iri is not None else '.npz') if a.output is None else a.output
np.savez(outfile, meta=vars(a), **results)
logging.info("Saved to %s" % outfile)
exit(0)
# TODO plots.
if a.plot or (a.plot_out is not None):
import matplotlib
matplotlib.use('pdf')
from matplotlib import pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(sinalpha_list, σ_ext*1e12,label='$\sigma_\mathrm{ext}$')
ax.plot(sinalpha_list, σ_scat*1e12, label='$\sigma_\mathrm{scat}$')
ax.plot(sinalpha_list, σ_abs*1e12, label='$\sigma_\mathrm{abs}$')
ax.legend()
ax.set_xlabel('$\sin\\alpha$')
ax.set_ylabel('$\sigma/\mathrm{\mu m^2}$')
plotfile = defaultprefix + ".pdf" if a.plot_out is None else a.plot_out
fig.savefig(plotfile)
exit(0)

View File

@ -15,7 +15,7 @@ add_library (qpms SHARED translations.c tmatrices.c vecprint.c vswf.c wigner.c e
ewaldsf.c pointgroups.c latticegens.c ewaldsf.c pointgroups.c latticegens.c
lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c
bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c
lll.c beyn.c lll.c beyn.c scatsys_translation_booster.c
) )
use_c99() use_c99()
@ -31,7 +31,7 @@ target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label) target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label)
target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS
EWALD_AUTO_CUTOFF EWALD_AUTO_CUTOFF QPMS_EVALUATE_PARANOID_ASSERTS
) )
install(TARGETS qpms install(TARGETS qpms

View File

@ -7,7 +7,7 @@ from sys import platform as __platform
import warnings as __warnings import warnings as __warnings
try: try:
from .qpms_c import PointGroup, FinitePointGroup, FinitePointGroupElement, Particle, scatsystem_set_nthreads, ScatteringSystem, ScatteringMatrix, pitau, set_gsl_pythonic_error_handling, pgsl_ignore_error, gamma_inc, lll_reduce from .qpms_c import PointGroup, FinitePointGroup, FinitePointGroupElement, Particle, scatsystem_set_nthreads, ScatteringSystem, ScatteringMatrix, pitau, set_gsl_pythonic_error_handling, pgsl_ignore_error, gamma_inc, lll_reduce, ScatteringSystemCachingMode
except ImportError as ex: except ImportError as ex:
if __platform == "linux" or __platform == "linux2": if __platform == "linux" or __platform == "linux2":
if 'LD_LIBRARY_PATH' not in __os.environ.keys(): if 'LD_LIBRARY_PATH' not in __os.environ.keys():

View File

@ -345,7 +345,7 @@ void BeynSolver_srandom(BeynSolver *solver, unsigned int RandSeed)
static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_function, static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_function,
void *Params, void *Params,
gsl_matrix_complex *A0, gsl_matrix_complex *A1, double complex z0, gsl_matrix_complex *A0, gsl_matrix_complex *A1, double complex z0,
gsl_vector_complex *eigenvalues, gsl_matrix_complex *eigenvectors, const double rank_tol, const double res_tol) gsl_vector_complex *eigenvalues, gsl_matrix_complex *eigenvectors, const double rank_tol, size_t rank_sel_min, const double res_tol)
{ {
const size_t m = solver->M; const size_t m = solver->M;
const size_t l = solver->L; const size_t l = solver->L;
@ -379,7 +379,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
int K=0; int K=0;
for (int k=0; k<Sigma->size /* this is l, actually */; k++) { for (int k=0; k<Sigma->size /* this is l, actually */; k++) {
if (verbose) printf("Beyn: SV(%d)=%e\n",k,gsl_vector_get(Sigma, k)); if (verbose) printf("Beyn: SV(%d)=%e\n",k,gsl_vector_get(Sigma, k));
if (gsl_vector_get(Sigma, k) > rank_tol) if (k < rank_sel_min || gsl_vector_get(Sigma, k) > rank_tol)
K++; K++;
} }
if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l); if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l);
@ -494,7 +494,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
gsl_vector_complex_set(eigenvalues, KRetained, zgsl); gsl_vector_complex_set(eigenvalues, KRetained, zgsl);
if(eigenvectors) { if(eigenvectors) {
gsl_matrix_complex_set_col(eigenvectors, KRetained, &(Vk.vector)); gsl_matrix_complex_set_row(eigenvectors, KRetained, &(Vk.vector));
gsl_vector_set(solver->residuals, KRetained, residual); gsl_vector_set(solver->residuals, KRetained, residual);
} }
++KRetained; ++KRetained;
@ -513,7 +513,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
beyn_result_gsl_t *beyn_solve_gsl(const size_t m, const size_t l, beyn_result_gsl_t *beyn_solve_gsl(const size_t m, const size_t l,
beyn_function_M_gsl_t M_function, beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat_function, beyn_function_M_gsl_t M_function, beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat_function,
void *params, const beyn_contour_t *contour, void *params, const beyn_contour_t *contour,
double rank_tol, double res_tol) double rank_tol, size_t rank_sel_min, double res_tol)
{ {
BeynSolver *solver = BeynSolver_create(m, l); BeynSolver *solver = BeynSolver_create(m, l);
@ -583,14 +583,14 @@ beyn_result_gsl_t *beyn_solve_gsl(const size_t m, const size_t l,
gsl_vector_complex *eigenvalue_errors = solver->eigenvalue_errors; gsl_vector_complex *eigenvalue_errors = solver->eigenvalue_errors;
gsl_matrix_complex *eigenvectors = solver->eigenvectors; gsl_matrix_complex *eigenvectors = solver->eigenvectors;
// Beyn Steps 36
int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, res_tol);
// Repeat Steps 36 with rougher contour approximation to get an error estimate. // Repeat Steps 36 with rougher contour approximation to get an error estimate.
int K_coarse = beyn_process_matrices(solver, M_function, params, A0_coarse, A1_coarse, z0, eigenvalue_errors, eigenvectors, rank_tol, res_tol); int K_coarse = beyn_process_matrices(solver, M_function, params, A0_coarse, A1_coarse, z0, eigenvalue_errors, /*eigenvectors_coarse*/ NULL, rank_tol, rank_sel_min, res_tol);
gsl_blas_zaxpy(gsl_complex_rect(-1,0), eigenvalues, eigenvalue_errors);
// Reid did also fabs on the complex and real parts ^^^. // Reid did also fabs on the complex and real parts ^^^.
// Beyn Steps 36
int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, rank_sel_min, res_tol);
gsl_blas_zaxpy(gsl_complex_rect(-1,0), eigenvalues, eigenvalue_errors);
beyn_result_gsl_t *result; beyn_result_gsl_t *result;
QPMS_CRASHING_MALLOC(result, sizeof(beyn_result_gsl_t)); QPMS_CRASHING_MALLOC(result, sizeof(beyn_result_gsl_t));
result->eigval = solver->eigenvalues; result->eigval = solver->eigenvalues;
@ -636,12 +636,12 @@ static int beyn_function_M_inv_Vhat_carr2gsl(gsl_matrix_complex *target,
} }
beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M, beyn_function_M_inv_Vhat_t M_inv_Vhat, beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M, beyn_function_M_inv_Vhat_t M_inv_Vhat,
void *params, const beyn_contour_t *contour, double rank_tol, double res_tol) { void *params, const beyn_contour_t *contour, double rank_tol, size_t rank_sel_min, double res_tol) {
struct beyn_function_M_carr2gsl_param p = {M, M_inv_Vhat, params}; struct beyn_function_M_carr2gsl_param p = {M, M_inv_Vhat, params};
return beyn_result_from_beyn_result_gsl( return beyn_result_from_beyn_result_gsl(
beyn_solve_gsl(m, l, beyn_function_M_carr2gsl, beyn_solve_gsl(m, l, beyn_function_M_carr2gsl,
(p.M_inv_Vhat_function) ? beyn_function_M_inv_Vhat_carr2gsl : NULL, (p.M_inv_Vhat_function) ? beyn_function_M_inv_Vhat_carr2gsl : NULL,
(void *) &p, contour, rank_tol, res_tol) (void *) &p, contour, rank_tol, rank_sel_min, res_tol)
); );
} }

View File

@ -72,7 +72,7 @@ typedef struct beyn_result_gsl_t {
gsl_vector_complex *eigval; gsl_vector_complex *eigval;
gsl_vector_complex *eigval_err; gsl_vector_complex *eigval_err;
gsl_vector *residuals; gsl_vector *residuals;
gsl_matrix_complex *eigvec; gsl_matrix_complex *eigvec; // Rows are the eigenvectors
gsl_vector *ranktest_SV; gsl_vector *ranktest_SV;
} beyn_result_gsl_t; } beyn_result_gsl_t;
@ -85,7 +85,7 @@ typedef struct beyn_result_t {
complex double *eigval; complex double *eigval;
complex double *eigval_err; complex double *eigval_err;
double *residuals; double *residuals;
complex double *eigvec; complex double *eigvec; // Rows are the eigenvectors
double *ranktest_SV; double *ranktest_SV;
/// Private, we wrap it around beyn_result_gsl_t for now. /// Private, we wrap it around beyn_result_gsl_t for now.
@ -109,6 +109,7 @@ beyn_result_gsl_t *beyn_solve_gsl(
void *params, ///< Parameter pointer passed to M() and M_inv_Vhat(). void *params, ///< Parameter pointer passed to M() and M_inv_Vhat().
const beyn_contour_t *contour, ///< Integration contour. const beyn_contour_t *contour, ///< Integration contour.
double rank_tol, ///< (default: `1e-4`) TODO DOC. double rank_tol, ///< (default: `1e-4`) TODO DOC.
size_t rank_min_sel, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
double res_tol ///< (default: `0.0`) TODO DOC. double res_tol ///< (default: `0.0`) TODO DOC.
); );
@ -120,6 +121,7 @@ beyn_result_t *beyn_solve(
void *params, ///< Parameter pointer passed to M() and M_inv_Vhat(). void *params, ///< Parameter pointer passed to M() and M_inv_Vhat().
const beyn_contour_t *contour, ///< Integration contour. const beyn_contour_t *contour, ///< Integration contour.
double rank_tol, ///< (default: `1e-4`) TODO DOC. double rank_tol, ///< (default: `1e-4`) TODO DOC.
size_t rank_min_sel, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
double res_tol ///< (default: `0.0`) TODO DOC. double res_tol ///< (default: `0.0`) TODO DOC.
); );

View File

@ -81,6 +81,12 @@ static inline qpms_gmi_t qpms_finite_group_inv(const qpms_finite_group_t *G,
return G->invi[a]; return G->invi[a];
} }
static inline _Bool qpms_iri_is_valid(const qpms_finite_group_t *G, qpms_iri_t iri) {
return (iri > G->nirreps || iri < 0) ? 0 : 1;
}
/// NOT IMPLEMENTED Get irrep index by name. /// NOT IMPLEMENTED Get irrep index by name.
qpms_iri_t qpms_finite_group_find_irrep_by_name(qpms_finite_group_t *G, char *name); qpms_iri_t qpms_finite_group_find_irrep_by_name(qpms_finite_group_t *G, char *name);

View File

@ -19,4 +19,10 @@
#define QPMS_EXPECT(exp,c) (exp) #define QPMS_EXPECT(exp,c) (exp)
#endif #endif
#if (defined(__GNUC__) && __GNUC__ >= 3) || \
(defined(__GNUC__) && defined(__GNUC_MINOR__) && __GNUC__ == 2 && __GNUC_MINOR__ >= 8)
// TODO clang
#define QPMS_NORETURN __attribute__((noreturn))
#endif
#endif // OPTIM_H #endif // OPTIM_H

View File

@ -16,7 +16,7 @@ from .cytmatrices cimport CTMatrix, TMatrixFunction, TMatrixGenerator
from .cymaterials cimport EpsMuGenerator from .cymaterials cimport EpsMuGenerator
from libc.stdlib cimport malloc, free, calloc from libc.stdlib cimport malloc, free, calloc
import warnings import warnings
import enum
# Set custom GSL error handler. N.B. this is obviously not thread-safe. # Set custom GSL error handler. N.B. this is obviously not thread-safe.
cdef char *pgsl_err_reason cdef char *pgsl_err_reason
@ -280,7 +280,7 @@ cdef class Particle:
if isinstance(tgen.holder, CTMatrix): if isinstance(tgen.holder, CTMatrix):
spec = (<CTMatrix>tgen.holder).spec spec = (<CTMatrix>tgen.holder).spec
else: else:
raise ValueError("bspec argument must be specified separately for str(type(t))") raise ValueError("bspec argument must be specified separately for %s" % str(type(t)))
self.f = TMatrixFunction(tgen, spec) self.f = TMatrixFunction(tgen, spec)
self.p.tmg = self.f.rawpointer() self.p.tmg = self.f.rawpointer()
# TODO non-trivial transformations later; if modified, do not forget to update ScatteringSystem constructor # TODO non-trivial transformations later; if modified, do not forget to update ScatteringSystem constructor
@ -331,6 +331,10 @@ cpdef void scatsystem_set_nthreads(long n):
qpms_scatsystem_set_nthreads(n) qpms_scatsystem_set_nthreads(n)
return return
class ScatteringSystemCachingMode(enum.IntEnum):
NEVER = QPMS_SS_CACHE_NEVER
AUTO = QPMS_SS_CACHE_AUTO
ALWAYS = QPMS_SS_CACHE_ALWAYS
cdef class ScatteringSystem: cdef class ScatteringSystem:
''' '''
@ -340,6 +344,16 @@ cdef class ScatteringSystem:
#cdef list Tmatrices # Here we keep the references to occuring T-matrices #cdef list Tmatrices # Here we keep the references to occuring T-matrices
cdef EpsMuGenerator medium_holder # Here we keep the reference to medium generator cdef EpsMuGenerator medium_holder # Here we keep the reference to medium generator
cdef qpms_scatsys_t *s cdef qpms_scatsys_t *s
cdef FinitePointGroup sym
cdef qpms_iri_t iri_py2c(self, iri, allow_None = True):
if iri is None and allow_None:
return QPMS_NO_IRREP
cdef qpms_iri_t nir = self.nirreps
cdef qpms_iri_t ciri = iri
if ciri < 0 or ciri > nir:
raise ValueError("Invalid irrep index %s (of %d irreps)", str(iri), self.nirreps)
return ciri
def check_s(self): # cdef instead? def check_s(self): # cdef instead?
if self.s == <qpms_scatsys_t *>NULL: if self.s == <qpms_scatsys_t *>NULL:
@ -347,7 +361,9 @@ cdef class ScatteringSystem:
#TODO is there a way to disable the constructor outside this module? #TODO is there a way to disable the constructor outside this module?
@staticmethod # We don't have any "standard" constructor for this right now @staticmethod # We don't have any "standard" constructor for this right now
def create(particles, medium, FinitePointGroup sym, cdouble omega): # TODO tolerances def create(particles, medium, FinitePointGroup sym, cdouble omega,
caching_mode = QPMS_SS_CACHE_DEFAULT): # TODO tolerances
caching_mode = ScatteringSystemCachingMode(caching_mode)
# These we are going to construct # These we are going to construct
cdef ScatteringSystem self cdef ScatteringSystem self
cdef _ScatteringSystemAtOmega pyssw cdef _ScatteringSystemAtOmega pyssw
@ -369,7 +385,8 @@ cdef class ScatteringSystem:
for p in particles: # find and enumerate unique t-matrix generators for p in particles: # find and enumerate unique t-matrix generators
if p.p.op.typ != QPMS_TMATRIX_OPERATION_NOOP: if p.p.op.typ != QPMS_TMATRIX_OPERATION_NOOP:
raise NotImplementedError("currently, only no-op T-matrix operations are allowed in ScatteringSystem constructor") raise NotImplementedError("currently, only no-op T-matrix operations are allowed in ScatteringSystem constructor")
tmg_key = id(p.f) #tmg_key = id(p.f) # This causes a different generator for each particle -> SUPER SLOW
tmg_key = (id(p.f.generator), id(p.f.spec))
if tmg_key not in tmgindices: if tmg_key not in tmgindices:
tmgindices[tmg_key] = tmg_count tmgindices[tmg_key] = tmg_count
tmgobjs.append(p.f) # Save the references on BaseSpecs and TMatrixGenerators (via TMatrixFunctions) tmgobjs.append(p.f) # Save the references on BaseSpecs and TMatrixGenerators (via TMatrixFunctions)
@ -401,12 +418,13 @@ cdef class ScatteringSystem:
orig.tm[tmi].op = qpms_tmatrix_operation_noop # TODO adjust when notrivial operations allowed orig.tm[tmi].op = qpms_tmatrix_operation_noop # TODO adjust when notrivial operations allowed
for pi in range(p_count): for pi in range(p_count):
p = particles[pi] p = particles[pi]
tmg_key = id(p.f) tmg_key = (id(p.f.generator), id(p.f.spec))
tm_derived_key = (tmg_key, None) # TODO unique representation of p.p.op instead of None tm_derived_key = (tmg_key, None) # TODO unique representation of p.p.op instead of None
orig.p[pi].pos = p.cval().pos orig.p[pi].pos = p.cval().pos
orig.p[pi].tmatrix_id = tmindices[tm_derived_key] orig.p[pi].tmatrix_id = tmindices[tm_derived_key]
ssw = qpms_scatsys_apply_symmetry(&orig, sym.rawpointer(), omega, &QPMS_TOLERANCE_DEFAULT) ssw = qpms_scatsys_apply_symmetry(&orig, sym.rawpointer(), omega, &QPMS_TOLERANCE_DEFAULT)
ss = ssw[0].ss ss = ssw[0].ss
qpms_ss_create_translation_cache(ss, caching_mode)
finally: finally:
free(orig.tmg) free(orig.tmg)
free(orig.tm) free(orig.tm)
@ -415,6 +433,7 @@ cdef class ScatteringSystem:
self.medium_holder = mediumgen self.medium_holder = mediumgen
self.s = ss self.s = ss
self.tmgobjs = tmgobjs self.tmgobjs = tmgobjs
self.sym = sym
pyssw = _ScatteringSystemAtOmega() pyssw = _ScatteringSystemAtOmega()
pyssw.ssw = ssw pyssw.ssw = ssw
pyssw.ss_pyref = self pyssw.ss_pyref = self
@ -588,6 +607,65 @@ cdef class ScatteringSystem:
self.s, qpms_incfield_planewave, <void *>&p, 0) self.s, qpms_incfield_planewave, <void *>&p, 0)
return target_np return target_np
property has_translation_cache:
def __get__(self):
self.check_s()
return True if qpms_scatsys_has_translation_cache(self.s) else False
def find_modes(self, cdouble omega_centre, double omega_rr, double omega_ri, iri = None,
size_t contour_points = 20, double rank_tol = 1e-4, size_t rank_min_sel=1,
double res_tol = 0):
"""
Attempts to find the eigenvalues and eigenvectors using Beyn's algorithm.
"""
cdef beyn_result_t *res = qpms_scatsys_finite_find_eigenmodes(self.s,
self.iri_py2c(iri),
omega_centre, omega_rr, omega_ri, contour_points,
rank_tol, rank_min_sel, res_tol)
if res == NULL: raise RuntimeError
cdef size_t neig = res[0].neig
cdef size_t vlen = res[0].vlen # should be equal to self.s.fecv_size
cdef np.ndarray[complex, ndim=1] eigval = np.empty((neig,), dtype=complex)
cdef cdouble[::1] eigval_v = eigval
cdef np.ndarray[complex, ndim=1] eigval_err = np.empty((neig,), dtype=complex)
cdef cdouble[::1] eigval_err_v = eigval_err
cdef np.ndarray[double, ndim=1] residuals = np.empty((neig,), dtype=np.double)
cdef double[::1] residuals_v = residuals
cdef np.ndarray[complex, ndim=2] eigvec = np.empty((neig,vlen),dtype=complex)
cdef cdouble[:,::1] eigvec_v = eigvec
cdef np.ndarray[double, ndim=1] ranktest_SV = np.empty((vlen), dtype=np.double)
cdef double[::1] ranktest_SV_v = ranktest_SV
cdef size_t i, j
for i in range(neig):
eigval_v[i] = res[0].eigval[i]
eigval_err_v[i] = res[0].eigval_err[i]
residuals_v[i] = res[0].residuals[i]
for j in range(vlen):
eigvec_v[i,j] = res[0].eigvec[i*vlen + j]
for i in range(vlen):
ranktest_SV_v[i] = res[0].ranktest_SV[i]
zdist = eigval - omega_centre
eigval_inside_metric = np.hypot(zdist.real / omega_rr, zdist.imag / omega_ri)
beyn_result_free(res)
retdict = {
'eigval':eigval,
'eigval_inside_metric':eigval_inside_metric,
'eigvec':eigvec,
'residuals':residuals,
'eigval_err':eigval_err,
'ranktest_SV':ranktest_SV,
'iri': iri,
}
return retdict
cdef class _ScatteringSystemAtOmega: cdef class _ScatteringSystemAtOmega:
''' '''
Wrapper over the C qpms_scatsys_at_omega_t structure Wrapper over the C qpms_scatsys_at_omega_t structure
@ -635,6 +713,21 @@ cdef class _ScatteringSystemAtOmega:
def __get__(self): return self.ss_pyref.irrep_names def __get__(self): return self.ss_pyref.irrep_names
property nirreps: property nirreps:
def __get__(self): return self.ss_pyref.nirreps def __get__(self): return self.ss_pyref.nirreps
property has_translation_cache:
def __get__(self):
self.check()
return True if qpms_scatsysw_has_translation_cache(self.ssw) else False
def add_translation_cache(self):
'''
Adds translation cache if the parent ScatteringSystem
contains the required metadata (this is not done automatically,
as the cache is not useful if no translation operators are to
be evaluated
'''
self.check()
qpms_ssw_create_translation_cache(self.ssw)
return self.has_translation_cache
def modeproblem_matrix_full(self): def modeproblem_matrix_full(self):
self.check() self.check()

View File

@ -107,6 +107,7 @@ cdef extern from "qpms_types.h":
ctypedef int32_t qpms_ss_pi_t ctypedef int32_t qpms_ss_pi_t
ctypedef int qpms_gmi_t ctypedef int qpms_gmi_t
ctypedef int qpms_iri_t ctypedef int qpms_iri_t
qpms_iri_t QPMS_NO_IRREP
ctypedef const char * qpms_permutation_t ctypedef const char * qpms_permutation_t
struct qpms_tmatrix_t: struct qpms_tmatrix_t:
qpms_vswf_set_spec_t *spec qpms_vswf_set_spec_t *spec
@ -549,10 +550,17 @@ cdef extern from "scatsystem.h":
cdouble omega cdouble omega
qpms_epsmu_t medium qpms_epsmu_t medium
cdouble wavenumber cdouble wavenumber
ctypedef enum qpms_ss_caching_mode_t:
QPMS_SS_CACHE_NEVER
QPMS_SS_CACHE_ALWAYS
QPMS_SS_CACHE_DEFAULT
QPMS_SS_CACHE_AUTO
qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig, const qpms_finite_group_t *sym, qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig, const qpms_finite_group_t *sym,
cdouble omega, const qpms_tolerance_spec_t *tol) cdouble omega, const qpms_tolerance_spec_t *tol)
qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss, cdouble omega) qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss, cdouble omega)
void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw) void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw)
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m)
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw)
cdouble *qpms_scatsys_irrep_pack_matrix(cdouble *target_packed, cdouble *qpms_scatsys_irrep_pack_matrix(cdouble *target_packed,
const cdouble *orig_full, const qpms_scatsys_t *ss, qpms_iri_t iri) const cdouble *orig_full, const qpms_scatsys_t *ss, qpms_iri_t iri)
cdouble *qpms_scatsys_irrep_unpack_matrix(cdouble *target_full, cdouble *qpms_scatsys_irrep_unpack_matrix(cdouble *target_full,
@ -598,6 +606,11 @@ cdef extern from "scatsystem.h":
cdouble *qpms_scatsys_scatter_solve(cdouble *target_f, const cdouble *a_inc, qpms_ss_LU ludata) cdouble *qpms_scatsys_scatter_solve(cdouble *target_f, const cdouble *a_inc, qpms_ss_LU ludata)
const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi) const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi)
const qpms_vswf_set_spec_t *qpms_ss_bspec_pi(const qpms_scatsys_t *ss, qpms_ss_pi_t pi) const qpms_vswf_set_spec_t *qpms_ss_bspec_pi(const qpms_scatsys_t *ss, qpms_ss_pi_t pi)
beyn_result_t *qpms_scatsys_finite_find_eigenmodes(const qpms_scatsys_t *ss, qpms_iri_t iri,
cdouble omega_centre, double omega_rr, double omega_ri, size_t contour_npoints,
double rank_tol, size_t rank_sel_min, double res_tol)
bint qpms_scatsysw_has_translation_cache(const qpms_scatsys_at_omega_t *ssw)
bint qpms_scatsys_has_translation_cache(const qpms_scatsys_t *ss)
cdef extern from "ewald.h": cdef extern from "ewald.h":
struct qpms_csf_result: struct qpms_csf_result:
@ -674,11 +687,11 @@ cdef extern from "beyn.h":
beyn_result_gsl_t *beyn_solve_gsl(size_t m, size_t l, beyn_function_M_gsl_t M, beyn_result_gsl_t *beyn_solve_gsl(size_t m, size_t l, beyn_function_M_gsl_t M,
beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat, void *params, const beyn_contour_t *contour, beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat, void *params, const beyn_contour_t *contour,
double rank_tol, double res_tol) double rank_tol, size_t rank_min_sel, double res_tol)
beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M, beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M,
beyn_function_M_inv_Vhat_t M_inv_Vhat, void *params, const beyn_contour_t *contour, beyn_function_M_inv_Vhat_t M_inv_Vhat, void *params, const beyn_contour_t *contour,
double rank_tol, double res_tol) double rank_tol, size_t rank_min_sel, double res_tol)
beyn_contour_t *beyn_contour_ellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints) beyn_contour_t *beyn_contour_ellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints)
beyn_contour_t *beyn_contour_halfellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints, beyn_contour_t *beyn_contour_halfellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints,

View File

@ -4,14 +4,14 @@
#include "optim.h" #include "optim.h"
/// Provisional error message with abort(); /// Provisional error message with abort();
void qpms_pr_error(const char *fmt, ...); QPMS_NORETURN void qpms_pr_error(const char *fmt, ...);
//void qpms_error(const char *fmt, ...); //void qpms_error(const char *fmt, ...);
/// Provisional error message with abort(), indicating source and line number. /// Provisional error message with abort(), indicating source and line number.
void qpms_pr_error_at_line(const char *filename, unsigned int linenum, void qpms_pr_error_at_line(const char *filename, unsigned int linenum,
const char *fmt, ...); const char *fmt, ...);
void qpms_pr_error_at_flf(const char *filename, unsigned int linenum, QPMS_NORETURN void qpms_pr_error_at_flf(const char *filename, unsigned int linenum,
const char *func, const char *func,
const char *fmt, ...); const char *fmt, ...);
@ -100,6 +100,13 @@ qpms_dbgmsg_flags qpms_dbgmsg_enable(qpms_dbgmsg_flags types);
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,"Unexpected error (%d)", errorcode); \ qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,"Unexpected error (%d)", errorcode); \
} }
// Same as previous, but with message.
#define QPMS_ENSURE_SUCCESS_M(x, msg, ...) { \
int errorcode = (x); \
if(QPMS_UNLIKELY(errorcode)) \
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,msg, ##__VA_ARGS__); \
}
#define QPMS_ENSURE(x, msg, ...) {if(QPMS_UNLIKELY(!(x))) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,msg, ##__VA_ARGS__); } #define QPMS_ENSURE(x, msg, ...) {if(QPMS_UNLIKELY(!(x))) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,msg, ##__VA_ARGS__); }
@ -109,6 +116,16 @@ qpms_dbgmsg_flags qpms_dbgmsg_enable(qpms_dbgmsg_flags types);
"Unexpected error. This is most certainly a bug.");\ "Unexpected error. This is most certainly a bug.");\
} }
#ifdef QPMS_EVALUATE_PARANOID_ASSERTS
#define QPMS_PARANOID_ASSERT(x) {\
if(QPMS_UNLIKELY(!(x)))\
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,\
"Unexpected error. This is most certainly a bug.");\
}
#else
#define QPMS_PARANOID_ASSERT(x) {;}
#endif
#define QPMS_NOT_IMPLEMENTED(msg, ...) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__, \ #define QPMS_NOT_IMPLEMENTED(msg, ...) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__, \
"Not implemented:" msg, ##__VA_ARGS__) "Not implemented:" msg, ##__VA_ARGS__)

View File

@ -309,6 +309,8 @@ typedef int qpms_gmi_t;
/// Irreducible representation index. See also groups.h. /// Irreducible representation index. See also groups.h.
typedef int qpms_iri_t; typedef int qpms_iri_t;
/// Constant labeling that no irrep decomposition is done (full system solved instead).
#define QPMS_NO_IRREP ((qpms_iri_t) -1)
/// Permutation representation of a group element. /// Permutation representation of a group element.
/** For now, it's just a string of the form "(0,1)(3,4,5)" /** For now, it's just a string of the form "(0,1)(3,4,5)"

41
qpms/scatsys_private.h Normal file
View File

@ -0,0 +1,41 @@
#ifndef QPMS_SCATSYS_PRIVATE_H
#define QPMS_SCATSYS_PRIVATE_H
/*
* This file includes some definitions shared between scatsystem.c
* and scatsys_translation_booster.c that are not needed anywhere
* else.
*/
#include "scatsystem.h"
#include <pthread.h>
complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
complex double *target, const qpms_scatsys_at_omega_t *ssw);
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_boosted(
complex double *target_packed, const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri);
/// "private" destructor, called by qpms_scatsys_free()
void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *);
/// "private" constructor, use qpms_ss_create_translation_cache() instead.
struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create(
const qpms_scatsys_t *ss);
struct qpms_scatsysw_translation_booster *
qpms_scatsysw_translation_booster_create(const qpms_scatsys_at_omega_t *ssw);
/// "private" destructor, called by qpms_scatsys_at_omega_free()
void qpms_scatsysw_translation_booster_free(struct qpms_scatsysw_translation_booster *);
struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
const qpms_scatsys_at_omega_t *ssw;
qpms_ss_pi_t *opistartR_ptr;
pthread_mutex_t *opistartR_mutex;
qpms_iri_t iri;
complex double *target_packed;
};
void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted(void *arg);
#endif //QPMS_SCATSYS_PRIVATE_H

View File

@ -0,0 +1,562 @@
// Functionality to speedup translation matrix computations in large finite arrays
// by caching Bessel function values etc.
#include <cblas.h>
#include "scatsys_private.h"
#include "translations_inlines.h"
#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "vectors.h"
#include "qpms_error.h"
#include "qpms_specfunc.h"
#include "groups.h"
#define SQ(x) ((x)*(x))
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
#include "qpmsblas.h"
#define SERIAL_ZGEMM qpms_zgemm
#else
#define SERIAL_ZGEMM cblas_zgemm
#endif
typedef size_t ppid_t;
typedef size_t uoppid_t;
// Unordered exclusive pair ID count.
static inline uoppid_t uopairarr_len(qpms_ss_pi_t pn_total) {
return pn_total * (pn_total - 1) / 2;
}
// Unordered exclusive pair ID.
static inline uoppid_t uopairid(qpms_ss_pi_t pn_total, qpms_ss_pi_t p1, qpms_ss_pi_t p2) {
qpms_ss_pi_t hi, lo;
QPMS_ASSERT(p1 != p2);
if (p1 < p2) {
lo = p1;
hi = p2;
} else {
lo = p2;
hi = p1;
}
QPMS_ASSERT(lo >= 0);
QPMS_ASSERT(hi < pn_total);
return lo + hi * (hi - 1) / 2;
}
// Ordered pair ID.
static inline ppid_t pairid(qpms_ss_pi_t pn_total, qpms_ss_pi_t p1, qpms_ss_pi_t p2) {
return pn_total * p1 + p2;
}
// Ordered pair ID count.
static inline ppid_t pairarr_len(qpms_ss_pi_t pn_total) {
return SQ(pn_total);
}
typedef struct qpms_scatsys_translation_booster {
double *r; // Unique distances array, indices are ppid_t
qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r)
size_t r_count; // Number of different interparticle distances (length of r[])
size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type)
// Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums
// of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the
// Bessel function "cache"
size_t *bessel_offsets_r;
double *theta;
qpms_l_t *lMax_theta;
size_t theta_count;
size_t *theta_map;
size_t *legendre_offsets_theta;
double *legendre_base;
double *phi;
qpms_l_t *lMax_phi;
size_t phi_count;
size_t *phi_map;
} booster_t;
/// Sorts an array and throws away duplicit elements.
/**
* The unique elements (according to the compare function)
* are aligned to the left of the original array.
*
* \returns Number of kept unique array members
*/
static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size,
int (*compar)(const void *, const void *)) {
if (nmemb == 0) return 0; // corner case
qsort(base, nmemb, size, compar);
size_t left = 0;
for (size_t src = 1; src < nmemb; ++src) {
assert(left <= src);
if (compar((const char *)base + left*size, (const char *)base + src*size)) {
left += 1;
if (left < src) // Avoid copying to itself (when it's not needed and memcpy behaviour undef'd
memcpy((char *)base + left*size, (char *)base + src*size, size);
}
}
return left + 1;
}
static int cmp_double(const void *aa, const void *bb) {
const double a = *(double*)aa;
const double b = *(double*)bb;
if (a < b) return -1;
if (a == b) return 0;
if (a > b) return 1;
QPMS_WTF; // NaN or similar
}
booster_t *qpms_scatsys_translation_booster_create(
const qpms_scatsys_t *ss) {
const qpms_ss_pi_t np = ss->p_count;
booster_t *b;
QPMS_CRASHING_MALLOC(b, sizeof(*b));
QPMS_CRASHING_MALLOC(b->r, sizeof(double) * uopairarr_len(np));
QPMS_CRASHING_MALLOC(b->theta, sizeof(double) * pairarr_len(np)); // TODO only one theta per pair
QPMS_CRASHING_MALLOC(b->phi, sizeof(double) * pairarr_len(np)); // TODO only one theta per pair
for(qpms_ss_pi_t di = 0; di < np; ++di)
for(qpms_ss_pi_t si = 0; si < np; ++si) {
const ppid_t opid = pairid(np, di, si);
if (si != di) {
const sph_t dlj = cart2sph(cart3_substract(ss->p[di].pos, ss->p[si].pos));
b->r[uopairid(np, di, si)] = dlj.r;
b->theta[opid] = dlj.theta;
b->phi[opid] = dlj.phi;
}
else { // NOT USED IN ACTUAL CALCULATIONS, but must be valid doubles for sorting (infinities might be ok as well)
b->theta[opid] = M_PI_2;
b->phi[opid] = 0;
}
}
b->r_count = sort_and_eliminate(b->r, uopairarr_len(np), sizeof(*b->r), cmp_double);
QPMS_CRASHING_REALLOC(b->r, b->r_count * sizeof(*b->r));
b->theta_count = sort_and_eliminate(b->theta, pairarr_len(np), sizeof(*b->theta), cmp_double);
QPMS_CRASHING_REALLOC(b->theta, b->theta_count * sizeof(*b->theta));
b->phi_count = sort_and_eliminate(b->phi, pairarr_len(np), sizeof(*b->phi), cmp_double);
QPMS_CRASHING_REALLOC(b->phi, b->phi_count * sizeof(*b->phi));
QPMS_CRASHING_CALLOC(b->lMax_r, b->r_count, sizeof(*b->lMax_r));
QPMS_CRASHING_MALLOC(b->r_map, uopairarr_len(np) * sizeof(*b->r_map));
QPMS_CRASHING_CALLOC(b->lMax_theta, b->theta_count, sizeof(*b->lMax_theta));
QPMS_CRASHING_MALLOC(b->theta_map, pairarr_len(np) * sizeof(*b->theta_map));
QPMS_CRASHING_CALLOC(b->lMax_phi, b->phi_count, sizeof(*b->lMax_phi));
QPMS_CRASHING_MALLOC(b->phi_map, pairarr_len(np) * sizeof(*b->phi_map));
for(qpms_ss_pi_t di = 0; di < np; ++di)
for(qpms_ss_pi_t si = 0; si < np; ++si) {
if (si != di) {
const ppid_t opid = pairid(np, di, si);
const uoppid_t uopid = uopairid(np, di, si);
const sph_t dlj = cart2sph(cart3_substract(ss->p[di].pos, ss->p[si].pos));
double *hit = bsearch(&dlj.r, b->r, b->r_count, sizeof(*b->r), cmp_double);
QPMS_ASSERT(hit != NULL);
QPMS_ASSERT(hit >= b->r);
const size_t ri = b->r_map[uopid] = hit - b->r;
b->lMax_r[ri] = MAX(b->lMax_r[ri],
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
hit = bsearch(&dlj.theta, b->theta, b->theta_count, sizeof(*b->theta), cmp_double);
QPMS_ASSERT(hit != NULL);
QPMS_ASSERT(hit >= b->theta);
const size_t thi = b->theta_map[opid] = hit - b->theta;
b->lMax_theta[thi] = MAX(b->lMax_theta[thi],
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
hit = bsearch(&dlj.phi, b->phi, b->phi_count, sizeof(*b->phi), cmp_double);
QPMS_ASSERT(hit != NULL);
QPMS_ASSERT(hit >= b->phi);
const size_t phj = b->phi_map[opid] = hit - b->phi;
b->lMax_phi[phj] = MAX(b->lMax_phi[phj],
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
// TODO ri, thi, phj (+bspec?) -tuples here
}
}
{ // Bessel offsets
QPMS_CRASHING_MALLOC(b->bessel_offsets_r, (b->r_count + 1) * sizeof(*b->bessel_offsets_r));
size_t bessel_offset = 0;
for(size_t ri = 0; ri < b->r_count; ++ri) {
b->bessel_offsets_r[ri] = bessel_offset;
bessel_offset += 2 * b->lMax_r[ri] + 2;
}
b->bessel_offsets_r[b->r_count] = bessel_offset;
}
{ // Legendres
QPMS_CRASHING_MALLOC(b->legendre_offsets_theta, (b->theta_count + 1) * sizeof(*b->legendre_offsets_theta));
size_t legendre_offset = 0;
for(size_t thi = 0; thi < b->theta_count; ++thi) {
b->legendre_offsets_theta[thi] = legendre_offset;
legendre_offset += gsl_sf_legendre_array_n(2*b->lMax_theta[thi] + 1);
}
b->legendre_offsets_theta[b->theta_count] = legendre_offset;
QPMS_CRASHING_MALLOC(b->legendre_base, legendre_offset * sizeof(*b->legendre_base));
for(size_t thi = 0; thi < b->theta_count; ++thi) {
const double costheta = cos(b->theta[thi]);
double *legendre_buf = b->legendre_base + b->legendre_offsets_theta[thi];
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
2*b->lMax_theta[thi]+1, costheta, -1, legendre_buf));
}
}
return b;
}
void qpms_scatsys_translation_booster_free(booster_t *b) {
if (b) {
free(b->bessel_offsets_r);
free(b->lMax_r);
free(b->r);
free(b->r_map);
free(b->legendre_base);
free(b->legendre_offsets_theta);
free(b->lMax_theta);
free(b->theta);
free(b->theta_map);
free(b->lMax_phi);
free(b->phi);
free(b->phi_map);
free(b);
}
}
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m) {
QPMS_ASSERT(ss);
if (ss->tbooster) {
QPMS_WARN("Translation cache already created?");
return 0;
}
switch(m) {
case QPMS_SS_CACHE_NEVER:
return 0;
case QPMS_SS_CACHE_AUTO:
QPMS_WARN("Translation operator cache heuristics not implemented, creating the cache");
case QPMS_SS_CACHE_ALWAYS:
ss->tbooster = qpms_scatsys_translation_booster_create(ss);
if (ss->tbooster) return 0;
else {
QPMS_WARN("Failed to create tranlation operator cache");
return -1;
}
default:
QPMS_WTF;
}
QPMS_WTF;
}
static qpms_errno_t qpms_scatsys_translation_booster_eval_bessels(
const booster_t *b, complex double *target, complex double k // includes ref. ind. effect
) {
for(size_t ri = 0; ri < b->r_count; ++ri) {
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // Is there a case for different J?
2*b->lMax_r[ri]+1, k * b->r[ri],
target + b->bessel_offsets_r[ri]));
}
return QPMS_SUCCESS;
}
typedef struct qpms_scatsysw_translation_booster {
// _Bool owned_by_ssw; // if False, this is not deallocated by parent ssw
const booster_t *b;
complex double *bessels;
} boosterw_t;
boosterw_t *qpms_scatsysw_translation_booster_create(
const qpms_scatsys_at_omega_t *ssw) {
QPMS_PARANOID_ASSERT(ssw->ss);
const booster_t *const b = ssw->ss->tbooster;
QPMS_ASSERT(b);
boosterw_t *bw;
QPMS_CRASHING_MALLOC(bw, sizeof(*bw));
// Evaluate bessel functions
QPMS_CRASHING_MALLOC(bw->bessels, b->bessel_offsets_r[b->r_count] * sizeof(*bw->bessels));
for(size_t ri = 0; ri < b->r_count; ++ri) {
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // is there a case for other J?
b->bessel_offsets_r[ri+1]-b->bessel_offsets_r[ri]-1,
b->r[ri] * ssw->wavenumber,
bw->bessels + b->bessel_offsets_r[ri]));
}
bw->b = b;
return bw;
}
void qpms_scatsysw_translation_booster_free(boosterw_t *bw) {
if (bw) {
free (bw->bessels);
}
free (bw);
}
complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target,
const qpms_scatsys_at_omega_t *ssw)
{
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
const qpms_scatsys_t *const ss = ssw->ss;
const booster_t *const b = ss->tbooster;
const boosterw_t *const bw = ssw->translation_cache;
const qpms_trans_calculator *const c = ss->c;
const complex double k = ssw->wavenumber;
const size_t full_len = ss->fecv_size;
if(!target)
QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
complex double *tmp;
QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double));
// Workspaces for the translation operator A and B matrices
complex double *A, *B;
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
const complex double zero = 0, minusone = -1;
{ // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
size_t fullvec_offsetR = 0;
for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
const cart3_t posR = ss->p[piR].pos;
size_t fullvec_offsetC = 0;
// dest particle T-matrix
const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
if(piC != piR) { // The diagonal will be dealt with later.
const cart3_t posC = ss->p[piC].pos;
#if 0
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
tmp, // tmp is S(piR<-piC)
bspecR, bspecC->n, bspecC, 1,
k, posR, posC, QPMS_HANKEL_PLUS));
#else
{ // this replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src
const sph_t dlj = cart2sph(cart3_substract(posR, posC));
const uoppid_t pid = uopairid(ss->p_count, piR, piC);
const size_t ri = b->r_map[pid];
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
const qpms_l_t pair_lMax = b->lMax_r[ri];
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
{ // this replaces qpms_trans_calculator_get_AB_arrays() and ..._buf()
const double costheta = cos(dlj.theta);
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
2*pair_lMax+1, costheta, -1, legendre_buf));
const double * const legendres = legendre_buf;
const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri];
qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A, B,
/*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres);
qpms_trans_array_from_AB(tmp,// tmp is S(piR<-piC)
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
}
}
#endif
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/,
full_len /*ldc*/);
}
fullvec_offsetC += bspecC->n;
}
fullvec_offsetR += bspecR->n;
}
}
// diagonal part M[pi,pi] = +1
for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] = +1;
free(tmp);
free(A);
free(B);
return target;
}
void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted(void *arg)
{
const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
*a = arg;
const qpms_scatsys_at_omega_t *ssw = a->ssw;
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
const qpms_scatsys_t * const ss = ssw->ss;
const qpms_trans_calculator *const c = ss->c;
const booster_t *const b = ss->tbooster;
const boosterw_t *const bw = ssw->translation_cache;
const complex double k = ssw->wavenumber;
const qpms_iri_t iri = a->iri;
const size_t packedlen = ss->saecv_sizes[iri];
// some of the following workspaces are probably redundant; TODO optimize later.
// workspaces for the uncompressed particle<-particle tranlation matrix block
// and the result of multiplying with a T-matrix (times -1)
complex double *Sblock, *TSblock;
QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
// Workspace for the intermediate particle-orbit matrix result
complex double *tmp;
QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
// Workspace for A, B arrays
complex double *A, *B;
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
const complex double one = 1, zero = 0, minusone = -1;
while(1) {
// In the beginning, pick a target (row) orbit for this thread
QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
break;
}
const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
// Now increment it for another thread:
*(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
// Orbit picked (defined by opistartR), do the work.
const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
// Orbit coeff vector's full size:
const size_t orbit_fullsizeR = otR->size * otR->bspecn;
// This is where the orbit starts in the "packed" vector:
const size_t packed_orbit_offsetR =
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
+ osnR * otR->irbase_sizes[iri];
for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
assert(opiR == ss->p_orbitinfo[piR].p);
assert(otiR == ss->p_orbitinfo[piR].t);
assert(ss->p_orbitinfo[piR].osn == osnR);
const cart3_t posR = ss->p[piR].pos;
// dest particle T-matrix
const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
// This is where the particle's orbit starts in the "packed" vector:
const size_t packed_orbit_offsetC =
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
+ osnC * otC->irbase_sizes[iri];
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
// Orbit coeff vector's full size:
const size_t orbit_fullsizeC = otC->size * otC->bspecn;
const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
if(piC != piR) { // non-diagonal, calculate TS
const cart3_t posC = ss->p[piC].pos;
#if 0
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
Sblock, // Sblock is S(piR->piC)
bspecR, bspecC->n, bspecC, 1,
k, posR, posC, QPMS_HANKEL_PLUS));
#else
{ // this block replaces qpms_trans_calculator_get_trans_array():
// R is dest, C is src
const sph_t dlj = cart2sph(cart3_substract(posR, posC));
const uoppid_t uopid = uopairid(ss->p_count, piR, piC);
const ppid_t opid = pairid(ss->p_count, piR, piC);
const size_t ri = b->r_map[uopid];
const size_t thi = b->theta_map[opid];
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
QPMS_PARANOID_ASSERT(dlj.theta == b->theta[thi]);
const qpms_l_t pair_lMax = b->lMax_r[ri];
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
{ // this replaces qpms_trans_calculator_get_AB_arrays() and _buf()
#if 0
const double costheta = cos(dlj.theta);
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
2*pair_lMax+1, costheta, -1, legendre_buf));
const double * const legendres = legendre_buf;
#else
const double * const legendres = b->legendre_base + b->legendre_offsets_theta[thi];
#endif
const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri];
qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A, B,
/*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres);
}
qpms_trans_array_from_AB(Sblock, // Sblock is S(piR->piC)
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
}
#endif
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
TSblock /*c*/, bspecC->n /*ldc*/);
} else { // diagonal, fill with diagonal +1
for (size_t row = 0; row < bspecR->n; ++row)
for (size_t col = 0; col < bspecC->n; ++col)
TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
}
// tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
&one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
omC + opiC*particle_fullsizeC /*B*/,
orbit_fullsizeC/*ldB*/, &zero /*beta*/,
tmp /*C*/, orbit_packedsizeC /*LDC*/);
// target[oiR|piR,oiC|piC] += U[...] tmp[...]
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
packedlen /*ldC*/);
}
}
}
}
}
free(A);
free(B);
free(tmp);
free(Sblock);
free(TSblock);
return NULL;
}

View File

@ -6,7 +6,7 @@
#include <lapacke.h> #include <lapacke.h>
#include <cblas.h> #include <cblas.h>
#include <lapacke.h> #include <lapacke.h>
#include "scatsystem.h" #include "scatsys_private.h"
#include "indexing.h" #include "indexing.h"
#include "vswf.h" #include "vswf.h"
#include "groups.h" #include "groups.h"
@ -22,6 +22,7 @@
#include <pthread.h> #include <pthread.h>
#include "kahansum.h" #include "kahansum.h"
#include "tolerances.h" #include "tolerances.h"
#include "beyn.h"
#ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS #ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
#include "qpmsblas.h" #include "qpmsblas.h"
@ -133,8 +134,7 @@ static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_cu
bs_cumsum += lastbs; bs_cumsum += lastbs;
ot_new->irbase_cumsizes[iri] = bs_cumsum; ot_new->irbase_cumsizes[iri] = bs_cumsum;
} }
if(bs_cumsum != ot_new->size * bspecn) QPMS_ENSURE(bs_cumsum == ot_new->size * bspecn,
qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
"The cumulative size of the symmetry-adapted bases is wrong; " "The cumulative size of the symmetry-adapted bases is wrong; "
"expected %d = %d * %d, got %d.", "expected %d = %d * %d, got %d.",
ot_new->size * bspecn, ot_new->size, bspecn, bs_cumsum); ot_new->size * bspecn, ot_new->size, bspecn, bs_cumsum);
@ -142,7 +142,6 @@ static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_cu
ss->orbit_type_count++; ss->orbit_type_count++;
} }
// Almost 200 lines. This whole thing deserves a rewrite! // Almost 200 lines. This whole thing deserves a rewrite!
qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig, qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
const qpms_finite_group_t *sym, complex double omega, const qpms_finite_group_t *sym, complex double omega,
@ -181,6 +180,7 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
ss->lenscale = lenscale; ss->lenscale = lenscale;
ss->sym = sym; ss->sym = sym;
ss->medium = orig->medium; ss->medium = orig->medium;
ss->tbooster = NULL;
// Copy the qpms_tmatrix_fuction_t from orig // Copy the qpms_tmatrix_fuction_t from orig
ss->tmg_count = orig->tmg_count; ss->tmg_count = orig->tmg_count;
@ -209,6 +209,7 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw)); // returned QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw)); // returned
ssw->ss = ss; ssw->ss = ss;
ssw->omega = omega; ssw->omega = omega;
ssw->translation_cache = NULL;
ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega); ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
ssw->wavenumber = qpms_wavenumber(omega, ssw->medium); ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
// we will be using ss->tm_capacity also for ssw->tm // we will be using ss->tm_capacity also for ssw->tm
@ -275,15 +276,17 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
if (tmj < ss->tm_count) { // HIT, transformed T-matrix already exists if (tmj < ss->tm_count) { // HIT, transformed T-matrix already exists
//TODO some "rounding error cleanup" symmetrisation could be performed here? //TODO some "rounding error cleanup" symmetrisation could be performed here?
qpms_tmatrix_free(transformed); qpms_tmatrix_free(transformed);
free(m);
} else { // MISS, save the matrix (also the "abstract" one) } else { // MISS, save the matrix (also the "abstract" one)
ssw->tm[ss->tm_count] = transformed; ssw->tm[ss->tm_count] = transformed;
ss->tm[ss->tm_count].tmgi = ss->tm[tmi].tmgi; ss->tm[ss->tm_count].tmgi = ss->tm[tmi].tmgi;
qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1); qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1); // FIXME MEMLEAK
struct qpms_tmatrix_operation_compose_chain * const o = &(ss->tm[ss->tm_count].op.op.compose_chain); struct qpms_tmatrix_operation_compose_chain * const o = &(ss->tm[ss->tm_count].op.op.compose_chain);
o->ops[0] = & ss->tm[tmi].op; // Let's just borrow this o->ops[0] = & ss->tm[tmi].op; // Let's just borrow this
o->ops_owned[0] = false; o->ops_owned[0] = false;
o->opmem[0].typ = QPMS_TMATRIX_OPERATION_LRMATRIX; o->opmem[0].typ = QPMS_TMATRIX_OPERATION_LRMATRIX;
o->opmem[0].op.lrmatrix.m = m; o->opmem[0].op.lrmatrix.m = m;
o->opmem[0].op.lrmatrix.owns_m = true;
o->opmem[0].op.lrmatrix.m_size = d * d; o->opmem[0].op.lrmatrix.m_size = d * d;
o->ops[1] = o->opmem; o->ops[1] = o->opmem;
o->ops_owned[1] = true; o->ops_owned[1] = true;
@ -462,6 +465,9 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
void qpms_scatsys_free(qpms_scatsys_t *ss) { void qpms_scatsys_free(qpms_scatsys_t *ss) {
if(ss) { if(ss) {
QPMS_ASSERT(ss->tm);
for(qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
qpms_tmatrix_operation_clear(&ss->tm[tmi].op);
free(ss->tm); free(ss->tm);
free(ss->tmg); free(ss->tmg);
free(ss->p); free(ss->p);
@ -471,8 +477,11 @@ void qpms_scatsys_free(qpms_scatsys_t *ss) {
free(ss->otspace); free(ss->otspace);
free(ss->p_orbitinfo); free(ss->p_orbitinfo);
free(ss->orbit_types); free(ss->orbit_types);
free(ss->saecv_ot_offsets);
free(ss->saecv_sizes); free(ss->saecv_sizes);
free(ss->p_by_orbit); free(ss->p_by_orbit);
if(ss->tbooster)
qpms_scatsys_translation_booster_free(ss->tbooster);
qpms_trans_calculator_free(ss->c); qpms_trans_calculator_free(ss->c);
} }
free(ss); free(ss);
@ -519,15 +528,26 @@ qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss,
for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi) for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
qpms_tmatrix_free(tmatrices_preop[tmgi]); qpms_tmatrix_free(tmatrices_preop[tmgi]);
free(tmatrices_preop); free(tmatrices_preop);
ssw->translation_cache= NULL;
return ssw; return ssw;
} }
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw) {
if(!ssw->translation_cache)
if(ssw->ss->tbooster)
ssw->translation_cache = qpms_scatsysw_translation_booster_create(ssw);
QPMS_ASSERT(ssw->translation_cache);
return 0;
}
void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw) { void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw) {
if (ssw) { if (ssw) {
if(ssw->tm) if(ssw->tm)
for(qpms_ss_tmi_t i = 0; i < ssw->ss->tm_count; ++i) for(qpms_ss_tmi_t i = 0; i < ssw->ss->tm_count; ++i)
qpms_tmatrix_free(ssw->tm[i]); qpms_tmatrix_free(ssw->tm[i]);
free(ssw->tm); free(ssw->tm);
if(ssw->translation_cache)
qpms_scatsysw_translation_booster_free(ssw->translation_cache);
} }
free(ssw); free(ssw);
} }
@ -667,25 +687,16 @@ complex double *qpms_orbit_irrep_basis(size_t *basis_size,
complex double *U; QPMS_CRASHING_MALLOC(U, n*n*N*N*sizeof(complex double)); complex double *U; QPMS_CRASHING_MALLOC(U, n*n*N*N*sizeof(complex double));
double *s; QPMS_CRASHING_MALLOC(s, n*N*sizeof(double)); double *s; QPMS_CRASHING_MALLOC(s, n*N*sizeof(double));
int info = LAPACKE_zgesdd(LAPACK_ROW_MAJOR, QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR,
'A', // jobz; overwrite projector with left sg.vec. and write right into V_H 'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
n*N /* m */, n*N /* n */, projector /* a */, n*N /* lda */, n*N /* m */, n*N /* n */, projector /* a */, n*N /* lda */,
s /* s */, U /* u */, n*N /* ldu, irrelev. */, V_H /* vt */, s /* s */, U /* u */, n*N /* ldu, irrelev. */, V_H /* vt */,
n*N /* ldvt */); n*N /* ldvt */));
if (info) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
"Something went wrong with the SVD.");
size_t bs; size_t bs;
for(bs = 0; bs < n*N; ++bs) { for(bs = 0; bs < n*N; ++bs) {
#if 0 QPMS_ENSURE(s[bs] <= 1 + SVD_ATOL, "%zd. SV too large: %.16lf", bs, s[bs]);
qpms_pr_debug_at_flf(__FILE__, __LINE__, __func__, QPMS_ENSURE(!(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL),
"%d. irrep, %zd. SV: %.16lf", (int) iri, bs, s[bs]);
#endif
if(s[bs] > 1 + SVD_ATOL) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
"%zd. SV too large: %.16lf", bs, s[bs]);
if(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL)
qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
"%zd. SV in the 'wrong' interval: %.16lf", bs, s[bs]); "%zd. SV in the 'wrong' interval: %.16lf", bs, s[bs]);
if(s[bs] < SVD_ATOL) break; if(s[bs] < SVD_ATOL) break;
} }
@ -694,6 +705,7 @@ complex double *qpms_orbit_irrep_basis(size_t *basis_size,
if(newtarget) QPMS_CRASHING_REALLOC(target, bs*N*n*sizeof(complex double)); if(newtarget) QPMS_CRASHING_REALLOC(target, bs*N*n*sizeof(complex double));
if(basis_size) *basis_size = bs; if(basis_size) *basis_size = bs;
free(s);
free(U); free(U);
free(V_H); free(V_H);
free(projector); free(projector);
@ -1110,6 +1122,8 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full(
const qpms_scatsys_at_omega_t *ssw const qpms_scatsys_at_omega_t *ssw
) )
{ {
if (ssw->translation_cache)
return qpms_scatsysw_build_modeproblem_matrix_full_boosted(target, ssw);
const complex double k = ssw->wavenumber; const complex double k = ssw->wavenumber;
const qpms_scatsys_t *ss = ssw->ss; const qpms_scatsys_t *ss = ssw->ss;
const size_t full_len = ss->fecv_size; const size_t full_len = ss->fecv_size;
@ -1386,14 +1400,6 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
return target_packed; return target_packed;
} }
struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
const qpms_scatsys_at_omega_t *ssw;
qpms_ss_pi_t *opistartR_ptr;
pthread_mutex_t *opistartR_mutex;
qpms_iri_t iri;
complex double *target_packed;
};
static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg) static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg)
{ {
const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
@ -1512,9 +1518,6 @@ static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_threa
} }
} }
} }
} }
free(tmp); free(tmp);
free(Sblock); free(Sblock);
@ -1644,9 +1647,6 @@ static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thre
} }
} }
} }
} }
free(tmp); free(tmp);
free(Sblock); free(Sblock);
@ -1716,6 +1716,7 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
) )
{ {
const _Bool use_translation_cache = (ssw->translation_cache != NULL);
const size_t packedlen = ssw->ss->saecv_sizes[iri]; const size_t packedlen = ssw->ss->saecv_sizes[iri];
if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps? if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
return target_packed; return target_packed;
@ -1748,7 +1749,9 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
pthread_t thread_ids[nthreads]; pthread_t thread_ids[nthreads];
for(long thi = 0; thi < nthreads; ++thi) for(long thi = 0; thi < nthreads; ++thi)
QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL, QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread, use_translation_cache
? qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted
: qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread,
(void *) &arg)); (void *) &arg));
for(long thi = 0; thi < nthreads; ++thi) { for(long thi = 0; thi < nthreads; ++thi) {
void *retval; void *retval;
@ -1804,7 +1807,6 @@ complex double *qpms_scatsysw_apply_Tmatrices_full(
} }
ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss, ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss,
const complex double *cvf, const cart3_t where, const complex double *cvf, const cart3_t where,
const complex double k) { const complex double k) {
@ -1897,3 +1899,55 @@ complex double *qpms_scatsys_scatter_solve(
return f; return f;
} }
struct qpms_scatsys_finite_eval_Beyn_ImTS_param {
const qpms_scatsys_t *ss;
qpms_iri_t iri;
};
/// Wrapper for Beyn algorithm (non-periodic system)
static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target,
size_t m, complex double omega, void *params) {
const struct qpms_scatsys_finite_eval_Beyn_ImTS_param *p = params;
qpms_scatsys_at_omega_t *ssw = qpms_scatsys_at_omega(p->ss, omega);
QPMS_ENSURE(ssw != NULL, "qpms_scatsys_at_omega() returned NULL");
if(p->ss->tbooster) qpms_ssw_create_translation_cache(ssw);
if (p->iri == QPMS_NO_IRREP) {
QPMS_ASSERT(m == p->ss->fecv_size);
QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_full(
target, ssw),
"qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
} else {
QPMS_ASSERT(m == p->ss->saecv_sizes[p->iri]);
QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
target, ssw, p->iri),
"qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
}
qpms_scatsys_at_omega_free(ssw);
return QPMS_SUCCESS;
}
beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
const qpms_scatsys_t * const ss, const qpms_iri_t iri,
complex double omega_centre, double omega_rr, double omega_ri,
size_t contour_npoints,
double rank_tol, size_t rank_sel_min, double res_tol) {
size_t n; // matrix dimension
if (qpms_iri_is_valid(ss->sym, iri)) {
n = ss->saecv_sizes[iri];
} else if (iri == QPMS_NO_IRREP) {
n = ss->fecv_size;
} else QPMS_WTF;
beyn_contour_t *contour = beyn_contour_ellipse(omega_centre,
omega_rr, omega_ri, contour_npoints);
struct qpms_scatsys_finite_eval_Beyn_ImTS_param p = {ss, iri};
beyn_result_t *result = beyn_solve(n, n /* possibly make smaller? */,
qpms_scatsys_finite_eval_Beyn_ImTS, NULL, (void *) &p,
contour, rank_tol, rank_sel_min, res_tol);
QPMS_ENSURE(result != NULL, "beyn_solve() returned NULL");
free(contour);
return result;
}

View File

@ -133,7 +133,7 @@ typedef struct qpms_ss_derived_tmatrix_t {
struct qpms_trans_calculator; struct qpms_trans_calculator;
struct qpms_epsmu_generator_t; struct qpms_scatsys_translation_booster;
typedef struct qpms_scatsys_t { typedef struct qpms_scatsys_t {
struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium. struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium.
@ -188,8 +188,16 @@ typedef struct qpms_scatsys_t {
char *otspace_end; char *otspace_end;
double lenscale; // radius of the array, used as a relative tolerance measure double lenscale; // radius of the array, used as a relative tolerance measure
struct qpms_trans_calculator *c; struct qpms_trans_calculator *c;
// Internal metadata for faster translation matrix evaluation
struct qpms_scatsys_translation_booster *tbooster;
} qpms_scatsys_t; } qpms_scatsys_t;
/// Returns true if ss has the translation caching metadata populated.
static inline _Bool qpms_scatsys_has_translation_cache(const qpms_scatsys_t *ss) {
return ss->tbooster != NULL;
}
/// Retrieve the bspec of \a tmi'th element of \a ss->tm. /// Retrieve the bspec of \a tmi'th element of \a ss->tm.
static inline const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi) { static inline const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi) {
return ss->tmg[ss->tm[tmi].tmgi].spec; return ss->tmg[ss->tm[tmi].tmgi].spec;
@ -200,6 +208,9 @@ static inline const qpms_vswf_set_spec_t *qpms_ss_bspec_pi(const qpms_scatsys_t
return ss->tmg[ss->tm[ss->p[pi].tmatrix_id].tmgi].spec; return ss->tmg[ss->tm[ss->p[pi].tmatrix_id].tmgi].spec;
} }
struct qpms_scatsysw_translation_booster;
void qpms_scatsysw_translation_booster_free(struct qpms_scatsysw_translation_booster *);
typedef struct qpms_scatsys_at_omega_t { typedef struct qpms_scatsys_at_omega_t {
const qpms_scatsys_t *ss; ///< Parent scattering system. const qpms_scatsys_t *ss; ///< Parent scattering system.
/// T-matrices from \a ss, evaluated at \a omega. /// T-matrices from \a ss, evaluated at \a omega.
@ -210,8 +221,11 @@ typedef struct qpms_scatsys_at_omega_t {
complex double omega; ///< Angular frequency complex double omega; ///< Angular frequency
qpms_epsmu_t medium; ///< Background medium optical properties at the given frequency qpms_epsmu_t medium; ///< Background medium optical properties at the given frequency
complex double wavenumber; ///< Background medium wavenumber complex double wavenumber; ///< Background medium wavenumber
struct qpms_scatsysw_translation_booster *translation_cache; ///< (private) cache to speedup translations
} qpms_scatsys_at_omega_t; } qpms_scatsys_at_omega_t;
/// Creates a new scatsys by applying a symmetry group onto a "proto-scatsys", copying particles if needed. /// Creates a new scatsys by applying a symmetry group onto a "proto-scatsys", copying particles if needed.
/** In fact, it copies everything except the vswf set specs and qpms_abstract_tmatrix_t instances, /** In fact, it copies everything except the vswf set specs and qpms_abstract_tmatrix_t instances,
* so keep them alive until scatsys is destroyed. * so keep them alive until scatsys is destroyed.
@ -261,6 +275,34 @@ void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw);
qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss, qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss,
complex double omega); complex double omega);
/// Determines behaviour of qpms_ss_create_translation_cache().
typedef enum qpms_ss_caching_mode {
/// Don't create the translation operator cache.
/** Use this if the particle positions are random. */
QPMS_SS_CACHE_NEVER,
/// Always create the translation operator cache.
/** Use this for highly regular large finite structures. */
QPMS_SS_CACHE_ALWAYS,
/// Evaluate the need of caching automatically.
QPMS_SS_CACHE_AUTO,
QPMS_SS_CACHE_DEFAULT = QPMS_SS_CACHE_AUTO
} qpms_ss_caching_mode_t;
/// Returns true if ssw has the translation operator cache populated.
static inline _Bool qpms_scatsysw_has_translation_cache(const qpms_scatsys_at_omega_t *ssw) {
return ssw->translation_cache != NULL;
}
/// Creates some data structures for speeding up translation operator calculations.
/** This is mostly useful for "finite lattices", where many pairs of nanoparticles
* share the same relative positions.
*/
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m);
/// Pre-calculates the "translation cache" of a qpms_scatsys_at_omega_t.
/** If ssw->ss->tbooster is NULL, does nothing. */
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw);
/// Creates a "full" transformation matrix U that takes a full vector and projects it onto an symmetry adapted basis. /// Creates a "full" transformation matrix U that takes a full vector and projects it onto an symmetry adapted basis.
/** Mostly as a reference and a debugging tool, as multiplicating these big matrices would be inefficient. /** Mostly as a reference and a debugging tool, as multiplicating these big matrices would be inefficient.
* *
@ -343,31 +385,43 @@ complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
); );
/// Creates the full \f$ (I - TS) \f$ matrix of the scattering system. /// Creates the full \f$ (I - TS) \f$ matrix of the scattering system.
/**
* \returns \a target on success, NULL on error.
*/
complex double *qpms_scatsysw_build_modeproblem_matrix_full( complex double *qpms_scatsysw_build_modeproblem_matrix_full(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target, complex double *target,
const qpms_scatsys_at_omega_t *ssw const qpms_scatsys_at_omega_t *ssw
); );
/// Creates the mode problem matrix \f$ (I - TS) \f$ directly in the irrep-packed form. /// Creates the mode problem matrix \f$ (I - TS) \f$ directly in the irrep-packed form.
/**
* \returns \a target on success, NULL on error.
*/
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed( complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target, complex double *target,
const qpms_scatsys_at_omega_t *ssw, const qpms_scatsys_at_omega_t *ssw,
qpms_iri_t iri qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
); );
/// Alternative implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed(). /// Alternative implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed().
/**
* \returns \a target on success, NULL on error.
*/
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR( complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target, complex double *target,
const qpms_scatsys_at_omega_t *ssw, const qpms_scatsys_at_omega_t *ssw,
qpms_iri_t iri qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
); );
/// Alternative (serial reference) implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed(). /// Alternative (serial reference) implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed().
/**
* \returns \a target on success, NULL on error.
*/
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial( complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated. /// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
complex double *target, complex double *target,
const qpms_scatsys_at_omega_t *ssw, const qpms_scatsys_at_omega_t *ssw,
qpms_iri_t iri qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
); );
/// LU factorisation (LAPACKE_zgetrf) result holder. /// LU factorisation (LAPACKE_zgetrf) result holder.
@ -504,6 +558,55 @@ complex double *qpms_scatsysw_apply_Tmatrices_full(
const qpms_scatsys_at_omega_t *ssw const qpms_scatsys_at_omega_t *ssw
); );
struct beyn_result_t; // See beyn.h for full definition
/// Searches for finite scattering system's eigenmodes using Beyn's algorithm.
/**
* Currently, elliptical contour is used.
*
* TODO In the future, this will probably support irrep decomposition as well,
* but it does not make much sense for periodic / small systems, as in their
* case the bottleneck is the T-matrix and translation matrix evaluation
* rather than the linear algebra.
*/
struct beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
const qpms_scatsys_t *ss,
/// A valid irrep index to search only in one irrep, or QPMS_NO_IRREP for solving the full system.
qpms_iri_t iri,
complex double omega_centre, ///< Center of the ellipse inside which the eigenfreqs are searched for.
double omega_rr, ///< Real half-axis of the ellipse inside which the eigenfreqs are searched for.
double omega_ri, ///< Imaginary half-axis of the ellipse inside which the eigenfreqs are searched for.
size_t contour_npoints, ///< Number of elliptic contour discretisation points (preferably even number)
double rank_tol, ///< (default: `1e-4`) TODO DOC.
size_t rank_sel_min, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
double res_tol ///< (default: `0.0`) TODO DOC.
);
#if 0
/// Searches for scattering system's eigenmodes using Beyn's algorithm.
/**
* Currently, elliptical contour is used.
*
* TODO In the future, this will probably support irrep decomposition as well,
* but it does not make much sense for periodic / small systems, as in their
* case the bottleneck is the T-matrix and translation matrix evaluation
* rather than the linear algebra.
*/
struct beyn_result_t *qpms_scatsys_find_eigenmodes(
const qpms_scatsys_t *ss,
double eta, ///< Ewald sum parameter
const double *beta_, ///< k-vector of corresponding dimensionality, NULL/ignored for finite system.
complex double omega_centre, ///< Center of the ellipse inside which the eigenfreqs are searched for.
double omega_rr, ///< Real half-axis of the ellipse inside which the eigenfreqs are searched for.
double omega_ri, ///< Imaginary half-axis of the ellipse inside which the eigenfreqs are searched for.
size_t contour_npoints, ///< Number of elliptic contour discretisation points (preferably even number)
double rank_tol, ///< (default: `1e-4`) TODO DOC.
size_t rank_sel_min, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
double res_tol ///< (default: `0.0`) TODO DOC.
);
#endif
#if 0 #if 0
/// Creates a (partial) incident field vector in the symmetry-adapted basis, given a function that evaluates the field expansions at points. /// Creates a (partial) incident field vector in the symmetry-adapted basis, given a function that evaluates the field expansions at points.

View File

@ -429,12 +429,12 @@ qpms_errno_t qpms_read_scuff_tmatrix(
switch(PAlpha) { switch(PAlpha) {
case 0: TAlpha = QPMS_VSWF_MAGNETIC; break; case 0: TAlpha = QPMS_VSWF_MAGNETIC; break;
case 1: TAlpha = QPMS_VSWF_ELECTRIC; break; case 1: TAlpha = QPMS_VSWF_ELECTRIC; break;
default: assert(0); default: QPMS_WTF;
} }
switch(PBeta) { switch(PBeta) {
case 0: TBeta = QPMS_VSWF_MAGNETIC; break; case 0: TBeta = QPMS_VSWF_MAGNETIC; break;
case 1: TBeta = QPMS_VSWF_ELECTRIC; break; case 1: TBeta = QPMS_VSWF_ELECTRIC; break;
default: assert(0); default: QPMS_WTF;
} }
qpms_uvswfi_t srcui = qpms_tmn2uvswfi(TAlpha, MAlpha, LAlpha), qpms_uvswfi_t srcui = qpms_tmn2uvswfi(TAlpha, MAlpha, LAlpha),
destui = qpms_tmn2uvswfi(TBeta, MBeta, LBeta); destui = qpms_tmn2uvswfi(TBeta, MBeta, LBeta);

View File

@ -13,6 +13,7 @@
#include <gsl/gsl_sf_coupling.h> #include <gsl/gsl_sf_coupling.h>
#include "qpms_error.h" #include "qpms_error.h"
#include "normalisation.h" #include "normalisation.h"
#include "translations_inlines.h"
/* /*
* Define macros with additional factors that "should not be there" according * Define macros with additional factors that "should not be there" according
@ -289,8 +290,8 @@ void qpms_trans_calculator_free(qpms_trans_calculator *c) {
free(c->A_multipliers); free(c->A_multipliers);
free(c->B_multipliers[0]); free(c->B_multipliers[0]);
free(c->B_multipliers); free(c->B_multipliers);
#ifdef LATTICESUMS #ifdef LATTICESUMS32
qpms_ewald3_constants_free(e3c); qpms_ewald3_constants_free(c->e3c);
#endif #endif
free(c->legendre0); free(c->legendre0);
free(c); free(c);
@ -509,8 +510,7 @@ qpms_trans_calculator
} }
static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c, static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, csph_t kdlj, int m, int n, int mu, int nu, double kdlj_phi,
bool r_ge_d, qpms_bessel_t J,
const complex double *bessel_buf, const double *legendre_buf) { const complex double *bessel_buf, const double *legendre_buf) {
TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation); TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation);
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
@ -525,7 +525,7 @@ static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_t
complex double multiplier = c->A_multipliers[i][q]; complex double multiplier = c->A_multipliers[i][q];
ckahanadd(&sum, &kahanc, Pp * zp * multiplier); ckahanadd(&sum, &kahanc, Pp * zp * multiplier);
} }
complex double eimf = cexp(I*(mu-m)*kdlj.phi); complex double eimf = cexp(I*(mu-m)*kdlj_phi);
return sum * eimf; return sum * eimf;
} }
@ -545,12 +545,11 @@ complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
costheta,csphase,legendre_buf)); costheta,csphase,legendre_buf));
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf)); QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu, return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf); kdlj.phi,bessel_buf,legendre_buf);
} }
static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c, static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, csph_t kdlj, int m, int n, int mu, int nu, double kdlj_phi,
bool r_ge_d, qpms_bessel_t J,
const complex double *bessel_buf, const double *legendre_buf) { const complex double *bessel_buf, const double *legendre_buf) {
TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation); TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation);
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
@ -565,7 +564,7 @@ static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_t
complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET]; complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
ckahanadd(&sum, &kahanc, Pp_ * zp_ * multiplier); ckahanadd(&sum, &kahanc, Pp_ * zp_ * multiplier);
} }
complex double eimf = cexp(I*(mu-m)*kdlj.phi); complex double eimf = cexp(I*(mu-m)*kdlj_phi);
return sum * eimf; return sum * eimf;
} }
@ -584,7 +583,7 @@ complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
costheta,csphase,legendre_buf)); costheta,csphase,legendre_buf));
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf)); QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu, return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf); kdlj.phi,bessel_buf,legendre_buf);
} }
int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c, int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
@ -604,12 +603,38 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
costheta,-1,legendre_buf)); costheta,-1,legendre_buf));
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf)); QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
*Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu, *Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf); kdlj.phi,bessel_buf,legendre_buf);
*Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu, *Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf); kdlj.phi,bessel_buf,legendre_buf);
return 0; return 0;
} }
int qpms_trans_calculator_get_AB_arrays_precalcbuf(const qpms_trans_calculator *c,
qpms_y_t lMax, complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride, double kdlj_phi,
const complex double *bessel_buf, const double *legendre_buf) {
if(lMax == 0) lMax = c->lMax;
QPMS_ASSERT(lMax <= c->lMax);
size_t desti = 0, srci = 0;
for (int n = 1; n <= lMax; ++n) for (int m = -n; m <= n; ++m) {
for (int nu = 1; nu <= lMax; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
#ifndef NDEBUG
size_t assertindex = qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu);
#endif
assert(assertindex == desti*c->nelem + srci);
*(Adest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
kdlj_phi, bessel_buf, legendre_buf);
*(Bdest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
kdlj_phi,bessel_buf,legendre_buf);
++srci;
}
++desti;
srci = 0;
}
return 0;
}
int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c, int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest, complex double *Adest, complex double *Bdest,
@ -631,26 +656,9 @@ int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1, QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1,
costheta,-1,legendre_buf)); costheta,-1,legendre_buf));
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, 2*c->lMax+1, kdlj.r, bessel_buf)); QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, 2*c->lMax+1, kdlj.r, bessel_buf));
size_t desti = 0, srci = 0;
for (int n = 1; n <= c->lMax; ++n) for (int m = -n; m <= n; ++m) {
for (int nu = 1; nu <= c->lMax; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
#ifndef NDEBUG
size_t assertindex = qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu);
#endif
assert(assertindex == desti*c->nelem + srci);
*(Adest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
*(Bdest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
++srci;
}
++desti;
srci = 0;
}
return 0;
} }
return qpms_trans_calculator_get_AB_arrays_precalcbuf(c, c->lMax, Adest, Bdest,
deststride, srcstride, kdlj.phi, bessel_buf, legendre_buf);
} }
complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c, complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c,
@ -712,20 +720,8 @@ qpms_errno_t qpms_trans_calculator_get_trans_array(const qpms_trans_calculator *
qpms_errno_t retval = qpms_trans_calculator_get_AB_arrays(c, qpms_errno_t retval = qpms_trans_calculator_get_AB_arrays(c,
A[0], B[0], c->nelem, 1, A[0], B[0], c->nelem, 1,
kdlj, r_ge_d, J); kdlj, r_ge_d, J);
for (size_t desti = 0; desti < destspec->n; ++desti) { qpms_trans_array_from_AB(target, destspec, deststride, srcspec, srcstride,
qpms_y_t desty; qpms_vswf_type_t destt; A[0], B[0], c->lMax);
if(QPMS_SUCCESS != qpms_uvswfi2ty(destspec->ilist[desti], &destt, &desty))
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,
"Invalid u. vswf index %llx.", destspec->ilist[desti]);
for (size_t srci = 0; srci < srcspec->n; ++srci){
qpms_y_t srcy; qpms_vswf_type_t srct;
if(QPMS_SUCCESS != qpms_uvswfi2ty(srcspec->ilist[srci], &srct, &srcy))
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,
"Invalid u. vswf index %llx.", srcspec->ilist[srci]);
target[srci * srcstride + desti * deststride]
= (srct == destt) ? A[desty][srcy] : B[desty][srcy];
}
}
return retval; return retval;
} }
@ -762,6 +758,7 @@ qpms_errno_t qpms_trans_calculator_get_trans_array_e32_e(const qpms_trans_calcul
A, Aerr, B, Berr, ldAB, 1, A, Aerr, B, Berr, ldAB, 1,
eta, k, b1, b2, beta, particle_shift, maxR, maxK, parts); eta, k, b1, b2, beta, particle_shift, maxR, maxK, parts);
for (size_t desti = 0; desti < destspec->n; ++desti) { for (size_t desti = 0; desti < destspec->n; ++desti) {
// TODO replace with (modified) qpms_trans_array_from_AB()
qpms_y_t desty; qpms_vswf_type_t destt; qpms_y_t desty; qpms_vswf_type_t destt;
if(QPMS_SUCCESS != qpms_uvswfi2ty(destspec->ilist[desti], &destt, &desty)) if(QPMS_SUCCESS != qpms_uvswfi2ty(destspec->ilist[desti], &destt, &desty))
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__, qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,

View File

@ -0,0 +1,42 @@
#ifndef TRANSLATIONS_INLINES_H
#define TRANSLATIONS_INLINES_H
#include "translations.h"
#include "indexing.h"
/// Rearranges the default-ordered "A,B" array elements into "bspec"-defined matrix.
// TODO DOC
static inline void qpms_trans_array_from_AB(
complex double *t,
const qpms_vswf_set_spec_t *const t_destspec,
const size_t t_deststride,
const qpms_vswf_set_spec_t *const t_srcspec,
const size_t t_srcstride,
const complex double *const A, const complex double *const B,
/// A and B matrices' lMax.
/** This also determines their size and stride: they are assumed to
* be square matrices of size `nelem * nelem` where
* `nelem = qpms_lMax2nelem(lMax_AB)`
*/
const qpms_l_t lMax_AB
) {
QPMS_PARANOID_ASSERT(lMax_AB >= t_srcspec->lMax && lMax_AB >= t_destspec->lMax);
const qpms_y_t nelem_AB = qpms_lMax2nelem(lMax_AB);
for (size_t desti = 0; desti < t_destspec->n; ++desti) {
qpms_y_t desty; qpms_vswf_type_t destt;
QPMS_ENSURE_SUCCESS_M(qpms_uvswfi2ty(t_destspec->ilist[desti], &destt, &desty),
"Invalid u. vswf index %llx.", t_destspec->ilist[desti]);
for (size_t srci = 0; srci < t_srcspec->n; ++srci){
qpms_y_t srcy; qpms_vswf_type_t srct;
QPMS_ENSURE_SUCCESS_M(qpms_uvswfi2ty(t_srcspec->ilist[srci], &srct, &srcy),
"Invalid u. vswf index %llx.", t_srcspec->ilist[srci]); t[srci * t_srcstride + desti * t_deststride]
= (srct == destt) ? A[desty*nelem_AB + srcy] : B[desty*nelem_AB + srcy];
}
}
}
int qpms_trans_calculator_get_AB_arrays_precalcbuf(const qpms_trans_calculator *c,
qpms_y_t lMax, complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride, double kdlj_phi,
const complex double *bessel_buf, const double *legendre_buf);
#endif