Compare commits
26 Commits
master
...
finite_lat
Author | SHA1 | Date |
---|---|---|
Marek Nečada | acc08f8863 | |
Marek Nečada | b4381bd13d | |
Marek Nečada | 6233e1c210 | |
Marek Nečada | e3834fdad7 | |
Marek Nečada | acec5bed98 | |
Marek Nečada | 96c9e95ea0 | |
Marek Nečada | 8f4a8c7c7b | |
Marek Nečada | 338fc00bfe | |
Marek Nečada | 775976816e | |
Marek Nečada | 00ab187510 | |
Marek Nečada | b62f1dadc5 | |
Marek Nečada | 7d19bed4cd | |
Marek Nečada | 7a80c9e0f2 | |
Marek Nečada | 1f63d2b529 | |
Marek Nečada | 2f03fc58b4 | |
Marek Nečada | f33b102768 | |
Marek Nečada | e4d84b3b25 | |
Marek Nečada | 5cc29210d7 | |
Marek Nečada | effe59bc50 | |
Marek Nečada | 8209e9df6e | |
Marek Nečada | 8251eba955 | |
Marek Nečada | af12f2301f | |
Marek Nečada | a0acdfdc5d | |
Marek Nečada | 36c6826b5a | |
Marek Nečada | ed3322ec93 | |
Marek Nečada | f082838c5f |
|
@ -5,8 +5,6 @@ include(GNUInstallDirs)
|
|||
|
||||
project (QPMS)
|
||||
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
|
||||
macro(use_c99)
|
||||
if (CMAKE_VERSION VERSION_LESS "3.1")
|
||||
if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import math
|
||||
from qpms.argproc import ArgParser
|
||||
|
||||
|
||||
ap = ArgParser(['rectlattice2d_finite', 'single_particle', 'single_lMax', ])
|
||||
|
||||
ap.add_argument("-t", "--rank-tolerance", type=float, default=1e11)
|
||||
ap.add_argument("-c", "--min-candidates", type=int, default=1, help='always try at least this many eigenvalue candidates, even if their SVs in the rank tests are lower than rank_tolerance')
|
||||
ap.add_argument("-T", "--residual-tolerance", type=float, default=0.)
|
||||
|
||||
ap.add_argument("-o", "--output", type=str, required=False, help='output path (if not provided, will be generated automatically)')
|
||||
#ap.add_argument("-O", "--plot-out", type=str, required=False, help="path to plot output (optional)")
|
||||
#ap.add_argument("-P", "--plot", action='store_true', help="if -p not given, plot to a default path")
|
||||
#ap.add_argument("-g", "--save-gradually", action='store_true', help="saves the partial result after computing each irrep")
|
||||
|
||||
ap.add_argument("-f", "--centre", type=complex, required=True, help='Contour centre in eV')
|
||||
ap.add_argument("--ai", type=float, default=0.05, help="Contour imaginary half-axis in eV")
|
||||
ap.add_argument("--ar", type=float, default=0.05, help="Contour real half-axis in eV")
|
||||
ap.add_argument("-N", type=int, default="150", help="Integration contour discretisation size")
|
||||
|
||||
ap.add_argument("--irrep", type=str, default="none", help="Irrep subspace (irrep index from 0 to 7, irrep label, or 'none' for no irrep decomposition")
|
||||
|
||||
a=ap.parse_args()
|
||||
|
||||
import logging
|
||||
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
|
||||
|
||||
Nx, Ny = a.size
|
||||
px, py = a.period
|
||||
|
||||
particlestr = ("sph" if a.height is None else "cyl") + ("_r%gnm" % (a.radius*1e9))
|
||||
if a.height is not None: particlestr += "_h%gnm" % (a.height * 1e9)
|
||||
defaultprefix = "%s_p%gnmx%gnm_%dx%d_m%s_n%g_L%d_c(%s±%g±%gj)eV_cn%d" % (
|
||||
particlestr, px*1e9, py*1e9, Nx, Ny, str(a.material), a.refractive_index, a.lMax,
|
||||
str(a.centre), a.ai, a.ar, a.N)
|
||||
logging.info("Default file prefix: %s" % defaultprefix)
|
||||
|
||||
def inside_ellipse(point_xy, centre_xy, halfaxes_xy):
|
||||
x = point_xy[0] - centre_xy[0]
|
||||
y = point_xy[1] - centre_xy[1]
|
||||
ax = halfaxes_xy[0]
|
||||
ay = halfaxes_xy[1]
|
||||
return ((x/ax)**2 + (y/ay)**2) <= 1
|
||||
|
||||
|
||||
import numpy as np
|
||||
import qpms
|
||||
from qpms.cybspec import BaseSpec
|
||||
from qpms.cytmatrices import CTMatrix, TMatrixGenerator
|
||||
from qpms.qpms_c import Particle
|
||||
from qpms.cymaterials import EpsMu, EpsMuGenerator, LorentzDrudeModel, lorentz_drude
|
||||
from qpms.cycommon import DebugFlags, dbgmsg_enable
|
||||
from qpms import FinitePointGroup, ScatteringSystem, BesselType, eV, hbar
|
||||
from qpms.symmetries import point_group_info
|
||||
eh = eV/hbar
|
||||
|
||||
dbgmsg_enable(DebugFlags.INTEGRATION)
|
||||
|
||||
#Particle positions
|
||||
orig_x = (np.arange(Nx/2) + (0 if (Nx % 2) else .5)) * px
|
||||
orig_y = (np.arange(Ny/2) + (0 if (Ny % 2) else .5)) * py
|
||||
|
||||
orig_xy = np.stack(np.meshgrid(orig_x, orig_y), axis = -1)
|
||||
|
||||
|
||||
bspec = BaseSpec(lMax = a.lMax)
|
||||
medium = EpsMuGenerator(ap.background_epsmu)
|
||||
particles= [Particle(orig_xy[i], ap.tmgen, bspec) for i in np.ndindex(orig_xy.shape[:-1])]
|
||||
|
||||
sym = FinitePointGroup(point_group_info['D2h'])
|
||||
logging.info("Creating scattering system object")
|
||||
ss, ssw = ScatteringSystem.create(particles, medium, sym, a.centre * eh)
|
||||
|
||||
if a.irrep == 'none':
|
||||
iri = None # no irrep decomposition
|
||||
irname = 'full'
|
||||
logging.info("Not performing irrep decomposition and working with the full space of dimension %d." % ss.fecv_size)
|
||||
else:
|
||||
try:
|
||||
iri = int(a.irrep)
|
||||
except ValueError:
|
||||
iri = ss.irrep_names.index(a.irrep)
|
||||
irname = ss.irrep_names[iri]
|
||||
logging.info("Using irrep subspace %s (iri = %d) of dimension %d." % (irname, iri, ss.saecv_sizes[iri]))
|
||||
|
||||
outfile_tmp = defaultprefix + ".tmp" if a.output is None else a.output + ".tmp"
|
||||
|
||||
logging.info("Starting Beyn's algorithm")
|
||||
results = ss.find_modes(iri=iri, omega_centre = a.centre*eh, omega_rr=a.ar*eh, omega_ri=a.ai*eh, contour_points=a.N,
|
||||
rank_tol=a.rank_tolerance, rank_min_sel=a.min_candidates, res_tol=a.residual_tolerance)
|
||||
results['inside_contour'] = inside_ellipse((results['eigval'].real, results['eigval'].imag),
|
||||
(a.centre.real*eh, a.centre.imag*eh), (a.ar*eh, a.ai*eh))
|
||||
results['refractive_index_internal'] = [medium(om).n for om in results['eigval']]
|
||||
|
||||
outfile = defaultprefix + (('_ir%s_%s.npz' % (str(iri), irname)) if iri is not None else '.npz') if a.output is None else a.output
|
||||
np.savez(outfile, meta=vars(a), **results)
|
||||
logging.info("Saved to %s" % outfile)
|
||||
|
||||
exit(0)
|
||||
|
||||
# TODO plots.
|
||||
if a.plot or (a.plot_out is not None):
|
||||
import matplotlib
|
||||
matplotlib.use('pdf')
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(111)
|
||||
ax.plot(sinalpha_list, σ_ext*1e12,label='$\sigma_\mathrm{ext}$')
|
||||
ax.plot(sinalpha_list, σ_scat*1e12, label='$\sigma_\mathrm{scat}$')
|
||||
ax.plot(sinalpha_list, σ_abs*1e12, label='$\sigma_\mathrm{abs}$')
|
||||
ax.legend()
|
||||
ax.set_xlabel('$\sin\\alpha$')
|
||||
ax.set_ylabel('$\sigma/\mathrm{\mu m^2}$')
|
||||
|
||||
plotfile = defaultprefix + ".pdf" if a.plot_out is None else a.plot_out
|
||||
fig.savefig(plotfile)
|
||||
|
||||
exit(0)
|
||||
|
|
@ -15,7 +15,7 @@ add_library (qpms SHARED translations.c tmatrices.c vecprint.c vswf.c wigner.c e
|
|||
ewaldsf.c pointgroups.c latticegens.c
|
||||
lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c
|
||||
bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c
|
||||
lll.c beyn.c
|
||||
lll.c beyn.c scatsys_translation_booster.c
|
||||
)
|
||||
use_c99()
|
||||
|
||||
|
@ -31,7 +31,7 @@ target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
|||
|
||||
target_compile_options(qpms PRIVATE -Wall -Wno-return-type -Wno-unused-variable -Wno-unused-function -Wno-unused-but-set-variable -Wno-unused-label)
|
||||
target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS
|
||||
EWALD_AUTO_CUTOFF
|
||||
EWALD_AUTO_CUTOFF QPMS_EVALUATE_PARANOID_ASSERTS
|
||||
)
|
||||
|
||||
install(TARGETS qpms
|
||||
|
|
|
@ -7,7 +7,7 @@ from sys import platform as __platform
|
|||
import warnings as __warnings
|
||||
|
||||
try:
|
||||
from .qpms_c import PointGroup, FinitePointGroup, FinitePointGroupElement, Particle, scatsystem_set_nthreads, ScatteringSystem, ScatteringMatrix, pitau, set_gsl_pythonic_error_handling, pgsl_ignore_error, gamma_inc, lll_reduce
|
||||
from .qpms_c import PointGroup, FinitePointGroup, FinitePointGroupElement, Particle, scatsystem_set_nthreads, ScatteringSystem, ScatteringMatrix, pitau, set_gsl_pythonic_error_handling, pgsl_ignore_error, gamma_inc, lll_reduce, ScatteringSystemCachingMode
|
||||
except ImportError as ex:
|
||||
if __platform == "linux" or __platform == "linux2":
|
||||
if 'LD_LIBRARY_PATH' not in __os.environ.keys():
|
||||
|
|
22
qpms/beyn.c
22
qpms/beyn.c
|
@ -345,7 +345,7 @@ void BeynSolver_srandom(BeynSolver *solver, unsigned int RandSeed)
|
|||
static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_function,
|
||||
void *Params,
|
||||
gsl_matrix_complex *A0, gsl_matrix_complex *A1, double complex z0,
|
||||
gsl_vector_complex *eigenvalues, gsl_matrix_complex *eigenvectors, const double rank_tol, const double res_tol)
|
||||
gsl_vector_complex *eigenvalues, gsl_matrix_complex *eigenvectors, const double rank_tol, size_t rank_sel_min, const double res_tol)
|
||||
{
|
||||
const size_t m = solver->M;
|
||||
const size_t l = solver->L;
|
||||
|
@ -379,7 +379,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
|
|||
int K=0;
|
||||
for (int k=0; k<Sigma->size /* this is l, actually */; k++) {
|
||||
if (verbose) printf("Beyn: SV(%d)=%e\n",k,gsl_vector_get(Sigma, k));
|
||||
if (gsl_vector_get(Sigma, k) > rank_tol)
|
||||
if (k < rank_sel_min || gsl_vector_get(Sigma, k) > rank_tol)
|
||||
K++;
|
||||
}
|
||||
if (verbose)printf(" Beyn: %d/%zd relevant singular values\n",K,l);
|
||||
|
@ -494,7 +494,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
|
|||
|
||||
gsl_vector_complex_set(eigenvalues, KRetained, zgsl);
|
||||
if(eigenvectors) {
|
||||
gsl_matrix_complex_set_col(eigenvectors, KRetained, &(Vk.vector));
|
||||
gsl_matrix_complex_set_row(eigenvectors, KRetained, &(Vk.vector));
|
||||
gsl_vector_set(solver->residuals, KRetained, residual);
|
||||
}
|
||||
++KRetained;
|
||||
|
@ -513,7 +513,7 @@ static int beyn_process_matrices(BeynSolver *solver, beyn_function_M_gsl_t M_fun
|
|||
beyn_result_gsl_t *beyn_solve_gsl(const size_t m, const size_t l,
|
||||
beyn_function_M_gsl_t M_function, beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat_function,
|
||||
void *params, const beyn_contour_t *contour,
|
||||
double rank_tol, double res_tol)
|
||||
double rank_tol, size_t rank_sel_min, double res_tol)
|
||||
{
|
||||
BeynSolver *solver = BeynSolver_create(m, l);
|
||||
|
||||
|
@ -583,14 +583,14 @@ beyn_result_gsl_t *beyn_solve_gsl(const size_t m, const size_t l,
|
|||
gsl_vector_complex *eigenvalue_errors = solver->eigenvalue_errors;
|
||||
gsl_matrix_complex *eigenvectors = solver->eigenvectors;
|
||||
|
||||
// Beyn Steps 3–6
|
||||
int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, res_tol);
|
||||
// Repeat Steps 3–6 with rougher contour approximation to get an error estimate.
|
||||
int K_coarse = beyn_process_matrices(solver, M_function, params, A0_coarse, A1_coarse, z0, eigenvalue_errors, eigenvectors, rank_tol, res_tol);
|
||||
|
||||
gsl_blas_zaxpy(gsl_complex_rect(-1,0), eigenvalues, eigenvalue_errors);
|
||||
int K_coarse = beyn_process_matrices(solver, M_function, params, A0_coarse, A1_coarse, z0, eigenvalue_errors, /*eigenvectors_coarse*/ NULL, rank_tol, rank_sel_min, res_tol);
|
||||
// Reid did also fabs on the complex and real parts ^^^.
|
||||
|
||||
// Beyn Steps 3–6
|
||||
int K = beyn_process_matrices(solver, M_function, params, A0, A1, z0, eigenvalues, eigenvectors, rank_tol, rank_sel_min, res_tol);
|
||||
gsl_blas_zaxpy(gsl_complex_rect(-1,0), eigenvalues, eigenvalue_errors);
|
||||
|
||||
beyn_result_gsl_t *result;
|
||||
QPMS_CRASHING_MALLOC(result, sizeof(beyn_result_gsl_t));
|
||||
result->eigval = solver->eigenvalues;
|
||||
|
@ -636,12 +636,12 @@ static int beyn_function_M_inv_Vhat_carr2gsl(gsl_matrix_complex *target,
|
|||
}
|
||||
|
||||
beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M, beyn_function_M_inv_Vhat_t M_inv_Vhat,
|
||||
void *params, const beyn_contour_t *contour, double rank_tol, double res_tol) {
|
||||
void *params, const beyn_contour_t *contour, double rank_tol, size_t rank_sel_min, double res_tol) {
|
||||
struct beyn_function_M_carr2gsl_param p = {M, M_inv_Vhat, params};
|
||||
return beyn_result_from_beyn_result_gsl(
|
||||
beyn_solve_gsl(m, l, beyn_function_M_carr2gsl,
|
||||
(p.M_inv_Vhat_function) ? beyn_function_M_inv_Vhat_carr2gsl : NULL,
|
||||
(void *) &p, contour, rank_tol, res_tol)
|
||||
(void *) &p, contour, rank_tol, rank_sel_min, res_tol)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ typedef struct beyn_result_gsl_t {
|
|||
gsl_vector_complex *eigval;
|
||||
gsl_vector_complex *eigval_err;
|
||||
gsl_vector *residuals;
|
||||
gsl_matrix_complex *eigvec;
|
||||
gsl_matrix_complex *eigvec; // Rows are the eigenvectors
|
||||
gsl_vector *ranktest_SV;
|
||||
} beyn_result_gsl_t;
|
||||
|
||||
|
@ -85,7 +85,7 @@ typedef struct beyn_result_t {
|
|||
complex double *eigval;
|
||||
complex double *eigval_err;
|
||||
double *residuals;
|
||||
complex double *eigvec;
|
||||
complex double *eigvec; // Rows are the eigenvectors
|
||||
double *ranktest_SV;
|
||||
|
||||
/// Private, we wrap it around beyn_result_gsl_t for now.
|
||||
|
@ -109,6 +109,7 @@ beyn_result_gsl_t *beyn_solve_gsl(
|
|||
void *params, ///< Parameter pointer passed to M() and M_inv_Vhat().
|
||||
const beyn_contour_t *contour, ///< Integration contour.
|
||||
double rank_tol, ///< (default: `1e-4`) TODO DOC.
|
||||
size_t rank_min_sel, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
|
||||
double res_tol ///< (default: `0.0`) TODO DOC.
|
||||
);
|
||||
|
||||
|
@ -120,6 +121,7 @@ beyn_result_t *beyn_solve(
|
|||
void *params, ///< Parameter pointer passed to M() and M_inv_Vhat().
|
||||
const beyn_contour_t *contour, ///< Integration contour.
|
||||
double rank_tol, ///< (default: `1e-4`) TODO DOC.
|
||||
size_t rank_min_sel, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
|
||||
double res_tol ///< (default: `0.0`) TODO DOC.
|
||||
);
|
||||
|
||||
|
|
|
@ -81,6 +81,12 @@ static inline qpms_gmi_t qpms_finite_group_inv(const qpms_finite_group_t *G,
|
|||
return G->invi[a];
|
||||
}
|
||||
|
||||
static inline _Bool qpms_iri_is_valid(const qpms_finite_group_t *G, qpms_iri_t iri) {
|
||||
return (iri > G->nirreps || iri < 0) ? 0 : 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// NOT IMPLEMENTED Get irrep index by name.
|
||||
qpms_iri_t qpms_finite_group_find_irrep_by_name(qpms_finite_group_t *G, char *name);
|
||||
|
||||
|
|
|
@ -19,4 +19,10 @@
|
|||
#define QPMS_EXPECT(exp,c) (exp)
|
||||
#endif
|
||||
|
||||
#if (defined(__GNUC__) && __GNUC__ >= 3) || \
|
||||
(defined(__GNUC__) && defined(__GNUC_MINOR__) && __GNUC__ == 2 && __GNUC_MINOR__ >= 8)
|
||||
// TODO clang
|
||||
#define QPMS_NORETURN __attribute__((noreturn))
|
||||
#endif
|
||||
|
||||
#endif // OPTIM_H
|
||||
|
|
103
qpms/qpms_c.pyx
103
qpms/qpms_c.pyx
|
@ -16,7 +16,7 @@ from .cytmatrices cimport CTMatrix, TMatrixFunction, TMatrixGenerator
|
|||
from .cymaterials cimport EpsMuGenerator
|
||||
from libc.stdlib cimport malloc, free, calloc
|
||||
import warnings
|
||||
|
||||
import enum
|
||||
|
||||
# Set custom GSL error handler. N.B. this is obviously not thread-safe.
|
||||
cdef char *pgsl_err_reason
|
||||
|
@ -280,7 +280,7 @@ cdef class Particle:
|
|||
if isinstance(tgen.holder, CTMatrix):
|
||||
spec = (<CTMatrix>tgen.holder).spec
|
||||
else:
|
||||
raise ValueError("bspec argument must be specified separately for str(type(t))")
|
||||
raise ValueError("bspec argument must be specified separately for %s" % str(type(t)))
|
||||
self.f = TMatrixFunction(tgen, spec)
|
||||
self.p.tmg = self.f.rawpointer()
|
||||
# TODO non-trivial transformations later; if modified, do not forget to update ScatteringSystem constructor
|
||||
|
@ -331,6 +331,10 @@ cpdef void scatsystem_set_nthreads(long n):
|
|||
qpms_scatsystem_set_nthreads(n)
|
||||
return
|
||||
|
||||
class ScatteringSystemCachingMode(enum.IntEnum):
|
||||
NEVER = QPMS_SS_CACHE_NEVER
|
||||
AUTO = QPMS_SS_CACHE_AUTO
|
||||
ALWAYS = QPMS_SS_CACHE_ALWAYS
|
||||
|
||||
cdef class ScatteringSystem:
|
||||
'''
|
||||
|
@ -340,6 +344,16 @@ cdef class ScatteringSystem:
|
|||
#cdef list Tmatrices # Here we keep the references to occuring T-matrices
|
||||
cdef EpsMuGenerator medium_holder # Here we keep the reference to medium generator
|
||||
cdef qpms_scatsys_t *s
|
||||
cdef FinitePointGroup sym
|
||||
|
||||
cdef qpms_iri_t iri_py2c(self, iri, allow_None = True):
|
||||
if iri is None and allow_None:
|
||||
return QPMS_NO_IRREP
|
||||
cdef qpms_iri_t nir = self.nirreps
|
||||
cdef qpms_iri_t ciri = iri
|
||||
if ciri < 0 or ciri > nir:
|
||||
raise ValueError("Invalid irrep index %s (of %d irreps)", str(iri), self.nirreps)
|
||||
return ciri
|
||||
|
||||
def check_s(self): # cdef instead?
|
||||
if self.s == <qpms_scatsys_t *>NULL:
|
||||
|
@ -347,7 +361,9 @@ cdef class ScatteringSystem:
|
|||
#TODO is there a way to disable the constructor outside this module?
|
||||
|
||||
@staticmethod # We don't have any "standard" constructor for this right now
|
||||
def create(particles, medium, FinitePointGroup sym, cdouble omega): # TODO tolerances
|
||||
def create(particles, medium, FinitePointGroup sym, cdouble omega,
|
||||
caching_mode = QPMS_SS_CACHE_DEFAULT): # TODO tolerances
|
||||
caching_mode = ScatteringSystemCachingMode(caching_mode)
|
||||
# These we are going to construct
|
||||
cdef ScatteringSystem self
|
||||
cdef _ScatteringSystemAtOmega pyssw
|
||||
|
@ -369,7 +385,8 @@ cdef class ScatteringSystem:
|
|||
for p in particles: # find and enumerate unique t-matrix generators
|
||||
if p.p.op.typ != QPMS_TMATRIX_OPERATION_NOOP:
|
||||
raise NotImplementedError("currently, only no-op T-matrix operations are allowed in ScatteringSystem constructor")
|
||||
tmg_key = id(p.f)
|
||||
#tmg_key = id(p.f) # This causes a different generator for each particle -> SUPER SLOW
|
||||
tmg_key = (id(p.f.generator), id(p.f.spec))
|
||||
if tmg_key not in tmgindices:
|
||||
tmgindices[tmg_key] = tmg_count
|
||||
tmgobjs.append(p.f) # Save the references on BaseSpecs and TMatrixGenerators (via TMatrixFunctions)
|
||||
|
@ -401,12 +418,13 @@ cdef class ScatteringSystem:
|
|||
orig.tm[tmi].op = qpms_tmatrix_operation_noop # TODO adjust when notrivial operations allowed
|
||||
for pi in range(p_count):
|
||||
p = particles[pi]
|
||||
tmg_key = id(p.f)
|
||||
tmg_key = (id(p.f.generator), id(p.f.spec))
|
||||
tm_derived_key = (tmg_key, None) # TODO unique representation of p.p.op instead of None
|
||||
orig.p[pi].pos = p.cval().pos
|
||||
orig.p[pi].tmatrix_id = tmindices[tm_derived_key]
|
||||
ssw = qpms_scatsys_apply_symmetry(&orig, sym.rawpointer(), omega, &QPMS_TOLERANCE_DEFAULT)
|
||||
ss = ssw[0].ss
|
||||
qpms_ss_create_translation_cache(ss, caching_mode)
|
||||
finally:
|
||||
free(orig.tmg)
|
||||
free(orig.tm)
|
||||
|
@ -415,6 +433,7 @@ cdef class ScatteringSystem:
|
|||
self.medium_holder = mediumgen
|
||||
self.s = ss
|
||||
self.tmgobjs = tmgobjs
|
||||
self.sym = sym
|
||||
pyssw = _ScatteringSystemAtOmega()
|
||||
pyssw.ssw = ssw
|
||||
pyssw.ss_pyref = self
|
||||
|
@ -588,6 +607,65 @@ cdef class ScatteringSystem:
|
|||
self.s, qpms_incfield_planewave, <void *>&p, 0)
|
||||
return target_np
|
||||
|
||||
property has_translation_cache:
|
||||
def __get__(self):
|
||||
self.check_s()
|
||||
return True if qpms_scatsys_has_translation_cache(self.s) else False
|
||||
|
||||
def find_modes(self, cdouble omega_centre, double omega_rr, double omega_ri, iri = None,
|
||||
size_t contour_points = 20, double rank_tol = 1e-4, size_t rank_min_sel=1,
|
||||
double res_tol = 0):
|
||||
"""
|
||||
Attempts to find the eigenvalues and eigenvectors using Beyn's algorithm.
|
||||
"""
|
||||
|
||||
cdef beyn_result_t *res = qpms_scatsys_finite_find_eigenmodes(self.s,
|
||||
self.iri_py2c(iri),
|
||||
omega_centre, omega_rr, omega_ri, contour_points,
|
||||
rank_tol, rank_min_sel, res_tol)
|
||||
if res == NULL: raise RuntimeError
|
||||
|
||||
cdef size_t neig = res[0].neig
|
||||
cdef size_t vlen = res[0].vlen # should be equal to self.s.fecv_size
|
||||
|
||||
cdef np.ndarray[complex, ndim=1] eigval = np.empty((neig,), dtype=complex)
|
||||
cdef cdouble[::1] eigval_v = eigval
|
||||
cdef np.ndarray[complex, ndim=1] eigval_err = np.empty((neig,), dtype=complex)
|
||||
cdef cdouble[::1] eigval_err_v = eigval_err
|
||||
cdef np.ndarray[double, ndim=1] residuals = np.empty((neig,), dtype=np.double)
|
||||
cdef double[::1] residuals_v = residuals
|
||||
cdef np.ndarray[complex, ndim=2] eigvec = np.empty((neig,vlen),dtype=complex)
|
||||
cdef cdouble[:,::1] eigvec_v = eigvec
|
||||
cdef np.ndarray[double, ndim=1] ranktest_SV = np.empty((vlen), dtype=np.double)
|
||||
cdef double[::1] ranktest_SV_v = ranktest_SV
|
||||
|
||||
cdef size_t i, j
|
||||
|
||||
for i in range(neig):
|
||||
eigval_v[i] = res[0].eigval[i]
|
||||
eigval_err_v[i] = res[0].eigval_err[i]
|
||||
residuals_v[i] = res[0].residuals[i]
|
||||
for j in range(vlen):
|
||||
eigvec_v[i,j] = res[0].eigvec[i*vlen + j]
|
||||
for i in range(vlen):
|
||||
ranktest_SV_v[i] = res[0].ranktest_SV[i]
|
||||
|
||||
zdist = eigval - omega_centre
|
||||
eigval_inside_metric = np.hypot(zdist.real / omega_rr, zdist.imag / omega_ri)
|
||||
|
||||
beyn_result_free(res)
|
||||
retdict = {
|
||||
'eigval':eigval,
|
||||
'eigval_inside_metric':eigval_inside_metric,
|
||||
'eigvec':eigvec,
|
||||
'residuals':residuals,
|
||||
'eigval_err':eigval_err,
|
||||
'ranktest_SV':ranktest_SV,
|
||||
'iri': iri,
|
||||
}
|
||||
|
||||
return retdict
|
||||
|
||||
cdef class _ScatteringSystemAtOmega:
|
||||
'''
|
||||
Wrapper over the C qpms_scatsys_at_omega_t structure
|
||||
|
@ -635,6 +713,21 @@ cdef class _ScatteringSystemAtOmega:
|
|||
def __get__(self): return self.ss_pyref.irrep_names
|
||||
property nirreps:
|
||||
def __get__(self): return self.ss_pyref.nirreps
|
||||
property has_translation_cache:
|
||||
def __get__(self):
|
||||
self.check()
|
||||
return True if qpms_scatsysw_has_translation_cache(self.ssw) else False
|
||||
|
||||
def add_translation_cache(self):
|
||||
'''
|
||||
Adds translation cache if the parent ScatteringSystem
|
||||
contains the required metadata (this is not done automatically,
|
||||
as the cache is not useful if no translation operators are to
|
||||
be evaluated
|
||||
'''
|
||||
self.check()
|
||||
qpms_ssw_create_translation_cache(self.ssw)
|
||||
return self.has_translation_cache
|
||||
|
||||
def modeproblem_matrix_full(self):
|
||||
self.check()
|
||||
|
|
|
@ -107,6 +107,7 @@ cdef extern from "qpms_types.h":
|
|||
ctypedef int32_t qpms_ss_pi_t
|
||||
ctypedef int qpms_gmi_t
|
||||
ctypedef int qpms_iri_t
|
||||
qpms_iri_t QPMS_NO_IRREP
|
||||
ctypedef const char * qpms_permutation_t
|
||||
struct qpms_tmatrix_t:
|
||||
qpms_vswf_set_spec_t *spec
|
||||
|
@ -549,10 +550,17 @@ cdef extern from "scatsystem.h":
|
|||
cdouble omega
|
||||
qpms_epsmu_t medium
|
||||
cdouble wavenumber
|
||||
ctypedef enum qpms_ss_caching_mode_t:
|
||||
QPMS_SS_CACHE_NEVER
|
||||
QPMS_SS_CACHE_ALWAYS
|
||||
QPMS_SS_CACHE_DEFAULT
|
||||
QPMS_SS_CACHE_AUTO
|
||||
qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig, const qpms_finite_group_t *sym,
|
||||
cdouble omega, const qpms_tolerance_spec_t *tol)
|
||||
qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss, cdouble omega)
|
||||
void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw)
|
||||
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m)
|
||||
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw)
|
||||
cdouble *qpms_scatsys_irrep_pack_matrix(cdouble *target_packed,
|
||||
const cdouble *orig_full, const qpms_scatsys_t *ss, qpms_iri_t iri)
|
||||
cdouble *qpms_scatsys_irrep_unpack_matrix(cdouble *target_full,
|
||||
|
@ -598,6 +606,11 @@ cdef extern from "scatsystem.h":
|
|||
cdouble *qpms_scatsys_scatter_solve(cdouble *target_f, const cdouble *a_inc, qpms_ss_LU ludata)
|
||||
const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi)
|
||||
const qpms_vswf_set_spec_t *qpms_ss_bspec_pi(const qpms_scatsys_t *ss, qpms_ss_pi_t pi)
|
||||
beyn_result_t *qpms_scatsys_finite_find_eigenmodes(const qpms_scatsys_t *ss, qpms_iri_t iri,
|
||||
cdouble omega_centre, double omega_rr, double omega_ri, size_t contour_npoints,
|
||||
double rank_tol, size_t rank_sel_min, double res_tol)
|
||||
bint qpms_scatsysw_has_translation_cache(const qpms_scatsys_at_omega_t *ssw)
|
||||
bint qpms_scatsys_has_translation_cache(const qpms_scatsys_t *ss)
|
||||
|
||||
cdef extern from "ewald.h":
|
||||
struct qpms_csf_result:
|
||||
|
@ -674,11 +687,11 @@ cdef extern from "beyn.h":
|
|||
|
||||
beyn_result_gsl_t *beyn_solve_gsl(size_t m, size_t l, beyn_function_M_gsl_t M,
|
||||
beyn_function_M_inv_Vhat_gsl_t M_inv_Vhat, void *params, const beyn_contour_t *contour,
|
||||
double rank_tol, double res_tol)
|
||||
double rank_tol, size_t rank_min_sel, double res_tol)
|
||||
|
||||
beyn_result_t *beyn_solve(size_t m, size_t l, beyn_function_M_t M,
|
||||
beyn_function_M_inv_Vhat_t M_inv_Vhat, void *params, const beyn_contour_t *contour,
|
||||
double rank_tol, double res_tol)
|
||||
double rank_tol, size_t rank_min_sel, double res_tol)
|
||||
|
||||
beyn_contour_t *beyn_contour_ellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints)
|
||||
beyn_contour_t *beyn_contour_halfellipse(cdouble centre, double halfax_re, double halfax_im, size_t npoints,
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
#include "optim.h"
|
||||
|
||||
/// Provisional error message with abort();
|
||||
void qpms_pr_error(const char *fmt, ...);
|
||||
QPMS_NORETURN void qpms_pr_error(const char *fmt, ...);
|
||||
//void qpms_error(const char *fmt, ...);
|
||||
|
||||
/// Provisional error message with abort(), indicating source and line number.
|
||||
void qpms_pr_error_at_line(const char *filename, unsigned int linenum,
|
||||
const char *fmt, ...);
|
||||
|
||||
void qpms_pr_error_at_flf(const char *filename, unsigned int linenum,
|
||||
QPMS_NORETURN void qpms_pr_error_at_flf(const char *filename, unsigned int linenum,
|
||||
const char *func,
|
||||
const char *fmt, ...);
|
||||
|
||||
|
@ -100,6 +100,13 @@ qpms_dbgmsg_flags qpms_dbgmsg_enable(qpms_dbgmsg_flags types);
|
|||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,"Unexpected error (%d)", errorcode); \
|
||||
}
|
||||
|
||||
// Same as previous, but with message.
|
||||
#define QPMS_ENSURE_SUCCESS_M(x, msg, ...) { \
|
||||
int errorcode = (x); \
|
||||
if(QPMS_UNLIKELY(errorcode)) \
|
||||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,msg, ##__VA_ARGS__); \
|
||||
}
|
||||
|
||||
|
||||
#define QPMS_ENSURE(x, msg, ...) {if(QPMS_UNLIKELY(!(x))) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,msg, ##__VA_ARGS__); }
|
||||
|
||||
|
@ -109,6 +116,16 @@ qpms_dbgmsg_flags qpms_dbgmsg_enable(qpms_dbgmsg_flags types);
|
|||
"Unexpected error. This is most certainly a bug.");\
|
||||
}
|
||||
|
||||
#ifdef QPMS_EVALUATE_PARANOID_ASSERTS
|
||||
#define QPMS_PARANOID_ASSERT(x) {\
|
||||
if(QPMS_UNLIKELY(!(x)))\
|
||||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,\
|
||||
"Unexpected error. This is most certainly a bug.");\
|
||||
}
|
||||
#else
|
||||
#define QPMS_PARANOID_ASSERT(x) {;}
|
||||
#endif
|
||||
|
||||
#define QPMS_NOT_IMPLEMENTED(msg, ...) qpms_pr_error_at_flf(__FILE__,__LINE__,__func__, \
|
||||
"Not implemented:" msg, ##__VA_ARGS__)
|
||||
|
||||
|
|
|
@ -309,6 +309,8 @@ typedef int qpms_gmi_t;
|
|||
|
||||
/// Irreducible representation index. See also groups.h.
|
||||
typedef int qpms_iri_t;
|
||||
/// Constant labeling that no irrep decomposition is done (full system solved instead).
|
||||
#define QPMS_NO_IRREP ((qpms_iri_t) -1)
|
||||
|
||||
/// Permutation representation of a group element.
|
||||
/** For now, it's just a string of the form "(0,1)(3,4,5)"
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
#ifndef QPMS_SCATSYS_PRIVATE_H
|
||||
#define QPMS_SCATSYS_PRIVATE_H
|
||||
/*
|
||||
* This file includes some definitions shared between scatsystem.c
|
||||
* and scatsys_translation_booster.c that are not needed anywhere
|
||||
* else.
|
||||
*/
|
||||
|
||||
#include "scatsystem.h"
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
|
||||
complex double *target, const qpms_scatsys_at_omega_t *ssw);
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_boosted(
|
||||
complex double *target_packed, const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri);
|
||||
/// "private" destructor, called by qpms_scatsys_free()
|
||||
void qpms_scatsys_translation_booster_free(struct qpms_scatsys_translation_booster *);
|
||||
/// "private" constructor, use qpms_ss_create_translation_cache() instead.
|
||||
struct qpms_scatsys_translation_booster *qpms_scatsys_translation_booster_create(
|
||||
const qpms_scatsys_t *ss);
|
||||
|
||||
|
||||
struct qpms_scatsysw_translation_booster *
|
||||
qpms_scatsysw_translation_booster_create(const qpms_scatsys_at_omega_t *ssw);
|
||||
|
||||
/// "private" destructor, called by qpms_scatsys_at_omega_free()
|
||||
void qpms_scatsysw_translation_booster_free(struct qpms_scatsysw_translation_booster *);
|
||||
|
||||
|
||||
struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
|
||||
const qpms_scatsys_at_omega_t *ssw;
|
||||
qpms_ss_pi_t *opistartR_ptr;
|
||||
pthread_mutex_t *opistartR_mutex;
|
||||
qpms_iri_t iri;
|
||||
complex double *target_packed;
|
||||
};
|
||||
|
||||
void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted(void *arg);
|
||||
|
||||
#endif //QPMS_SCATSYS_PRIVATE_H
|
|
@ -0,0 +1,562 @@
|
|||
// Functionality to speedup translation matrix computations in large finite arrays
|
||||
// by caching Bessel function values etc.
|
||||
#include <cblas.h>
|
||||
#include "scatsys_private.h"
|
||||
#include "translations_inlines.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "vectors.h"
|
||||
#include "qpms_error.h"
|
||||
#include "qpms_specfunc.h"
|
||||
#include "groups.h"
|
||||
|
||||
#define SQ(x) ((x)*(x))
|
||||
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
|
||||
|
||||
#ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
|
||||
#include "qpmsblas.h"
|
||||
#define SERIAL_ZGEMM qpms_zgemm
|
||||
#else
|
||||
#define SERIAL_ZGEMM cblas_zgemm
|
||||
#endif
|
||||
|
||||
typedef size_t ppid_t;
|
||||
typedef size_t uoppid_t;
|
||||
|
||||
|
||||
// Unordered exclusive pair ID count.
|
||||
static inline uoppid_t uopairarr_len(qpms_ss_pi_t pn_total) {
|
||||
return pn_total * (pn_total - 1) / 2;
|
||||
}
|
||||
|
||||
// Unordered exclusive pair ID.
|
||||
static inline uoppid_t uopairid(qpms_ss_pi_t pn_total, qpms_ss_pi_t p1, qpms_ss_pi_t p2) {
|
||||
qpms_ss_pi_t hi, lo;
|
||||
QPMS_ASSERT(p1 != p2);
|
||||
if (p1 < p2) {
|
||||
lo = p1;
|
||||
hi = p2;
|
||||
} else {
|
||||
lo = p2;
|
||||
hi = p1;
|
||||
}
|
||||
QPMS_ASSERT(lo >= 0);
|
||||
QPMS_ASSERT(hi < pn_total);
|
||||
return lo + hi * (hi - 1) / 2;
|
||||
}
|
||||
|
||||
// Ordered pair ID.
|
||||
static inline ppid_t pairid(qpms_ss_pi_t pn_total, qpms_ss_pi_t p1, qpms_ss_pi_t p2) {
|
||||
return pn_total * p1 + p2;
|
||||
}
|
||||
|
||||
// Ordered pair ID count.
|
||||
static inline ppid_t pairarr_len(qpms_ss_pi_t pn_total) {
|
||||
return SQ(pn_total);
|
||||
}
|
||||
|
||||
typedef struct qpms_scatsys_translation_booster {
|
||||
double *r; // Unique distances array, indices are ppid_t
|
||||
qpms_l_t *lMax_r; // lMaxes associated with the r's (use same indices as with r)
|
||||
size_t r_count; // Number of different interparticle distances (length of r[])
|
||||
size_t *r_map; // maps pairs to the corresponding distances (index of uoppid_t type)
|
||||
// Offsets of the Bessel function values: bessel_offsets_r[i] has the cumulative sums
|
||||
// of 2*lMax_r[j]+2, j < i. bessel_offsets_r[r_count] is the total length of the
|
||||
// Bessel function "cache"
|
||||
size_t *bessel_offsets_r;
|
||||
|
||||
double *theta;
|
||||
qpms_l_t *lMax_theta;
|
||||
size_t theta_count;
|
||||
size_t *theta_map;
|
||||
size_t *legendre_offsets_theta;
|
||||
double *legendre_base;
|
||||
|
||||
double *phi;
|
||||
qpms_l_t *lMax_phi;
|
||||
size_t phi_count;
|
||||
size_t *phi_map;
|
||||
} booster_t;
|
||||
|
||||
/// Sorts an array and throws away duplicit elements.
|
||||
/**
|
||||
* The unique elements (according to the compare function)
|
||||
* are aligned to the left of the original array.
|
||||
*
|
||||
* \returns Number of kept unique array members
|
||||
*/
|
||||
static size_t sort_and_eliminate(void *base, size_t nmemb, size_t size,
|
||||
int (*compar)(const void *, const void *)) {
|
||||
if (nmemb == 0) return 0; // corner case
|
||||
qsort(base, nmemb, size, compar);
|
||||
size_t left = 0;
|
||||
for (size_t src = 1; src < nmemb; ++src) {
|
||||
assert(left <= src);
|
||||
if (compar((const char *)base + left*size, (const char *)base + src*size)) {
|
||||
left += 1;
|
||||
if (left < src) // Avoid copying to itself (when it's not needed and memcpy behaviour undef'd
|
||||
memcpy((char *)base + left*size, (char *)base + src*size, size);
|
||||
}
|
||||
}
|
||||
return left + 1;
|
||||
}
|
||||
|
||||
static int cmp_double(const void *aa, const void *bb) {
|
||||
const double a = *(double*)aa;
|
||||
const double b = *(double*)bb;
|
||||
if (a < b) return -1;
|
||||
if (a == b) return 0;
|
||||
if (a > b) return 1;
|
||||
QPMS_WTF; // NaN or similar
|
||||
}
|
||||
|
||||
booster_t *qpms_scatsys_translation_booster_create(
|
||||
const qpms_scatsys_t *ss) {
|
||||
const qpms_ss_pi_t np = ss->p_count;
|
||||
booster_t *b;
|
||||
QPMS_CRASHING_MALLOC(b, sizeof(*b));
|
||||
|
||||
QPMS_CRASHING_MALLOC(b->r, sizeof(double) * uopairarr_len(np));
|
||||
QPMS_CRASHING_MALLOC(b->theta, sizeof(double) * pairarr_len(np)); // TODO only one theta per pair
|
||||
QPMS_CRASHING_MALLOC(b->phi, sizeof(double) * pairarr_len(np)); // TODO only one theta per pair
|
||||
for(qpms_ss_pi_t di = 0; di < np; ++di)
|
||||
for(qpms_ss_pi_t si = 0; si < np; ++si) {
|
||||
const ppid_t opid = pairid(np, di, si);
|
||||
if (si != di) {
|
||||
const sph_t dlj = cart2sph(cart3_substract(ss->p[di].pos, ss->p[si].pos));
|
||||
b->r[uopairid(np, di, si)] = dlj.r;
|
||||
b->theta[opid] = dlj.theta;
|
||||
b->phi[opid] = dlj.phi;
|
||||
}
|
||||
else { // NOT USED IN ACTUAL CALCULATIONS, but must be valid doubles for sorting (infinities might be ok as well)
|
||||
b->theta[opid] = M_PI_2;
|
||||
b->phi[opid] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
b->r_count = sort_and_eliminate(b->r, uopairarr_len(np), sizeof(*b->r), cmp_double);
|
||||
QPMS_CRASHING_REALLOC(b->r, b->r_count * sizeof(*b->r));
|
||||
b->theta_count = sort_and_eliminate(b->theta, pairarr_len(np), sizeof(*b->theta), cmp_double);
|
||||
QPMS_CRASHING_REALLOC(b->theta, b->theta_count * sizeof(*b->theta));
|
||||
b->phi_count = sort_and_eliminate(b->phi, pairarr_len(np), sizeof(*b->phi), cmp_double);
|
||||
QPMS_CRASHING_REALLOC(b->phi, b->phi_count * sizeof(*b->phi));
|
||||
|
||||
QPMS_CRASHING_CALLOC(b->lMax_r, b->r_count, sizeof(*b->lMax_r));
|
||||
QPMS_CRASHING_MALLOC(b->r_map, uopairarr_len(np) * sizeof(*b->r_map));
|
||||
|
||||
QPMS_CRASHING_CALLOC(b->lMax_theta, b->theta_count, sizeof(*b->lMax_theta));
|
||||
QPMS_CRASHING_MALLOC(b->theta_map, pairarr_len(np) * sizeof(*b->theta_map));
|
||||
|
||||
QPMS_CRASHING_CALLOC(b->lMax_phi, b->phi_count, sizeof(*b->lMax_phi));
|
||||
QPMS_CRASHING_MALLOC(b->phi_map, pairarr_len(np) * sizeof(*b->phi_map));
|
||||
|
||||
for(qpms_ss_pi_t di = 0; di < np; ++di)
|
||||
for(qpms_ss_pi_t si = 0; si < np; ++si) {
|
||||
if (si != di) {
|
||||
const ppid_t opid = pairid(np, di, si);
|
||||
const uoppid_t uopid = uopairid(np, di, si);
|
||||
const sph_t dlj = cart2sph(cart3_substract(ss->p[di].pos, ss->p[si].pos));
|
||||
|
||||
double *hit = bsearch(&dlj.r, b->r, b->r_count, sizeof(*b->r), cmp_double);
|
||||
QPMS_ASSERT(hit != NULL);
|
||||
QPMS_ASSERT(hit >= b->r);
|
||||
const size_t ri = b->r_map[uopid] = hit - b->r;
|
||||
b->lMax_r[ri] = MAX(b->lMax_r[ri],
|
||||
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
|
||||
|
||||
hit = bsearch(&dlj.theta, b->theta, b->theta_count, sizeof(*b->theta), cmp_double);
|
||||
QPMS_ASSERT(hit != NULL);
|
||||
QPMS_ASSERT(hit >= b->theta);
|
||||
const size_t thi = b->theta_map[opid] = hit - b->theta;
|
||||
b->lMax_theta[thi] = MAX(b->lMax_theta[thi],
|
||||
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
|
||||
|
||||
hit = bsearch(&dlj.phi, b->phi, b->phi_count, sizeof(*b->phi), cmp_double);
|
||||
QPMS_ASSERT(hit != NULL);
|
||||
QPMS_ASSERT(hit >= b->phi);
|
||||
const size_t phj = b->phi_map[opid] = hit - b->phi;
|
||||
b->lMax_phi[phj] = MAX(b->lMax_phi[phj],
|
||||
MAX(qpms_ss_bspec_pi(ss, di)->lMax, qpms_ss_bspec_pi(ss, si)->lMax));
|
||||
|
||||
// TODO ri, thi, phj (+bspec?) -tuples here
|
||||
}
|
||||
}
|
||||
|
||||
{ // Bessel offsets
|
||||
QPMS_CRASHING_MALLOC(b->bessel_offsets_r, (b->r_count + 1) * sizeof(*b->bessel_offsets_r));
|
||||
size_t bessel_offset = 0;
|
||||
for(size_t ri = 0; ri < b->r_count; ++ri) {
|
||||
b->bessel_offsets_r[ri] = bessel_offset;
|
||||
bessel_offset += 2 * b->lMax_r[ri] + 2;
|
||||
}
|
||||
b->bessel_offsets_r[b->r_count] = bessel_offset;
|
||||
}
|
||||
|
||||
{ // Legendres
|
||||
QPMS_CRASHING_MALLOC(b->legendre_offsets_theta, (b->theta_count + 1) * sizeof(*b->legendre_offsets_theta));
|
||||
size_t legendre_offset = 0;
|
||||
for(size_t thi = 0; thi < b->theta_count; ++thi) {
|
||||
b->legendre_offsets_theta[thi] = legendre_offset;
|
||||
legendre_offset += gsl_sf_legendre_array_n(2*b->lMax_theta[thi] + 1);
|
||||
}
|
||||
b->legendre_offsets_theta[b->theta_count] = legendre_offset;
|
||||
QPMS_CRASHING_MALLOC(b->legendre_base, legendre_offset * sizeof(*b->legendre_base));
|
||||
for(size_t thi = 0; thi < b->theta_count; ++thi) {
|
||||
const double costheta = cos(b->theta[thi]);
|
||||
double *legendre_buf = b->legendre_base + b->legendre_offsets_theta[thi];
|
||||
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
|
||||
2*b->lMax_theta[thi]+1, costheta, -1, legendre_buf));
|
||||
}
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
void qpms_scatsys_translation_booster_free(booster_t *b) {
|
||||
if (b) {
|
||||
free(b->bessel_offsets_r);
|
||||
free(b->lMax_r);
|
||||
free(b->r);
|
||||
free(b->r_map);
|
||||
free(b->legendre_base);
|
||||
free(b->legendre_offsets_theta);
|
||||
free(b->lMax_theta);
|
||||
free(b->theta);
|
||||
free(b->theta_map);
|
||||
free(b->lMax_phi);
|
||||
free(b->phi);
|
||||
free(b->phi_map);
|
||||
free(b);
|
||||
}
|
||||
}
|
||||
|
||||
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m) {
|
||||
QPMS_ASSERT(ss);
|
||||
if (ss->tbooster) {
|
||||
QPMS_WARN("Translation cache already created?");
|
||||
return 0;
|
||||
}
|
||||
switch(m) {
|
||||
case QPMS_SS_CACHE_NEVER:
|
||||
return 0;
|
||||
case QPMS_SS_CACHE_AUTO:
|
||||
QPMS_WARN("Translation operator cache heuristics not implemented, creating the cache");
|
||||
case QPMS_SS_CACHE_ALWAYS:
|
||||
ss->tbooster = qpms_scatsys_translation_booster_create(ss);
|
||||
if (ss->tbooster) return 0;
|
||||
else {
|
||||
QPMS_WARN("Failed to create tranlation operator cache");
|
||||
return -1;
|
||||
}
|
||||
default:
|
||||
QPMS_WTF;
|
||||
}
|
||||
QPMS_WTF;
|
||||
}
|
||||
|
||||
static qpms_errno_t qpms_scatsys_translation_booster_eval_bessels(
|
||||
const booster_t *b, complex double *target, complex double k // includes ref. ind. effect
|
||||
) {
|
||||
for(size_t ri = 0; ri < b->r_count; ++ri) {
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // Is there a case for different J?
|
||||
2*b->lMax_r[ri]+1, k * b->r[ri],
|
||||
target + b->bessel_offsets_r[ri]));
|
||||
}
|
||||
return QPMS_SUCCESS;
|
||||
}
|
||||
|
||||
typedef struct qpms_scatsysw_translation_booster {
|
||||
// _Bool owned_by_ssw; // if False, this is not deallocated by parent ssw
|
||||
const booster_t *b;
|
||||
complex double *bessels;
|
||||
} boosterw_t;
|
||||
|
||||
boosterw_t *qpms_scatsysw_translation_booster_create(
|
||||
const qpms_scatsys_at_omega_t *ssw) {
|
||||
QPMS_PARANOID_ASSERT(ssw->ss);
|
||||
const booster_t *const b = ssw->ss->tbooster;
|
||||
QPMS_ASSERT(b);
|
||||
boosterw_t *bw;
|
||||
QPMS_CRASHING_MALLOC(bw, sizeof(*bw));
|
||||
|
||||
// Evaluate bessel functions
|
||||
QPMS_CRASHING_MALLOC(bw->bessels, b->bessel_offsets_r[b->r_count] * sizeof(*bw->bessels));
|
||||
for(size_t ri = 0; ri < b->r_count; ++ri) {
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(QPMS_HANKEL_PLUS, // is there a case for other J?
|
||||
b->bessel_offsets_r[ri+1]-b->bessel_offsets_r[ri]-1,
|
||||
b->r[ri] * ssw->wavenumber,
|
||||
bw->bessels + b->bessel_offsets_r[ri]));
|
||||
}
|
||||
|
||||
bw->b = b;
|
||||
return bw;
|
||||
}
|
||||
|
||||
void qpms_scatsysw_translation_booster_free(boosterw_t *bw) {
|
||||
if (bw) {
|
||||
free (bw->bessels);
|
||||
}
|
||||
free (bw);
|
||||
}
|
||||
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_full_boosted(
|
||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||
complex double *target,
|
||||
const qpms_scatsys_at_omega_t *ssw)
|
||||
{
|
||||
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
|
||||
const qpms_scatsys_t *const ss = ssw->ss;
|
||||
const booster_t *const b = ss->tbooster;
|
||||
const boosterw_t *const bw = ssw->translation_cache;
|
||||
const qpms_trans_calculator *const c = ss->c;
|
||||
|
||||
const complex double k = ssw->wavenumber;
|
||||
const size_t full_len = ss->fecv_size;
|
||||
if(!target)
|
||||
QPMS_CRASHING_MALLOC(target, SQ(full_len) * sizeof(complex double));
|
||||
complex double *tmp;
|
||||
QPMS_CRASHING_MALLOC(tmp, SQ(ss->max_bspecn) * sizeof(complex double));
|
||||
// Workspaces for the translation operator A and B matrices
|
||||
complex double *A, *B;
|
||||
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
|
||||
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
|
||||
memset(target, 0, SQ(full_len) * sizeof(complex double)); //unnecessary?
|
||||
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
|
||||
const complex double zero = 0, minusone = -1;
|
||||
{ // Non-diagonal part; M[piR, piC] = -T[piR] S(piR<-piC)
|
||||
size_t fullvec_offsetR = 0;
|
||||
for(qpms_ss_pi_t piR = 0; piR < ss->p_count; ++piR) {
|
||||
const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[piR].tmatrix_id]->spec;
|
||||
const cart3_t posR = ss->p[piR].pos;
|
||||
size_t fullvec_offsetC = 0;
|
||||
// dest particle T-matrix
|
||||
const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
|
||||
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) {
|
||||
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
|
||||
if(piC != piR) { // The diagonal will be dealt with later.
|
||||
const cart3_t posC = ss->p[piC].pos;
|
||||
#if 0
|
||||
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
|
||||
tmp, // tmp is S(piR<-piC)
|
||||
bspecR, bspecC->n, bspecC, 1,
|
||||
k, posR, posC, QPMS_HANKEL_PLUS));
|
||||
#else
|
||||
{ // this replaces qpms_trans_calculator_get_trans_array():
|
||||
// R is dest, C is src
|
||||
const sph_t dlj = cart2sph(cart3_substract(posR, posC));
|
||||
const uoppid_t pid = uopairid(ss->p_count, piR, piC);
|
||||
const size_t ri = b->r_map[pid];
|
||||
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
|
||||
const qpms_l_t pair_lMax = b->lMax_r[ri];
|
||||
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
|
||||
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
|
||||
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
|
||||
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
|
||||
{ // this replaces qpms_trans_calculator_get_AB_arrays() and ..._buf()
|
||||
const double costheta = cos(dlj.theta);
|
||||
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
|
||||
2*pair_lMax+1, costheta, -1, legendre_buf));
|
||||
const double * const legendres = legendre_buf;
|
||||
const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri];
|
||||
qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A, B,
|
||||
/*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres);
|
||||
qpms_trans_array_from_AB(tmp,// tmp is S(piR<-piC)
|
||||
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
cblas_zgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
||||
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
|
||||
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
|
||||
tmp/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
|
||||
target + fullvec_offsetR*full_len + fullvec_offsetC /*c*/,
|
||||
full_len /*ldc*/);
|
||||
}
|
||||
fullvec_offsetC += bspecC->n;
|
||||
}
|
||||
fullvec_offsetR += bspecR->n;
|
||||
}
|
||||
}
|
||||
// diagonal part M[pi,pi] = +1
|
||||
for (size_t i = 0; i < full_len; ++i) target[full_len * i + i] = +1;
|
||||
|
||||
free(tmp);
|
||||
free(A);
|
||||
free(B);
|
||||
return target;
|
||||
}
|
||||
|
||||
void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted(void *arg)
|
||||
{
|
||||
const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
|
||||
*a = arg;
|
||||
const qpms_scatsys_at_omega_t *ssw = a->ssw;
|
||||
QPMS_ASSERT(ssw->translation_cache && ssw->ss->tbooster);
|
||||
const qpms_scatsys_t * const ss = ssw->ss;
|
||||
const qpms_trans_calculator *const c = ss->c;
|
||||
const booster_t *const b = ss->tbooster;
|
||||
const boosterw_t *const bw = ssw->translation_cache;
|
||||
const complex double k = ssw->wavenumber;
|
||||
const qpms_iri_t iri = a->iri;
|
||||
const size_t packedlen = ss->saecv_sizes[iri];
|
||||
|
||||
// some of the following workspaces are probably redundant; TODO optimize later.
|
||||
|
||||
// workspaces for the uncompressed particle<-particle tranlation matrix block
|
||||
// and the result of multiplying with a T-matrix (times -1)
|
||||
complex double *Sblock, *TSblock;
|
||||
QPMS_CRASHING_MALLOC(Sblock, sizeof(complex double)*SQ(ss->max_bspecn));
|
||||
QPMS_CRASHING_MALLOC(TSblock, sizeof(complex double)*SQ(ss->max_bspecn));
|
||||
|
||||
// Workspace for the intermediate particle-orbit matrix result
|
||||
complex double *tmp;
|
||||
QPMS_CRASHING_MALLOC(tmp, sizeof(complex double) * SQ(ss->max_bspecn) * ss->sym->order);
|
||||
// Workspace for A, B arrays
|
||||
complex double *A, *B;
|
||||
QPMS_CRASHING_MALLOC(A, SQ(c->nelem) * sizeof(*A));
|
||||
QPMS_CRASHING_MALLOC(B, SQ(c->nelem) * sizeof(*B));
|
||||
double legendre_buf[gsl_sf_legendre_array_n(2*c->lMax + 1)]; //VLA, workspace for legendre arrays
|
||||
|
||||
const complex double one = 1, zero = 0, minusone = -1;
|
||||
|
||||
while(1) {
|
||||
// In the beginning, pick a target (row) orbit for this thread
|
||||
QPMS_ENSURE_SUCCESS(pthread_mutex_lock(a->opistartR_mutex));
|
||||
if(*(a->opistartR_ptr) >= ss->p_count) {// Everything is already done, end
|
||||
QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
|
||||
break;
|
||||
}
|
||||
const qpms_ss_pi_t opistartR = *(a->opistartR_ptr);
|
||||
// Now increment it for another thread:
|
||||
*(a->opistartR_ptr) += ss->orbit_types[ss->p_orbitinfo[ss->p_by_orbit[opistartR]].t].size;
|
||||
QPMS_ENSURE_SUCCESS(pthread_mutex_unlock(a->opistartR_mutex));
|
||||
|
||||
// Orbit picked (defined by opistartR), do the work.
|
||||
const qpms_ss_pi_t orbitstartpiR = ss->p_by_orbit[opistartR];
|
||||
const qpms_ss_oti_t otiR = ss->p_orbitinfo[orbitstartpiR].t;
|
||||
const qpms_ss_osn_t osnR = ss->p_orbitinfo[orbitstartpiR].osn;
|
||||
const qpms_ss_orbit_type_t *const otR = ss->orbit_types + otiR;
|
||||
const qpms_ss_orbit_pi_t orbit_p_countR = otR->size;
|
||||
const size_t orbit_packedsizeR = otR->irbase_sizes[iri];
|
||||
|
||||
if(orbit_packedsizeR) { // avoid zgemm crash on empty irrep
|
||||
const size_t particle_fullsizeR = otR->bspecn; // == bspecR->n
|
||||
const qpms_vswf_set_spec_t *bspecR = ssw->tm[ss->p[orbitstartpiR].tmatrix_id]->spec;
|
||||
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
|
||||
const complex double *omR = otR->irbases + otR->irbase_offsets[iri];
|
||||
// Orbit coeff vector's full size:
|
||||
const size_t orbit_fullsizeR = otR->size * otR->bspecn;
|
||||
// This is where the orbit starts in the "packed" vector:
|
||||
const size_t packed_orbit_offsetR =
|
||||
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiR]
|
||||
+ osnR * otR->irbase_sizes[iri];
|
||||
for(qpms_ss_orbit_pi_t opiR = 0; opiR < orbit_p_countR; ++opiR) {
|
||||
qpms_ss_pi_t piR = ss->p_by_orbit[opistartR + opiR];
|
||||
assert(opiR == ss->p_orbitinfo[piR].p);
|
||||
assert(otiR == ss->p_orbitinfo[piR].t);
|
||||
assert(ss->p_orbitinfo[piR].osn == osnR);
|
||||
const cart3_t posR = ss->p[piR].pos;
|
||||
// dest particle T-matrix
|
||||
const complex double *tmmR = ssw->tm[ss->p[piR].tmatrix_id]->m;
|
||||
for(qpms_ss_pi_t piC = 0; piC < ss->p_count; ++piC) { //Column loop
|
||||
const qpms_ss_oti_t otiC = ss->p_orbitinfo[piC].t;
|
||||
const qpms_ss_orbit_type_t *const otC = ss->orbit_types + otiC;
|
||||
const qpms_ss_osn_t osnC = ss->p_orbitinfo[piC].osn;
|
||||
const qpms_ss_orbit_pi_t opiC = ss->p_orbitinfo[piC].p;
|
||||
// This is where the particle's orbit starts in the "packed" vector:
|
||||
const size_t packed_orbit_offsetC =
|
||||
ss->saecv_ot_offsets[iri*ss->orbit_type_count + otiC]
|
||||
+ osnC * otC->irbase_sizes[iri];
|
||||
const qpms_vswf_set_spec_t *bspecC = ssw->tm[ss->p[piC].tmatrix_id]->spec;
|
||||
// Orbit coeff vector's full size:
|
||||
const size_t orbit_fullsizeC = otC->size * otC->bspecn;
|
||||
const size_t particle_fullsizeC = otC->bspecn; // == bspecC->n
|
||||
const size_t orbit_packedsizeC = otC->irbase_sizes[iri];
|
||||
// This is the orbit-level matrix projecting the whole orbit onto the irrep.
|
||||
const complex double *omC = otC->irbases + otC->irbase_offsets[iri];
|
||||
|
||||
if(orbit_packedsizeC) { // avoid zgemm crash on empty irrep
|
||||
if(piC != piR) { // non-diagonal, calculate TS
|
||||
const cart3_t posC = ss->p[piC].pos;
|
||||
#if 0
|
||||
QPMS_ENSURE_SUCCESS(qpms_trans_calculator_get_trans_array_lc3p(ss->c,
|
||||
Sblock, // Sblock is S(piR->piC)
|
||||
bspecR, bspecC->n, bspecC, 1,
|
||||
k, posR, posC, QPMS_HANKEL_PLUS));
|
||||
#else
|
||||
{ // this block replaces qpms_trans_calculator_get_trans_array():
|
||||
// R is dest, C is src
|
||||
const sph_t dlj = cart2sph(cart3_substract(posR, posC));
|
||||
const uoppid_t uopid = uopairid(ss->p_count, piR, piC);
|
||||
const ppid_t opid = pairid(ss->p_count, piR, piC);
|
||||
const size_t ri = b->r_map[uopid];
|
||||
const size_t thi = b->theta_map[opid];
|
||||
QPMS_PARANOID_ASSERT(dlj.r == b->r[ri]);
|
||||
QPMS_PARANOID_ASSERT(dlj.theta == b->theta[thi]);
|
||||
const qpms_l_t pair_lMax = b->lMax_r[ri];
|
||||
const qpms_y_t pair_nelem = qpms_lMax2nelem(pair_lMax);
|
||||
QPMS_PARANOID_ASSERT(c->normalisation == bspecC->norm && c->normalisation == bspecR->norm);
|
||||
QPMS_PARANOID_ASSERT(c->lMax >= bspecC->lMax && c->lMax >= bspecR->lMax);
|
||||
QPMS_PARANOID_ASSERT(bspecC->lMax_L < 0 && bspecR->lMax_L < 0);
|
||||
{ // this replaces qpms_trans_calculator_get_AB_arrays() and _buf()
|
||||
#if 0
|
||||
const double costheta = cos(dlj.theta);
|
||||
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,
|
||||
2*pair_lMax+1, costheta, -1, legendre_buf));
|
||||
const double * const legendres = legendre_buf;
|
||||
#else
|
||||
const double * const legendres = b->legendre_base + b->legendre_offsets_theta[thi];
|
||||
#endif
|
||||
const complex double * const bessels = bw->bessels + b->bessel_offsets_r[ri];
|
||||
qpms_trans_calculator_get_AB_arrays_precalcbuf(c, pair_lMax, A, B,
|
||||
/*deststride*/ pair_nelem, /*srcstride*/ 1, dlj.phi, bessels, legendres);
|
||||
}
|
||||
qpms_trans_array_from_AB(Sblock, // Sblock is S(piR->piC)
|
||||
bspecR, bspecC->n, bspecC, 1, A, B, pair_lMax);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
||||
bspecR->n /*m*/, bspecC->n /*n*/, bspecR->n /*k*/,
|
||||
&minusone/*alpha*/, tmmR/*a*/, bspecR->n/*lda*/,
|
||||
Sblock/*b*/, bspecC->n/*ldb*/, &zero/*beta*/,
|
||||
TSblock /*c*/, bspecC->n /*ldc*/);
|
||||
} else { // diagonal, fill with diagonal +1
|
||||
for (size_t row = 0; row < bspecR->n; ++row)
|
||||
for (size_t col = 0; col < bspecC->n; ++col)
|
||||
TSblock[row * bspecC->n + col] = (row == col)? +1 : 0;
|
||||
}
|
||||
|
||||
// tmp[oiR|piR,piC] = ∑_K M[piR,K] U*[K,piC]
|
||||
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasConjTrans,
|
||||
particle_fullsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeC /*K*/,
|
||||
&one /*alpha*/, TSblock/*A*/, particle_fullsizeC/*ldA*/,
|
||||
omC + opiC*particle_fullsizeC /*B*/,
|
||||
orbit_fullsizeC/*ldB*/, &zero /*beta*/,
|
||||
tmp /*C*/, orbit_packedsizeC /*LDC*/);
|
||||
|
||||
// target[oiR|piR,oiC|piC] += U[...] tmp[...]
|
||||
SERIAL_ZGEMM(CblasRowMajor, CblasNoTrans, CblasNoTrans,
|
||||
orbit_packedsizeR /*M*/, orbit_packedsizeC /*N*/, particle_fullsizeR /*K*/,
|
||||
&one /*alpha*/, omR + opiR*particle_fullsizeR/*A*/, orbit_fullsizeR/*ldA*/,
|
||||
tmp /*B*/, orbit_packedsizeC /*ldB*/, &one /*beta*/,
|
||||
a->target_packed + packedlen*packed_orbit_offsetR + packed_orbit_offsetC /*C*/,
|
||||
packedlen /*ldC*/);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(tmp);
|
||||
free(Sblock);
|
||||
free(TSblock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
#include <lapacke.h>
|
||||
#include <cblas.h>
|
||||
#include <lapacke.h>
|
||||
#include "scatsystem.h"
|
||||
#include "scatsys_private.h"
|
||||
#include "indexing.h"
|
||||
#include "vswf.h"
|
||||
#include "groups.h"
|
||||
|
@ -22,6 +22,7 @@
|
|||
#include <pthread.h>
|
||||
#include "kahansum.h"
|
||||
#include "tolerances.h"
|
||||
#include "beyn.h"
|
||||
|
||||
#ifdef QPMS_SCATSYSTEM_USE_OWN_BLAS
|
||||
#include "qpmsblas.h"
|
||||
|
@ -133,8 +134,7 @@ static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_cu
|
|||
bs_cumsum += lastbs;
|
||||
ot_new->irbase_cumsizes[iri] = bs_cumsum;
|
||||
}
|
||||
if(bs_cumsum != ot_new->size * bspecn)
|
||||
qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
|
||||
QPMS_ENSURE(bs_cumsum == ot_new->size * bspecn,
|
||||
"The cumulative size of the symmetry-adapted bases is wrong; "
|
||||
"expected %d = %d * %d, got %d.",
|
||||
ot_new->size * bspecn, ot_new->size, bspecn, bs_cumsum);
|
||||
|
@ -142,7 +142,6 @@ static void add_orbit_type(qpms_scatsys_t *ss, const qpms_ss_orbit_type_t *ot_cu
|
|||
ss->orbit_type_count++;
|
||||
}
|
||||
|
||||
|
||||
// Almost 200 lines. This whole thing deserves a rewrite!
|
||||
qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
|
||||
const qpms_finite_group_t *sym, complex double omega,
|
||||
|
@ -181,6 +180,7 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
|
|||
ss->lenscale = lenscale;
|
||||
ss->sym = sym;
|
||||
ss->medium = orig->medium;
|
||||
ss->tbooster = NULL;
|
||||
|
||||
// Copy the qpms_tmatrix_fuction_t from orig
|
||||
ss->tmg_count = orig->tmg_count;
|
||||
|
@ -209,6 +209,7 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
|
|||
QPMS_CRASHING_MALLOC(ssw, sizeof(*ssw)); // returned
|
||||
ssw->ss = ss;
|
||||
ssw->omega = omega;
|
||||
ssw->translation_cache = NULL;
|
||||
ssw->medium = qpms_epsmu_generator_eval(ss->medium, omega);
|
||||
ssw->wavenumber = qpms_wavenumber(omega, ssw->medium);
|
||||
// we will be using ss->tm_capacity also for ssw->tm
|
||||
|
@ -275,15 +276,17 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
|
|||
if (tmj < ss->tm_count) { // HIT, transformed T-matrix already exists
|
||||
//TODO some "rounding error cleanup" symmetrisation could be performed here?
|
||||
qpms_tmatrix_free(transformed);
|
||||
free(m);
|
||||
} else { // MISS, save the matrix (also the "abstract" one)
|
||||
ssw->tm[ss->tm_count] = transformed;
|
||||
ss->tm[ss->tm_count].tmgi = ss->tm[tmi].tmgi;
|
||||
qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1);
|
||||
qpms_tmatrix_operation_compose_chain_init(&(ss->tm[ss->tm_count].op), 2, 1); // FIXME MEMLEAK
|
||||
struct qpms_tmatrix_operation_compose_chain * const o = &(ss->tm[ss->tm_count].op.op.compose_chain);
|
||||
o->ops[0] = & ss->tm[tmi].op; // Let's just borrow this
|
||||
o->ops_owned[0] = false;
|
||||
o->opmem[0].typ = QPMS_TMATRIX_OPERATION_LRMATRIX;
|
||||
o->opmem[0].op.lrmatrix.m = m;
|
||||
o->opmem[0].op.lrmatrix.owns_m = true;
|
||||
o->opmem[0].op.lrmatrix.m_size = d * d;
|
||||
o->ops[1] = o->opmem;
|
||||
o->ops_owned[1] = true;
|
||||
|
@ -462,6 +465,9 @@ qpms_scatsys_at_omega_t *qpms_scatsys_apply_symmetry(const qpms_scatsys_t *orig,
|
|||
|
||||
void qpms_scatsys_free(qpms_scatsys_t *ss) {
|
||||
if(ss) {
|
||||
QPMS_ASSERT(ss->tm);
|
||||
for(qpms_ss_tmi_t tmi = 0; tmi < ss->tm_count; ++tmi)
|
||||
qpms_tmatrix_operation_clear(&ss->tm[tmi].op);
|
||||
free(ss->tm);
|
||||
free(ss->tmg);
|
||||
free(ss->p);
|
||||
|
@ -471,8 +477,11 @@ void qpms_scatsys_free(qpms_scatsys_t *ss) {
|
|||
free(ss->otspace);
|
||||
free(ss->p_orbitinfo);
|
||||
free(ss->orbit_types);
|
||||
free(ss->saecv_ot_offsets);
|
||||
free(ss->saecv_sizes);
|
||||
free(ss->p_by_orbit);
|
||||
if(ss->tbooster)
|
||||
qpms_scatsys_translation_booster_free(ss->tbooster);
|
||||
qpms_trans_calculator_free(ss->c);
|
||||
}
|
||||
free(ss);
|
||||
|
@ -519,15 +528,26 @@ qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss,
|
|||
for (qpms_ss_tmgi_t tmgi = 0; tmgi < ss->tmg_count; ++tmgi)
|
||||
qpms_tmatrix_free(tmatrices_preop[tmgi]);
|
||||
free(tmatrices_preop);
|
||||
ssw->translation_cache= NULL;
|
||||
return ssw;
|
||||
}
|
||||
|
||||
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw) {
|
||||
if(!ssw->translation_cache)
|
||||
if(ssw->ss->tbooster)
|
||||
ssw->translation_cache = qpms_scatsysw_translation_booster_create(ssw);
|
||||
QPMS_ASSERT(ssw->translation_cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw) {
|
||||
if (ssw) {
|
||||
if(ssw->tm)
|
||||
for(qpms_ss_tmi_t i = 0; i < ssw->ss->tm_count; ++i)
|
||||
qpms_tmatrix_free(ssw->tm[i]);
|
||||
free(ssw->tm);
|
||||
if(ssw->translation_cache)
|
||||
qpms_scatsysw_translation_booster_free(ssw->translation_cache);
|
||||
}
|
||||
free(ssw);
|
||||
}
|
||||
|
@ -667,25 +687,16 @@ complex double *qpms_orbit_irrep_basis(size_t *basis_size,
|
|||
complex double *U; QPMS_CRASHING_MALLOC(U, n*n*N*N*sizeof(complex double));
|
||||
double *s; QPMS_CRASHING_MALLOC(s, n*N*sizeof(double));
|
||||
|
||||
int info = LAPACKE_zgesdd(LAPACK_ROW_MAJOR,
|
||||
QPMS_ENSURE_SUCCESS(LAPACKE_zgesdd(LAPACK_ROW_MAJOR,
|
||||
'A', // jobz; overwrite projector with left sg.vec. and write right into V_H
|
||||
n*N /* m */, n*N /* n */, projector /* a */, n*N /* lda */,
|
||||
s /* s */, U /* u */, n*N /* ldu, irrelev. */, V_H /* vt */,
|
||||
n*N /* ldvt */);
|
||||
if (info) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
|
||||
"Something went wrong with the SVD.");
|
||||
|
||||
n*N /* ldvt */));
|
||||
|
||||
size_t bs;
|
||||
for(bs = 0; bs < n*N; ++bs) {
|
||||
#if 0
|
||||
qpms_pr_debug_at_flf(__FILE__, __LINE__, __func__,
|
||||
"%d. irrep, %zd. SV: %.16lf", (int) iri, bs, s[bs]);
|
||||
#endif
|
||||
if(s[bs] > 1 + SVD_ATOL) qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
|
||||
"%zd. SV too large: %.16lf", bs, s[bs]);
|
||||
if(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL)
|
||||
qpms_pr_error_at_flf(__FILE__, __LINE__, __func__,
|
||||
QPMS_ENSURE(s[bs] <= 1 + SVD_ATOL, "%zd. SV too large: %.16lf", bs, s[bs]);
|
||||
QPMS_ENSURE(!(s[bs] > SVD_ATOL && fabs(1-s[bs]) > SVD_ATOL),
|
||||
"%zd. SV in the 'wrong' interval: %.16lf", bs, s[bs]);
|
||||
if(s[bs] < SVD_ATOL) break;
|
||||
}
|
||||
|
@ -694,6 +705,7 @@ complex double *qpms_orbit_irrep_basis(size_t *basis_size,
|
|||
if(newtarget) QPMS_CRASHING_REALLOC(target, bs*N*n*sizeof(complex double));
|
||||
if(basis_size) *basis_size = bs;
|
||||
|
||||
free(s);
|
||||
free(U);
|
||||
free(V_H);
|
||||
free(projector);
|
||||
|
@ -1110,6 +1122,8 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_full(
|
|||
const qpms_scatsys_at_omega_t *ssw
|
||||
)
|
||||
{
|
||||
if (ssw->translation_cache)
|
||||
return qpms_scatsysw_build_modeproblem_matrix_full_boosted(target, ssw);
|
||||
const complex double k = ssw->wavenumber;
|
||||
const qpms_scatsys_t *ss = ssw->ss;
|
||||
const size_t full_len = ss->fecv_size;
|
||||
|
@ -1386,14 +1400,6 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
|
|||
return target_packed;
|
||||
}
|
||||
|
||||
struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg{
|
||||
const qpms_scatsys_at_omega_t *ssw;
|
||||
qpms_ss_pi_t *opistartR_ptr;
|
||||
pthread_mutex_t *opistartR_mutex;
|
||||
qpms_iri_t iri;
|
||||
complex double *target_packed;
|
||||
};
|
||||
|
||||
static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread(void *arg)
|
||||
{
|
||||
const struct qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_arg
|
||||
|
@ -1512,9 +1518,6 @@ static void *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_threa
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
free(tmp);
|
||||
free(Sblock);
|
||||
|
@ -1644,9 +1647,6 @@ static void *qpms_scatsys_build_translation_matrix_e_irrep_packed_parallelR_thre
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
free(tmp);
|
||||
free(Sblock);
|
||||
|
@ -1716,6 +1716,7 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
|
|||
const qpms_scatsys_at_omega_t *ssw, qpms_iri_t iri
|
||||
)
|
||||
{
|
||||
const _Bool use_translation_cache = (ssw->translation_cache != NULL);
|
||||
const size_t packedlen = ssw->ss->saecv_sizes[iri];
|
||||
if (!packedlen) // THIS IS A BIT PROBLEMATIC, TODO how to deal with empty irreps?
|
||||
return target_packed;
|
||||
|
@ -1748,7 +1749,9 @@ complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
|
|||
pthread_t thread_ids[nthreads];
|
||||
for(long thi = 0; thi < nthreads; ++thi)
|
||||
QPMS_ENSURE_SUCCESS(pthread_create(thread_ids + thi, NULL,
|
||||
qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread,
|
||||
use_translation_cache
|
||||
? qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread_boosted
|
||||
: qpms_scatsysw_build_modeproblem_matrix_irrep_packed_parallelR_thread,
|
||||
(void *) &arg));
|
||||
for(long thi = 0; thi < nthreads; ++thi) {
|
||||
void *retval;
|
||||
|
@ -1804,7 +1807,6 @@ complex double *qpms_scatsysw_apply_Tmatrices_full(
|
|||
}
|
||||
|
||||
|
||||
|
||||
ccart3_t qpms_scatsys_eval_E(const qpms_scatsys_t *ss,
|
||||
const complex double *cvf, const cart3_t where,
|
||||
const complex double k) {
|
||||
|
@ -1897,3 +1899,55 @@ complex double *qpms_scatsys_scatter_solve(
|
|||
return f;
|
||||
}
|
||||
|
||||
struct qpms_scatsys_finite_eval_Beyn_ImTS_param {
|
||||
const qpms_scatsys_t *ss;
|
||||
qpms_iri_t iri;
|
||||
};
|
||||
|
||||
/// Wrapper for Beyn algorithm (non-periodic system)
|
||||
static int qpms_scatsys_finite_eval_Beyn_ImTS(complex double *target,
|
||||
size_t m, complex double omega, void *params) {
|
||||
const struct qpms_scatsys_finite_eval_Beyn_ImTS_param *p = params;
|
||||
qpms_scatsys_at_omega_t *ssw = qpms_scatsys_at_omega(p->ss, omega);
|
||||
QPMS_ENSURE(ssw != NULL, "qpms_scatsys_at_omega() returned NULL");
|
||||
if(p->ss->tbooster) qpms_ssw_create_translation_cache(ssw);
|
||||
if (p->iri == QPMS_NO_IRREP) {
|
||||
QPMS_ASSERT(m == p->ss->fecv_size);
|
||||
QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_full(
|
||||
target, ssw),
|
||||
"qpms_scatsysw_build_modeproblem_matrix_full() returned NULL");
|
||||
} else {
|
||||
QPMS_ASSERT(m == p->ss->saecv_sizes[p->iri]);
|
||||
QPMS_ENSURE(NULL != qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
|
||||
target, ssw, p->iri),
|
||||
"qpms_scatsysw_build_modeproblem_matrix_irrep_packed() returned NULL");
|
||||
}
|
||||
qpms_scatsys_at_omega_free(ssw);
|
||||
return QPMS_SUCCESS;
|
||||
}
|
||||
|
||||
beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
|
||||
const qpms_scatsys_t * const ss, const qpms_iri_t iri,
|
||||
complex double omega_centre, double omega_rr, double omega_ri,
|
||||
size_t contour_npoints,
|
||||
double rank_tol, size_t rank_sel_min, double res_tol) {
|
||||
size_t n; // matrix dimension
|
||||
if (qpms_iri_is_valid(ss->sym, iri)) {
|
||||
n = ss->saecv_sizes[iri];
|
||||
} else if (iri == QPMS_NO_IRREP) {
|
||||
n = ss->fecv_size;
|
||||
} else QPMS_WTF;
|
||||
|
||||
beyn_contour_t *contour = beyn_contour_ellipse(omega_centre,
|
||||
omega_rr, omega_ri, contour_npoints);
|
||||
|
||||
struct qpms_scatsys_finite_eval_Beyn_ImTS_param p = {ss, iri};
|
||||
beyn_result_t *result = beyn_solve(n, n /* possibly make smaller? */,
|
||||
qpms_scatsys_finite_eval_Beyn_ImTS, NULL, (void *) &p,
|
||||
contour, rank_tol, rank_sel_min, res_tol);
|
||||
QPMS_ENSURE(result != NULL, "beyn_solve() returned NULL");
|
||||
|
||||
free(contour);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -133,7 +133,7 @@ typedef struct qpms_ss_derived_tmatrix_t {
|
|||
|
||||
|
||||
struct qpms_trans_calculator;
|
||||
struct qpms_epsmu_generator_t;
|
||||
struct qpms_scatsys_translation_booster;
|
||||
|
||||
typedef struct qpms_scatsys_t {
|
||||
struct qpms_epsmu_generator_t medium; ///< Optical properties of the background medium.
|
||||
|
@ -188,8 +188,16 @@ typedef struct qpms_scatsys_t {
|
|||
char *otspace_end;
|
||||
double lenscale; // radius of the array, used as a relative tolerance measure
|
||||
struct qpms_trans_calculator *c;
|
||||
// Internal metadata for faster translation matrix evaluation
|
||||
struct qpms_scatsys_translation_booster *tbooster;
|
||||
|
||||
} qpms_scatsys_t;
|
||||
|
||||
/// Returns true if ss has the translation caching metadata populated.
|
||||
static inline _Bool qpms_scatsys_has_translation_cache(const qpms_scatsys_t *ss) {
|
||||
return ss->tbooster != NULL;
|
||||
}
|
||||
|
||||
/// Retrieve the bspec of \a tmi'th element of \a ss->tm.
|
||||
static inline const qpms_vswf_set_spec_t *qpms_ss_bspec_tmi(const qpms_scatsys_t *ss, qpms_ss_tmi_t tmi) {
|
||||
return ss->tmg[ss->tm[tmi].tmgi].spec;
|
||||
|
@ -200,6 +208,9 @@ static inline const qpms_vswf_set_spec_t *qpms_ss_bspec_pi(const qpms_scatsys_t
|
|||
return ss->tmg[ss->tm[ss->p[pi].tmatrix_id].tmgi].spec;
|
||||
}
|
||||
|
||||
struct qpms_scatsysw_translation_booster;
|
||||
void qpms_scatsysw_translation_booster_free(struct qpms_scatsysw_translation_booster *);
|
||||
|
||||
typedef struct qpms_scatsys_at_omega_t {
|
||||
const qpms_scatsys_t *ss; ///< Parent scattering system.
|
||||
/// T-matrices from \a ss, evaluated at \a omega.
|
||||
|
@ -210,8 +221,11 @@ typedef struct qpms_scatsys_at_omega_t {
|
|||
complex double omega; ///< Angular frequency
|
||||
qpms_epsmu_t medium; ///< Background medium optical properties at the given frequency
|
||||
complex double wavenumber; ///< Background medium wavenumber
|
||||
|
||||
struct qpms_scatsysw_translation_booster *translation_cache; ///< (private) cache to speedup translations
|
||||
} qpms_scatsys_at_omega_t;
|
||||
|
||||
|
||||
/// Creates a new scatsys by applying a symmetry group onto a "proto-scatsys", copying particles if needed.
|
||||
/** In fact, it copies everything except the vswf set specs and qpms_abstract_tmatrix_t instances,
|
||||
* so keep them alive until scatsys is destroyed.
|
||||
|
@ -261,6 +275,34 @@ void qpms_scatsys_at_omega_free(qpms_scatsys_at_omega_t *ssw);
|
|||
qpms_scatsys_at_omega_t *qpms_scatsys_at_omega(const qpms_scatsys_t *ss,
|
||||
complex double omega);
|
||||
|
||||
/// Determines behaviour of qpms_ss_create_translation_cache().
|
||||
typedef enum qpms_ss_caching_mode {
|
||||
/// Don't create the translation operator cache.
|
||||
/** Use this if the particle positions are random. */
|
||||
QPMS_SS_CACHE_NEVER,
|
||||
/// Always create the translation operator cache.
|
||||
/** Use this for highly regular large finite structures. */
|
||||
QPMS_SS_CACHE_ALWAYS,
|
||||
/// Evaluate the need of caching automatically.
|
||||
QPMS_SS_CACHE_AUTO,
|
||||
QPMS_SS_CACHE_DEFAULT = QPMS_SS_CACHE_AUTO
|
||||
} qpms_ss_caching_mode_t;
|
||||
|
||||
/// Returns true if ssw has the translation operator cache populated.
|
||||
static inline _Bool qpms_scatsysw_has_translation_cache(const qpms_scatsys_at_omega_t *ssw) {
|
||||
return ssw->translation_cache != NULL;
|
||||
}
|
||||
|
||||
/// Creates some data structures for speeding up translation operator calculations.
|
||||
/** This is mostly useful for "finite lattices", where many pairs of nanoparticles
|
||||
* share the same relative positions.
|
||||
*/
|
||||
int qpms_ss_create_translation_cache(qpms_scatsys_t *ss, qpms_ss_caching_mode_t m);
|
||||
|
||||
/// Pre-calculates the "translation cache" of a qpms_scatsys_at_omega_t.
|
||||
/** If ssw->ss->tbooster is NULL, does nothing. */
|
||||
int qpms_ssw_create_translation_cache(qpms_scatsys_at_omega_t *ssw);
|
||||
|
||||
/// Creates a "full" transformation matrix U that takes a full vector and projects it onto an symmetry adapted basis.
|
||||
/** Mostly as a reference and a debugging tool, as multiplicating these big matrices would be inefficient.
|
||||
*
|
||||
|
@ -343,31 +385,43 @@ complex double *qpms_scatsys_build_translation_matrix_e_irrep_packed(
|
|||
);
|
||||
|
||||
/// Creates the full \f$ (I - TS) \f$ matrix of the scattering system.
|
||||
/**
|
||||
* \returns \a target on success, NULL on error.
|
||||
*/
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_full(
|
||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||
complex double *target,
|
||||
const qpms_scatsys_at_omega_t *ssw
|
||||
);
|
||||
/// Creates the mode problem matrix \f$ (I - TS) \f$ directly in the irrep-packed form.
|
||||
/**
|
||||
* \returns \a target on success, NULL on error.
|
||||
*/
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed(
|
||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||
complex double *target,
|
||||
const qpms_scatsys_at_omega_t *ssw,
|
||||
qpms_iri_t iri
|
||||
qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
|
||||
);
|
||||
/// Alternative implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed().
|
||||
/**
|
||||
* \returns \a target on success, NULL on error.
|
||||
*/
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_orbitorderR(
|
||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||
complex double *target,
|
||||
const qpms_scatsys_at_omega_t *ssw,
|
||||
qpms_iri_t iri
|
||||
qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
|
||||
);
|
||||
/// Alternative (serial reference) implementation of qpms_scatsysw_build_modeproblem_matrix_irrep_packed().
|
||||
/**
|
||||
* \returns \a target on success, NULL on error.
|
||||
*/
|
||||
complex double *qpms_scatsysw_build_modeproblem_matrix_irrep_packed_serial(
|
||||
/// Target memory with capacity for ss->fecv_size**2 elements. If NULL, new will be allocated.
|
||||
complex double *target,
|
||||
const qpms_scatsys_at_omega_t *ssw,
|
||||
qpms_iri_t iri
|
||||
qpms_iri_t iri ///< Index of the irreducible representation in ssw->ss->sym
|
||||
);
|
||||
|
||||
/// LU factorisation (LAPACKE_zgetrf) result holder.
|
||||
|
@ -504,6 +558,55 @@ complex double *qpms_scatsysw_apply_Tmatrices_full(
|
|||
const qpms_scatsys_at_omega_t *ssw
|
||||
);
|
||||
|
||||
struct beyn_result_t; // See beyn.h for full definition
|
||||
|
||||
/// Searches for finite scattering system's eigenmodes using Beyn's algorithm.
|
||||
/**
|
||||
* Currently, elliptical contour is used.
|
||||
*
|
||||
* TODO In the future, this will probably support irrep decomposition as well,
|
||||
* but it does not make much sense for periodic / small systems, as in their
|
||||
* case the bottleneck is the T-matrix and translation matrix evaluation
|
||||
* rather than the linear algebra.
|
||||
*/
|
||||
struct beyn_result_t *qpms_scatsys_finite_find_eigenmodes(
|
||||
const qpms_scatsys_t *ss,
|
||||
/// A valid irrep index to search only in one irrep, or QPMS_NO_IRREP for solving the full system.
|
||||
qpms_iri_t iri,
|
||||
complex double omega_centre, ///< Center of the ellipse inside which the eigenfreqs are searched for.
|
||||
double omega_rr, ///< Real half-axis of the ellipse inside which the eigenfreqs are searched for.
|
||||
double omega_ri, ///< Imaginary half-axis of the ellipse inside which the eigenfreqs are searched for.
|
||||
size_t contour_npoints, ///< Number of elliptic contour discretisation points (preferably even number)
|
||||
double rank_tol, ///< (default: `1e-4`) TODO DOC.
|
||||
size_t rank_sel_min, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
|
||||
double res_tol ///< (default: `0.0`) TODO DOC.
|
||||
);
|
||||
|
||||
|
||||
#if 0
|
||||
/// Searches for scattering system's eigenmodes using Beyn's algorithm.
|
||||
/**
|
||||
* Currently, elliptical contour is used.
|
||||
*
|
||||
* TODO In the future, this will probably support irrep decomposition as well,
|
||||
* but it does not make much sense for periodic / small systems, as in their
|
||||
* case the bottleneck is the T-matrix and translation matrix evaluation
|
||||
* rather than the linear algebra.
|
||||
*/
|
||||
struct beyn_result_t *qpms_scatsys_find_eigenmodes(
|
||||
const qpms_scatsys_t *ss,
|
||||
double eta, ///< Ewald sum parameter
|
||||
const double *beta_, ///< k-vector of corresponding dimensionality, NULL/ignored for finite system.
|
||||
complex double omega_centre, ///< Center of the ellipse inside which the eigenfreqs are searched for.
|
||||
double omega_rr, ///< Real half-axis of the ellipse inside which the eigenfreqs are searched for.
|
||||
double omega_ri, ///< Imaginary half-axis of the ellipse inside which the eigenfreqs are searched for.
|
||||
size_t contour_npoints, ///< Number of elliptic contour discretisation points (preferably even number)
|
||||
double rank_tol, ///< (default: `1e-4`) TODO DOC.
|
||||
size_t rank_sel_min, ///< Minimum number of eigenvalue candidates, even if they don't pass \a rank_tol.
|
||||
double res_tol ///< (default: `0.0`) TODO DOC.
|
||||
);
|
||||
#endif
|
||||
|
||||
|
||||
#if 0
|
||||
/// Creates a (partial) incident field vector in the symmetry-adapted basis, given a function that evaluates the field expansions at points.
|
||||
|
|
|
@ -429,12 +429,12 @@ qpms_errno_t qpms_read_scuff_tmatrix(
|
|||
switch(PAlpha) {
|
||||
case 0: TAlpha = QPMS_VSWF_MAGNETIC; break;
|
||||
case 1: TAlpha = QPMS_VSWF_ELECTRIC; break;
|
||||
default: assert(0);
|
||||
default: QPMS_WTF;
|
||||
}
|
||||
switch(PBeta) {
|
||||
case 0: TBeta = QPMS_VSWF_MAGNETIC; break;
|
||||
case 1: TBeta = QPMS_VSWF_ELECTRIC; break;
|
||||
default: assert(0);
|
||||
default: QPMS_WTF;
|
||||
}
|
||||
qpms_uvswfi_t srcui = qpms_tmn2uvswfi(TAlpha, MAlpha, LAlpha),
|
||||
destui = qpms_tmn2uvswfi(TBeta, MBeta, LBeta);
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <gsl/gsl_sf_coupling.h>
|
||||
#include "qpms_error.h"
|
||||
#include "normalisation.h"
|
||||
#include "translations_inlines.h"
|
||||
|
||||
/*
|
||||
* Define macros with additional factors that "should not be there" according
|
||||
|
@ -289,8 +290,8 @@ void qpms_trans_calculator_free(qpms_trans_calculator *c) {
|
|||
free(c->A_multipliers);
|
||||
free(c->B_multipliers[0]);
|
||||
free(c->B_multipliers);
|
||||
#ifdef LATTICESUMS
|
||||
qpms_ewald3_constants_free(e3c);
|
||||
#ifdef LATTICESUMS32
|
||||
qpms_ewald3_constants_free(c->e3c);
|
||||
#endif
|
||||
free(c->legendre0);
|
||||
free(c);
|
||||
|
@ -509,8 +510,7 @@ qpms_trans_calculator
|
|||
}
|
||||
|
||||
static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c,
|
||||
int m, int n, int mu, int nu, csph_t kdlj,
|
||||
bool r_ge_d, qpms_bessel_t J,
|
||||
int m, int n, int mu, int nu, double kdlj_phi,
|
||||
const complex double *bessel_buf, const double *legendre_buf) {
|
||||
TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation);
|
||||
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
|
||||
|
@ -523,9 +523,9 @@ static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_t
|
|||
double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
|
||||
complex double zp = bessel_buf[p];
|
||||
complex double multiplier = c->A_multipliers[i][q];
|
||||
ckahanadd(&sum, &kahanc, Pp * zp * multiplier);
|
||||
ckahanadd(&sum, &kahanc, Pp * zp * multiplier);
|
||||
}
|
||||
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
|
||||
complex double eimf = cexp(I*(mu-m)*kdlj_phi);
|
||||
return sum * eimf;
|
||||
}
|
||||
|
||||
|
@ -545,12 +545,11 @@ complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
|
|||
costheta,csphase,legendre_buf));
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
|
||||
return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
kdlj.phi,bessel_buf,legendre_buf);
|
||||
}
|
||||
|
||||
static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c,
|
||||
int m, int n, int mu, int nu, csph_t kdlj,
|
||||
bool r_ge_d, qpms_bessel_t J,
|
||||
int m, int n, int mu, int nu, double kdlj_phi,
|
||||
const complex double *bessel_buf, const double *legendre_buf) {
|
||||
TROPS_ONLY_EIMF_IMPLEMENTED(c->normalisation);
|
||||
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
|
||||
|
@ -565,7 +564,7 @@ static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_t
|
|||
complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET];
|
||||
ckahanadd(&sum, &kahanc, Pp_ * zp_ * multiplier);
|
||||
}
|
||||
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
|
||||
complex double eimf = cexp(I*(mu-m)*kdlj_phi);
|
||||
return sum * eimf;
|
||||
}
|
||||
|
||||
|
@ -584,7 +583,7 @@ complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
|
|||
costheta,csphase,legendre_buf));
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
|
||||
return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
kdlj.phi,bessel_buf,legendre_buf);
|
||||
}
|
||||
|
||||
int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
|
||||
|
@ -604,12 +603,38 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
|
|||
costheta,-1,legendre_buf));
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, n+nu+1, kdlj.r, bessel_buf));
|
||||
*Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
kdlj.phi,bessel_buf,legendre_buf);
|
||||
*Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
kdlj.phi,bessel_buf,legendre_buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int qpms_trans_calculator_get_AB_arrays_precalcbuf(const qpms_trans_calculator *c,
|
||||
qpms_y_t lMax, complex double *Adest, complex double *Bdest,
|
||||
size_t deststride, size_t srcstride, double kdlj_phi,
|
||||
const complex double *bessel_buf, const double *legendre_buf) {
|
||||
if(lMax == 0) lMax = c->lMax;
|
||||
QPMS_ASSERT(lMax <= c->lMax);
|
||||
size_t desti = 0, srci = 0;
|
||||
for (int n = 1; n <= lMax; ++n) for (int m = -n; m <= n; ++m) {
|
||||
for (int nu = 1; nu <= lMax; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
|
||||
#ifndef NDEBUG
|
||||
size_t assertindex = qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu);
|
||||
#endif
|
||||
assert(assertindex == desti*c->nelem + srci);
|
||||
*(Adest + deststride * desti + srcstride * srci) =
|
||||
qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj_phi, bessel_buf, legendre_buf);
|
||||
*(Bdest + deststride * desti + srcstride * srci) =
|
||||
qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj_phi,bessel_buf,legendre_buf);
|
||||
++srci;
|
||||
}
|
||||
++desti;
|
||||
srci = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
|
||||
complex double *Adest, complex double *Bdest,
|
||||
|
@ -631,26 +656,9 @@ int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
|
|||
QPMS_ENSURE_SUCCESS(gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1,
|
||||
costheta,-1,legendre_buf));
|
||||
QPMS_ENSURE_SUCCESS(qpms_sph_bessel_fill(J, 2*c->lMax+1, kdlj.r, bessel_buf));
|
||||
size_t desti = 0, srci = 0;
|
||||
for (int n = 1; n <= c->lMax; ++n) for (int m = -n; m <= n; ++m) {
|
||||
for (int nu = 1; nu <= c->lMax; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
|
||||
#ifndef NDEBUG
|
||||
size_t assertindex = qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu);
|
||||
#endif
|
||||
assert(assertindex == desti*c->nelem + srci);
|
||||
*(Adest + deststride * desti + srcstride * srci) =
|
||||
qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
*(Bdest + deststride * desti + srcstride * srci) =
|
||||
qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
|
||||
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
|
||||
++srci;
|
||||
}
|
||||
++desti;
|
||||
srci = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return qpms_trans_calculator_get_AB_arrays_precalcbuf(c, c->lMax, Adest, Bdest,
|
||||
deststride, srcstride, kdlj.phi, bessel_buf, legendre_buf);
|
||||
}
|
||||
|
||||
complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c,
|
||||
|
@ -712,20 +720,8 @@ qpms_errno_t qpms_trans_calculator_get_trans_array(const qpms_trans_calculator *
|
|||
qpms_errno_t retval = qpms_trans_calculator_get_AB_arrays(c,
|
||||
A[0], B[0], c->nelem, 1,
|
||||
kdlj, r_ge_d, J);
|
||||
for (size_t desti = 0; desti < destspec->n; ++desti) {
|
||||
qpms_y_t desty; qpms_vswf_type_t destt;
|
||||
if(QPMS_SUCCESS != qpms_uvswfi2ty(destspec->ilist[desti], &destt, &desty))
|
||||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,
|
||||
"Invalid u. vswf index %llx.", destspec->ilist[desti]);
|
||||
for (size_t srci = 0; srci < srcspec->n; ++srci){
|
||||
qpms_y_t srcy; qpms_vswf_type_t srct;
|
||||
if(QPMS_SUCCESS != qpms_uvswfi2ty(srcspec->ilist[srci], &srct, &srcy))
|
||||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,
|
||||
"Invalid u. vswf index %llx.", srcspec->ilist[srci]);
|
||||
target[srci * srcstride + desti * deststride]
|
||||
= (srct == destt) ? A[desty][srcy] : B[desty][srcy];
|
||||
}
|
||||
}
|
||||
qpms_trans_array_from_AB(target, destspec, deststride, srcspec, srcstride,
|
||||
A[0], B[0], c->lMax);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
@ -762,6 +758,7 @@ qpms_errno_t qpms_trans_calculator_get_trans_array_e32_e(const qpms_trans_calcul
|
|||
A, Aerr, B, Berr, ldAB, 1,
|
||||
eta, k, b1, b2, beta, particle_shift, maxR, maxK, parts);
|
||||
for (size_t desti = 0; desti < destspec->n; ++desti) {
|
||||
// TODO replace with (modified) qpms_trans_array_from_AB()
|
||||
qpms_y_t desty; qpms_vswf_type_t destt;
|
||||
if(QPMS_SUCCESS != qpms_uvswfi2ty(destspec->ilist[desti], &destt, &desty))
|
||||
qpms_pr_error_at_flf(__FILE__,__LINE__,__func__,
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#ifndef TRANSLATIONS_INLINES_H
|
||||
#define TRANSLATIONS_INLINES_H
|
||||
#include "translations.h"
|
||||
#include "indexing.h"
|
||||
|
||||
/// Rearranges the default-ordered "A,B" array elements into "bspec"-defined matrix.
|
||||
// TODO DOC
|
||||
static inline void qpms_trans_array_from_AB(
|
||||
complex double *t,
|
||||
const qpms_vswf_set_spec_t *const t_destspec,
|
||||
const size_t t_deststride,
|
||||
const qpms_vswf_set_spec_t *const t_srcspec,
|
||||
const size_t t_srcstride,
|
||||
const complex double *const A, const complex double *const B,
|
||||
/// A and B matrices' lMax.
|
||||
/** This also determines their size and stride: they are assumed to
|
||||
* be square matrices of size `nelem * nelem` where
|
||||
* `nelem = qpms_lMax2nelem(lMax_AB)`
|
||||
*/
|
||||
const qpms_l_t lMax_AB
|
||||
) {
|
||||
QPMS_PARANOID_ASSERT(lMax_AB >= t_srcspec->lMax && lMax_AB >= t_destspec->lMax);
|
||||
const qpms_y_t nelem_AB = qpms_lMax2nelem(lMax_AB);
|
||||
for (size_t desti = 0; desti < t_destspec->n; ++desti) {
|
||||
qpms_y_t desty; qpms_vswf_type_t destt;
|
||||
QPMS_ENSURE_SUCCESS_M(qpms_uvswfi2ty(t_destspec->ilist[desti], &destt, &desty),
|
||||
"Invalid u. vswf index %llx.", t_destspec->ilist[desti]);
|
||||
for (size_t srci = 0; srci < t_srcspec->n; ++srci){
|
||||
qpms_y_t srcy; qpms_vswf_type_t srct;
|
||||
QPMS_ENSURE_SUCCESS_M(qpms_uvswfi2ty(t_srcspec->ilist[srci], &srct, &srcy),
|
||||
"Invalid u. vswf index %llx.", t_srcspec->ilist[srci]); t[srci * t_srcstride + desti * t_deststride]
|
||||
= (srct == destt) ? A[desty*nelem_AB + srcy] : B[desty*nelem_AB + srcy];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int qpms_trans_calculator_get_AB_arrays_precalcbuf(const qpms_trans_calculator *c,
|
||||
qpms_y_t lMax, complex double *Adest, complex double *Bdest,
|
||||
size_t deststride, size_t srcstride, double kdlj_phi,
|
||||
const complex double *bessel_buf, const double *legendre_buf);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue