Whole translation matrix: it compiles; untested

Former-commit-id: 1465a1385a2f318148d7b3a21cbbf2c0391a13df
This commit is contained in:
Marek Nečada 2017-05-02 00:18:01 +03:00
parent e2a2100b57
commit 2d88a1ef0d
4 changed files with 456 additions and 182 deletions

View File

@ -166,21 +166,29 @@ cdef extern from "translations.h":
cdouble qpms_trans_single_B_Taylor_ext(int m, int n, int mu, int nu, cdouble qpms_trans_single_B_Taylor_ext(int m, int n, int mu, int nu,
double r, double th, double ph, int r_ge_d, int J) nogil double r, double th, double ph, int r_ge_d, int J) nogil
struct qpms_trans_calculator: struct qpms_trans_calculator:
pass int lMax
size_t nelem
cdouble** A_multipliers
cdouble** B_multipliers
enum qpms_normalization_t: enum qpms_normalization_t:
pass pass
qpms_trans_calculator* qpms_trans_calculator_init(int lMax, int nt) # should be qpms_normalization_t qpms_trans_calculator* qpms_trans_calculator_init(int lMax, int nt) # should be qpms_normalization_t
void qpms_trans_calculator_free(qpms_trans_calculator* c) void qpms_trans_calculator_free(qpms_trans_calculator* c)
cdouble qpms_trans_calculator_get_A_ext(const qpms_trans_calculator* c, cdouble qpms_trans_calculator_get_A_ext(const qpms_trans_calculator* c,
int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
int r_ge_d, int J) int r_ge_d, int J) nogil
cdouble qpms_trans_calculator_get_B_ext(const qpms_trans_calculator* c, cdouble qpms_trans_calculator_get_B_ext(const qpms_trans_calculator* c,
int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
int r_ge_d, int J) int r_ge_d, int J) nogil
int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator* c, int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator* c,
cdouble *Adest, cdouble *Bdest, cdouble *Adest, cdouble *Bdest,
int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
int r_ge_d, int J) int r_ge_d, int J) nogil
int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
cdouble *Adest, cdouble *Bdest,
size_t deststride, size_t srcstride,
double kdlj_r, double kdlj_theta, double kdlj_phi,
int r_ge_d, int J) nogil
@ -436,6 +444,20 @@ trans_calculator_get_AB_loop_funcs[0] = trans_calculator_loop_E_C_DD_iiiidddii_A
cdef void *trans_calculator_get_AB_elementwise_funcs[1] cdef void *trans_calculator_get_AB_elementwise_funcs[1]
trans_calculator_get_AB_elementwise_funcs[0] = <void *>qpms_trans_calculator_get_AB_p_ext trans_calculator_get_AB_elementwise_funcs[0] = <void *>qpms_trans_calculator_get_AB_p_ext
'''
cdef extern from "numpy/ndarrayobject.h":
struct PyArrayInterface:
int itemsize
np.npy_uintp *shape
np.npy_uintp *strides
void *data
'''
from libc.stdlib cimport malloc, free, calloc, abort
cdef class trans_calculator: cdef class trans_calculator:
cdef qpms_trans_calculator* c cdef qpms_trans_calculator* c
cdef trans_calculator_get_X_data_t get_A_data[1] cdef trans_calculator_get_X_data_t get_A_data[1]
@ -509,6 +531,178 @@ cdef class trans_calculator:
qpms_trans_calculator_free(self.c) qpms_trans_calculator_free(self.c)
# TODO Reference counts to get_A, get_B, get_AB? # TODO Reference counts to get_A, get_B, get_AB?
def lMax(self):
return self.c[0].lMax
def nelem(self):
return self.c[0].nelem
def get_AB_arrays(self, r, theta, phi, r_ge_d, int J,
destaxis=None, srcaxis=None, expand=True):
"""
Returns arrays of translation coefficients, inserting two new nelem-sized axes
(corresponding to the destination and source axes of the translation matrix,
respectively).
By default (expand==True), it inserts the new axes. or it can be provided with
the resulting shape (with the corresponding axes dimensions equal to one).
The provided axes positions are for the resulting array.
If none axis positions are provided, destaxis and srcaxis will be the second-to-last
and last, respectively.
"""
# TODO CHECK (and try to cast) INPUT ARRAY TYPES (now is done)
# BIG FIXME: make skalars valid arguments, now r, theta, phi, r_ge_d have to be ndarrays
cdef:
int daxis, saxis, smallaxis, bigaxis, reslen, longest_axis, i, j, d, ax, errval
np.npy_intp sstride, dstride, longi, longstride
int *local_indices
int *innerloop_shape
char *r_p
char *theta_p
char *phi_p
char *r_ge_d_p
char *a_p
char *b_p
# Process the array shapes
baseshape = np.broadcast.shape(r,theta,phi,r_ge_d)
if not expand:
reslen = len(baseshape)
if reslen < 2:
raise ValueError('Translation matrix arrays must have at least 2 dimensions!')
daxis = (reslen-2) if destaxis is None else destaxis
saxis = (reslen-1) if srcaxis is None else srcaxis
if daxis < 0:
daxis = reslen + daxis
if saxis < 0:
saxis = reslen + saxis
if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis:
raise ValueError('invalid axes provided (destaxis = %d, srcaxis = %d, # of axes: %d'
% (daxis, saxis, reslen))
if baseshape[daxis] != 1 or baseshape[saxis] != 1:
raise ValueError('dimension mismatch (input argument dimensions have to be 1 both at'
'destaxis (==%d) and srcaxis (==%d) but are %d and %d' %
(daxis, saxis, baseshape[daxis], baseshape[saxis]))
resultshape = list(baseshape)
else:
reslen = len(baseshape)+2
if destaxis is None:
daxis = -2
if srcaxis is None:
saxis = -1
if daxis < 0:
daxis = reslen + daxis
if saxis < 0:
saxis = reslen + saxis
if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis:
raise ValueError('invalid axes provided') # TODO better error formulation
resultshape = list(baseshape)
if daxis > saxis:
smallaxis = saxis
bigaxis = daxis
else:
smallaxis = daxis
bixagis = saxis
resultshape.insert(smallaxis,1)
resultshape.insert(bigaxis,1)
r = np.expand_dims(np.expand_dims(r.astype(np.float_, copy=False), smallaxis), bigaxis)
theta = np.expand_dims(np.expand_dims(theta.astype(np.float_, copy=False), smallaxis), bigaxis)
phi = np.expand_dims(np.expand_dims(phi.astype(np.float_, copy=False), smallaxis), bigaxis)
r_ge_d = np.expand_dims(np.expand_dims(r_ge_d(np.bool_, copy=False), smallaxis), bigaxis)
longestaxis = 0
# FIxME: the whole thing with longest_axis will fail if none is longer than 1
for i in range(reslen):
if resultshape[i] > resultshape[longest_axis]:
longestaxis = i
innerloop_shape = <int *> malloc(reslen * sizeof(int))
if innerloop_shape == NULL:
abort()
for i in range(reslen):
innerloop_shape[i] = resultshape[i]
innerloop_shape[longest_axis] = 1 # longest axis will be iterated in the outer (parallelized) loop. Therefore, longest axis, together with saxis and daxis, will not be iterated in the inner loop
resultshape[daxis] = self.c[0].nelem
resultshape[saxis] = self.c[0].nelem
cdef np.ndarray r_c = np.broadcast_to(r,resultshape)
cdef np.ndarray theta_c = np.broadcast_to(theta,resultshape)
cdef np.ndarray phi_c = np.broadcast_to(phi,resultshape)
cdef np.ndarray r_ge_d_c = np.broadcast_to(r_ge_d, resultshape)
cdef np.ndarray a = np.empty(resultshape, dtype=complex)
cdef np.ndarray b = np.empty(resultshape, dtype=complex)
dstride = a.strides[daxis]
sstride = a.strides[saxis]
longstride = a.strides[longest_axis]
# TODO write this in C (as a function) and parallelize there
with nogil: #, parallel(): # FIXME rewrite this part in C
local_indices = <int *> calloc(reslen, sizeof(int))
if local_indices == NULL: abort()
for longi in range(a.shape[longest_axis]): # outer loop (to be parallelized)
# this might be done also in the inverse order, but this is more 'c-contiguous' way of incrementing the indices
ax = reslen - 1
while ax >= 0:
# calculate the correct index/pointer for each array used. This can be further optimized from O(reslen * total size of the result array) to O(total size of the result array), but fick that now
r_p = r_c.data + r_c.strides[longest_axis] * longi
theta_p = theta_c.data + theta_c.strides[longest_axis] * longi
phi_p = phi_c.data + phi_c.strides[longest_axis] * longi
r_ge_d_p = r_ge_d_c.data + r_ge_d_c.strides[longest_axis] * longi
a_p = a.data + a.strides[longest_axis] * longi
b_p = b.data + b.strides[longest_axis] * longi
for i in range(reslen):
if i == longest_axis: continue
r_p += r_c.strides[i] * local_indices[i]
theta_p += theta_c.strides[i] * local_indices[i]
phi_p += phi_c.strides[i] * local_indices[i]
r_ge_d_p += r_ge_d_c.strides[i] * local_indices[i]
if i == saxis or i == daxis: continue
a_p += a.strides[i] * local_indices[i]
b_p += b.strides[i] * local_indices[i]
# perform the actual task here
errval = qpms_trans_calculator_get_AB_arrays_ext(self.c,
<cdouble*>a_p, <cdouble*>b_p,
dstride // sizeof(cdouble), sstride // sizeof(cdouble),
(<double*>r_p)[0], (<double*>theta_p)[0], (<double*>phi_p)[0], <int>((<np.npy_bool*>r_ge_d_p)[0]), J)
if errval: abort()
# increment the last index 'digit' (ax is now reslen-1; we don't have do-while loop in python)
local_indices[ax] += 1
while (local_indices[ax] == innerloop_shape[ax] and ax >= 0): # overflow to the next digit but stop when we reach below the last one
local_indices[ax] = 0
ax -= 1
local_indices[ax] += 1
if ax >= 0: # did not overflow, get back to the lowest index
ax = reslen - 1
for ax in range(a.ndim):
if (ax == longest_axis or ax == daxis or ax == saxis):
continue
free(local_indices)
free(innerloop_shape)
# TODO make possible to access the attributes (to show normalization etc) # TODO make possible to access the attributes (to show normalization etc)

View File

@ -48,6 +48,12 @@ int main() {
cabs(B - B2)/((cabs(B2) < cabs(B)) ? cabs(B2) : cabs(B)) cabs(B - B2)/((cabs(B2) < cabs(B)) ? cabs(B2) : cabs(B))
); );
} }
complex double A,B;
// Test of zero R
sph_t kdlj = {0, 1, 2};
int m = -1, n = 1, mu = -1, nu = 1;
qpms_trans_calculator_get_AB_p(c,&A,&B,m,n,mu,nu,kdlj,false,3);
printf("A = %.6e+%.6ej, B = %.6e+%.6ej\n", creal(A),cimag(A),creal(B),cimag(B));
qpms_trans_calculator_free(c); qpms_trans_calculator_free(c);
} }

View File

@ -64,102 +64,102 @@ int qpms_sph_bessel_array(qpms_bessel_t typ, int lmax, double x, complex double
complex double qpms_trans_single_A_Taylor(int m, int n, int mu, int nu, sph_t kdlj, complex double qpms_trans_single_A_Taylor(int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J) { // TODO make J enum bool r_ge_d, qpms_bessel_t J) { // TODO make J enum
if(r_ge_d) J = QPMS_BESSEL_REGULAR; if(r_ge_d) J = QPMS_BESSEL_REGULAR;
double costheta = cos(kdlj.theta); double costheta = cos(kdlj.theta);
int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m? int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m?
// N.B. -m !!!!!! // N.B. -m !!!!!!
double a1q[qmax+1]; double a1q[qmax+1];
int err; int err;
gaunt_xu(-m,n,mu,nu,qmax,a1q,&err); gaunt_xu(-m,n,mu,nu,qmax,a1q,&err);
double a1q0 = a1q[0]; double a1q0 = a1q[0];
if (err) abort(); if (err) abort();
double leg[gsl_sf_legendre_array_n(n+nu)]; double leg[gsl_sf_legendre_array_n(n+nu)];
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort(); if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort();
complex double bes[n+nu+1]; complex double bes[n+nu+1];
if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort(); if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort();
complex double sum = 0; complex double sum = 0;
for(int q = 0; q <= qmax; ++q) { for(int q = 0; q <= qmax; ++q) {
int p = n+nu-2*q; int p = n+nu-2*q;
int Pp_order = mu-m; int Pp_order = mu-m;
//if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze //if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze
assert(p >= abs(Pp_order)); assert(p >= abs(Pp_order));
double a1q_n = a1q[q] / a1q0; double a1q_n = a1q[q] / a1q0;
double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))]; double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))];
if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
complex double zp = bes[p]; complex double zp = bes[p];
complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp; complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp;
sum += summandq; sum += summandq;
} }
double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
+lgamma(n+nu+1) - lgamma(2*(n+nu)+1)); +lgamma(n+nu+1) - lgamma(2*(n+nu)+1));
complex double presum = exp(exponent); complex double presum = exp(exponent);
presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n); presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n);
complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp(
lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
return (presum / prenormratio) * sum; return (presum / prenormratio) * sum;
} }
complex double qpms_trans_single_B_Taylor(int m, int n, int mu, int nu, sph_t kdlj, complex double qpms_trans_single_B_Taylor(int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J) { bool r_ge_d, qpms_bessel_t J) {
if(r_ge_d) J = QPMS_BESSEL_REGULAR; if(r_ge_d) J = QPMS_BESSEL_REGULAR;
double costheta = cos(kdlj.theta); double costheta = cos(kdlj.theta);
int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu); int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu);
int Qmax = gaunt_q_max(-m,n+1,mu,nu); int Qmax = gaunt_q_max(-m,n+1,mu,nu);
double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0; double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0;
int err; int err;
if (mu == nu) { if (mu == nu) {
for (int q = 0; q <= q2max; ++q) for (int q = 0; q <= q2max; ++q)
a2q[q] = 0; a2q[q] = 0;
a2q0 = 1; a2q0 = 1;
} }
else { else {
gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
a2q0 = a2q[0]; a2q0 = a2q[0];
} }
gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
a3q0 = a3q[0]; a3q0 = a3q[0];
double leg[gsl_sf_legendre_array_n(n+nu+1)]; double leg[gsl_sf_legendre_array_n(n+nu+1)];
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort(); if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort();
complex double bes[n+nu+2]; complex double bes[n+nu+2];
if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort(); if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort();
complex double sum = 0; complex double sum = 0;
for (int q = 0; q <= Qmax; ++q) { for (int q = 0; q <= Qmax; ++q) {
int p = n+nu-2*q; int p = n+nu-2*q;
double a2q_n = a2q[q]/a2q0; double a2q_n = a2q[q]/a2q0;
double a3q_n = a3q[q]/a3q0; double a3q_n = a3q[q]/a3q0;
complex double zp_ = bes[p+1]; complex double zp_ = bes[p+1];
int Pp_order_ = mu-m; int Pp_order_ = mu-m;
//if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze
assert(p+1 >= abs(Pp_order_)); assert(p+1 >= abs(Pp_order_));
double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))]; double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))];
if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_)); if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_));
complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n
-(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n)
*min1pow(q) * zp_ * Pp_); *min1pow(q) * zp_ * Pp_);
sum += summandq; sum += summandq;
} }
double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1) +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1)
+lgamma(n+nu+2) - lgamma(2*(n+nu)+3)); +lgamma(n+nu+2) - lgamma(2*(n+nu)+3));
complex double presum = exp(exponent); complex double presum = exp(exponent);
presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / ( presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / (
(4*n)*(n+1)*(n+m+1)); (4*n)*(n+1)*(n+m+1));
// Taylor normalisation v2, proven to be equivalent // Taylor normalisation v2, proven to be equivalent
complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp(
lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
return (presum / prenormratio) * sum; return (presum / prenormratio) * sum;
} }
complex double qpms_trans_single_A_Taylor_ext(int m, int n, int mu, int nu, complex double qpms_trans_single_A_Taylor_ext(int m, int n, int mu, int nu,
@ -223,9 +223,9 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
double a1q0 = a1q[0]; double a1q0 = a1q[0];
double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
+lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio
lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)); lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1));
double presum = exp(exponent); double presum = exp(exponent);
presum *= min1pow(m+n) * sqrt((2.*n+1)/(2.*nu+1)) / (4*n); presum *= min1pow(m+n) * sqrt((2.*n+1)/(2.*nu+1)) / (4*n);
@ -236,7 +236,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
double a1q_n = a1q[q] / a1q0; double a1q_n = a1q[q] / a1q0;
// Assuming non_normalized legendre polynomials! // Assuming non_normalized legendre polynomials!
double Ppfac = (Pp_order >= 0) ? 1 : double Ppfac = (Pp_order >= 0) ? 1 :
min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
double summandfac = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n; double summandfac = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n;
dest[q] = presum * summandfac * Ppfac; dest[q] = presum * summandfac * Ppfac;
// FIXME I might not need complex here // FIXME I might not need complex here
@ -260,7 +260,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
double a1q_n = a1q[q] / a1q0; double a1q_n = a1q[q] / a1q0;
//double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))]; //double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))];
//complex double zp = bes[p]; //complex double zp = bes[p];
dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/; dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/;
if (Pp_order < 0) dest[q] *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); if (Pp_order < 0) dest[q] *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
//sum += summandq; //sum += summandq;
} }
@ -290,25 +290,25 @@ static void qpms_trans_calculator_multipliers_B_Taylor(
double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0; double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0;
int err; int err;
if (mu == nu) { if (mu == nu) {
for (int q = 0; q <= q2max; ++q) for (int q = 0; q <= q2max; ++q)
a2q[q] = 0; a2q[q] = 0;
a2q0 = 1; a2q0 = 1;
} }
else { else {
gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
a2q0 = a2q[0]; a2q0 = a2q[0];
} }
gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
a3q0 = a3q[0]; a3q0 = a3q[0];
double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1) +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1)
+lgamma(n+nu+2) - lgamma(2*(n+nu)+3)) - 0.5 * ( +lgamma(n+nu+2) - lgamma(2*(n+nu)+3)) - 0.5 * (
lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1) lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)
-lgamma(nu+mu+1)); -lgamma(nu+mu+1));
complex double presum = exp(exponent); complex double presum = exp(exponent);
presum *= I * (min1pow(m+n) *sqrt((2.*n+1)/(2.*nu+1)) / ( presum *= I * (min1pow(m+n) *sqrt((2.*n+1)/(2.*nu+1)) / (
(4*n)*(n+1)*(n+m+1))); (4*n)*(n+1)*(n+m+1)));
for (int q = 0; q <= Qmax; ++q) { for (int q = 0; q <= Qmax; ++q) {
int p = n+nu-2*q; int p = n+nu-2*q;
@ -367,8 +367,8 @@ qpms_trans_calculator
qpms_y2mn_p(y,&m,&n); qpms_y2mn_p(y,&m,&n);
qpms_y2mn_p(yu,&mu,&nu); qpms_y2mn_p(yu,&mu,&nu);
qmaxsum += 1 + ( qmaxsum += 1 + (
qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)]
= gaunt_q_max(-m,n,mu,nu)); = gaunt_q_max(-m,n,mu,nu));
} }
c->A_multipliers[0] = malloc(qmaxsum * sizeof(complex double)); c->A_multipliers[0] = malloc(qmaxsum * sizeof(complex double));
// calculate multiplier beginnings // calculate multiplier beginnings
@ -382,7 +382,7 @@ qpms_trans_calculator
qpms_y2mn_p(y, &m, &n); qpms_y2mn_p(y, &m, &n);
qpms_y2mn_p(yu, &mu, &nu); qpms_y2mn_p(yu, &mu, &nu);
qpms_trans_calculator_multipliers_A(normalization, qpms_trans_calculator_multipliers_A(normalization,
c->A_multipliers[i], m, n, mu, nu, qmaxes[i]); c->A_multipliers[i], m, n, mu, nu, qmaxes[i]);
} }
qmaxsum = 0; qmaxsum = 0;
@ -392,8 +392,8 @@ qpms_trans_calculator
qpms_y2mn_p(y,&m,&n); qpms_y2mn_p(y,&m,&n);
qpms_y2mn_p(yu,&mu,&nu); qpms_y2mn_p(yu,&mu,&nu);
qmaxsum += 1 + ( qmaxsum += 1 + (
qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)]
= gaunt_q_max(-m,n+1,mu,nu)); = gaunt_q_max(-m,n+1,mu,nu));
} }
c->B_multipliers[0] = malloc(qmaxsum * sizeof(complex double)); c->B_multipliers[0] = malloc(qmaxsum * sizeof(complex double));
// calculate multiplier beginnings // calculate multiplier beginnings
@ -407,41 +407,50 @@ qpms_trans_calculator
qpms_y2mn_p(y, &m, &n); qpms_y2mn_p(y, &m, &n);
qpms_y2mn_p(yu, &mu, &nu); qpms_y2mn_p(yu, &mu, &nu);
qpms_trans_calculator_multipliers_B(normalization, qpms_trans_calculator_multipliers_B(normalization,
c->B_multipliers[i], m, n, mu, nu, qmaxes[i]); c->B_multipliers[i], m, n, mu, nu, qmaxes[i]);
} }
free(qmaxes); free(qmaxes);
return c; return c;
} }
complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c, static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, sph_t kdlj, int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J, bool r_ge_d, qpms_bessel_t J,
complex double *bessel_buf, double *legendre_buf) { const complex double *bessel_buf, const double *legendre_buf) {
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
assert(qmax == gaunt_q_max(-m,n,mu,nu));
complex double sum = 0;
for(size_t q = 0; q <= qmax; ++q) {
int p = n+nu-2*q;
double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
complex double zp = bessel_buf[p];
complex double multiplier = c->A_multipliers[i][q];
sum += Pp * zp * multiplier;
}
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
return sum * eimf;
}
complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J,
complex double *bessel_buf, double *legendre_buf) {
// This functions gets preallocated memory for bessel and legendre functions, but computes them itself
if (r_ge_d) J = QPMS_BESSEL_REGULAR; if (r_ge_d) J = QPMS_BESSEL_REGULAR;
if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR)
// TODO warn? // TODO warn?
return NAN+I*NAN; return NAN+I*NAN;
switch(c->normalization) { switch(c->normalization) {
case QPMS_NORMALIZATION_TAYLOR: case QPMS_NORMALIZATION_TAYLOR:
{ {
double costheta = cos(kdlj.theta); double costheta = cos(kdlj.theta);
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,legendre_buf)) abort(); if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,
if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bessel_buf)) abort(); costheta,-1,legendre_buf)) abort();
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bessel_buf)) abort();
size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1; return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
assert(qmax == gaunt_q_max(-m,n,mu,nu)); kdlj,r_ge_d,J,bessel_buf,legendre_buf);
complex double sum = 0;
for(size_t q = 0; q <= qmax; ++q) {
int p = n+nu-2*q;
double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
complex double zp = bessel_buf[p];
complex double multiplier = c->A_multipliers[i][q];
sum += Pp * zp * multiplier;
}
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
return sum * eimf;
} }
break; break;
default: default:
@ -450,10 +459,30 @@ complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
assert(0); assert(0);
} }
complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c, static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, sph_t kdlj, int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J, bool r_ge_d, qpms_bessel_t J,
complex double *bessel_buf, double *legendre_buf) { const complex double *bessel_buf, const double *legendre_buf) {
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1;
assert(qmax == gaunt_q_max(-m,n+1,mu,nu));
complex double sum = 0;
for(int q = 0; q <= qmax; ++q) {
int p = n+nu-2*q;
double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
complex double zp_ = bessel_buf[p+1];
complex double multiplier = c->B_multipliers[i][q];
sum += Pp_ * zp_ * multiplier;
}
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
return sum * eimf;
}
complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J,
complex double *bessel_buf, double *legendre_buf) {
// This functions gets preallocated memory for bessel and legendre functions, but computes them itself
if (r_ge_d) J = QPMS_BESSEL_REGULAR; if (r_ge_d) J = QPMS_BESSEL_REGULAR;
if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR)
// TODO warn? // TODO warn?
@ -463,22 +492,10 @@ complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
{ {
double costheta = cos(kdlj.theta); double costheta = cos(kdlj.theta);
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1, if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,
costheta,-1,legendre_buf)) abort(); costheta,-1,legendre_buf)) abort();
if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort(); if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort();
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1; kdlj,r_ge_d,J,bessel_buf,legendre_buf);
assert(qmax == gaunt_q_max(-m,n+1,mu,nu));
complex double sum = 0;
for(int q = 0; q <= qmax; ++q) {
int p = n+nu-2*q;
double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
complex double eimf = cexp(I * kdlj.phi);
complex double zp_ = bessel_buf[p+1];
complex double multiplier = c->B_multipliers[i][q];
sum += Pp_ * zp_ * multiplier;
}
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
return sum * eimf;
} }
break; break;
default: default:
@ -504,35 +521,12 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
{ {
double costheta = cos(kdlj.theta); double costheta = cos(kdlj.theta);
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1, if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,
costheta,-1,legendre_buf)) abort(); costheta,-1,legendre_buf)) abort();
if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort(); if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort();
size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); *Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
size_t Qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1; kdlj,r_ge_d,J,bessel_buf,legendre_buf);
assert(Qmax == gaunt_q_max(-m,n+1,mu,nu)); *Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
complex double Bsum = 0; kdlj,r_ge_d,J,bessel_buf,legendre_buf);
for(int q = 0; q <= Qmax; ++q) {
int p = n+nu-2*q;
double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
complex double eimf = cexp(I * kdlj.phi);
complex double zp_ = bessel_buf[p+1];
complex double multiplier = c->B_multipliers[i][q];
Bsum += Pp_ * zp_ * multiplier;
}
size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
assert(qmax == gaunt_q_max(-m,n,mu,nu));
complex double Asum = 0;
for(size_t q = 0; q <= qmax; ++q) {
int p = n+nu-2*q;
double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
complex double zp = bessel_buf[p];
complex double multiplier = c->A_multipliers[i][q];
Asum += Pp * zp * multiplier;
}
complex double eimf = cexp(I*(mu-m)*kdlj.phi);
*Adest = Asum * eimf;
*Bdest = Bsum * eimf;
return 0; return 0;
} }
break; break;
@ -543,6 +537,53 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
} }
int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride,
sph_t kdlj, bool r_ge_d, qpms_bessel_t J,
complex double *bessel_buf, double *legendre_buf) {
if (r_ge_d) J = QPMS_BESSEL_REGULAR;
if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) {
for (size_t i = 0; i < c->nelem; ++i)
for (size_t j = 0; j < c->nelem; ++j) {
*(Adest + i*srcstride + j*deststride) = NAN+I*NAN;
*(Bdest + i*srcstride + j*deststride) = NAN+I*NAN;
}
// TODO warn? different return value?
return 0;
}
switch(c->normalization) {
case QPMS_NORMALIZATION_TAYLOR:
{
double costheta = cos(kdlj.theta);
if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1,
costheta,-1,legendre_buf)) abort();
if (qpms_sph_bessel_array(J, 2*c->lMax+2, kdlj.r, bessel_buf)) abort();
size_t desti = 0, srci = 0;
for (int n = 1; n <= c->nelem; ++n) for (int m = -n; m <= n; ++m) {
for (int nu = 1; nu <= c->nelem; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
assert(qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu) == desti*c->nelem + srci);
*(Adest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
*(Bdest + deststride * desti + srcstride * srci) =
qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
kdlj,r_ge_d,J,bessel_buf,legendre_buf);
++srci;
}
++desti;
}
return 0;
}
break;
default:
abort();
}
assert(0);
}
complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c, complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, sph_t kdlj, int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J) { bool r_ge_d, qpms_bessel_t J) {
@ -565,12 +606,26 @@ int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest, complex double *Adest, complex double *Bdest,
int m, int n, int mu, int nu, sph_t kdlj, int m, int n, int mu, int nu, sph_t kdlj,
bool r_ge_d, qpms_bessel_t J) { bool r_ge_d, qpms_bessel_t J) {
double leg[gsl_sf_legendre_array_n(n+nu+1)]; double leg[gsl_sf_legendre_array_n(2*c->lMax+1)];
complex double bes[n+nu+2]; complex double bes[2*c->lMax+2];
return qpms_trans_calculator_get_AB_buf_p(c,Adest, Bdest,m,n,mu,nu,kdlj,r_ge_d,J, return qpms_trans_calculator_get_AB_buf_p(c,Adest, Bdest,m,n,mu,nu,kdlj,r_ge_d,J,
bes,leg); bes,leg);
} }
int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride,
sph_t kdlj, bool r_ge_d, qpms_bessel_t J) {
double leg[gsl_sf_legendre_array_n(c->lMax+c->lMax+1)];
complex double bes[c->lMax+c->lMax+2];
return qpms_trans_calculator_get_AB_arrays_buf(c,
Adest, Bdest, deststride, srcstride,
kdlj, r_ge_d, J,
bes, leg);
}
complex double qpms_trans_calculator_get_A_ext(const qpms_trans_calculator *c, complex double qpms_trans_calculator_get_A_ext(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, int m, int n, int mu, int nu,
double kdlj_r, double kdlj_theta, double kdlj_phi, double kdlj_r, double kdlj_theta, double kdlj_phi,
@ -596,3 +651,13 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c,
return qpms_trans_calculator_get_AB_p(c,Adest,Bdest,m,n,mu,nu,kdlj,r_ge_d,J); return qpms_trans_calculator_get_AB_p(c,Adest,Bdest,m,n,mu,nu,kdlj,r_ge_d,J);
} }
int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride,
double kdlj_r, double kdlj_theta, double kdlj_phi,
int r_ge_d, int J) {
sph_t kdlj = {kdlj_r, kdlj_theta, kdlj_phi};
return qpms_trans_calculator_get_AB_arrays(c,Adest,Bdest,deststride,srcstride,
kdlj, r_ge_d, J);
}

View File

@ -60,7 +60,6 @@ complex double qpms_trans_calculator_get_B_ext(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, double kdlj_r, int m, int n, int mu, int nu, double kdlj_r,
double kdlj_th, double kdlj_phi, int r_ge_d, int J); double kdlj_th, double kdlj_phi, int r_ge_d, int J);
int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c, int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest, complex double *Adest, complex double *Bdest,
int m, int n, int mu, int nu, sph_t kdlj, int m, int n, int mu, int nu, sph_t kdlj,
@ -70,5 +69,15 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c,
int m, int n, int mu, int nu, double kdlj_r, int m, int n, int mu, int nu, double kdlj_r,
double kdlj_th, double kdlj_phi, int r_ge_d, int J); double kdlj_th, double kdlj_phi, int r_ge_d, int J);
int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride,
sph_t kdlj, bool r_ge_d, qpms_bessel_t J);
int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
complex double *Adest, complex double *Bdest,
size_t deststride, size_t srcstride,
double kdlj_r, double kdlj_theta, double kdlj_phi,
int r_ge_d, int J);
#endif // QPMS_TRANSLATIONS_H #endif // QPMS_TRANSLATIONS_H