diff --git a/qpms/qpms_c.pyx b/qpms/qpms_c.pyx index 9eb23d7..f88da5b 100644 --- a/qpms/qpms_c.pyx +++ b/qpms/qpms_c.pyx @@ -166,21 +166,29 @@ cdef extern from "translations.h": cdouble qpms_trans_single_B_Taylor_ext(int m, int n, int mu, int nu, double r, double th, double ph, int r_ge_d, int J) nogil struct qpms_trans_calculator: - pass + int lMax + size_t nelem + cdouble** A_multipliers + cdouble** B_multipliers enum qpms_normalization_t: pass qpms_trans_calculator* qpms_trans_calculator_init(int lMax, int nt) # should be qpms_normalization_t void qpms_trans_calculator_free(qpms_trans_calculator* c) cdouble qpms_trans_calculator_get_A_ext(const qpms_trans_calculator* c, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, - int r_ge_d, int J) + int r_ge_d, int J) nogil cdouble qpms_trans_calculator_get_B_ext(const qpms_trans_calculator* c, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, - int r_ge_d, int J) + int r_ge_d, int J) nogil int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator* c, cdouble *Adest, cdouble *Bdest, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, - int r_ge_d, int J) + int r_ge_d, int J) nogil + int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c, + cdouble *Adest, cdouble *Bdest, + size_t deststride, size_t srcstride, + double kdlj_r, double kdlj_theta, double kdlj_phi, + int r_ge_d, int J) nogil @@ -436,6 +444,20 @@ trans_calculator_get_AB_loop_funcs[0] = trans_calculator_loop_E_C_DD_iiiidddii_A cdef void *trans_calculator_get_AB_elementwise_funcs[1] trans_calculator_get_AB_elementwise_funcs[0] = qpms_trans_calculator_get_AB_p_ext +''' +cdef extern from "numpy/ndarrayobject.h": + struct PyArrayInterface: + int itemsize + np.npy_uintp *shape + np.npy_uintp *strides + void *data +''' + + +from libc.stdlib cimport malloc, free, calloc, abort + + + cdef class trans_calculator: cdef qpms_trans_calculator* c cdef trans_calculator_get_X_data_t get_A_data[1] @@ -509,6 +531,178 @@ cdef class trans_calculator: qpms_trans_calculator_free(self.c) # TODO Reference counts to get_A, get_B, get_AB? + def lMax(self): + return self.c[0].lMax + + def nelem(self): + return self.c[0].nelem + + def get_AB_arrays(self, r, theta, phi, r_ge_d, int J, + destaxis=None, srcaxis=None, expand=True): + """ + Returns arrays of translation coefficients, inserting two new nelem-sized axes + (corresponding to the destination and source axes of the translation matrix, + respectively). + + By default (expand==True), it inserts the new axes. or it can be provided with + the resulting shape (with the corresponding axes dimensions equal to one). + The provided axes positions are for the resulting array. + + If none axis positions are provided, destaxis and srcaxis will be the second-to-last + and last, respectively. + """ + # TODO CHECK (and try to cast) INPUT ARRAY TYPES (now is done) + # BIG FIXME: make skalars valid arguments, now r, theta, phi, r_ge_d have to be ndarrays + cdef: + int daxis, saxis, smallaxis, bigaxis, reslen, longest_axis, i, j, d, ax, errval + np.npy_intp sstride, dstride, longi, longstride + int *local_indices + int *innerloop_shape + char *r_p + char *theta_p + char *phi_p + char *r_ge_d_p + char *a_p + char *b_p + # Process the array shapes + baseshape = np.broadcast.shape(r,theta,phi,r_ge_d) + if not expand: + reslen = len(baseshape) + if reslen < 2: + raise ValueError('Translation matrix arrays must have at least 2 dimensions!') + daxis = (reslen-2) if destaxis is None else destaxis + saxis = (reslen-1) if srcaxis is None else srcaxis + if daxis < 0: + daxis = reslen + daxis + if saxis < 0: + saxis = reslen + saxis + if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis: + raise ValueError('invalid axes provided (destaxis = %d, srcaxis = %d, # of axes: %d' + % (daxis, saxis, reslen)) + if baseshape[daxis] != 1 or baseshape[saxis] != 1: + raise ValueError('dimension mismatch (input argument dimensions have to be 1 both at' + 'destaxis (==%d) and srcaxis (==%d) but are %d and %d' % + (daxis, saxis, baseshape[daxis], baseshape[saxis])) + resultshape = list(baseshape) + else: + reslen = len(baseshape)+2 + if destaxis is None: + daxis = -2 + if srcaxis is None: + saxis = -1 + if daxis < 0: + daxis = reslen + daxis + if saxis < 0: + saxis = reslen + saxis + if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis: + raise ValueError('invalid axes provided') # TODO better error formulation + resultshape = list(baseshape) + if daxis > saxis: + smallaxis = saxis + bigaxis = daxis + else: + smallaxis = daxis + bixagis = saxis + resultshape.insert(smallaxis,1) + resultshape.insert(bigaxis,1) + r = np.expand_dims(np.expand_dims(r.astype(np.float_, copy=False), smallaxis), bigaxis) + theta = np.expand_dims(np.expand_dims(theta.astype(np.float_, copy=False), smallaxis), bigaxis) + phi = np.expand_dims(np.expand_dims(phi.astype(np.float_, copy=False), smallaxis), bigaxis) + r_ge_d = np.expand_dims(np.expand_dims(r_ge_d(np.bool_, copy=False), smallaxis), bigaxis) + + longestaxis = 0 + # FIxME: the whole thing with longest_axis will fail if none is longer than 1 + for i in range(reslen): + if resultshape[i] > resultshape[longest_axis]: + longestaxis = i + innerloop_shape = malloc(reslen * sizeof(int)) + if innerloop_shape == NULL: + abort() + for i in range(reslen): + innerloop_shape[i] = resultshape[i] + innerloop_shape[longest_axis] = 1 # longest axis will be iterated in the outer (parallelized) loop. Therefore, longest axis, together with saxis and daxis, will not be iterated in the inner loop + resultshape[daxis] = self.c[0].nelem + resultshape[saxis] = self.c[0].nelem + cdef np.ndarray r_c = np.broadcast_to(r,resultshape) + cdef np.ndarray theta_c = np.broadcast_to(theta,resultshape) + cdef np.ndarray phi_c = np.broadcast_to(phi,resultshape) + cdef np.ndarray r_ge_d_c = np.broadcast_to(r_ge_d, resultshape) + cdef np.ndarray a = np.empty(resultshape, dtype=complex) + cdef np.ndarray b = np.empty(resultshape, dtype=complex) + dstride = a.strides[daxis] + sstride = a.strides[saxis] + longstride = a.strides[longest_axis] + # TODO write this in C (as a function) and parallelize there + with nogil: #, parallel(): # FIXME rewrite this part in C + local_indices = calloc(reslen, sizeof(int)) + if local_indices == NULL: abort() + for longi in range(a.shape[longest_axis]): # outer loop (to be parallelized) + # this might be done also in the inverse order, but this is more 'c-contiguous' way of incrementing the indices + ax = reslen - 1 + while ax >= 0: + # calculate the correct index/pointer for each array used. This can be further optimized from O(reslen * total size of the result array) to O(total size of the result array), but fick that now + r_p = r_c.data + r_c.strides[longest_axis] * longi + theta_p = theta_c.data + theta_c.strides[longest_axis] * longi + phi_p = phi_c.data + phi_c.strides[longest_axis] * longi + r_ge_d_p = r_ge_d_c.data + r_ge_d_c.strides[longest_axis] * longi + a_p = a.data + a.strides[longest_axis] * longi + b_p = b.data + b.strides[longest_axis] * longi + for i in range(reslen): + if i == longest_axis: continue + r_p += r_c.strides[i] * local_indices[i] + theta_p += theta_c.strides[i] * local_indices[i] + phi_p += phi_c.strides[i] * local_indices[i] + r_ge_d_p += r_ge_d_c.strides[i] * local_indices[i] + if i == saxis or i == daxis: continue + a_p += a.strides[i] * local_indices[i] + b_p += b.strides[i] * local_indices[i] + + # perform the actual task here + errval = qpms_trans_calculator_get_AB_arrays_ext(self.c, + a_p, b_p, + dstride // sizeof(cdouble), sstride // sizeof(cdouble), + (r_p)[0], (theta_p)[0], (phi_p)[0], ((r_ge_d_p)[0]), J) + if errval: abort() + + # increment the last index 'digit' (ax is now reslen-1; we don't have do-while loop in python) + local_indices[ax] += 1 + while (local_indices[ax] == innerloop_shape[ax] and ax >= 0): # overflow to the next digit but stop when we reach below the last one + local_indices[ax] = 0 + ax -= 1 + local_indices[ax] += 1 + if ax >= 0: # did not overflow, get back to the lowest index + ax = reslen - 1 + + + + + + + + for ax in range(a.ndim): + if (ax == longest_axis or ax == daxis or ax == saxis): + continue + free(local_indices) + free(innerloop_shape) + + + + + + + + + + + + + + + + + + + # TODO make possible to access the attributes (to show normalization etc) diff --git a/qpms/test_translations2.c b/qpms/test_translations2.c index ff7f958..5d24293 100644 --- a/qpms/test_translations2.c +++ b/qpms/test_translations2.c @@ -48,6 +48,12 @@ int main() { cabs(B - B2)/((cabs(B2) < cabs(B)) ? cabs(B2) : cabs(B)) ); } + complex double A,B; + // Test of zero R + sph_t kdlj = {0, 1, 2}; + int m = -1, n = 1, mu = -1, nu = 1; + qpms_trans_calculator_get_AB_p(c,&A,&B,m,n,mu,nu,kdlj,false,3); + printf("A = %.6e+%.6ej, B = %.6e+%.6ej\n", creal(A),cimag(A),creal(B),cimag(B)); qpms_trans_calculator_free(c); } diff --git a/qpms/translations.c b/qpms/translations.c index b77c06a..485a26d 100644 --- a/qpms/translations.c +++ b/qpms/translations.c @@ -64,102 +64,102 @@ int qpms_sph_bessel_array(qpms_bessel_t typ, int lmax, double x, complex double complex double qpms_trans_single_A_Taylor(int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J) { // TODO make J enum - if(r_ge_d) J = QPMS_BESSEL_REGULAR; + if(r_ge_d) J = QPMS_BESSEL_REGULAR; - double costheta = cos(kdlj.theta); + double costheta = cos(kdlj.theta); - int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m? - // N.B. -m !!!!!! - double a1q[qmax+1]; - int err; - gaunt_xu(-m,n,mu,nu,qmax,a1q,&err); - double a1q0 = a1q[0]; - if (err) abort(); + int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m? + // N.B. -m !!!!!! + double a1q[qmax+1]; + int err; + gaunt_xu(-m,n,mu,nu,qmax,a1q,&err); + double a1q0 = a1q[0]; + if (err) abort(); - double leg[gsl_sf_legendre_array_n(n+nu)]; - if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort(); - complex double bes[n+nu+1]; - if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort(); - complex double sum = 0; - for(int q = 0; q <= qmax; ++q) { - int p = n+nu-2*q; - int Pp_order = mu-m; - //if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze - assert(p >= abs(Pp_order)); - double a1q_n = a1q[q] / a1q0; - double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))]; - if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); - complex double zp = bes[p]; - complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp; - sum += summandq; - } - - double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) - +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) - +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)); - complex double presum = exp(exponent); - presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n); - - complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( - lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); - return (presum / prenormratio) * sum; + double leg[gsl_sf_legendre_array_n(n+nu)]; + if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort(); + complex double bes[n+nu+1]; + if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort(); + complex double sum = 0; + for(int q = 0; q <= qmax; ++q) { + int p = n+nu-2*q; + int Pp_order = mu-m; + //if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze + assert(p >= abs(Pp_order)); + double a1q_n = a1q[q] / a1q0; + double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))]; + if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); + complex double zp = bes[p]; + complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp; + sum += summandq; + } + + double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) + +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) + +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)); + complex double presum = exp(exponent); + presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n); + + complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( + lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); + return (presum / prenormratio) * sum; } complex double qpms_trans_single_B_Taylor(int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J) { - if(r_ge_d) J = QPMS_BESSEL_REGULAR; - double costheta = cos(kdlj.theta); - - int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu); - int Qmax = gaunt_q_max(-m,n+1,mu,nu); - double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0; - int err; - if (mu == nu) { - for (int q = 0; q <= q2max; ++q) - a2q[q] = 0; - a2q0 = 1; - } - else { - gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); - a2q0 = a2q[0]; - } - gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); - a3q0 = a3q[0]; + if(r_ge_d) J = QPMS_BESSEL_REGULAR; + double costheta = cos(kdlj.theta); - double leg[gsl_sf_legendre_array_n(n+nu+1)]; - if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort(); - complex double bes[n+nu+2]; - if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort(); - - complex double sum = 0; - for (int q = 0; q <= Qmax; ++q) { - int p = n+nu-2*q; - double a2q_n = a2q[q]/a2q0; - double a3q_n = a3q[q]/a3q0; - complex double zp_ = bes[p+1]; - int Pp_order_ = mu-m; - //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze - assert(p+1 >= abs(Pp_order_)); - double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))]; - if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_)); - complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n - -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) - *min1pow(q) * zp_ * Pp_); - sum += summandq; - } + int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu); + int Qmax = gaunt_q_max(-m,n+1,mu,nu); + double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0; + int err; + if (mu == nu) { + for (int q = 0; q <= q2max; ++q) + a2q[q] = 0; + a2q0 = 1; + } + else { + gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); + a2q0 = a2q[0]; + } + gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); + a3q0 = a3q[0]; - double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) - +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1) - +lgamma(n+nu+2) - lgamma(2*(n+nu)+3)); - complex double presum = exp(exponent); - presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / ( - (4*n)*(n+1)*(n+m+1)); + double leg[gsl_sf_legendre_array_n(n+nu+1)]; + if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort(); + complex double bes[n+nu+2]; + if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort(); - // Taylor normalisation v2, proven to be equivalent - complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( - lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); - - return (presum / prenormratio) * sum; + complex double sum = 0; + for (int q = 0; q <= Qmax; ++q) { + int p = n+nu-2*q; + double a2q_n = a2q[q]/a2q0; + double a3q_n = a3q[q]/a3q0; + complex double zp_ = bes[p+1]; + int Pp_order_ = mu-m; + //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze + assert(p+1 >= abs(Pp_order_)); + double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))]; + if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_)); + complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n + -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) + *min1pow(q) * zp_ * Pp_); + sum += summandq; + } + + double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) + +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1) + +lgamma(n+nu+2) - lgamma(2*(n+nu)+3)); + complex double presum = exp(exponent); + presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / ( + (4*n)*(n+1)*(n+m+1)); + + // Taylor normalisation v2, proven to be equivalent + complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp( + lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1))); + + return (presum / prenormratio) * sum; } complex double qpms_trans_single_A_Taylor_ext(int m, int n, int mu, int nu, @@ -181,7 +181,7 @@ void qpms_trans_calculator_free(qpms_trans_calculator *c) { free(c->B_multipliers); free(c); } - + static inline size_t qpms_mn2y(int m, int n) { return (size_t) n * (n + 1) + m - 1; } @@ -223,9 +223,9 @@ static void qpms_trans_calculator_multipliers_A_Taylor( double a1q0 = a1q[0]; double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) - +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) - +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio - lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)); + +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1) + +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio + lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)); double presum = exp(exponent); presum *= min1pow(m+n) * sqrt((2.*n+1)/(2.*nu+1)) / (4*n); @@ -236,7 +236,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor( double a1q_n = a1q[q] / a1q0; // Assuming non_normalized legendre polynomials! double Ppfac = (Pp_order >= 0) ? 1 : - min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); + min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); double summandfac = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n; dest[q] = presum * summandfac * Ppfac; // FIXME I might not need complex here @@ -260,7 +260,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor( double a1q_n = a1q[q] / a1q0; //double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))]; //complex double zp = bes[p]; - dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/; + dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/; if (Pp_order < 0) dest[q] *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); //sum += summandq; } @@ -290,25 +290,25 @@ static void qpms_trans_calculator_multipliers_B_Taylor( double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0; int err; if (mu == nu) { - for (int q = 0; q <= q2max; ++q) - a2q[q] = 0; - a2q0 = 1; - } - else { - gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); - a2q0 = a2q[0]; - } - gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); - a3q0 = a3q[0]; + for (int q = 0; q <= q2max; ++q) + a2q[q] = 0; + a2q0 = 1; + } + else { + gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort(); + a2q0 = a2q[0]; + } + gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort(); + a3q0 = a3q[0]; double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1) +lgamma(n+nu+2) - lgamma(2*(n+nu)+3)) - 0.5 * ( - lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1) - -lgamma(nu+mu+1)); + lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1) + -lgamma(nu+mu+1)); complex double presum = exp(exponent); presum *= I * (min1pow(m+n) *sqrt((2.*n+1)/(2.*nu+1)) / ( - (4*n)*(n+1)*(n+m+1))); + (4*n)*(n+1)*(n+m+1))); for (int q = 0; q <= Qmax; ++q) { int p = n+nu-2*q; @@ -318,7 +318,7 @@ static void qpms_trans_calculator_multipliers_B_Taylor( //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze assert(p+1 >= abs(Pp_order_)); double Ppfac = (Pp_order_ >= 0) ? 1 : - + min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_)); double summandq = ((2*(n+1)*(nu-mu)*a2q_n -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) @@ -367,8 +367,8 @@ qpms_trans_calculator qpms_y2mn_p(y,&m,&n); qpms_y2mn_p(yu,&mu,&nu); qmaxsum += 1 + ( - qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] - = gaunt_q_max(-m,n,mu,nu)); + qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] + = gaunt_q_max(-m,n,mu,nu)); } c->A_multipliers[0] = malloc(qmaxsum * sizeof(complex double)); // calculate multiplier beginnings @@ -382,7 +382,7 @@ qpms_trans_calculator qpms_y2mn_p(y, &m, &n); qpms_y2mn_p(yu, &mu, &nu); qpms_trans_calculator_multipliers_A(normalization, - c->A_multipliers[i], m, n, mu, nu, qmaxes[i]); + c->A_multipliers[i], m, n, mu, nu, qmaxes[i]); } qmaxsum = 0; @@ -392,8 +392,8 @@ qpms_trans_calculator qpms_y2mn_p(y,&m,&n); qpms_y2mn_p(yu,&mu,&nu); qmaxsum += 1 + ( - qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] - = gaunt_q_max(-m,n+1,mu,nu)); + qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] + = gaunt_q_max(-m,n+1,mu,nu)); } c->B_multipliers[0] = malloc(qmaxsum * sizeof(complex double)); // calculate multiplier beginnings @@ -407,41 +407,50 @@ qpms_trans_calculator qpms_y2mn_p(y, &m, &n); qpms_y2mn_p(yu, &mu, &nu); qpms_trans_calculator_multipliers_B(normalization, - c->B_multipliers[i], m, n, mu, nu, qmaxes[i]); + c->B_multipliers[i], m, n, mu, nu, qmaxes[i]); } free(qmaxes); return c; } -complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c, +static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c, int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J, - complex double *bessel_buf, double *legendre_buf) { + const complex double *bessel_buf, const double *legendre_buf) { + size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); + size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1; + assert(qmax == gaunt_q_max(-m,n,mu,nu)); + complex double sum = 0; + for(size_t q = 0; q <= qmax; ++q) { + int p = n+nu-2*q; + double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))]; + complex double zp = bessel_buf[p]; + complex double multiplier = c->A_multipliers[i][q]; + sum += Pp * zp * multiplier; + } + complex double eimf = cexp(I*(mu-m)*kdlj.phi); + return sum * eimf; +} + +complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c, + int m, int n, int mu, int nu, sph_t kdlj, + bool r_ge_d, qpms_bessel_t J, + complex double *bessel_buf, double *legendre_buf) { + // This functions gets preallocated memory for bessel and legendre functions, but computes them itself if (r_ge_d) J = QPMS_BESSEL_REGULAR; if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) // TODO warn? return NAN+I*NAN; - switch(c->normalization) { case QPMS_NORMALIZATION_TAYLOR: { double costheta = cos(kdlj.theta); - if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,legendre_buf)) abort(); - if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bessel_buf)) abort(); - size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); - size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1; - assert(qmax == gaunt_q_max(-m,n,mu,nu)); - complex double sum = 0; - for(size_t q = 0; q <= qmax; ++q) { - int p = n+nu-2*q; - double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))]; - complex double zp = bessel_buf[p]; - complex double multiplier = c->A_multipliers[i][q]; - sum += Pp * zp * multiplier; - } - complex double eimf = cexp(I*(mu-m)*kdlj.phi); - return sum * eimf; + if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu, + costheta,-1,legendre_buf)) abort(); + if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bessel_buf)) abort(); + return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); } break; default: @@ -450,10 +459,30 @@ complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c, assert(0); } -complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c, +static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c, int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J, - complex double *bessel_buf, double *legendre_buf) { + const complex double *bessel_buf, const double *legendre_buf) { + size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); + size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1; + assert(qmax == gaunt_q_max(-m,n+1,mu,nu)); + complex double sum = 0; + for(int q = 0; q <= qmax; ++q) { + int p = n+nu-2*q; + double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))]; + complex double zp_ = bessel_buf[p+1]; + complex double multiplier = c->B_multipliers[i][q]; + sum += Pp_ * zp_ * multiplier; + } + complex double eimf = cexp(I*(mu-m)*kdlj.phi); + return sum * eimf; +} + +complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c, + int m, int n, int mu, int nu, sph_t kdlj, + bool r_ge_d, qpms_bessel_t J, + complex double *bessel_buf, double *legendre_buf) { + // This functions gets preallocated memory for bessel and legendre functions, but computes them itself if (r_ge_d) J = QPMS_BESSEL_REGULAR; if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) // TODO warn? @@ -463,22 +492,10 @@ complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c, { double costheta = cos(kdlj.theta); if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1, - costheta,-1,legendre_buf)) abort(); + costheta,-1,legendre_buf)) abort(); if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort(); - size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); - size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1; - assert(qmax == gaunt_q_max(-m,n+1,mu,nu)); - complex double sum = 0; - for(int q = 0; q <= qmax; ++q) { - int p = n+nu-2*q; - double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))]; - complex double eimf = cexp(I * kdlj.phi); - complex double zp_ = bessel_buf[p+1]; - complex double multiplier = c->B_multipliers[i][q]; - sum += Pp_ * zp_ * multiplier; - } - complex double eimf = cexp(I*(mu-m)*kdlj.phi); - return sum * eimf; + return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); } break; default: @@ -504,35 +521,12 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c, { double costheta = cos(kdlj.theta); if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1, - costheta,-1,legendre_buf)) abort(); + costheta,-1,legendre_buf)) abort(); if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort(); - size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); - size_t Qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1; - assert(Qmax == gaunt_q_max(-m,n+1,mu,nu)); - complex double Bsum = 0; - for(int q = 0; q <= Qmax; ++q) { - int p = n+nu-2*q; - double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))]; - complex double eimf = cexp(I * kdlj.phi); - complex double zp_ = bessel_buf[p+1]; - complex double multiplier = c->B_multipliers[i][q]; - Bsum += Pp_ * zp_ * multiplier; - } - - size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1; - assert(qmax == gaunt_q_max(-m,n,mu,nu)); - complex double Asum = 0; - for(size_t q = 0; q <= qmax; ++q) { - int p = n+nu-2*q; - double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))]; - complex double zp = bessel_buf[p]; - complex double multiplier = c->A_multipliers[i][q]; - Asum += Pp * zp * multiplier; - } - complex double eimf = cexp(I*(mu-m)*kdlj.phi); - - *Adest = Asum * eimf; - *Bdest = Bsum * eimf; + *Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); + *Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); return 0; } break; @@ -543,6 +537,53 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c, } + + +int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c, + complex double *Adest, complex double *Bdest, + size_t deststride, size_t srcstride, + sph_t kdlj, bool r_ge_d, qpms_bessel_t J, + complex double *bessel_buf, double *legendre_buf) { + if (r_ge_d) J = QPMS_BESSEL_REGULAR; + if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) { + for (size_t i = 0; i < c->nelem; ++i) + for (size_t j = 0; j < c->nelem; ++j) { + *(Adest + i*srcstride + j*deststride) = NAN+I*NAN; + *(Bdest + i*srcstride + j*deststride) = NAN+I*NAN; + } + // TODO warn? different return value? + return 0; + } + switch(c->normalization) { + case QPMS_NORMALIZATION_TAYLOR: + { + double costheta = cos(kdlj.theta); + if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1, + costheta,-1,legendre_buf)) abort(); + if (qpms_sph_bessel_array(J, 2*c->lMax+2, kdlj.r, bessel_buf)) abort(); + size_t desti = 0, srci = 0; + for (int n = 1; n <= c->nelem; ++n) for (int m = -n; m <= n; ++m) { + for (int nu = 1; nu <= c->nelem; ++nu) for (int mu = -nu; mu <= nu; ++mu) { + assert(qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu) == desti*c->nelem + srci); + *(Adest + deststride * desti + srcstride * srci) = + qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); + *(Bdest + deststride * desti + srcstride * srci) = + qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu, + kdlj,r_ge_d,J,bessel_buf,legendre_buf); + ++srci; + } + ++desti; + } + return 0; + } + break; + default: + abort(); + } + assert(0); +} + complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c, int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J) { @@ -565,12 +606,26 @@ int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c, complex double *Adest, complex double *Bdest, int m, int n, int mu, int nu, sph_t kdlj, bool r_ge_d, qpms_bessel_t J) { - double leg[gsl_sf_legendre_array_n(n+nu+1)]; - complex double bes[n+nu+2]; + double leg[gsl_sf_legendre_array_n(2*c->lMax+1)]; + complex double bes[2*c->lMax+2]; return qpms_trans_calculator_get_AB_buf_p(c,Adest, Bdest,m,n,mu,nu,kdlj,r_ge_d,J, bes,leg); } +int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c, + complex double *Adest, complex double *Bdest, + size_t deststride, size_t srcstride, + sph_t kdlj, bool r_ge_d, qpms_bessel_t J) { + double leg[gsl_sf_legendre_array_n(c->lMax+c->lMax+1)]; + complex double bes[c->lMax+c->lMax+2]; + return qpms_trans_calculator_get_AB_arrays_buf(c, + Adest, Bdest, deststride, srcstride, + kdlj, r_ge_d, J, + bes, leg); +} + + + complex double qpms_trans_calculator_get_A_ext(const qpms_trans_calculator *c, int m, int n, int mu, int nu, double kdlj_r, double kdlj_theta, double kdlj_phi, @@ -596,3 +651,13 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c, return qpms_trans_calculator_get_AB_p(c,Adest,Bdest,m,n,mu,nu,kdlj,r_ge_d,J); } +int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c, + complex double *Adest, complex double *Bdest, + size_t deststride, size_t srcstride, + double kdlj_r, double kdlj_theta, double kdlj_phi, + int r_ge_d, int J) { + sph_t kdlj = {kdlj_r, kdlj_theta, kdlj_phi}; + return qpms_trans_calculator_get_AB_arrays(c,Adest,Bdest,deststride,srcstride, + kdlj, r_ge_d, J); +} + diff --git a/qpms/translations.h b/qpms/translations.h index 025ac6c..881bb5e 100644 --- a/qpms/translations.h +++ b/qpms/translations.h @@ -60,7 +60,6 @@ complex double qpms_trans_calculator_get_B_ext(const qpms_trans_calculator *c, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, int r_ge_d, int J); - int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c, complex double *Adest, complex double *Bdest, int m, int n, int mu, int nu, sph_t kdlj, @@ -70,5 +69,15 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c, int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi, int r_ge_d, int J); +int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c, + complex double *Adest, complex double *Bdest, + size_t deststride, size_t srcstride, + sph_t kdlj, bool r_ge_d, qpms_bessel_t J); +int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c, + complex double *Adest, complex double *Bdest, + size_t deststride, size_t srcstride, + double kdlj_r, double kdlj_theta, double kdlj_phi, + int r_ge_d, int J); + #endif // QPMS_TRANSLATIONS_H