Whole translation matrix: it compiles; untested

Former-commit-id: 1465a1385a2f318148d7b3a21cbbf2c0391a13df
2017-05-02 00:18:01 +03:00 · 2017-05-02 00:18:01 +03:00 · 2d88a1ef0d
parent e2a2100b57
commit 2d88a1ef0d
4 changed files with 456 additions and 182 deletions
--- a/qpms/qpms_c.pyx
+++ b/qpms/qpms_c.pyx
@ -166,21 +166,29 @@ cdef extern from "translations.h":
    cdouble qpms_trans_single_B_Taylor_ext(int m, int n, int mu, int nu,
        double r, double th, double ph, int r_ge_d, int J) nogil
    struct qpms_trans_calculator:
-        pass
+        int lMax
+        size_t nelem
+        cdouble** A_multipliers
+        cdouble** B_multipliers
    enum qpms_normalization_t:
        pass
    qpms_trans_calculator* qpms_trans_calculator_init(int lMax, int nt) # should be qpms_normalization_t
    void qpms_trans_calculator_free(qpms_trans_calculator* c)
    cdouble qpms_trans_calculator_get_A_ext(const qpms_trans_calculator* c,
            int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
-            int r_ge_d, int J)
+            int r_ge_d, int J) nogil
    cdouble qpms_trans_calculator_get_B_ext(const qpms_trans_calculator* c,
            int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
-            int r_ge_d, int J)
+            int r_ge_d, int J) nogil
    int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator* c,
            cdouble *Adest, cdouble *Bdest,
            int m, int n, int mu, int nu, double kdlj_r, double kdlj_th, double kdlj_phi,
-            int r_ge_d, int J)
+            int r_ge_d, int J) nogil
+    int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
+            cdouble *Adest, cdouble *Bdest,
+            size_t deststride, size_t srcstride,
+            double kdlj_r, double kdlj_theta, double kdlj_phi,
+            int r_ge_d, int J) nogil



@ -436,6 +444,20 @@ trans_calculator_get_AB_loop_funcs[0] = trans_calculator_loop_E_C_DD_iiiidddii_A
 cdef void *trans_calculator_get_AB_elementwise_funcs[1]
 trans_calculator_get_AB_elementwise_funcs[0] = <void *>qpms_trans_calculator_get_AB_p_ext

+'''
+cdef extern from "numpy/ndarrayobject.h":
+    struct PyArrayInterface:
+        int itemsize
+        np.npy_uintp *shape
+        np.npy_uintp *strides
+        void *data
+'''
+
+
+from libc.stdlib cimport malloc, free, calloc, abort
+
+
+
 cdef class trans_calculator:
    cdef qpms_trans_calculator* c
    cdef trans_calculator_get_X_data_t get_A_data[1]
@ -509,6 +531,178 @@ cdef class trans_calculator:
        qpms_trans_calculator_free(self.c)
        # TODO Reference counts to get_A, get_B, get_AB?

+    def lMax(self):
+        return self.c[0].lMax
+
+    def nelem(self):
+        return self.c[0].nelem
+
+    def get_AB_arrays(self, r, theta, phi, r_ge_d, int J, 
+            destaxis=None, srcaxis=None, expand=True):
+        """
+        Returns arrays of translation coefficients, inserting two new nelem-sized axes
+        (corresponding to the destination and source axes of the translation matrix,
+        respectively).
+
+        By default (expand==True), it inserts the new axes. or it can be provided with
+        the resulting shape (with the corresponding axes dimensions equal to one).
+        The provided axes positions are for the resulting array.
+
+        If none axis positions are provided, destaxis and srcaxis will be the second-to-last
+        and last, respectively.
+        """
+        # TODO CHECK (and try to cast) INPUT ARRAY TYPES (now is done)
+        # BIG FIXME: make skalars valid arguments, now r, theta, phi, r_ge_d have to be ndarrays
+        cdef:
+            int daxis, saxis, smallaxis, bigaxis, reslen, longest_axis, i, j, d, ax, errval
+            np.npy_intp sstride, dstride, longi, longstride
+            int *local_indices
+            int *innerloop_shape
+            char *r_p
+            char *theta_p
+            char *phi_p
+            char *r_ge_d_p
+            char *a_p
+            char *b_p
+        # Process the array shapes
+        baseshape = np.broadcast.shape(r,theta,phi,r_ge_d)
+        if not expand:
+            reslen = len(baseshape)
+            if reslen < 2:
+                raise ValueError('Translation matrix arrays must have at least 2 dimensions!')
+            daxis = (reslen-2) if destaxis is None else destaxis
+            saxis = (reslen-1) if srcaxis is None else srcaxis
+            if daxis < 0:
+                daxis = reslen + daxis
+            if saxis < 0:
+                saxis = reslen + saxis
+            if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis:
+                raise ValueError('invalid axes provided (destaxis = %d, srcaxis = %d, # of axes: %d'
+                        % (daxis, saxis, reslen)) 
+            if baseshape[daxis] != 1 or baseshape[saxis] != 1:
+                raise ValueError('dimension mismatch (input argument dimensions have to be 1 both at'
+                        'destaxis (==%d) and srcaxis (==%d) but are %d and %d' % 
+                        (daxis, saxis, baseshape[daxis], baseshape[saxis]))
+            resultshape = list(baseshape)
+        else:
+            reslen = len(baseshape)+2
+            if destaxis is None:
+                daxis = -2
+            if srcaxis is None:
+                saxis = -1
+            if daxis < 0:
+                daxis = reslen + daxis
+            if saxis < 0:
+                saxis = reslen + saxis
+            if daxis < 0 or saxis < 0 or daxis >= reslen or saxis >= reslen or daxis == saxis:
+                raise ValueError('invalid axes provided') # TODO better error formulation
+            resultshape = list(baseshape)
+            if daxis > saxis:
+                smallaxis = saxis
+                bigaxis = daxis
+            else:
+                smallaxis = daxis
+                bixagis = saxis
+            resultshape.insert(smallaxis,1)
+            resultshape.insert(bigaxis,1)
+            r = np.expand_dims(np.expand_dims(r.astype(np.float_, copy=False), smallaxis), bigaxis)
+            theta = np.expand_dims(np.expand_dims(theta.astype(np.float_, copy=False), smallaxis), bigaxis)
+            phi = np.expand_dims(np.expand_dims(phi.astype(np.float_, copy=False), smallaxis), bigaxis)
+            r_ge_d = np.expand_dims(np.expand_dims(r_ge_d(np.bool_, copy=False), smallaxis), bigaxis)
+
+        longestaxis = 0
+        # FIxME: the whole thing with longest_axis will fail if none is longer than 1
+        for i in range(reslen):
+            if resultshape[i] > resultshape[longest_axis]:
+                longestaxis = i
+        innerloop_shape = <int *> malloc(reslen * sizeof(int))
+        if innerloop_shape == NULL:
+            abort()
+        for i in range(reslen):
+            innerloop_shape[i] = resultshape[i]
+        innerloop_shape[longest_axis] = 1 # longest axis will be iterated in the outer (parallelized) loop. Therefore, longest axis, together with saxis and daxis, will not be iterated in the inner loop
+        resultshape[daxis] = self.c[0].nelem
+        resultshape[saxis] = self.c[0].nelem
+        cdef np.ndarray r_c = np.broadcast_to(r,resultshape)
+        cdef np.ndarray theta_c = np.broadcast_to(theta,resultshape)
+        cdef np.ndarray phi_c = np.broadcast_to(phi,resultshape)
+        cdef np.ndarray r_ge_d_c = np.broadcast_to(r_ge_d, resultshape)
+        cdef np.ndarray a = np.empty(resultshape, dtype=complex)
+        cdef np.ndarray b = np.empty(resultshape, dtype=complex)
+        dstride = a.strides[daxis]
+        sstride = a.strides[saxis]
+        longstride = a.strides[longest_axis]
+        # TODO write this in C (as a function) and parallelize there
+        with nogil: #, parallel(): # FIXME rewrite this part in C
+            local_indices = <int *> calloc(reslen, sizeof(int))
+            if local_indices == NULL: abort()
+            for longi in range(a.shape[longest_axis]): # outer loop (to be parallelized)
+                # this might be done also in the inverse order, but this is more 'c-contiguous' way of incrementing the indices
+                ax = reslen - 1
+                while ax >= 0:
+                    # calculate the correct index/pointer for each array used. This can be further optimized from O(reslen * total size of the result array) to O(total size of the result array), but fick that now
+                    r_p = r_c.data + r_c.strides[longest_axis] * longi
+                    theta_p = theta_c.data + theta_c.strides[longest_axis] * longi
+                    phi_p = phi_c.data + phi_c.strides[longest_axis] * longi
+                    r_ge_d_p = r_ge_d_c.data + r_ge_d_c.strides[longest_axis] * longi
+                    a_p = a.data + a.strides[longest_axis] * longi
+                    b_p = b.data + b.strides[longest_axis] * longi
+                    for i in range(reslen):
+                        if i == longest_axis: continue
+                        r_p += r_c.strides[i] * local_indices[i]
+                        theta_p += theta_c.strides[i] * local_indices[i]
+                        phi_p += phi_c.strides[i] * local_indices[i]
+                        r_ge_d_p += r_ge_d_c.strides[i] * local_indices[i]
+                        if i == saxis or i == daxis: continue
+                        a_p += a.strides[i] * local_indices[i]
+                        b_p += b.strides[i] * local_indices[i]
+
+                    # perform the actual task here
+                    errval = qpms_trans_calculator_get_AB_arrays_ext(self.c,
+                            <cdouble*>a_p, <cdouble*>b_p,
+                            dstride // sizeof(cdouble), sstride // sizeof(cdouble),
+                            (<double*>r_p)[0], (<double*>theta_p)[0], (<double*>phi_p)[0], <int>((<np.npy_bool*>r_ge_d_p)[0]), J)
+                    if errval: abort()
+
+                    # increment the last index 'digit' (ax is now reslen-1; we don't have do-while loop in python)
+                    local_indices[ax] += 1
+                    while (local_indices[ax] == innerloop_shape[ax] and ax >= 0): # overflow to the next digit but stop when we reach below the last one
+                        local_indices[ax] = 0
+                        ax -= 1
+                        local_indices[ax] += 1
+                    if ax >= 0: # did not overflow, get back to the lowest index
+                        ax = reslen - 1
+
+
+
+
+
+
+
+                for ax in range(a.ndim):
+                    if (ax == longest_axis or ax == daxis or ax == saxis):
+                        continue
+            free(local_indices)
+        free(innerloop_shape)
+
+
+                
+
+
+
+
+
+
+
+        
+
+
+
+
+
+
+
+

    # TODO make possible to access the attributes (to show normalization etc)

--- a/qpms/test_translations2.c
+++ b/qpms/test_translations2.c
@ -48,6 +48,12 @@ int main() {
 				cabs(B - B2)/((cabs(B2) < cabs(B)) ? cabs(B2) : cabs(B))
 		      );
 	}
+	complex double A,B;
+	// Test of zero R
+	sph_t kdlj = {0, 1, 2};
+	int m = -1, n = 1, mu = -1, nu = 1;
+	qpms_trans_calculator_get_AB_p(c,&A,&B,m,n,mu,nu,kdlj,false,3);
+	printf("A = %.6e+%.6ej, B = %.6e+%.6ej\n", creal(A),cimag(A),creal(B),cimag(B));
 	qpms_trans_calculator_free(c);
 }

--- a/qpms/translations.c
+++ b/qpms/translations.c
@ -64,102 +64,102 @@ int qpms_sph_bessel_array(qpms_bessel_t typ, int lmax, double x, complex double

 complex double qpms_trans_single_A_Taylor(int m, int n, int mu, int nu, sph_t kdlj,
 		bool r_ge_d, qpms_bessel_t J) { // TODO make J enum
-    if(r_ge_d) J = QPMS_BESSEL_REGULAR;
+	if(r_ge_d) J = QPMS_BESSEL_REGULAR;

-    double costheta = cos(kdlj.theta);
+	double costheta = cos(kdlj.theta);

-    int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m?
-    // N.B. -m !!!!!!
-    double a1q[qmax+1];
-    int err;
-    gaunt_xu(-m,n,mu,nu,qmax,a1q,&err);
-    double a1q0 = a1q[0];
-    if (err) abort();
+	int qmax = gaunt_q_max(-m,n,mu,nu); // nemá tu být +m?
+	// N.B. -m !!!!!!
+	double a1q[qmax+1];
+	int err;
+	gaunt_xu(-m,n,mu,nu,qmax,a1q,&err);
+	double a1q0 = a1q[0];
+	if (err) abort();

-    double leg[gsl_sf_legendre_array_n(n+nu)];
-    if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort();
-    complex double bes[n+nu+1];
-    if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort();
-    complex double sum = 0;
-    for(int q = 0; q <= qmax; ++q) {
-	    int p = n+nu-2*q;
-	    int Pp_order = mu-m;
-	    //if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze
-	    assert(p >= abs(Pp_order));
-	    double a1q_n = a1q[q] / a1q0;
-	    double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))];
-	    if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
-	    complex double zp = bes[p];
-	    complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp;
-	    sum += summandq;
-    }
-    
-    double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
-                +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
-                +lgamma(n+nu+1) - lgamma(2*(n+nu)+1));
-    complex double presum = exp(exponent);
-    presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n);
- 
-    complex double prenormratio = ipow(nu-n) *  sqrt(((2.*nu+1)/(2.*n+1))* exp(
-	        lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
-    return (presum / prenormratio) * sum;
+	double leg[gsl_sf_legendre_array_n(n+nu)];
+	if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,leg)) abort();
+	complex double bes[n+nu+1];
+	if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bes)) abort();
+	complex double sum = 0;
+	for(int q = 0; q <= qmax; ++q) {
+		int p = n+nu-2*q;
+		int Pp_order = mu-m;
+		//if(p < abs(Pp_order)) continue; // FIXME raději nastav lépe meze
+		assert(p >= abs(Pp_order));
+		double a1q_n = a1q[q] / a1q0;
+		double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))];
+		if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
+		complex double zp = bes[p];
+		complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp;
+		sum += summandq;
+	}
+
+	double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+			+lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
+			+lgamma(n+nu+1) - lgamma(2*(n+nu)+1));
+	complex double presum = exp(exponent);
+	presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n) / (4*n);
+
+	complex double prenormratio = ipow(nu-n) *  sqrt(((2.*nu+1)/(2.*n+1))* exp(
+				lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
+	return (presum / prenormratio) * sum;
 }

 complex double qpms_trans_single_B_Taylor(int m, int n, int mu, int nu, sph_t kdlj,
 		bool r_ge_d, qpms_bessel_t J) { 
-    if(r_ge_d) J = QPMS_BESSEL_REGULAR;
-    double costheta = cos(kdlj.theta);
-    
-    int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu);
-    int Qmax = gaunt_q_max(-m,n+1,mu,nu);
-    double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0;
-    int err;
-    if (mu == nu) {
-	    for (int q = 0; q <= q2max; ++q) 
-	    	a2q[q] = 0;
-	    a2q0 = 1;
-    }
-    else { 
-	    gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
-	    a2q0 = a2q[0];
-    }
-    gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
-    a3q0 = a3q[0];
+	if(r_ge_d) J = QPMS_BESSEL_REGULAR;
+	double costheta = cos(kdlj.theta);

-    double leg[gsl_sf_legendre_array_n(n+nu+1)];
-    if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort();
-    complex double bes[n+nu+2];
-    if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort();
- 
-    complex double sum = 0;
-    for (int q = 0; q <= Qmax; ++q) {
-	    int p = n+nu-2*q;
-	    double a2q_n = a2q[q]/a2q0;
-	    double a3q_n = a3q[q]/a3q0;
-	    complex double zp_ = bes[p+1];
-	    int Pp_order_ = mu-m;
-	    //if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze
-	    assert(p+1 >= abs(Pp_order_));
-	    double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))];
-	    if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_));
-	    complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n
-                 -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n)
-                *min1pow(q) * zp_ * Pp_);
-	    sum += summandq;
-    }
+	int q2max = gaunt_q_max(-m-1,n+1,mu+1,nu);
+	int Qmax = gaunt_q_max(-m,n+1,mu,nu);
+	double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0;
+	int err;
+	if (mu == nu) {
+		for (int q = 0; q <= q2max; ++q) 
+			a2q[q] = 0;
+		a2q0 = 1;
+	}
+	else { 
+		gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
+		a2q0 = a2q[0];
+	}
+	gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
+	a3q0 = a3q[0];

-    double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
-                +lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1)
-                +lgamma(n+nu+2) - lgamma(2*(n+nu)+3));
-    complex double presum = exp(exponent);
-    presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / (
-        (4*n)*(n+1)*(n+m+1));
+	double leg[gsl_sf_legendre_array_n(n+nu+1)];
+	if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,costheta,-1,leg)) abort();
+	complex double bes[n+nu+2];
+	if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bes)) abort();

-    // Taylor normalisation v2, proven to be equivalent
-    complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp(
-        lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
-    
-    return (presum / prenormratio) * sum;
+	complex double sum = 0;
+	for (int q = 0; q <= Qmax; ++q) {
+		int p = n+nu-2*q;
+		double a2q_n = a2q[q]/a2q0;
+		double a3q_n = a3q[q]/a3q0;
+		complex double zp_ = bes[p+1];
+		int Pp_order_ = mu-m;
+		//if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze
+		assert(p+1 >= abs(Pp_order_));
+		double Pp_ = leg[gsl_sf_legendre_array_index(p+1, abs(Pp_order_))];
+		if (Pp_order_ < 0) Pp_ *= min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_));
+		complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n
+					-(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n)
+				*min1pow(q) * zp_ * Pp_);
+		sum += summandq;
+	}
+
+	double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
+			+lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1)
+			+lgamma(n+nu+2) - lgamma(2*(n+nu)+3));
+	complex double presum = exp(exponent);
+	presum *= cexp(I*(mu-m)*kdlj.phi) * min1pow(m) * ipow(nu+n+1) / (
+			(4*n)*(n+1)*(n+m+1));
+
+	// Taylor normalisation v2, proven to be equivalent
+	complex double prenormratio = ipow(nu-n) * sqrt(((2.*nu+1)/(2.*n+1))* exp(
+				lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1)));
+
+	return (presum / prenormratio) * sum;
 }

 complex double qpms_trans_single_A_Taylor_ext(int m, int n, int mu, int nu, 
@ -181,7 +181,7 @@ void qpms_trans_calculator_free(qpms_trans_calculator *c) {
 	free(c->B_multipliers);
 	free(c);
 }
-	
+
 static inline size_t qpms_mn2y(int m, int n) {
 	return (size_t) n * (n + 1) + m - 1;
 }
@ -223,9 +223,9 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
 	double a1q0 = a1q[0];

 	double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
-                +lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
-                +lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio
-                lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1));
+			+lgamma(n+nu+m-mu+1)-lgamma(n-m+1)-lgamma(nu+mu+1)
+			+lgamma(n+nu+1) - lgamma(2*(n+nu)+1)) - 0.5*( // ex-prenormratio
+			lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)-lgamma(nu+mu+1));
 	double presum = exp(exponent);
 	presum *=  min1pow(m+n) * sqrt((2.*n+1)/(2.*nu+1)) / (4*n);

@ -236,7 +236,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
 		double a1q_n = a1q[q] / a1q0;
 		// Assuming non_normalized legendre polynomials!
 		double Ppfac = (Pp_order >= 0) ? 1 :
-		       	min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
+			min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
 		double summandfac = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n;
 		dest[q] = presum * summandfac * Ppfac;
 		// FIXME I might not need complex here
@ -260,7 +260,7 @@ static void qpms_trans_calculator_multipliers_A_Taylor(
 		double a1q_n = a1q[q] / a1q0;
 		//double Pp = leg[gsl_sf_legendre_array_index(p, abs(Pp_order))];
 		//complex double zp = bes[p];
-		 dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/;
+		dest[q] = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n /* * zp * Pp*/;
 		if (Pp_order < 0) dest[q] *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order));
 		//sum += summandq;
 	}
@ -290,25 +290,25 @@ static void qpms_trans_calculator_multipliers_B_Taylor(
 	double a2q[q2max+1], a3q[Qmax+1], a2q0, a3q0;
 	int err;
 	if (mu == nu) {
-	for (int q = 0; q <= q2max; ++q)
-                a2q[q] = 0;
-          	a2q0 = 1;
-    	}
-    	else {
-            	gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
-            	a2q0 = a2q[0];
-    	}
-    	gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
-    	a3q0 = a3q[0];
+		for (int q = 0; q <= q2max; ++q)
+			a2q[q] = 0;
+		a2q0 = 1;
+	}
+	else {
+		gaunt_xu(-m-1,n+1,mu+1,nu,q2max,a2q,&err); if (err) abort();
+		a2q0 = a2q[0];
+	}
+	gaunt_xu(-m,n+1,mu,nu,Qmax,a3q,&err); if (err) abort();
+	a3q0 = a3q[0];

 	double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2)
 			+lgamma(n+nu+m-mu+2)-lgamma(n-m+1)-lgamma(nu+mu+1)
 			+lgamma(n+nu+2) - lgamma(2*(n+nu)+3)) - 0.5 * (
-				lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)
-				-lgamma(nu+mu+1));
+			lgamma(n+m+1)-lgamma(n-m+1)+lgamma(nu-mu+1)
+			-lgamma(nu+mu+1));
 	complex double presum = exp(exponent);
 	presum *= I * (min1pow(m+n) *sqrt((2.*n+1)/(2.*nu+1)) / (
-			(4*n)*(n+1)*(n+m+1)));
+				(4*n)*(n+1)*(n+m+1)));

 	for (int q = 0; q <= Qmax; ++q) {
 		int p = n+nu-2*q;
@ -318,7 +318,7 @@ static void qpms_trans_calculator_multipliers_B_Taylor(
 		//if(p+1 < abs(Pp_order_)) continue; // FIXME raději nastav lépe meze
 		assert(p+1 >= abs(Pp_order_));
 		double Ppfac = (Pp_order_ >= 0) ? 1 :		
-			
+
 			min1pow(mu-m) * exp(lgamma(1+1+p+Pp_order_)-lgamma(1+1+p-Pp_order_));
 		double summandq = ((2*(n+1)*(nu-mu)*a2q_n
 					-(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n)
@ -367,8 +367,8 @@ qpms_trans_calculator
 			qpms_y2mn_p(y,&m,&n);
 			qpms_y2mn_p(yu,&mu,&nu);
 			qmaxsum += 1 + (
-				qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] 
-				= gaunt_q_max(-m,n,mu,nu));
+					qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] 
+					= gaunt_q_max(-m,n,mu,nu));
 		}
 	c->A_multipliers[0] = malloc(qmaxsum * sizeof(complex double));
 	// calculate multiplier beginnings
@ -382,7 +382,7 @@ qpms_trans_calculator
 			qpms_y2mn_p(y, &m, &n);
 			qpms_y2mn_p(yu, &mu, &nu);
 			qpms_trans_calculator_multipliers_A(normalization,
-				c->A_multipliers[i], m, n, mu, nu, qmaxes[i]);
+					c->A_multipliers[i], m, n, mu, nu, qmaxes[i]);
 		}

 	qmaxsum = 0;
@ -392,8 +392,8 @@ qpms_trans_calculator
 			qpms_y2mn_p(y,&m,&n);
 			qpms_y2mn_p(yu,&mu,&nu);
 			qmaxsum += 1 + (
-				qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] 
-				= gaunt_q_max(-m,n+1,mu,nu));
+					qmaxes[qpms_trans_calculator_index_yyu(c,y,yu)] 
+					= gaunt_q_max(-m,n+1,mu,nu));
 		}
 	c->B_multipliers[0] = malloc(qmaxsum * sizeof(complex double));
 	// calculate multiplier beginnings
@ -407,41 +407,50 @@ qpms_trans_calculator
 			qpms_y2mn_p(y, &m, &n);
 			qpms_y2mn_p(yu, &mu, &nu);
 			qpms_trans_calculator_multipliers_B(normalization,
-				c->B_multipliers[i], m, n, mu, nu, qmaxes[i]);
+					c->B_multipliers[i], m, n, mu, nu, qmaxes[i]);
 		}

 	free(qmaxes);
 	return c;
 }

-complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
+static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_trans_calculator *c,
 		int m, int n, int mu, int nu, sph_t kdlj,
 		bool r_ge_d, qpms_bessel_t J,
-		complex double *bessel_buf, double *legendre_buf) {
+		const complex double *bessel_buf, const double *legendre_buf) {
+	size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
+	size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
+	assert(qmax == gaunt_q_max(-m,n,mu,nu));
+	complex double sum = 0;
+	for(size_t q = 0; q <= qmax; ++q) {
+		int p = n+nu-2*q;
+		double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
+		complex double zp = bessel_buf[p];
+		complex double multiplier = c->A_multipliers[i][q];
+		sum += Pp * zp *  multiplier;
+	}
+	complex double eimf =  cexp(I*(mu-m)*kdlj.phi);
+	return sum * eimf;
+}
+
+complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
+								int m, int n, int mu, int nu, sph_t kdlj,
+								bool r_ge_d, qpms_bessel_t J,
+								complex double *bessel_buf, double *legendre_buf) {
+	// This functions gets preallocated memory for bessel and legendre functions, but computes them itself
 	if (r_ge_d) J = QPMS_BESSEL_REGULAR;
 	if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) 
 		// TODO warn? 
 		return NAN+I*NAN;
-	
 	switch(c->normalization) {
 		case QPMS_NORMALIZATION_TAYLOR:
 			{
 				double costheta = cos(kdlj.theta);
-				if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,costheta,-1,legendre_buf)) abort();
-				if (qpms_sph_bessel_array(J, n+nu, kdlj.r, bessel_buf)) abort();
-				size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
-				size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
-				assert(qmax == gaunt_q_max(-m,n,mu,nu));
-				complex double sum = 0;
-				for(size_t q = 0; q <= qmax; ++q) {
-					int p = n+nu-2*q;
-					double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
-					complex double zp = bessel_buf[p];
-					complex double multiplier = c->A_multipliers[i][q];
-					sum += Pp * zp *  multiplier;
-				}
-				complex double eimf =  cexp(I*(mu-m)*kdlj.phi);
-				return sum * eimf;
+				if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu,
+							costheta,-1,legendre_buf)) abort();
+				if (qpms_sph_bessel_array(J, n+nu+1, kdlj.r, bessel_buf)) abort();
+				return qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
+						kdlj,r_ge_d,J,bessel_buf,legendre_buf);
 			}
 			break;
 		default:
@ -450,10 +459,30 @@ complex double qpms_trans_calculator_get_A_buf(const qpms_trans_calculator *c,
 	assert(0);
 }

-complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
+static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_trans_calculator *c,
 		int m, int n, int mu, int nu, sph_t kdlj,
 		bool r_ge_d, qpms_bessel_t J,
-		complex double *bessel_buf, double *legendre_buf) {
+		const complex double *bessel_buf, const double *legendre_buf) {
+	size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
+	size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1;
+	assert(qmax == gaunt_q_max(-m,n+1,mu,nu));
+	complex double sum = 0;
+	for(int q = 0; q <= qmax; ++q) {
+		int p = n+nu-2*q;
+		double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
+		complex double zp_ = bessel_buf[p+1];
+		complex double multiplier = c->B_multipliers[i][q];
+		sum += Pp_ * zp_ * multiplier;
+	}
+	complex double eimf =  cexp(I*(mu-m)*kdlj.phi);
+	return sum * eimf;
+}
+
+complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
+								int m, int n, int mu, int nu, sph_t kdlj,
+								bool r_ge_d, qpms_bessel_t J,
+								complex double *bessel_buf, double *legendre_buf) {
+	// This functions gets preallocated memory for bessel and legendre functions, but computes them itself
 	if (r_ge_d) J = QPMS_BESSEL_REGULAR;
 	if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) 
 		// TODO warn? 
@ -463,22 +492,10 @@ complex double qpms_trans_calculator_get_B_buf(const qpms_trans_calculator *c,
 			{
 				double costheta = cos(kdlj.theta);
 				if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,
-																costheta,-1,legendre_buf)) abort();
+							costheta,-1,legendre_buf)) abort();
 				if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort();
-				size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
-				size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1;
-				assert(qmax == gaunt_q_max(-m,n+1,mu,nu));
-				complex double sum = 0;
-				for(int q = 0; q <= qmax; ++q) {
-					int p = n+nu-2*q;
-					double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
-					complex double eimf = cexp(I * kdlj.phi);
-					complex double zp_ = bessel_buf[p+1];
-					complex double multiplier = c->B_multipliers[i][q];
-					sum += Pp_ * zp_ * multiplier;
-				}
-				complex double eimf =  cexp(I*(mu-m)*kdlj.phi);
-				return sum * eimf;
+				return qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
+						kdlj,r_ge_d,J,bessel_buf,legendre_buf);
 			}
 			break;
 		default:
@ -504,35 +521,12 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
 			{
 				double costheta = cos(kdlj.theta);
 				if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,n+nu+1,
-																costheta,-1,legendre_buf)) abort();
+							costheta,-1,legendre_buf)) abort();
 				if (qpms_sph_bessel_array(J, n+nu+2, kdlj.r, bessel_buf)) abort();
-				size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu);
-				size_t Qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - 1;
-				assert(Qmax == gaunt_q_max(-m,n+1,mu,nu));
-				complex double Bsum = 0;
-				for(int q = 0; q <= Qmax; ++q) {
-					int p = n+nu-2*q;
-					double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))];
-					complex double eimf = cexp(I * kdlj.phi);
-					complex double zp_ = bessel_buf[p+1];
-					complex double multiplier = c->B_multipliers[i][q];
-					Bsum += Pp_ * zp_ * multiplier;
-				}
-
-				size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1;
-				assert(qmax == gaunt_q_max(-m,n,mu,nu));
-				complex double Asum = 0;
-				for(size_t q = 0; q <= qmax; ++q) {
-					int p = n+nu-2*q;
-					double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))];
-					complex double zp = bessel_buf[p];
-					complex double multiplier = c->A_multipliers[i][q];
-					Asum += Pp * zp *  multiplier;
-				}
-				complex double eimf =  cexp(I*(mu-m)*kdlj.phi);
-
-				*Adest = Asum * eimf;
-				*Bdest = Bsum * eimf;
+				*Adest = qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
+						kdlj,r_ge_d,J,bessel_buf,legendre_buf);
+				*Bdest = qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
+						kdlj,r_ge_d,J,bessel_buf,legendre_buf);
 				return 0;
 			}
 			break;
@ -543,6 +537,53 @@ int qpms_trans_calculator_get_AB_buf_p(const qpms_trans_calculator *c,
 }


+
+
+int qpms_trans_calculator_get_AB_arrays_buf(const qpms_trans_calculator *c,
+		complex double *Adest, complex double *Bdest,
+		size_t deststride, size_t srcstride,
+		sph_t kdlj, bool r_ge_d, qpms_bessel_t J,
+		complex double *bessel_buf, double *legendre_buf) {
+	if (r_ge_d) J = QPMS_BESSEL_REGULAR;
+	if (0 == kdlj.r && J != QPMS_BESSEL_REGULAR) {
+		for (size_t i = 0; i < c->nelem; ++i)
+			for (size_t j = 0; j < c->nelem; ++j) {
+				*(Adest + i*srcstride + j*deststride) = NAN+I*NAN;
+				*(Bdest + i*srcstride + j*deststride) = NAN+I*NAN;
+			}
+		// TODO warn? different return value?
+		return 0;
+	}
+	switch(c->normalization) {
+		case QPMS_NORMALIZATION_TAYLOR:
+			{
+				double costheta = cos(kdlj.theta);
+				if (gsl_sf_legendre_array_e(GSL_SF_LEGENDRE_NONE,2*c->lMax+1,
+							costheta,-1,legendre_buf)) abort();
+				if (qpms_sph_bessel_array(J, 2*c->lMax+2, kdlj.r, bessel_buf)) abort();
+				size_t desti = 0, srci = 0;
+				for (int n = 1; n <= c->nelem; ++n) for (int m = -n; m <= n; ++m) {
+					for (int nu = 1; nu <= c->nelem; ++nu) for (int mu = -nu; mu <= nu; ++mu) {
+						assert(qpms_trans_calculator_index_mnmunu(c,m,n,mu,nu) == desti*c->nelem + srci);
+						*(Adest + deststride * desti + srcstride * srci) = 
+							qpms_trans_calculator_get_A_precalcbuf(c,m,n,mu,nu,
+									kdlj,r_ge_d,J,bessel_buf,legendre_buf);
+						*(Bdest + deststride * desti + srcstride * srci) = 
+							qpms_trans_calculator_get_B_precalcbuf(c,m,n,mu,nu,
+									kdlj,r_ge_d,J,bessel_buf,legendre_buf);
+						++srci;
+					}
+					++desti;
+				}
+				return 0;
+			}
+			break;
+		default:
+			abort();
+	}
+	assert(0);
+}
+
 complex double qpms_trans_calculator_get_A(const qpms_trans_calculator *c,
                int m, int n, int mu, int nu, sph_t kdlj,
                bool r_ge_d, qpms_bessel_t J) {
@ -565,12 +606,26 @@ int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c,
 		complex double *Adest, complex double *Bdest,
                int m, int n, int mu, int nu, sph_t kdlj,
                bool r_ge_d, qpms_bessel_t J) {
-	double leg[gsl_sf_legendre_array_n(n+nu+1)];
-	complex double bes[n+nu+2];
+	double leg[gsl_sf_legendre_array_n(2*c->lMax+1)];
+	complex double bes[2*c->lMax+2];
 	return qpms_trans_calculator_get_AB_buf_p(c,Adest, Bdest,m,n,mu,nu,kdlj,r_ge_d,J,
 			bes,leg);
 }

+int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c,
+		complex double *Adest, complex double *Bdest,
+		size_t deststride, size_t srcstride,
+		sph_t kdlj, bool r_ge_d, qpms_bessel_t J) {
+	double leg[gsl_sf_legendre_array_n(c->lMax+c->lMax+1)];
+	complex double bes[c->lMax+c->lMax+2];
+	return qpms_trans_calculator_get_AB_arrays_buf(c, 
+			Adest, Bdest, deststride, srcstride,
+			kdlj, r_ge_d, J, 
+			bes, leg);
+}
+
+
+
 complex double qpms_trans_calculator_get_A_ext(const qpms_trans_calculator *c,
                int m, int n, int mu, int nu,
 	       	double kdlj_r, double kdlj_theta, double kdlj_phi,
@ -596,3 +651,13 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c,
 	return qpms_trans_calculator_get_AB_p(c,Adest,Bdest,m,n,mu,nu,kdlj,r_ge_d,J);
 }

+int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
+		complex double *Adest, complex double *Bdest,
+		size_t deststride, size_t srcstride,
+		double kdlj_r, double kdlj_theta, double kdlj_phi,
+	 	int r_ge_d, int J) {
+	sph_t kdlj = {kdlj_r, kdlj_theta, kdlj_phi};
+	return qpms_trans_calculator_get_AB_arrays(c,Adest,Bdest,deststride,srcstride,
+						kdlj, r_ge_d, J);
+}
+
--- a/qpms/translations.h
+++ b/qpms/translations.h
@ -60,7 +60,6 @@ complex double qpms_trans_calculator_get_B_ext(const qpms_trans_calculator *c,
 		int m, int n, int mu, int nu, double kdlj_r,
 		double kdlj_th, double kdlj_phi, int r_ge_d, int J);

-
 int qpms_trans_calculator_get_AB_p(const qpms_trans_calculator *c,
 		complex double *Adest, complex double *Bdest,
 		int m, int n, int mu, int nu, sph_t kdlj,
@ -70,5 +69,15 @@ int qpms_trans_calculator_get_AB_p_ext(const qpms_trans_calculator *c,
 		int m, int n, int mu, int nu, double kdlj_r,
 		double kdlj_th, double kdlj_phi, int r_ge_d, int J);

+int qpms_trans_calculator_get_AB_arrays(const qpms_trans_calculator *c,
+		complex double *Adest, complex double *Bdest,
+		size_t deststride, size_t srcstride,
+		sph_t kdlj, bool r_ge_d, qpms_bessel_t J); 
+int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c,
+		complex double *Adest, complex double *Bdest,
+		size_t deststride, size_t srcstride,
+		double kdlj_r, double kdlj_theta, double kdlj_phi,
+		int r_ge_d, int J);
+	 

 #endif // QPMS_TRANSLATIONS_H