diff --git a/qpms/kahansum.h b/qpms/kahansum.h new file mode 100644 index 0000000..466c3dd --- /dev/null +++ b/qpms/kahansum.h @@ -0,0 +1,29 @@ +#ifndef KAHANSUM_H +#define KAHANSUM_H + +static inline void kahaninit(double *sum, double *compensation) { + sum = 0; + compensation = 0; +} + +static inline void kahaninc(double *sum, double *compensation, double input) { + double compensated_input = input - *compensation; + double nsum = *sum + compensated_input; + *compensation = (nsum - *sum) - compensated_input; + *sum = nsum; +} + + +static inline void ckahaninit(complex double *sum, complex double *compensation) { + sum = 0; + compensation = 0; +} + +static inline void ckahaninc(complex double *sum, complex double *compensation, complex double input) { + complex double compensated_input = input - *compensation; + complex double nsum = *sum + compensated_input; + *compensation = (nsum - *sum) - compensated_input; + *sum = nsum; +} + +#endif //KAHANSUM_H diff --git a/qpms/translations.c b/qpms/translations.c index ba93e9b..c6f2b03 100644 --- a/qpms/translations.c +++ b/qpms/translations.c @@ -7,6 +7,7 @@ #include #include #include "assert_cython_workaround.h" +#include "kahansum.h" #include //abort() #include @@ -165,7 +166,7 @@ complex double qpms_trans_single_A(qpms_normalisation_t norm, if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); complex double zp = bes[p]; complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp; - sum += summandq; + sum += summandq; // TODO KAHAN } double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) @@ -211,7 +212,7 @@ complex double qpms_trans_single_A_Taylor(int m, int n, int mu, int nu, sph_t kd if (Pp_order < 0) Pp *= min1pow(mu-m) * exp(lgamma(1+p+Pp_order)-lgamma(1+p-Pp_order)); complex double zp = bes[p]; complex double summandq = (n*(n+1) + nu*(nu+1) - p*(p+1)) * min1pow(q) * a1q_n * zp * Pp; - sum += summandq; + sum += summandq; // TODO KAHAN } double exponent=(lgamma(2*n+1)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) @@ -270,7 +271,7 @@ complex double qpms_trans_single_B_Xu(int m, int n, int mu, int nu, sph_t kdlj, complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) *min1pow(q) * zp_ * Pp_); - sum += summandq; + sum += summandq; // TODO KAHAN } double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) @@ -330,7 +331,7 @@ complex double qpms_trans_single_B(qpms_normalisation_t norm, complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) *min1pow(q) * zp_ * Pp_); - sum += summandq; + sum += summandq; //TODO KAHAN } double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) @@ -391,7 +392,7 @@ complex double qpms_trans_single_B_Taylor(int m, int n, int mu, int nu, sph_t kd complex double summandq = ((2*(n+1)*(nu-mu)*a2q_n -(-nu*(nu+1) - n*(n+3) - 2*mu*(n+1)+p*(p+3))* a3q_n) *min1pow(q) * zp_ * Pp_); - sum += summandq; + sum += summandq; //TODO KAHAN } double exponent=(lgamma(2*n+3)-lgamma(n+2)+lgamma(2*nu+3)-lgamma(nu+2) @@ -812,13 +813,14 @@ static inline complex double qpms_trans_calculator_get_A_precalcbuf(const qpms_t size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); size_t qmax = c->A_multipliers[i+1] - c->A_multipliers[i] - 1; assert(qmax == gaunt_q_max(-m,n,mu,nu)); - complex double sum = 0; + complex double sum, kahanc; + ckahaninit(&sum, &kahanc); for(size_t q = 0; q <= qmax; ++q) { int p = n+nu-2*q; double Pp = legendre_buf[gsl_sf_legendre_array_index(p, abs(mu-m))]; complex double zp = bessel_buf[p]; complex double multiplier = c->A_multipliers[i][q]; - sum += Pp * zp * multiplier; + ckahaninc(&sum, &kahanc, Pp * zp * multiplier); } complex double eimf = cexp(I*(mu-m)*kdlj.phi); return sum * eimf; @@ -861,13 +863,14 @@ static inline complex double qpms_trans_calculator_get_B_precalcbuf(const qpms_t size_t i = qpms_trans_calculator_index_mnmunu(c, m, n, mu, nu); size_t qmax = c->B_multipliers[i+1] - c->B_multipliers[i] - (1 - BQ_OFFSET); assert(qmax == gauntB_Q_max(-m,n,mu,nu)); - complex double sum = 0; + complex double sum, kahanc; + kahaninit(&sum, &kahanc); for(int q = BQ_OFFSET; q <= qmax; ++q) { int p = n+nu-2*q; double Pp_ = legendre_buf[gsl_sf_legendre_array_index(p+1, abs(mu-m))]; complex double zp_ = bessel_buf[p+1]; complex double multiplier = c->B_multipliers[i][q-BQ_OFFSET]; - sum += Pp_ * zp_ * multiplier; + ckahaninc(&sum, &kahanc, Pp_ * zp_ * multiplier); } complex double eimf = cexp(I*(mu-m)*kdlj.phi); return sum * eimf; diff --git a/qpms/vectors.h b/qpms/vectors.h index af980fc..88bea09 100644 --- a/qpms/vectors.h +++ b/qpms/vectors.h @@ -34,6 +34,11 @@ static inline cart3_t cart3_add(const cart3_t a, const cart3_t b) { return res; } +static inline cart3_t cart3_substract(const cart3_t a, const cart3_t b) { + cart3_t res = {a.x-b.x, a.y-b.y, a.z-b.z}; + return res; +} + static inline cart3_t cart3_scale(const double c, const cart3_t v) { cart3_t res = {c * v.x, c * v.y, c * v.z}; return res; @@ -49,6 +54,11 @@ static inline csphvec_t csphvec_add(const csphvec_t a, const csphvec_t b) { return res; } +static inline csphvec_t csphvec_substract(const csphvec_t a, const csphvec_t b) { + csphvec_t res = {a.rc - b.rc, a.thetac - b.thetac, a.phic - b.phic}; + return res; +} + static inline csphvec_t csphvec_scale(complex double c, const csphvec_t v) { csphvec_t res = {c * v.rc, c * v.thetac, c * v.phic}; return res; @@ -112,4 +122,30 @@ void print_ccart3(ccart3_t); void print_cart3(cart3_t); void print_sph(sph_t); +// kahan sums for various types... TODO make generic code using macros + +static inline void ccart3_kahaninit(ccart3_t *sum, ccart3_t *compensation) { + sum->x = sum->y = sum->z = compensation->x = compensation->y = compensation->z = 0; +} +static inline void csphvec_kahaninit(csphvec_t *sum, csphvec_t *compensation) { + sum->rc = sum->thetac = sum->phic = compensation->rc = compensation->thetac = compensation->phic = 0; +} + +static inline void ccart3_kahanadd(ccart3_t *sum, ccart3_t *compensation, const *ccart3_t input) { + ccart3_t comped_input = ccart3_substract(*input, *compensation); + ccart3_t nsum = ccart3_add(*sum, comped_input); + *compensation = ccart3_substract(ccart3_substract(nsum, *sum), comped_input); + *sum = nsum; +} + + + + +static inline void csphvec_kahanadd(csphvec_t *sum, csphvec_t *compensation, const csphvec_t *input) { + csphvec_t comped_input = csphvec_substract(*input, *compensation); + csphvec_t nsum = csphvec_add(*sum, comped_input); + *compensation = csphvec_substract(csphvec_substract(nsum, *sum), comped_input); + *sum = nsum; +} + #endif //VECTORS_H