2018-08-21 18:13:42 +03:00
# include "ewald.h"
# include <stdlib.h>
# include "indexing.h"
2018-08-27 09:45:11 +03:00
# include "kahansum.h"
2018-08-21 18:13:42 +03:00
# include <assert.h>
2018-08-27 09:45:11 +03:00
# include <string.h>
2018-09-05 10:58:13 +03:00
# include <complex.h>
2018-08-21 18:13:42 +03:00
# include "tiny_inlines.h"
2019-08-19 15:10:39 +03:00
# include "qpms_error.h"
2018-08-27 14:39:29 +03:00
# include <gsl/gsl_integration.h>
# include <gsl/gsl_errno.h>
2018-09-05 10:58:13 +03:00
# include <gsl/gsl_sf_legendre.h>
2018-11-24 14:51:57 +02:00
# include <gsl/gsl_sf_expint.h>
2018-08-27 14:39:29 +03:00
// parameters for the quadrature of integral in (4.6)
# ifndef INTEGRATION_WORKSPACE_LIMIT
# define INTEGRATION_WORKSPACE_LIMIT 30000
# endif
# ifndef INTEGRATION_EPSABS
2018-09-05 22:45:28 +03:00
# define INTEGRATION_EPSABS 1e-13
2018-08-27 14:39:29 +03:00
# endif
# ifndef INTEGRATION_EPSREL
# define INTEGRATION_EPSREL 1e-13
# endif
2018-08-21 18:13:42 +03:00
2018-09-05 09:07:03 +03:00
# ifndef M_SQRTPI
# define M_SQRTPI 1.7724538509055160272981674833411452
# endif
2019-09-30 14:37:28 +03:00
# define MAX(x,y) ((x)<(y)?(y):(x))
# define MIN(x,y) ((x)<(y)?(x):(y))
2018-09-05 09:07:03 +03:00
2018-09-05 14:09:22 +03:00
2018-08-21 18:13:42 +03:00
// sloppy implementation of factorial
static inline double factorial ( const int n ) {
assert ( n > = 0 ) ;
if ( n < 0 )
return 0 ; // should not happen in the functions below. (Therefore the assert above)
else if ( n < = 20 ) {
double fac = 1 ;
for ( int i = 1 ; i < = n ; + + i )
fac * = i ;
return fac ;
}
else
return tgamma ( n + 1 ) ; // hope it's precise and that overflow does not happen
}
2018-08-27 09:45:11 +03:00
static inline complex double csq ( complex double x ) { return x * x ; }
2018-08-27 14:39:29 +03:00
static inline double sq ( double x ) { return x * x ; }
2018-08-21 18:13:42 +03:00
2019-06-27 16:42:35 +03:00
/// Metadata describing the normalisation conventions used in ewald32_constants_t.
2018-09-05 09:07:03 +03:00
typedef enum {
EWALD32_CONSTANTS_ORIG , // As in [1, (4,5)], NOT USED right now.
EWALD32_CONSTANTS_AGNOSTIC /* Not depending on the spherical harmonic sign/normalisation
* convention – the $ e ^ { im \ alpha_pq } $ term in [ 1 , ( 4.5 ) ] being
* replaced by the respective $ Y_n ^ m ( \ pi / 2 , \ alpha ) $
* spherical harmonic . See notes / ewald . lyx .
*/
2018-12-21 21:31:07 +02:00
} ewald3_constants_option ;
2018-09-05 09:07:03 +03:00
2018-12-21 21:31:07 +02:00
static const ewald3_constants_option type = EWALD32_CONSTANTS_AGNOSTIC ;
2018-09-05 09:07:03 +03:00
2018-12-21 21:31:07 +02:00
qpms_ewald3_constants_t * qpms_ewald3_constants_init ( const qpms_l_t lMax /*, const ewald3_constants_option type */ ,
2018-09-05 09:07:03 +03:00
const int csphase )
2018-08-21 18:13:42 +03:00
{
2018-12-21 21:31:07 +02:00
qpms_ewald3_constants_t * c = malloc ( sizeof ( qpms_ewald3_constants_t ) ) ;
2018-08-21 18:13:42 +03:00
//if (c == NULL) return NULL; // Do I really want to do this?
c - > lMax = lMax ;
2018-09-05 15:39:02 +03:00
c - > nelem_sc = qpms_lMax2nelem_sc ( lMax ) ;
c - > s1_jMaxes = malloc ( c - > nelem_sc * sizeof ( qpms_l_t ) ) ;
c - > s1_constfacs = malloc ( c - > nelem_sc * sizeof ( complex double * ) ) ;
2018-08-21 18:13:42 +03:00
//if (c->s1_jMaxes == NULL) return NULL;
2018-11-24 14:51:57 +02:00
// ----- the "xy-plane constants" ------
2018-08-21 18:13:42 +03:00
//determine sizes
size_t s1_constfacs_sz = 0 ;
2018-09-05 15:39:02 +03:00
for ( qpms_y_t y = 0 ; y < c - > nelem_sc ; + + y ) {
qpms_l_t n ; qpms_m_t m ; qpms_y2mn_sc_p ( y , & m , & n ) ;
2018-08-21 18:13:42 +03:00
if ( ( m + n ) % 2 = = 0 )
s1_constfacs_sz + = 1 + ( c - > s1_jMaxes [ y ] = ( n - abs ( m ) ) / 2 ) ;
else
c - > s1_jMaxes [ y ] = - 1 ;
}
2018-09-07 15:39:52 +03:00
c - > s1_constfacs_base = malloc ( s1_constfacs_sz * sizeof ( complex double ) ) ;
2018-08-21 18:13:42 +03:00
size_t s1_constfacs_sz_cumsum = 0 ;
2018-09-05 15:39:02 +03:00
for ( qpms_y_t y = 0 ; y < c - > nelem_sc ; + + y ) {
qpms_l_t n ; qpms_m_t m ; qpms_y2mn_sc_p ( y , & m , & n ) ;
2018-08-21 18:13:42 +03:00
if ( ( m + n ) % 2 = = 0 ) {
c - > s1_constfacs [ y ] = c - > s1_constfacs_base + s1_constfacs_sz_cumsum ;
// and here comes the actual calculation
for ( qpms_l_t j = 0 ; j < = c - > s1_jMaxes [ y ] ; + + j ) {
2018-09-05 09:07:03 +03:00
switch ( type ) {
case EWALD32_CONSTANTS_ORIG : // NOT USED
c - > s1_constfacs [ y ] [ j ] = - 0.5 * ipow ( n + 1 ) * min1pow ( ( n + m ) / 2 )
* sqrt ( ( 2 * n + 1 ) * factorial ( n - m ) * factorial ( n + m ) )
* min1pow ( j ) * pow ( 0.5 , n - 2 * j )
/ ( factorial ( j ) * factorial ( ( n - m ) / 2 - j ) * factorial ( ( n + m ) / 2 - j ) )
* pow ( 0.5 , 2 * j - 1 ) ;
break ;
case EWALD32_CONSTANTS_AGNOSTIC :
2018-09-07 15:39:52 +03:00
c - > s1_constfacs [ y ] [ j ] = - 2 * ipow ( n + 1 ) * M_SQRTPI
2018-09-05 09:07:03 +03:00
* factorial ( ( n - m ) / 2 ) * factorial ( ( n + m ) / 2 )
* min1pow ( j )
/ ( factorial ( j ) * factorial ( ( n - m ) / 2 - j ) * factorial ( ( n + m ) / 2 - j ) ) ;
break ;
default :
abort ( ) ;
}
2018-08-21 18:13:42 +03:00
}
s1_constfacs_sz_cumsum + = 1 + c - > s1_jMaxes [ y ] ;
}
else
c - > s1_constfacs [ y ] = NULL ;
}
2018-09-05 09:07:03 +03:00
2018-11-24 14:51:57 +02:00
// ------ the "z-axis constants" -----
// determine sizes
size_t s1_constfacs_1Dz_sz ;
{
const qpms_l_t sz_n_lMax = lMax / 2 + 1 ; // number of different j's for n = lMax
s1_constfacs_1Dz_sz = ( lMax % 2 ) ? isq ( sz_n_lMax ) + sz_n_lMax
: isq ( sz_n_lMax ) ;
}
c - > s1_constfacs_1Dz_base = malloc ( s1_constfacs_1Dz_sz * sizeof ( complex double ) ) ;
2018-12-11 13:09:38 +02:00
c - > s1_constfacs_1Dz = malloc ( ( lMax + 1 ) * sizeof ( complex double * ) ) ;
2018-11-24 14:51:57 +02:00
size_t s1_constfacs_1Dz_sz_cumsum = 0 ;
for ( qpms_l_t n = 0 ; n < = lMax ; + + n ) {
c - > s1_constfacs_1Dz [ n ] = c - > s1_constfacs_1Dz_base + s1_constfacs_1Dz_sz_cumsum ;
for ( qpms_l_t j = 0 ; j < = n / 2 ; + + j ) {
switch ( type ) {
case EWALD32_CONSTANTS_AGNOSTIC :
2018-12-11 13:09:38 +02:00
c - > s1_constfacs_1Dz [ n ] [ j ] = - ipow ( n + 1 ) * min1pow ( j ) * factorial ( n )
2018-11-24 14:51:57 +02:00
/ ( factorial ( j ) * pow ( 2 , 2 * j ) * factorial ( n - 2 * j ) ) ;
break ;
default :
abort ( ) ; // wrong type argument or not implemented
}
}
s1_constfacs_1Dz_sz_cumsum + = 1 + n / 2 ;
}
assert ( s1_constfacs_1Dz_sz = = s1_constfacs_1Dz_sz_cumsum ) ;
2018-11-21 21:41:20 +02:00
c - > legendre_csphase = csphase ;
2018-09-05 10:58:13 +03:00
c - > legendre0 = malloc ( gsl_sf_legendre_array_n ( lMax ) * sizeof ( double ) ) ;
2018-12-11 13:09:38 +02:00
c - > legendre_plus1 = malloc ( gsl_sf_legendre_array_n ( lMax ) * sizeof ( double ) ) ;
c - > legendre_minus1 = malloc ( gsl_sf_legendre_array_n ( lMax ) * sizeof ( double ) ) ;
2018-09-05 10:58:13 +03:00
// N.B. here I use the GSL_SF_LEGENRE_NONE, in order to be consistent with translations.c
2018-11-21 19:49:07 +02:00
c - > legendre_normconv = GSL_SF_LEGENDRE_NONE ;
2018-09-05 10:58:13 +03:00
// Moreover, using this approach (i.e. gsl) takes about 64kB extra memory
2018-11-21 19:49:07 +02:00
if ( GSL_SUCCESS ! = gsl_sf_legendre_array_e ( c - > legendre_normconv , lMax , 0 , csphase , c - > legendre0 ) )
abort ( ) ;
if ( GSL_SUCCESS ! = gsl_sf_legendre_array_e ( c - > legendre_normconv , lMax , + 1 , csphase , c - > legendre_plus1 ) )
abort ( ) ;
if ( GSL_SUCCESS ! = gsl_sf_legendre_array_e ( c - > legendre_normconv , lMax , - 1 , csphase , c - > legendre_minus1 ) )
2018-09-05 09:07:03 +03:00
abort ( ) ;
2018-08-21 18:13:42 +03:00
return c ;
}
2018-12-21 21:31:07 +02:00
void qpms_ewald3_constants_free ( qpms_ewald3_constants_t * c ) {
2018-09-05 09:07:03 +03:00
free ( c - > legendre0 ) ;
2018-12-11 13:09:38 +02:00
free ( c - > legendre_plus1 ) ;
free ( c - > legendre_minus1 ) ;
2018-08-21 18:13:42 +03:00
free ( c - > s1_constfacs ) ;
free ( c - > s1_constfacs_base ) ;
2018-11-24 14:51:57 +02:00
free ( c - > s1_constfacs_1Dz_base ) ;
2018-12-11 13:09:38 +02:00
free ( c - > s1_constfacs_1Dz ) ;
2018-08-21 18:13:42 +03:00
free ( c - > s1_jMaxes ) ;
free ( c ) ;
}
2018-08-27 09:45:11 +03:00
2018-11-21 19:49:07 +02:00
int ewald3_sigma0 ( complex double * result , double * err ,
2018-12-21 21:31:07 +02:00
const qpms_ewald3_constants_t * c ,
2018-12-21 21:05:52 +02:00
const double eta , const complex double k )
2018-09-05 14:09:22 +03:00
{
qpms_csf_result gam ;
2019-10-09 13:20:20 +03:00
complex double z = - csq ( k / ( 2 * eta ) ) ;
int retval = complex_gamma_inc_e ( - 0.5 , z ,
// we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
QPMS_LIKELY ( creal ( z ) < 0 ) & & ! signbit ( cimag ( z ) ) ? - 1 : 0 , & gam ) ;
2018-09-05 14:09:22 +03:00
if ( 0 ! = retval )
abort ( ) ;
* result = gam . val * c - > legendre0 [ gsl_sf_legendre_array_index ( 0 , 0 ) ] / 2 / M_SQRTPI ;
if ( err )
2018-09-06 15:00:38 +03:00
* err = gam . err * fabs ( c - > legendre0 [ gsl_sf_legendre_array_index ( 0 , 0 ) ] / 2 / M_SQRTPI ) ;
2018-09-05 14:09:22 +03:00
return 0 ;
}
2019-09-16 12:37:32 +03:00
// Wrapper over cpow to correctly handle the k->0 behaviour
static inline complex double cpow_0lim_zi ( complex double x , long z )
{
if ( x = = 0 & & z = = 0 )
return 1 ;
else if ( x = = 0 & & z > 0 ) // lol branch cut in cpow
return 0 ;
else
return cpow ( x , z ) ;
}
2018-11-22 02:02:31 +02:00
int ewald3_21_xy_sigma_long (
complex double * target , // must be c->nelem_sc long
double * err ,
2018-12-21 21:31:07 +02:00
const qpms_ewald3_constants_t * c ,
2018-12-21 21:05:52 +02:00
const double eta , const complex double k ,
2018-11-22 02:02:31 +02:00
const double unitcell_volume /* with the corresponding lattice dimensionality */ ,
const LatticeDimensionality latdim ,
2018-12-11 13:09:38 +02:00
PGen * pgen_K , const bool pgen_generates_shifted_points
2018-11-22 02:02:31 +02:00
/* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
* so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice ,
* and adds beta to the generated points before calculations .
* If true , it assumes that they are already shifted .
*/ ,
const cart3_t beta ,
const cart3_t particle_shift
)
{
2018-12-21 21:05:52 +02:00
const bool k_is_real = ( cimag ( k ) = = 0 ) ;
2018-11-22 16:49:37 +02:00
assert ( ( latdim & LAT_XYONLY ) & & ( latdim & SPACE3D ) ) ;
assert ( ( latdim & LAT1D ) | | ( latdim & LAT2D ) ) ;
2018-11-22 02:02:31 +02:00
const qpms_y_t nelem_sc = c - > nelem_sc ;
2019-09-30 14:37:28 +03:00
assert ( nelem_sc > 0 ) ;
2018-11-22 02:02:31 +02:00
const qpms_l_t lMax = c - > lMax ;
// Manual init of the ewald summation targets
complex double * target_c = calloc ( nelem_sc , sizeof ( complex double ) ) ;
memset ( target , 0 , nelem_sc * sizeof ( complex double ) ) ;
double * err_c = NULL ;
if ( err ) {
err_c = calloc ( nelem_sc , sizeof ( double ) ) ;
memset ( err , 0 , nelem_sc * sizeof ( double ) ) ;
}
2019-09-30 14:37:28 +03:00
# ifdef EWALD_AUTO_CUTOFF
/*
* Experimental : stop the evaluation if the relative contribution of the " last layer "
* is below DBL_EPSILON to speed up the calculation .
* This obviously can currently work only for " suitable PGen " s that generate points in
* layers with increasing distance from the origin .
* Currently , the " layers " are chunks of 12 * lw + 1 points , where w is an
* increasing integer index ( starting from 1 ) .
* TODO : define the layers ( optionally ) in the PGen metadata and switch this feature on
* during run time instead of using this EWALD_AUTO_CUTOFF macro .
*/
size_t lw = 1 ; // "Layer" index.
size_t li = 0 ; // Counter of points inside the current "layer".
double lsum , lsum_c ; // Layer contribution magnitude sum + kahan compensation
kahaninit ( & lsum , & lsum_c ) ;
# endif
2018-12-21 21:05:52 +02:00
const complex double commonfac = 1 / ( k * k * unitcell_volume ) ; // used in the very end (CFC)
if ( k_is_real )
assert ( creal ( commonfac ) > 0 ) ;
2018-11-22 02:02:31 +02:00
2018-11-24 14:51:57 +02:00
PGenSphReturnData pgen_retdata ;
2018-11-22 02:02:31 +02:00
# ifndef NDEBUG
double rbeta_pq_prev ;
# endif
// recycleable values if rbeta_pq stays the same:
complex double gamma_pq ;
complex double z ;
2018-12-21 21:05:52 +02:00
complex double factor1d = 1 ; // the "additional" factor for the 1D case (then it is not 1)
2018-11-22 02:02:31 +02:00
// space for Gamma_pq[j]'s
qpms_csf_result Gamma_pq [ lMax / 2 + 1 ] ;
// CHOOSE POINT BEGIN
2018-12-11 13:09:38 +02:00
// TODO mayby PGen_next_sph is not the best coordinate system choice here
while ( ( pgen_retdata = PGen_next_sph ( pgen_K ) ) . flags & PGEN_NOTDONE ) { // BEGIN POINT LOOP
2018-11-22 02:02:31 +02:00
cart3_t K_pq_cart ;
sph_t beta_pq_sph ;
if ( pgen_generates_shifted_points ) {
beta_pq_sph = pgen_retdata . point_sph ;
const cart3_t beta_pq_cart = sph2cart ( beta_pq_sph ) ;
K_pq_cart = cart3_add ( cart3_scale ( - 1 , beta ) , beta_pq_cart ) ;
} else { // as in the old _points_and_shift functions
const sph_t K_pq_sph = pgen_retdata . point_sph ;
K_pq_cart = sph2cart ( K_pq_sph ) ;
const cart3_t beta_pq_cart = cart3_add ( K_pq_cart , beta ) ;
beta_pq_sph = cart2sph ( beta_pq_cart ) ;
}
const double rbeta_pq = beta_pq_sph . r ;
const double arg_pq = beta_pq_sph . phi ;
2019-08-19 10:55:45 +03:00
//const double beta_pq_theta = beta_pq_sph.theta; //unused
2018-11-22 02:02:31 +02:00
// CHOOSE POINT END
const complex double phasefac = cexp ( I * cart3_dot ( K_pq_cart , particle_shift ) ) ; // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
2019-06-27 12:13:25 +03:00
const bool new_rbeta_pq = ( ! pgen_generates_shifted_points ) | | ( pgen_retdata . flags & ! PGEN_OLD_R ) ;
2018-11-22 02:02:31 +02:00
if ( ! new_rbeta_pq ) assert ( rbeta_pq = = rbeta_pq_prev ) ;
2018-11-22 16:49:37 +02:00
2018-11-22 02:02:31 +02:00
// R-DEPENDENT BEGIN
if ( new_rbeta_pq ) {
2018-12-21 21:05:52 +02:00
gamma_pq = clilgamma ( rbeta_pq / k ) ;
z = csq ( gamma_pq * k / ( 2 * eta ) ) ;
2018-11-22 02:02:31 +02:00
for ( qpms_l_t j = 0 ; j < = lMax / 2 ; + + j ) {
2019-10-09 13:20:20 +03:00
int retval = complex_gamma_inc_e ( 0.5 - j , z ,
// we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
QPMS_LIKELY ( creal ( z ) < 0 ) & & ! signbit ( cimag ( z ) ) ? - 1 : 0 , Gamma_pq + j ) ;
2018-11-22 16:49:37 +02:00
if ( ! ( retval = = 0 | | retval = = GSL_EUNDRFLW ) ) abort ( ) ;
2018-11-22 02:02:31 +02:00
}
2018-11-22 16:49:37 +02:00
if ( latdim & LAT1D )
2018-12-21 21:05:52 +02:00
factor1d = M_SQRT1_2 * .5 * k * gamma_pq ;
2018-11-22 02:02:31 +02:00
}
// R-DEPENDENT END
// TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
// and just fetched for each n, m pair
for ( qpms_l_t n = 0 ; n < = lMax ; + + n )
for ( qpms_m_t m = - n ; m < = n ; + + m ) {
if ( ( m + n ) % 2 ! = 0 ) // odd coefficients are zero.
continue ;
const qpms_y_t y = qpms_mn2y_sc ( m , n ) ;
const complex double e_imalpha_pq = cexp ( I * m * arg_pq ) ;
complex double jsum , jsum_c ; ckahaninit ( & jsum , & jsum_c ) ;
double jsum_err , jsum_err_c ; kahaninit ( & jsum_err , & jsum_err_c ) ; // TODO do I really need to kahan sum errors?
assert ( ( n - abs ( m ) ) / 2 = = c - > s1_jMaxes [ y ] ) ;
for ( qpms_l_t j = 0 ; j < = c - > s1_jMaxes [ y ] /*(n-abs(m))/2*/ ; + + j ) { // FIXME </<= ?
2019-09-16 12:37:32 +03:00
complex double summand = cpow_0lim_zi ( rbeta_pq / k , n - 2 * j )
2018-11-22 02:02:31 +02:00
* e_imalpha_pq * c - > legendre0 [ gsl_sf_legendre_array_index ( n , abs ( m ) ) ] * min1pow_m_neg ( m ) // This line can actually go outside j-loop
* cpow ( gamma_pq , 2 * j - 1 ) // * Gamma_pq[j] bellow (GGG) after error computation
* c - > s1_constfacs [ y ] [ j ] ;
if ( err ) {
// FIXME include also other errors than Gamma_pq's relative error
kahanadd ( & jsum_err , & jsum_err_c , Gamma_pq [ j ] . err * cabs ( summand ) ) ;
}
summand * = Gamma_pq [ j ] . val ; // GGG
ckahanadd ( & jsum , & jsum_c , summand ) ;
}
2018-11-22 16:49:37 +02:00
jsum * = phasefac * factor1d ; // PFC
2018-11-22 02:02:31 +02:00
ckahanadd ( target + y , target_c + y , jsum ) ;
2019-09-30 14:37:28 +03:00
# ifdef EWALD_AUTO_CUTOFF
kahanadd ( & lsum , & lsum_c , cabs ( jsum ) ) ;
# endif
2018-11-22 02:02:31 +02:00
if ( err ) kahanadd ( err + y , err_c + y , jsum_err ) ;
}
# ifndef NDEBUG
rbeta_pq_prev = rbeta_pq ;
2019-09-30 14:37:28 +03:00
# endif
# ifdef EWALD_AUTO_CUTOFF
+ + li ;
double cursum_min = INFINITY ;
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y ) {
const double a = cabs ( target [ y ] ) ;
if ( a ) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
cursum_min = MIN ( cursum_min , a ) ;
}
if ( li > = 12 * lw + 1 ) {
if ( lsum < DBL_EPSILON * cursum_min )
goto ewald3_21_xy_sigma_long_end_point_loop ;
kahaninit ( & lsum , & lsum_c ) ;
li = 0 ;
+ + lw ;
}
2018-11-22 02:02:31 +02:00
# endif
} // END POINT LOOP
2019-09-30 14:37:28 +03:00
# ifdef EWALD_AUTO_CUTOFF
ewald3_21_xy_sigma_long_end_point_loop :
# endif
2018-11-22 02:02:31 +02:00
free ( err_c ) ;
free ( target_c ) ;
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y ) // CFC common factor from above
target [ y ] * = commonfac ;
if ( err )
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y )
err [ y ] * = commonfac ;
return 0 ;
}
2018-11-22 16:49:37 +02:00
// 1D chain along the z-axis; not many optimisations here as the same
// shifted beta radius could be recycled only once anyways
int ewald3_1_z_sigma_long (
complex double * target , // must be c->nelem_sc long
double * err ,
2018-12-21 21:31:07 +02:00
const qpms_ewald3_constants_t * c ,
2018-12-21 21:05:52 +02:00
const double eta , const complex double k ,
2018-11-24 14:51:57 +02:00
const double unitcell_volume /* length (periodicity) in this case */ ,
2018-11-22 16:49:37 +02:00
const LatticeDimensionality latdim ,
2018-12-11 13:09:38 +02:00
PGen * pgen_K , const bool pgen_generates_shifted_points
2018-11-22 16:49:37 +02:00
/* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
* so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice ,
* and adds beta to the generated points before calculations .
* If true , it assumes that they are already shifted .
*/ ,
const cart3_t beta ,
const cart3_t particle_shift
)
{
assert ( LatticeDimensionality_checkflags ( latdim , LAT_1D_IN_3D_ZONLY ) ) ;
assert ( beta . x = = 0 & & beta . y = = 0 ) ;
assert ( particle_shift . x = = 0 & & particle_shift . y = = 0 ) ;
const double beta_z = beta . z ;
const double particle_shift_z = particle_shift_z ;
const qpms_y_t nelem_sc = c - > nelem_sc ;
const qpms_l_t lMax = c - > lMax ;
// Manual init of the ewald summation targets
complex double * target_c = calloc ( nelem_sc , sizeof ( complex double ) ) ;
memset ( target , 0 , nelem_sc * sizeof ( complex double ) ) ;
double * err_c = NULL ;
if ( err ) {
err_c = calloc ( nelem_sc , sizeof ( double ) ) ;
memset ( err , 0 , nelem_sc * sizeof ( double ) ) ;
}
2018-11-24 14:51:57 +02:00
const double commonfac = 1 / ( k * unitcell_volume ) ; // multiplied in the very end (CFC)
assert ( commonfac > 0 ) ;
// space for Gamma_pq[j]'s (I rewrite the exp. ints. E_n in terms of Gamma fns., cf. my ewald.lyx notes, (eq:1D_z_LRsum).
qpms_csf_result Gamma_pq [ lMax / 2 + 1 ] ;
PGenSphReturnData pgen_retdata ;
2018-11-22 16:49:37 +02:00
// CHOOSE POINT BEGIN
2018-12-11 13:09:38 +02:00
// TODO FIXME USE PGen_next_z
while ( ( pgen_retdata = PGen_next_sph ( pgen_K ) ) . flags & PGEN_NOTDONE ) { // BEGIN POINT LOOP
2018-11-22 16:49:37 +02:00
assert ( pgen_retdata . flags & PGEN_AT_Z ) ;
double K_z , beta_mu_z ;
if ( pgen_generates_shifted_points ) {
beta_mu_z = ( ( pgen_retdata . point_sph . theta = = 0 ) ?
pgen_retdata . point_sph . r : - pgen_retdata . point_sph . r ) ; //!!!CHECKSIGN!!!
K_z = beta_mu_z - beta_z ;
} else { // as in the old _points_and_shift functions
K_z = ( ( pgen_retdata . point_sph . theta = = 0 ) ?
pgen_retdata . point_sph . r : - pgen_retdata . point_sph . r ) ; // !!!CHECKSIGN!!!
beta_mu_z = K_z + beta_z ;
}
2018-11-24 14:51:57 +02:00
double rbeta_mu = fabs ( beta_mu_z ) ;
2018-11-22 16:49:37 +02:00
// CHOOSE POINT END
const complex double phasefac = cexp ( I * K_z * particle_shift_z ) ; // POINT-DEPENDENT (PFC) // !!!CHECKSIGN!!!
// R-DEPENDENT BEGIN
2018-12-21 21:05:52 +02:00
complex double gamma_pq = clilgamma ( rbeta_mu / k ) ; // For real beta and k this is real or pure imaginary ...
2018-11-24 14:51:57 +02:00
const complex double z = csq ( gamma_pq * k / ( 2 * eta ) ) ; // ... so the square (this) is in fact real.
2018-11-22 16:49:37 +02:00
for ( qpms_l_t j = 0 ; j < = lMax / 2 ; + + j ) {
2019-10-09 13:20:20 +03:00
int retval = complex_gamma_inc_e ( 0.5 - j , z ,
// we take the branch which is principal for the Re z < 0, Im z < 0 quadrant, cf. [Linton, p. 642 in the middle]
QPMS_LIKELY ( creal ( z ) < 0 ) & & ! signbit ( cimag ( z ) ) ? - 1 : 0 , Gamma_pq + j ) ;
2018-11-22 16:49:37 +02:00
if ( ! ( retval = = 0 | | retval = = GSL_EUNDRFLW ) ) abort ( ) ;
}
// R-DEPENDENT END
// TODO optimisations: all the j-dependent powers can be done for each j only once, stored in array
2018-11-24 14:51:57 +02:00
// and just fetched for each n
for ( qpms_l_t n = 0 ; n < = lMax ; + + n ) {
const qpms_y_t y = qpms_mn2y_sc ( 0 , n ) ;
2018-11-22 16:49:37 +02:00
complex double jsum , jsum_c ; ckahaninit ( & jsum , & jsum_c ) ;
double jsum_err , jsum_err_c ; kahaninit ( & jsum_err , & jsum_err_c ) ; // TODO do I really need to kahan sum errors?
2018-11-24 14:51:57 +02:00
for ( qpms_l_t j = 0 ; j < = n / 2 ; + + j ) {
complex double summand = pow ( rbeta_mu / k , n - 2 * j )
* ( ( beta_mu_z > 0 ) ? // TODO this can go outsize the j-loop
c - > legendre_plus1 [ gsl_sf_legendre_array_index ( n , 0 ) ] :
( c - > legendre_minus1 [ gsl_sf_legendre_array_index ( n , 0 ) ] * min1pow ( n ) )
)
// * min1pow_m_neg(m) // not needed as m == 0
* cpow ( gamma_pq , 2 * j ) // * Gamma_pq[j] bellow (GGG) after error computation
* c - > s1_constfacs_1Dz [ n ] [ j ] ;
/* s1_consstfacs_1Dz[n][j] =
*
* - I * * ( n + 1 ) ( - 1 ) * * j * n !
* - - - - - - - - - - - - - - - - - - - - - - - - - -
* j ! * 2 * * ( 2 * j ) * ( n - 2 * j ) !
*/
2018-11-22 16:49:37 +02:00
if ( err ) {
// FIXME include also other errors than Gamma_pq's relative error
kahanadd ( & jsum_err , & jsum_err_c , Gamma_pq [ j ] . err * cabs ( summand ) ) ;
}
summand * = Gamma_pq [ j ] . val ; // GGG
ckahanadd ( & jsum , & jsum_c , summand ) ;
}
2018-11-24 14:51:57 +02:00
jsum * = phasefac ; // PFC
2018-11-22 16:49:37 +02:00
ckahanadd ( target + y , target_c + y , jsum ) ;
if ( err ) kahanadd ( err + y , err_c + y , jsum_err ) ;
}
} // END POINT LOOP
free ( err_c ) ;
free ( target_c ) ;
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y ) // CFC common factor from above
target [ y ] * = commonfac ;
if ( err )
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y )
err [ y ] * = commonfac ;
return 0 ;
}
2018-11-25 01:10:32 +02:00
int ewald3_sigma_long (
complex double * target , // must be c->nelem_sc long
double * err ,
2018-12-21 21:31:07 +02:00
const qpms_ewald3_constants_t * c ,
2018-12-21 21:05:52 +02:00
const double eta , const complex double k ,
2018-11-25 01:10:32 +02:00
const double unitcell_volume /* with the corresponding lattice dimensionality */ ,
const LatticeDimensionality latdim ,
2018-12-11 13:09:38 +02:00
PGen * pgen_K , const bool pgen_generates_shifted_points
2018-11-25 01:10:32 +02:00
/* If false, the behaviour corresponds to the old ewald32_sigma_long_points_and_shift,
* so the function assumes that the generated points correspond to the unshifted reciprocal Bravais lattice ,
* and adds beta to the generated points before calculations .
* If true , it assumes that they are already shifted .
*/ ,
const cart3_t beta ,
const cart3_t particle_shift
)
{
assert ( latdim & SPACE3D ) ;
if ( latdim & LAT_XYONLY )
return ewald3_21_xy_sigma_long ( target , err , c , eta , k , unitcell_volume ,
latdim , pgen_K , pgen_generates_shifted_points , beta , particle_shift ) ;
else if ( latdim & LAT_ZONLY )
return ewald3_1_z_sigma_long ( target , err , c , eta , k , unitcell_volume ,
latdim , pgen_K , pgen_generates_shifted_points , beta , particle_shift ) ;
// TODO 3D case and general 2D case
2019-08-19 15:10:39 +03:00
else QPMS_NOT_IMPLEMENTED ( " 3D or general 2D (outside XY plane) Ewald sum . " ) ;
2018-11-25 01:10:32 +02:00
}
2018-08-27 14:39:29 +03:00
struct sigma2_integrand_params {
int n ;
double k , R ;
} ;
static double sigma2_integrand ( double ksi , void * params ) {
struct sigma2_integrand_params * p = ( struct sigma2_integrand_params * ) params ;
return exp ( - sq ( p - > R * ksi ) + sq ( p - > k / ksi / 2 ) ) * pow ( ksi , 2 * p - > n ) ;
}
static int ewald32_sr_integral ( double r , double k , int n , double eta ,
double * result , double * err , gsl_integration_workspace * workspace )
{
struct sigma2_integrand_params p ;
const double R0 = r ; // Maybe could be chosen otherwise, but fuck it for now.
p . n = n ;
eta * = R0 ;
p . k = k * R0 ;
p . R = r / R0 ; // i.e. p.R = 1
gsl_function F ;
F . function = sigma2_integrand ;
F . params = & p ;
int retval = gsl_integration_qagiu ( & F , eta , INTEGRATION_EPSABS ,
INTEGRATION_EPSREL , INTEGRATION_WORKSPACE_LIMIT ,
workspace , result , err ) ;
double normfac = pow ( R0 , - 2 * p . n - 1 ) ;
* result * = normfac ;
* err * = normfac ;
return retval ;
}
2018-12-21 16:50:53 +02:00
// a version allowing complex k
struct sigma2_integrand_params_ck {
int n ;
complex double k ;
double R ;
} ;
// TODO ther might be some space for optimisation if needed, as now we calculate everything twice
// including the whole complex exponential (throwing the imaginary/real part away)
static double sigma2_integrand_ck_real ( double ksi , void * params ) {
struct sigma2_integrand_params_ck * p = ( struct sigma2_integrand_params_ck * ) params ;
return creal ( cexp ( - csq ( p - > R * ksi ) + csq ( p - > k / ksi / 2 ) ) ) * pow ( ksi , 2 * p - > n ) ;
}
static double sigma2_integrand_ck_imag ( double ksi , void * params ) {
struct sigma2_integrand_params_ck * p = ( struct sigma2_integrand_params_ck * ) params ;
return cimag ( cexp ( - csq ( p - > R * ksi ) + csq ( p - > k / ksi / 2 ) ) ) * pow ( ksi , 2 * p - > n ) ;
}
static int ewald32_sr_integral_ck ( double r , complex double k , int n , double eta ,
complex double * result , double * err , gsl_integration_workspace * workspace )
{
struct sigma2_integrand_params_ck p ;
const double R0 = r ; // Maybe could be chosen otherwise, but fuck it for now.
p . n = n ;
eta * = R0 ;
p . k = k * R0 ;
p . R = r / R0 ; // i.e. p.R = 1
gsl_function F ;
F . params = & p ;
double result_real , result_imag , err_real , err_imag ;
F . function = sigma2_integrand_ck_real ;
// TODO check return values
int retval = gsl_integration_qagiu ( & F , eta , INTEGRATION_EPSABS ,
INTEGRATION_EPSREL , INTEGRATION_WORKSPACE_LIMIT ,
workspace , & result_real , & err_real ) ;
F . function = sigma2_integrand_ck_imag ;
// TODO check return values
retval = gsl_integration_qagiu ( & F , eta , INTEGRATION_EPSABS ,
INTEGRATION_EPSREL , INTEGRATION_WORKSPACE_LIMIT ,
workspace , & result_imag , & err_imag ) ;
* result = result_real + I * result_imag ;
* err = sqrt ( sq ( err_real ) + sq ( err_imag ) ) ;
double normfac = pow ( R0 , - 2 * p . n - 1 ) ;
* result * = normfac ;
* err * = normfac ;
return retval ;
}
2018-11-21 19:49:07 +02:00
int ewald3_sigma_short (
2018-11-21 21:41:20 +02:00
complex double * target , // must be c->nelem_sc long
double * err , // must be c->nelem_sc long or NULL
2018-12-21 21:31:07 +02:00
const qpms_ewald3_constants_t * c ,
2018-12-21 21:05:52 +02:00
const double eta , const complex double k /* TODO COMPLEX */ ,
2018-11-21 21:41:20 +02:00
const LatticeDimensionality latdim , // apart from asserts and possible optimisations ignored, as the SR formula stays the same
2018-12-11 13:09:38 +02:00
PGen * pgen_R , const bool pgen_generates_shifted_points
2018-11-21 19:49:07 +02:00
/* If false, the behaviour corresponds to the old ewald32_sigma_short_points_and_shift,
* so the function assumes that the generated points correspond to the unshifted Bravais lattice ,
* and adds particle_shift to the generated points before calculations .
* If true , it assumes that they are already shifted ( if calculating interaction between
* different particles in the unit cell ) .
*/ ,
const cart3_t beta ,
const cart3_t particle_shift
)
{
2018-12-21 21:05:52 +02:00
const bool k_is_real = ( cimag ( k ) = = 0 ) ; // TODO check how the compiler optimises the loops
const double kreal = creal ( k ) ;
2018-11-21 19:49:07 +02:00
const qpms_y_t nelem_sc = c - > nelem_sc ;
const qpms_l_t lMax = c - > lMax ;
gsl_integration_workspace * workspace =
gsl_integration_workspace_alloc ( INTEGRATION_WORKSPACE_LIMIT ) ;
double legendre_buf [ gsl_sf_legendre_array_n ( c - > lMax ) ] ; // work space for the legendre polynomials (used only in the general case)
// Manual init of the ewald summation targets
complex double * const target_c = calloc ( nelem_sc , sizeof ( complex double ) ) ;
memset ( target , 0 , nelem_sc * sizeof ( complex double ) ) ;
double * err_c = NULL ;
if ( err ) {
err_c = calloc ( nelem_sc , sizeof ( double ) ) ;
memset ( err , 0 , nelem_sc * sizeof ( double ) ) ;
}
2019-09-30 14:37:28 +03:00
# ifdef EWALD_AUTO_CUTOFF
/*
* Experimental ; see the notes at the first # ifdef EWALD_AUTO_CUTOFF in this file .
*/
size_t lw = 1 ; // "Layer" index.
size_t li = 0 ; // Counter of points inside the current "layer".
double lsum , lsum_c ; // Layer contribution magnitude sum + kahan compensation
kahaninit ( & lsum , & lsum_c ) ;
# endif
2018-11-21 19:49:07 +02:00
PGenSphReturnData pgen_retdata ;
# ifndef NDEBUG
double r_pq_shifted_prev ;
# endif
// recyclable variables if r_pq_shifted stays the same:
double intres [ lMax + 1 ] , interr [ lMax + 1 ] ;
2018-12-21 21:05:52 +02:00
complex double cintres [ lMax + 1 ] ;
2018-11-21 19:49:07 +02:00
// CHOOSE POINT BEGIN
2018-12-11 13:09:38 +02:00
// TODO check whether _next_sph is the optimal coordinate system choice here
while ( ( pgen_retdata = PGen_next_sph ( pgen_R ) ) . flags & PGEN_NOTDONE ) { // BEGIN POINT LOOP
2018-11-21 19:49:07 +02:00
// CHOOSE POINT END
cart3_t Rpq_shifted_cart ; // I will need both sph and cart representations anyway...
sph_t Rpq_shifted_sph ;
if ( pgen_generates_shifted_points ) {
2018-11-21 21:41:20 +02:00
Rpq_shifted_sph = pgen_retdata . point_sph ;
Rpq_shifted_cart = sph2cart ( Rpq_shifted_sph ) ;
2018-11-21 19:49:07 +02:00
} else { // as in the old _points_and_shift functions
//const point2d Rpq_shifted = Rpoints_plus_particle_shift[i];
const sph_t bravais_point_sph = pgen_retdata . point_sph ;
const cart3_t bravais_point_cart = sph2cart ( bravais_point_sph ) ;
Rpq_shifted_cart = cart3_add ( bravais_point_cart , cart3_scale ( - 1 , particle_shift ) ) ; // CHECKSIGN!!!
Rpq_shifted_sph = cart2sph ( Rpq_shifted_cart ) ;
}
// TODO eliminate and use the Rpq_shifted_sph members directly (but in compiler optimizations we trust)
const double Rpq_shifted_arg = Rpq_shifted_sph . phi ; //atan2(Rpq_shifted.y, Rpq_shifted.x); // POINT-DEPENDENT
const double Rpq_shifted_theta = Rpq_shifted_sph . theta ; // POINT-DEPENDENT
const double r_pq_shifted = Rpq_shifted_sph . r ;
// if the radius is the same as in previous cycle, most of the calculations can be recycled
2019-06-27 12:13:25 +03:00
const bool new_r_pq_shifted = ( ! pgen_generates_shifted_points ) | | ( pgen_retdata . flags & ! PGEN_OLD_R ) ;
2018-11-21 19:49:07 +02:00
if ( ! new_r_pq_shifted ) assert ( r_pq_shifted_prev = = r_pq_shifted ) ;
const complex double e_beta_Rpq = cexp ( I * cart3_dot ( beta , Rpq_shifted_cart ) ) ; // POINT-DEPENDENT
LatticeDimensionality speedup_regime = 0 ;
if ( ( latdim & LAT_2D_IN_3D_XYONLY ) = = LAT_2D_IN_3D_XYONLY ) speedup_regime = LAT_2D_IN_3D_XYONLY ;
if ( ( latdim & LAT_1D_IN_3D_ZONLY ) = = LAT_1D_IN_3D_ZONLY ) speedup_regime = LAT_1D_IN_3D_ZONLY ;
const double * legendre_array ;
switch ( speedup_regime ) {
// speedup checks for special geometries and Legendre polynomials
case LAT_1D_IN_3D_ZONLY :
assert ( ( pgen_retdata . flags & PGEN_AT_Z ) = = PGEN_AT_Z ) ;
assert ( Rpq_shifted_theta = = M_PI | | Rpq_shifted_theta = = 0 ) ;
legendre_array = ( Rpq_shifted_theta = = 0 ) ? c - > legendre_plus1 : c - > legendre_minus1 ; // CHECKSIGN
break ;
case LAT_2D_IN_3D_XYONLY :
assert ( ( pgen_retdata . flags & PGEN_AT_XY ) = = PGEN_AT_XY ) ;
2018-12-11 13:36:12 +02:00
assert ( fabs ( Rpq_shifted_theta - M_PI_2 ) < DBL_EPSILON * 1024 ) ;
// assert(Rpq_shifted_theta == M_PI_2); // FIXME this should work as well
2018-11-21 19:49:07 +02:00
legendre_array = c - > legendre0 ;
break ;
default :
2018-11-21 21:41:20 +02:00
if ( GSL_SUCCESS ! = gsl_sf_legendre_array_e ( c - > legendre_normconv , lMax , cos ( Rpq_shifted_theta ) , c - > legendre_csphase , legendre_buf ) )
2018-11-21 19:49:07 +02:00
abort ( ) ;
legendre_array = legendre_buf ;
break ;
}
for ( qpms_l_t n = 0 ; n < = lMax ; + + n ) {
2018-12-21 21:05:52 +02:00
const double complex prefacn = - I * ( k_is_real ? pow ( 2. / creal ( k ) , n + 1 ) : cpow ( 2. / k , n + 1 ) ) * M_2_SQRTPI / 2 ; // profiling TODO put outside the R-loop and multiply in the end?
2018-11-21 19:49:07 +02:00
const double R_pq_pown = pow ( r_pq_shifted , n ) ; // profiling TODO: maybe put this into the new_r_pq_shifted condition as well?
if ( new_r_pq_shifted ) {
2018-12-21 21:05:52 +02:00
int retval ;
if ( k_is_real ) {
double intres_real ;
retval = ewald32_sr_integral ( r_pq_shifted , kreal , n , eta ,
& intres_real , interr + n , workspace ) ;
cintres [ n ] = intres_real ;
} else
retval = ewald32_sr_integral_ck ( r_pq_shifted , k , n , eta ,
cintres + n , interr + n , workspace ) ;
2018-11-21 19:49:07 +02:00
if ( retval ) abort ( ) ;
} // otherwise recycle the integrals
for ( qpms_m_t m = - n ; m < = n ; + + m ) {
complex double e_imf ;
// SPEEDUPS for some special geometries
if ( speedup_regime = = LAT_2D_IN_3D_XYONLY ) { //2D lattice inside the xy plane
if ( ( m + n ) % 2 ! = 0 ) // odd coefficients are zero.
continue ; // nothing needed, already done by memset
e_imf = cexp ( I * m * Rpq_shifted_arg ) ; // profiling TODO: calculate outside the n-loop?
2018-11-21 21:41:20 +02:00
} else if ( speedup_regime = = LAT_1D_IN_3D_ZONLY ) { // 1D lattice along the z axis
2018-11-21 19:49:07 +02:00
if ( m ! = 0 ) // m-non-zero coefficients are zero
continue ; // nothing needed, already done by memset
e_imf = 1 ;
} else { // general 1D/2D/3D lattice in 3D space
e_imf = cexp ( I * m * Rpq_shifted_arg ) ;
}
const double leg = legendre_array [ gsl_sf_legendre_array_index ( n , abs ( m ) ) ] ;
const qpms_y_t y = qpms_mn2y_sc ( m , n ) ;
if ( err )
kahanadd ( err + y , err_c + y , cabs ( leg * ( prefacn / I ) * R_pq_pown
2018-11-21 21:41:20 +02:00
* interr [ n ] ) ) ; // TODO include also other errors
2019-09-30 14:37:28 +03:00
complex double thesummand = prefacn * R_pq_pown * leg * cintres [ n ] * e_beta_Rpq * e_imf * min1pow_m_neg ( m ) ;
ckahanadd ( target + y , target_c + y , thesummand ) ;
# ifdef EWALD_AUTO_CUTOFF
kahanadd ( & lsum , & lsum_c , cabs ( thesummand ) ) ;
# endif
2018-11-21 19:49:07 +02:00
}
}
# ifndef NDEBUG
r_pq_shifted_prev = r_pq_shifted ;
2019-09-30 14:37:28 +03:00
# endif
# ifdef EWALD_AUTO_CUTOFF
+ + li ;
double cursum_min = INFINITY ;
for ( qpms_y_t y = 0 ; y < nelem_sc ; + + y ) {
const double a = cabs ( target [ y ] ) ;
if ( a ) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
cursum_min = MIN ( cursum_min , a ) ;
}
if ( li > = 12 * lw + 1 ) {
if ( lsum < DBL_EPSILON * cursum_min )
goto ewald3_sigma_short_end_point_loop ;
kahaninit ( & lsum , & lsum_c ) ;
li = 0 ;
+ + lw ;
}
2018-11-21 19:49:07 +02:00
# endif
}
2019-09-30 14:37:28 +03:00
//END POINT LOOP
# ifdef EWALD_AUTO_CUTOFF
ewald3_sigma_short_end_point_loop :
# endif
2018-11-21 19:49:07 +02:00
gsl_integration_workspace_free ( workspace ) ;
if ( err ) free ( err_c ) ;
free ( target_c ) ;
return 0 ;
}