Experimental speedup by earlier cutoff for Ewald summation.
TODO check whether PGen is properly destroyed Former-commit-id: db2c64292b9c1bd80e97255bf71e570385b409a4
This commit is contained in:
parent
67f93e461c
commit
7e74cb100c
|
@ -3,8 +3,6 @@ find_package(GSL 2.0 REQUIRED)
|
||||||
find_package(BLAS REQUIRED)
|
find_package(BLAS REQUIRED)
|
||||||
find_package(LAPACK REQUIRED)
|
find_package(LAPACK REQUIRED)
|
||||||
|
|
||||||
add_definitions(-DLATTICESUMS32)
|
|
||||||
add_definitions(-DQPMS_VECTORS_NICE_TRANSFORMATIONS)
|
|
||||||
|
|
||||||
#includes
|
#includes
|
||||||
set (DIRS ${GSL_INCLUDE_DIRS} ${GSLCBLAS_INCLUDE_DIRS})
|
set (DIRS ${GSL_INCLUDE_DIRS} ${GSLCBLAS_INCLUDE_DIRS})
|
||||||
|
@ -29,6 +27,9 @@ target_link_libraries (qpms
|
||||||
target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
target_include_directories (qpms PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
target_compile_options(qpms PRIVATE -Wall -Wno-return-type)
|
target_compile_options(qpms PRIVATE -Wall -Wno-return-type)
|
||||||
|
target_compile_definitions(qpms PRIVATE LATTICESUMS32 QPMS_VECTORS_NICE_TRANSFORMATIONS
|
||||||
|
EWALD_AUTO_CUTOFF
|
||||||
|
)
|
||||||
|
|
||||||
install(TARGETS qpms
|
install(TARGETS qpms
|
||||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||||
|
|
78
qpms/ewald.c
78
qpms/ewald.c
|
@ -29,6 +29,8 @@
|
||||||
#define M_SQRTPI 1.7724538509055160272981674833411452
|
#define M_SQRTPI 1.7724538509055160272981674833411452
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define MAX(x,y) ((x)<(y)?(y):(x))
|
||||||
|
#define MIN(x,y) ((x)<(y)?(x):(y))
|
||||||
|
|
||||||
|
|
||||||
// sloppy implementation of factorial
|
// sloppy implementation of factorial
|
||||||
|
@ -221,6 +223,7 @@ int ewald3_21_xy_sigma_long (
|
||||||
assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
|
assert((latdim & LAT_XYONLY) && (latdim & SPACE3D));
|
||||||
assert((latdim & LAT1D) || (latdim & LAT2D));
|
assert((latdim & LAT1D) || (latdim & LAT2D));
|
||||||
const qpms_y_t nelem_sc = c->nelem_sc;
|
const qpms_y_t nelem_sc = c->nelem_sc;
|
||||||
|
assert(nelem_sc > 0);
|
||||||
const qpms_l_t lMax = c->lMax;
|
const qpms_l_t lMax = c->lMax;
|
||||||
|
|
||||||
// Manual init of the ewald summation targets
|
// Manual init of the ewald summation targets
|
||||||
|
@ -232,6 +235,23 @@ int ewald3_21_xy_sigma_long (
|
||||||
memset(err, 0, nelem_sc * sizeof(double));
|
memset(err, 0, nelem_sc * sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
/*
|
||||||
|
* Experimental: stop the evaluation if the relative contribution of the "last layer"
|
||||||
|
* is below DBL_EPSILON to speed up the calculation.
|
||||||
|
* This obviously can currently work only for "suitable PGen"s that generate points in
|
||||||
|
* layers with increasing distance from the origin.
|
||||||
|
* Currently, the "layers" are chunks of 12 * lw + 1 points, where w is an
|
||||||
|
* increasing integer index (starting from 1).
|
||||||
|
* TODO: define the layers (optionally) in the PGen metadata and switch this feature on
|
||||||
|
* during run time instead of using this EWALD_AUTO_CUTOFF macro.
|
||||||
|
*/
|
||||||
|
size_t lw = 1; // "Layer" index.
|
||||||
|
size_t li = 0; // Counter of points inside the current "layer".
|
||||||
|
double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
|
||||||
|
kahaninit(&lsum, &lsum_c);
|
||||||
|
#endif
|
||||||
|
|
||||||
const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
|
const complex double commonfac = 1/(k*k*unitcell_volume); // used in the very end (CFC)
|
||||||
if (k_is_real)
|
if (k_is_real)
|
||||||
assert(creal(commonfac) > 0);
|
assert(creal(commonfac) > 0);
|
||||||
|
@ -317,12 +337,34 @@ int ewald3_21_xy_sigma_long (
|
||||||
}
|
}
|
||||||
jsum *= phasefac * factor1d; // PFC
|
jsum *= phasefac * factor1d; // PFC
|
||||||
ckahanadd(target + y, target_c + y, jsum);
|
ckahanadd(target + y, target_c + y, jsum);
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
kahanadd(&lsum, &lsum_c, cabs(jsum));
|
||||||
|
#endif
|
||||||
if(err) kahanadd(err + y, err_c + y, jsum_err);
|
if(err) kahanadd(err + y, err_c + y, jsum_err);
|
||||||
}
|
}
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
rbeta_pq_prev = rbeta_pq;
|
rbeta_pq_prev = rbeta_pq;
|
||||||
|
#endif
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
++li;
|
||||||
|
double cursum_min = INFINITY;
|
||||||
|
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
|
||||||
|
const double a = cabs(target[y]);
|
||||||
|
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
|
||||||
|
cursum_min = MIN(cursum_min, a);
|
||||||
|
}
|
||||||
|
if (li >= 12 * lw + 1) {
|
||||||
|
if(lsum < DBL_EPSILON * cursum_min)
|
||||||
|
goto ewald3_21_xy_sigma_long_end_point_loop;
|
||||||
|
kahaninit(&lsum, &lsum_c);
|
||||||
|
li = 0;
|
||||||
|
++lw;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
} // END POINT LOOP
|
} // END POINT LOOP
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
ewald3_21_xy_sigma_long_end_point_loop:
|
||||||
|
#endif
|
||||||
|
|
||||||
free(err_c);
|
free(err_c);
|
||||||
free(target_c);
|
free(target_c);
|
||||||
|
@ -607,6 +649,15 @@ int ewald3_sigma_short(
|
||||||
memset(err, 0, nelem_sc * sizeof(double));
|
memset(err, 0, nelem_sc * sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
/*
|
||||||
|
* Experimental; see the notes at the first #ifdef EWALD_AUTO_CUTOFF in this file.
|
||||||
|
*/
|
||||||
|
size_t lw = 1; // "Layer" index.
|
||||||
|
size_t li = 0; // Counter of points inside the current "layer".
|
||||||
|
double lsum, lsum_c; // Layer contribution magnitude sum + kahan compensation
|
||||||
|
kahaninit(&lsum, &lsum_c);
|
||||||
|
#endif
|
||||||
|
|
||||||
PGenSphReturnData pgen_retdata;
|
PGenSphReturnData pgen_retdata;
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
|
@ -705,15 +756,38 @@ int ewald3_sigma_short(
|
||||||
if(err)
|
if(err)
|
||||||
kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
|
kahanadd(err + y, err_c + y, cabs(leg * (prefacn / I) * R_pq_pown
|
||||||
* interr[n])); // TODO include also other errors
|
* interr[n])); // TODO include also other errors
|
||||||
ckahanadd(target + y, target_c + y,
|
complex double thesummand = prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m);
|
||||||
prefacn * R_pq_pown * leg * cintres[n] * e_beta_Rpq * e_imf * min1pow_m_neg(m));
|
ckahanadd(target + y, target_c + y, thesummand);
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
kahanadd(&lsum, &lsum_c, cabs(thesummand));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
r_pq_shifted_prev = r_pq_shifted;
|
r_pq_shifted_prev = r_pq_shifted;
|
||||||
|
#endif
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
++li;
|
||||||
|
double cursum_min = INFINITY;
|
||||||
|
for (qpms_y_t y = 0; y < nelem_sc; ++y) {
|
||||||
|
const double a = cabs(target[y]);
|
||||||
|
if (a) // Skip possible symmetry-induced zeros; TODO this might need to be done in completely different way, perhaps on per-y basis
|
||||||
|
cursum_min = MIN(cursum_min, a);
|
||||||
|
}
|
||||||
|
if (li >= 12 * lw + 1) {
|
||||||
|
if(lsum < DBL_EPSILON * cursum_min)
|
||||||
|
goto ewald3_sigma_short_end_point_loop;
|
||||||
|
kahaninit(&lsum, &lsum_c);
|
||||||
|
li = 0;
|
||||||
|
++lw;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
//END POINT LOOP
|
||||||
|
#ifdef EWALD_AUTO_CUTOFF
|
||||||
|
ewald3_sigma_short_end_point_loop:
|
||||||
|
#endif
|
||||||
|
|
||||||
gsl_integration_workspace_free(workspace);
|
gsl_integration_workspace_free(workspace);
|
||||||
if(err) free(err_c);
|
if(err) free(err_c);
|
||||||
|
|
Loading…
Reference in New Issue