From 0504f98bc0f215231336f2c4ddeb9697affbaa84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= Date: Sat, 10 Aug 2019 08:08:16 +0300 Subject: [PATCH] Make the splitted setup work Former-commit-id: 9c8a7e4d1131c40ceb27a4b1a9c54d80c319394b --- cyquat.pxd | 10 --- qpms/CMakeLists.txt | 3 +- qpms/__init__.py | 4 +- qpms/hexpoints.py | 3 +- qpms/qpms_c.pyx | 4 +- qpms/tmatrices.py | 3 +- qpms/translations.c | 119 -------------------------------- qpms/translations_python.c | 137 +++++++++++++++++++++++++++++++++++++ setup.py | 62 ++++++++++------- 9 files changed, 187 insertions(+), 158 deletions(-) delete mode 100644 cyquat.pxd create mode 100644 qpms/translations_python.c diff --git a/cyquat.pxd b/cyquat.pxd deleted file mode 100644 index f7de57d..0000000 --- a/cyquat.pxd +++ /dev/null @@ -1,10 +0,0 @@ -from qpms_cdefs cimport * - -cdef class CQuat: - cdef readonly qpms_quat_t q - -cdef class IRot3: - cdef readonly qpms_irot3_t qd - cdef void cset(self, qpms_irot3_t qd) - - diff --git a/qpms/CMakeLists.txt b/qpms/CMakeLists.txt index b530588..22e6206 100644 --- a/qpms/CMakeLists.txt +++ b/qpms/CMakeLists.txt @@ -10,7 +10,8 @@ add_definitions(-DQPMS_VECTORS_NICE_TRANSFORMATIONS) set (DIRS ${GSL_INCLUDE_DIRS} ${GSLCBLAS_INCLUDE_DIRS}) include_directories(${DIRS}) -add_library (qpms translations.c tmatrices.c vecprint.c vswf.c wigner.c +add_library (qpms translations.c tmatrices.c vecprint.c vswf.c wigner.c ewald.c + ewaldsf.c pointgroups.c latticegens.c lattices2d.c gaunt.c error.c legendre.c symmetries.c vecprint.c bessel.c own_zgemm.c parsing.c scatsystem.c materials.c drudeparam_data.c) use_c99() diff --git a/qpms/__init__.py b/qpms/__init__.py index 7a423af..27c8a48 100644 --- a/qpms/__init__.py +++ b/qpms/__init__.py @@ -1,8 +1,10 @@ from pkg_resources import get_distribution __version__ = get_distribution('qpms').version -from qpms_c import * +from .qpms_c import * from .qpms_p import * +from .cyquaternions import CQuat, IRot3 +from .cybspec import VSWFNorm, BaseSpec from .lattices2d import * from .hexpoints import * from .tmatrices import * diff --git a/qpms/hexpoints.py b/qpms/hexpoints.py index 063c113..69710a3 100644 --- a/qpms/hexpoints.py +++ b/qpms/hexpoints.py @@ -169,7 +169,8 @@ def generate_trianglepoints_hexcomplement(maxlayer, v3d = True, circular = True, } -from qpms_c import get_mn_y, trans_calculator +from .cycommon import get_mn_y +from .qpms_c import trans_calculator from .qpms_p import cart2sph def hexlattice_precalc_AB_save(file, lMax, k_hexside, maxlayer, circular=True, savepointinfo = False, J_scat=3): diff --git a/qpms/qpms_c.pyx b/qpms/qpms_c.pyx index 4f1e70d..6af2b76 100644 --- a/qpms/qpms_c.pyx +++ b/qpms/qpms_c.pyx @@ -11,9 +11,9 @@ import numpy as np import cmath #from qpms_cdefs cimport * from cyquaternions cimport * -from cyquaternions import * +#from cyquaternions import * from cybspec cimport * -from cybspec import * +#from cybspec import * from cycommon import * cimport cython from cython.parallel cimport parallel, prange diff --git a/qpms/tmatrices.py b/qpms/tmatrices.py index fac40cf..347d4d8 100644 --- a/qpms/tmatrices.py +++ b/qpms/tmatrices.py @@ -9,7 +9,8 @@ except ImportError: import re from scipy import interpolate from scipy.constants import hbar, e as eV, pi, c -from qpms_c import get_mn_y, get_nelem, CQuat +from .cycommon import get_mn_y, get_nelem +from .cyquaternions import CQuat ň = np.newaxis from .types import NormalizationT, TMatrixSpec diff --git a/qpms/translations.c b/qpms/translations.c index 1feeaeb..323309f 100644 --- a/qpms/translations.c +++ b/qpms/translations.c @@ -1834,123 +1834,4 @@ int qpms_trans_calculator_get_AB_arrays_ext(const qpms_trans_calculator *c, return qpms_trans_calculator_get_AB_arrays(c,Adest,Bdest,deststride,srcstride, kdlj, r_ge_d, J); } -#ifdef QPMS_COMPILE_PYTHON_EXTENSIONS -#include - -#ifdef QPMS_USE_OMP -#include -#endif - -int qpms_cython_trans_calculator_get_AB_arrays_loop( - const qpms_trans_calculator *c, const qpms_bessel_t J, const int resnd, - const int daxis, const int saxis, - char *A_data, const npy_intp *A_shape, const npy_intp *A_strides, - char *B_data, const npy_intp *B_shape, const npy_intp *B_strides, - const char *r_data, const npy_intp *r_shape, const npy_intp *r_strides, - const char *theta_data, const npy_intp *theta_shape, const npy_intp *theta_strides, - const char *phi_data, const npy_intp *phi_shape, const npy_intp *phi_strides, - const char *r_ge_d_data, const npy_intp *r_ge_d_shape, const npy_intp *r_ge_d_strides){ - assert(daxis != saxis); - assert(resnd >= 2); - int longest_axis = 0; - int longestshape = 1; - const npy_intp *resultshape = A_shape, *resultstrides = A_strides; - // TODO put some restrict's everywhere? - for (int ax = 0; ax < resnd; ++ax){ - assert(A_shape[ax] == B_shape[ax]); - assert(A_strides[ax] == B_strides[ax]); - if (daxis == ax || saxis == ax) continue; - if (A_shape[ax] > longestshape) { - longest_axis = ax; - longestshape = 1; - } - } - const npy_intp longlen = resultshape[longest_axis]; - - npy_intp innerloop_shape[resnd]; - for (int ax = 0; ax < resnd; ++ax) { - innerloop_shape[ax] = resultshape[ax]; - } - /* longest axis will be iterated in the outer (parallelized) loop. - * Therefore, longest axis, together with saxis and daxis, - * will not be iterated in the inner loop: - */ - innerloop_shape[longest_axis] = 1; - innerloop_shape[daxis] = 1; - innerloop_shape[saxis] = 1; - - // these are the 'strides' passed to the qpms_trans_calculator_get_AB_arrays_ext - // function, which expects 'const double *' strides, not 'char *' ones. - const npy_intp dstride = resultstrides[daxis] / sizeof(complex double); - const npy_intp sstride = resultstrides[saxis] / sizeof(complex double); - - int errval = 0; - // TODO here start parallelisation - //#pragma omp parallel - { - npy_intp local_indices[resnd]; - memset(local_indices, 0, sizeof(local_indices)); - int errval_local = 0; - size_t longi; - //#pragma omp for - for(longi = 0; longi < longlen; ++longi) { - // this might be done also in the inverse order, but this is more - // 'c-contiguous' way of incrementing the indices - int ax = resnd - 1; - while(ax >= 0) { - /* calculate the correct index/pointer for each array used. - * This can be further optimized from O(resnd * total size of - * the result array) to O(total size of the result array), but - * fick that now - */ - const char *r_p = r_data + r_strides[longest_axis] * longi; - const char *theta_p = theta_data + theta_strides[longest_axis] * longi; - const char *phi_p = phi_data + phi_strides[longest_axis] * longi; - const char *r_ge_d_p = r_ge_d_data + r_ge_d_strides[longest_axis] * longi; - char *A_p = A_data + A_strides[longest_axis] * longi; - char *B_p = B_data + B_strides[longest_axis] * longi; - for(int i = 0; i < resnd; ++i) { - // following two lines are probably not needed, as innerloop_shape is there 1 anyway - // so if i == daxis, saxis, or longest_axis, local_indices[i] is zero. - if (i == longest_axis) continue; - if (daxis == i || saxis == i) continue; - r_p += r_strides[i] * local_indices[i]; - theta_p += theta_strides[i] * local_indices[i]; - phi_p += phi_strides[i] * local_indices[i]; - A_p += A_strides[i] * local_indices[i]; - B_p += B_strides[i] * local_indices[i]; - } - - // perform the actual task here - errval_local |= qpms_trans_calculator_get_AB_arrays_ext(c, (complex double *)A_p, - (complex double *)B_p, - dstride, sstride, - // FIXME change all the _ext function types to npy_... so that - // these casts are not needed - *((double *) r_p), *((double *) theta_p), *((double *)phi_p), - (int)(*((npy_bool *) r_ge_d_p)), J); - if (errval_local) abort(); - - // increment the last index 'digit' (ax is now resnd-1; we don't have do-while loop in python) - ++local_indices[ax]; - while(local_indices[ax] == innerloop_shape[ax] && ax >= 0) { - // overflow to the next digit but stop when reached below the last one - local_indices[ax] = 0; - //local_indices[--ax]++; // dekrementace indexu pod nulu a následná inkrementace poruší paměť FIXME - ax--; - if (ax >= 0) local_indices[ax]++; - } - if (ax >= 0) // did not overflow, get back to the lowest index - ax = resnd - 1; - } - } - errval |= errval_local; - } - // FIXME when parallelizing - // TODO Here end parallelisation - return errval; -} - - -#endif // QPMS_COMPILE_PYTHON_EXTENSIONS diff --git a/qpms/translations_python.c b/qpms/translations_python.c new file mode 100644 index 0000000..8386378 --- /dev/null +++ b/qpms/translations_python.c @@ -0,0 +1,137 @@ +#include +#include "qpms_types.h" +#include "qpms_specfunc.h" +#include "gaunt.h" +#include "translations.h" +#include "indexing.h" // TODO replace size_t and int with own index types here +#include +#include +#include +#include "tiny_inlines.h" +#include "assert_cython_workaround.h" +#include "kahansum.h" +#include //abort() +#include +#include "qpms_error.h" +#include "normalisation.h" + +//#ifdef QPMS_COMPILE_PYTHON_EXTENSIONS +#include + +#ifdef QPMS_USE_OMP +#include +#endif + +int qpms_cython_trans_calculator_get_AB_arrays_loop( + const qpms_trans_calculator *c, const qpms_bessel_t J, const int resnd, + const int daxis, const int saxis, + char *A_data, const npy_intp *A_shape, const npy_intp *A_strides, + char *B_data, const npy_intp *B_shape, const npy_intp *B_strides, + const char *r_data, const npy_intp *r_shape, const npy_intp *r_strides, + const char *theta_data, const npy_intp *theta_shape, const npy_intp *theta_strides, + const char *phi_data, const npy_intp *phi_shape, const npy_intp *phi_strides, + const char *r_ge_d_data, const npy_intp *r_ge_d_shape, const npy_intp *r_ge_d_strides){ + assert(daxis != saxis); + assert(resnd >= 2); + int longest_axis = 0; + int longestshape = 1; + const npy_intp *resultshape = A_shape, *resultstrides = A_strides; + // TODO put some restrict's everywhere? + for (int ax = 0; ax < resnd; ++ax){ + assert(A_shape[ax] == B_shape[ax]); + assert(A_strides[ax] == B_strides[ax]); + if (daxis == ax || saxis == ax) continue; + if (A_shape[ax] > longestshape) { + longest_axis = ax; + longestshape = 1; + } + } + const npy_intp longlen = resultshape[longest_axis]; + + npy_intp innerloop_shape[resnd]; + for (int ax = 0; ax < resnd; ++ax) { + innerloop_shape[ax] = resultshape[ax]; + } + /* longest axis will be iterated in the outer (parallelized) loop. + * Therefore, longest axis, together with saxis and daxis, + * will not be iterated in the inner loop: + */ + innerloop_shape[longest_axis] = 1; + innerloop_shape[daxis] = 1; + innerloop_shape[saxis] = 1; + + // these are the 'strides' passed to the qpms_trans_calculator_get_AB_arrays_ext + // function, which expects 'const double *' strides, not 'char *' ones. + const npy_intp dstride = resultstrides[daxis] / sizeof(complex double); + const npy_intp sstride = resultstrides[saxis] / sizeof(complex double); + + int errval = 0; + // TODO here start parallelisation + //#pragma omp parallel + { + npy_intp local_indices[resnd]; + memset(local_indices, 0, sizeof(local_indices)); + int errval_local = 0; + size_t longi; + //#pragma omp for + for(longi = 0; longi < longlen; ++longi) { + // this might be done also in the inverse order, but this is more + // 'c-contiguous' way of incrementing the indices + int ax = resnd - 1; + while(ax >= 0) { + /* calculate the correct index/pointer for each array used. + * This can be further optimized from O(resnd * total size of + * the result array) to O(total size of the result array), but + * fick that now + */ + const char *r_p = r_data + r_strides[longest_axis] * longi; + const char *theta_p = theta_data + theta_strides[longest_axis] * longi; + const char *phi_p = phi_data + phi_strides[longest_axis] * longi; + const char *r_ge_d_p = r_ge_d_data + r_ge_d_strides[longest_axis] * longi; + char *A_p = A_data + A_strides[longest_axis] * longi; + char *B_p = B_data + B_strides[longest_axis] * longi; + for(int i = 0; i < resnd; ++i) { + // following two lines are probably not needed, as innerloop_shape is there 1 anyway + // so if i == daxis, saxis, or longest_axis, local_indices[i] is zero. + if (i == longest_axis) continue; + if (daxis == i || saxis == i) continue; + r_p += r_strides[i] * local_indices[i]; + theta_p += theta_strides[i] * local_indices[i]; + phi_p += phi_strides[i] * local_indices[i]; + A_p += A_strides[i] * local_indices[i]; + B_p += B_strides[i] * local_indices[i]; + } + + // perform the actual task here + errval_local |= qpms_trans_calculator_get_AB_arrays_ext(c, (complex double *)A_p, + (complex double *)B_p, + dstride, sstride, + // FIXME change all the _ext function types to npy_... so that + // these casts are not needed + *((double *) r_p), *((double *) theta_p), *((double *)phi_p), + (int)(*((npy_bool *) r_ge_d_p)), J); + if (errval_local) abort(); + + // increment the last index 'digit' (ax is now resnd-1; we don't have do-while loop in python) + ++local_indices[ax]; + while(local_indices[ax] == innerloop_shape[ax] && ax >= 0) { + // overflow to the next digit but stop when reached below the last one + local_indices[ax] = 0; + //local_indices[--ax]++; // dekrementace indexu pod nulu a následná inkrementace poruší paměť FIXME + ax--; + if (ax >= 0) local_indices[ax]++; + } + if (ax >= 0) // did not overflow, get back to the lowest index + ax = resnd - 1; + } + } + errval |= errval_local; + } + // FIXME when parallelizing + // TODO Here end parallelisation + return errval; +} + + +//#endif // QPMS_COMPILE_PYTHON_EXTENSIONS + diff --git a/setup.py b/setup.py index 3d80b9e..0abe7ae 100755 --- a/setup.py +++ b/setup.py @@ -9,15 +9,18 @@ from distutils.extension import Extension # m.Extension.__dict__ = m._Extension.__dict__ # TODO CHECK THIS OUT http://stackoverflow.com/questions/4056657/what-is-the-easiest-way-to-make-an-optional-c-extension-for-a-python-package +# AND THIS https://groups.google.com/forum/#!topic/cython-users/GAAPYb2X304 # also this: https://docs.python.org/2/extending/building.html import os -print("You might want to add additional library path to LD_LIBRARY_PATH (especially if you are not using" - " GNU GSL in your system library path) and if import fails. ") -if("LD_LIBRARY_PATH" in os.environ): - print(os.environ['LD_LIBRARY_PATH'].split(':')) +#print("You might want to add additional library path to LD_LIBRARY_PATH (especially if you are not using" +# " GNU GSL in your system library path) and if import fails. ") +#if("LD_LIBRARY_PATH" in os.environ): +# print(os.environ['LD_LIBRARY_PATH'].split(':')) + +''' amos_sources = [ 'amos/d1mach.f', 'amos/dgamln.f', @@ -60,13 +63,7 @@ amos_sources = [ 'amos/zwrsk.f', ] - -qpms_c = Extension('qpms_c', - sources = [ - 'qpms/cycommon.pyx', - 'qpms/cyquaternions.pyx', - 'qpms/cybspec.pyx', - 'qpms/qpms_c.pyx', +libqpms_sources = [ #'qpms/hexpoints_c.pyx', 'qpms/gaunt.c',#'qpms/gaunt.h','qpms/vectors.h','qpms/translations.h', # FIXME http://stackoverflow.com/questions/4259170/python-setup-script-extensions-how-do-you-include-a-h-file @@ -82,26 +79,44 @@ qpms_c = Extension('qpms_c', 'qpms/bessel.c', 'qpms/own_zgemm.c', 'qpms/pointgroups.c', + ] +''' + +cycommon = Extension('qpms.cycommon', + sources = ['qpms/cycommon.pyx'], + extra_link_args=['amos/libamos.a', 'qpms/libqpms.a'], + libraries=['gsl', 'lapacke', 'blas', 'gslcblas', 'pthread',] + ) +cybspec = Extension('qpms.cybspec', + sources = ['qpms/cybspec.pyx'], + extra_link_args=['amos/libamos.a', 'qpms/libqpms.a'], + libraries=['gsl', 'lapacke', 'blas', 'gslcblas', 'pthread',] + ) +cyquaternions = Extension('qpms.cyquaternions', + sources = ['qpms/cyquaternions.pyx'], + extra_link_args=['amos/libamos.a', 'qpms/libqpms.a'], + libraries=['gsl', 'lapacke', 'blas', 'gslcblas', 'pthread',] + ) + +qpms_c = Extension('qpms.qpms_c', + sources = [ + 'qpms/qpms_c.pyx', + 'qpms/translations_python.c', ], - extra_compile_args=['-std=c99','-ggdb', '-O0', - '-DQPMS_COMPILE_PYTHON_EXTENSIONS', # this is required - #'-DQPMS_USE_OMP', - '-DQPMS_SCATSYSTEM_USE_OWN_BLAS', - '-DDISABLE_NDEBUG', # uncomment to enable assertions in the modules - #'-fopenmp', + extra_compile_args=['-std=c99', + '-DQPMS_COMPILE_PYTHON_EXTENSIONS', # This is needed to enable it in translations.h ], libraries=['gsl', 'lapacke', 'blas', 'gslcblas', 'pthread', #'omp' - # TODO resolve the problem with openblas (missing gotoblas symbol) and preferable use other blas library #('amos', dict(sources=amos_sources) ), ], - include_dirs=['amos'], - extra_link_args=['amos/libamos.a'], - runtime_library_dirs=os.environ['LD_LIBRARY_PATH'].split(':') if 'LD_LIBRARY_PATH' in os.environ else [], + include_dirs=['amos', 'qpms'], + extra_link_args=[ 'qpms/libqpms.a','amos/libamos.a', ], + #runtime_library_dirs=os.environ['LD_LIBRARY_PATH'].split(':') if 'LD_LIBRARY_PATH' in os.environ else [], #extra_objects = ['amos/libamos.a'], # FIXME apparently, I would like to eliminate the need to cmake/make first ) setup(name='qpms', - version = "0.2.995", + version = "0.2.996", packages=['qpms'], # libraries = [('amos', {'sources': amos_sources} )], setup_requires=['cython>=0.28',], @@ -109,8 +124,9 @@ setup(name='qpms', #'quaternion','spherical_functions', 'scipy>=0.18.0', 'sympy>=1.2'], #dependency_links=['https://github.com/moble/quaternion/archive/v2.0.tar.gz','https://github.com/moble/spherical_functions/archive/master.zip'], - ext_modules=cythonize([qpms_c], include_path=['qpms', 'amos'], gdb_debug=True), + ext_modules=cythonize([qpms_c, cycommon, cyquaternions, cybspec], include_path=['qpms', 'amos'], gdb_debug=True), cmdclass = {'build_ext': build_ext}, + zip_safe=False )