From 38a4dbfcd7d3e9539e334cee02fecab2c7901793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ne=C4=8Dada?= Date: Fri, 3 Jul 2020 15:10:15 +0300 Subject: [PATCH] Set compiler/linker options to make cython parallelisation actually work. --- qpms/qpms_c.pyx | 31 ++++++++++++++++--------------- setup.py | 2 ++ 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/qpms/qpms_c.pyx b/qpms/qpms_c.pyx index fbeafb3..ab76c97 100644 --- a/qpms/qpms_c.pyx +++ b/qpms/qpms_c.pyx @@ -17,8 +17,8 @@ from .cymaterials cimport EpsMuGenerator, EpsMu from libc.stdlib cimport malloc, free, calloc import warnings -from cython.parallel import prange, parallel -from cython import boundscheck, wraparound +from cython.parallel cimport prange, parallel +from cython cimport boundscheck, wraparound # Set custom GSL error handler. N.B. this is obviously not thread-safe. cdef char *pgsl_err_reason @@ -916,7 +916,7 @@ cdef class ScatteringSystem: cdef ccart3_t res cdef cart3_t pos cdef Py_ssize_t i - with nogil, parallel(): + with nogil, parallel(), boundscheck(False), wraparound(False): for i in prange(evalpos_a.shape[0]): pos.x = evalpos_a[i,0] pos.y = evalpos_a[i,1] @@ -957,7 +957,6 @@ def empty_lattice_modes_xy(EpsMu epsmu, reciprocal_basis, wavevector, double max free(omegas_c) return omegas - cdef class _ScatteringSystemAtOmegaK: ''' Wrapper over the C qpms_scatsys_at_omega_k_t structure @@ -1121,6 +1120,7 @@ cdef class _ScatteringSystemAtOmega: def translation_matrix_packed(self, iri, J = QPMS_HANKEL_PLUS): return self.ss_pyref.translation_matrix_packed(wavenumber=self.wavenumber, iri=iri, J=J) + @boundscheck(False) def scattered_E(self, scatcoeffvector_full, evalpos, bint alt=False, btyp=QPMS_HANKEL_PLUS): cdef qpms_bessel_t btyp_c = BesselType(btyp) evalpos = np.array(evalpos, dtype=float, copy=False) @@ -1133,17 +1133,18 @@ cdef class _ScatteringSystemAtOmega: cdef ccart3_t res cdef cart3_t pos cdef Py_ssize_t i - for i in range(evalpos_a.shape[0]): - pos.x = evalpos_a[i,0] - pos.y = evalpos_a[i,1] - pos.z = evalpos_a[i,2] - if alt: - res = qpms_scatsysw_scattered_E__alt(self.ssw, btyp_c, &scv_view[0], pos) - else: - res = qpms_scatsysw_scattered_E(self.ssw, btyp_c, &scv_view[0], pos) - results[i,0] = res.x - results[i,1] = res.y - results[i,2] = res.z + with wraparound(False), nogil, parallel(): + for i in prange(evalpos_a.shape[0]): + pos.x = evalpos_a[i,0] + pos.y = evalpos_a[i,1] + pos.z = evalpos_a[i,2] + if alt: + res = qpms_scatsysw_scattered_E__alt(self.ssw, btyp_c, &scv_view[0], pos) + else: + res = qpms_scatsysw_scattered_E(self.ssw, btyp_c, &scv_view[0], pos) + results[i,0] = res.x + results[i,1] = res.y + results[i,2] = res.z return results.reshape(evalpos.shape) diff --git a/setup.py b/setup.py index 13ee224..29fd31f 100755 --- a/setup.py +++ b/setup.py @@ -134,6 +134,8 @@ qpms_c = Extension('qpms.qpms_c', #('amos', dict(sources=amos_sources) ), ], include_dirs=['amos', 'qpms'], + extra_link_args=['-fopenmp'], + extra_compile_args=['-fopenmp'], #extra_link_args=[ 'qpms/libqpms.a','amos/libamos.a', ], #runtime_library_dirs=os.environ['LD_LIBRARY_PATH'].split(':') if 'LD_LIBRARY_PATH' in os.environ else [], #extra_objects = ['amos/libamos.a'], # FIXME apparently, I would like to eliminate the need to cmake/make first