From 3d9ce29d35f0ba0d903de5020902f6640e9b45bb Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Tue, 14 May 2019 11:50:04 +0200 Subject: [PATCH 1/9] nothing important --- setup.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/setup.py b/setup.py index 96f1b30..5756538 100644 --- a/setup.py +++ b/setup.py @@ -32,14 +32,6 @@ class _deferred_pybind11_include(object): return pybind11.get_include(self.user) -def _strip_inc(input): - res = [] - for i in input: - if "inc.c" not in i: - res.append(i) - return res - - def _get_cc_files(directory): path = directory iterable_sources = (iglob(os.path.join(root, '*.cc')) -- GitLab From 004be60c258d920e285bf410b8b99f4218263479 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Wed, 15 May 2019 21:47:43 +0200 Subject: [PATCH 2/9] first steps --- nifty_gridder.cc | 208 +++++++++++++++++++++-------------------------- 1 file changed, 93 insertions(+), 115 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index aaa3329..00a0e7c 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -99,8 +99,7 @@ uint32_t morton2peano2D_32(uint32_t v, int bits) void myassert(bool cond, const char *msg) { if (cond) return; - cerr << msg << endl; - throw 42; + throw runtime_error(msg); } template class IdxComp__ @@ -185,84 +184,69 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv) class Helper { - private: - int nu, nv, nspread, nbuf; - double r2lamb, fac; - vector kernel; - + protected: + int nu, nv; public: - vector ku, kv; - int iu, iv, idxu0, idxv0; - complex val; + int w; + protected: + double beta; + int nsafe, su; + public: + int sv; - Helper(int nu_, int nv_, int nspread_, double r2lamb_) - : nu(nu_), nv(nv_), nspread(nspread_), nbuf(2*nspread_), r2lamb(r2lamb_), - fac(pi/r2lamb), kernel(nspread+1), ku(nbuf), kv(nbuf) - { - // Precompute gridding kernel - for (size_t i=0; i vis) + vector kernel; + int iu0, iv0; // start index of the current visibility + int bu0, bv0; // start index of the current buffer + + void update(double u_in, double v_in) { auto u = fmodulo(u_in, 1.)*nu; - iu = min(nu-1, int(u)); - auto du = u-iu; - idxu0 = (iu-nspread+1+nu)%nu; + iu0 = int(u-w*0.5 + 1 + nu) - nu; + if (iu0+w>nu+nsafe) iu0 = nu+nsafe-w; auto v = fmodulo(v_in, 1.)*nv; - iv = min(nv-1, int(v)); - auto dv = v-iv; - idxv0 = (iv-nspread+1+nv)%nv; - - val = vis*exp(-fac*(du*du + dv*dv)); - - auto fu0 = exp(2*fac*du); - auto fv0 = exp(2*fac*dv); - auto fu = 1.; - auto fv = 1.; - for (int i=0; inv+nsafe) iv0 = nv+nsafe-w; +// cout << "iuv0 " << iu0 << " " << iv0 << endl; + double xw=2./w; + for (int i=0; ibu0+su) || (iv0+w>bv0+sv); } -class Buffer - { - protected: - int nu, nv, nspread, su; - public: - int sv; - protected: - int u0, v0; - - bool need_to_move(int iu, int iv) const + void update_position() { - return (abs(iu-u0)>su-nspread) || (abs(iv-v0)>sv-nspread); + bu0=max(-nsafe, min(nu+nsafe-su, iu0+nsafe-su/2)); + bv0=max(-nsafe, min(nv+nsafe-sv, iv0+nsafe-sv/2)); +//cout << "buv0 " << bu0 << " " << bv0 << endl; } - void update_position(int iu, int iv) + protected: + Helper(int nu_, int nv_, double epsilon) + : nu(nu_), nv(nv_), + w(int(-log10(epsilon)+1.9999)), beta(2.3*w), nsafe((w+1)/2), + su(min(3*w, nu)), sv(min(3*w, nv)), + kernel(2*w), + bu0(-1000000), bv0(-1000000) { - int safe_u = su-nspread, safe_v = sv-nspread; - u0=max(safe_u, min(nu-1-safe_u, iu)); - v0=max(safe_v, min(nv-1-safe_v, iv)); + if (min(nu,nv)<2*nsafe) throw runtime_error("grid dimensions too small"); +// cout << "beta=" << beta << endl; +// cout << "w=" << w << endl; +// cout << "suv=" << su << " " << sv << endl; } - - public: - Buffer(int nu_, int nv_, int nspread_) - : nu(nu_), nv(nv_), nspread(nspread_), - su(nspread+min(3*nspread, nu)), sv(3*nspread+min(nspread, nv)), - u0(-1000000), v0(-1000000) - {} }; -class WriteBuffer: public Buffer +class WriteHelper: public Helper { private: vector> data; @@ -270,17 +254,17 @@ class WriteBuffer: public Buffer void dump() { - if (u0<0) return; + if (bu0<-nsafe) return; // nothing written into buffer yet #pragma omp critical { - int idxu = (u0-su+1+nu)%nu; - int idxv0 = (v0-sv+1+nv)%nv; - for (int iu=0; iu<2*su; ++iu) + int idxu = (bu0+nu)%nu; + int idxv0 = (bv0+nv)%nv; + for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; @@ -290,24 +274,24 @@ class WriteBuffer: public Buffer public: complex *p0; - WriteBuffer(int nu_, int nv_, int nspread_, complex *grid_) - : Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {} - ~WriteBuffer() { dump(); } + WriteHelper(int nu_, int nv_, double epsilon, complex *grid_) + : Helper(nu_, nv_, epsilon), data(su*sv, 0.), grid(grid_) {} + ~WriteHelper() { dump(); } - void prep_write(int iu, int iv) - /* iu = [0; nu-1]; iv = [0; nv-1] */ + void prep_write(double u_in, double v_in) { - if (need_to_move(iu, iv)) + update(u_in, v_in); + if (need_to_move()) { dump(); - update_position(iu, iv); + update_position(); fill(data.begin(), data.end(), 0.); } - p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread; + p0 = data.data() + sv*(iu0-bu0) + iv0-bv0; } }; -class ReadBuffer: public Buffer +class ReadHelper: public Helper { private: vector> data; @@ -315,14 +299,14 @@ class ReadBuffer: public Buffer void load() { - int idxu = (u0-su+1+nu)%nu; - int idxv0 = (v0-sv+1+nv)%nv; - for (int iu=0; iu<2*su; ++iu) + int idxu = (bu0+nu)%nu; + int idxv0 = (bv0+nv)%nv; + for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; @@ -331,23 +315,23 @@ class ReadBuffer: public Buffer public: const complex *p0; - ReadBuffer(int nu_, int nv_, int nspread_, const complex *grid_) - : Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {} + ReadHelper(int nu_, int nv_, double epsilon, const complex *grid_) + : Helper(nu_, nv_, epsilon), data(su*sv,0.), grid(grid_), p0(nullptr) {} - void prep_read(int iu, int iv) - /* iu = [0; nu-1]; iv = [0; nv-1] */ + void prep_read(double u_in, double v_in) { - if (need_to_move(iu, iv)) + update(u_in, v_in); + if (need_to_move()) { - update_position(iu, iv); + update_position(); load(); } - p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread; + p0 = data.data() + sv*(iu0-bu0) + iv0-bv0; } }; a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_, - int nu, int nv, int nspread, double r2lamb) + int nu, int nv, double epsilon) { myassert(uv_.ndim()==2, "uv array must be 2D"); myassert(uv_.shape(1)==2, "uv.shape[1] must be 2"); @@ -364,23 +348,20 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_, #pragma omp parallel { - Helper hlp(nu, nv, nspread, r2lamb); - WriteBuffer buf(nu, nv, nspread, grid); - int delta = 2*(buf.sv-nspread); + WriteHelper hlp(nu, nv, epsilon, grid); // Loop over sampling points #pragma omp for schedule(dynamic,10000) for (int ipart=0; ipart tmp = hlp.val*hlp.ku[cu]; - for (int cv=0; cv<2*nspread; ++cv) - *ptr++ += tmp*hlp.kv[cv]; - ptr+=delta; + complex tmp = vis[ipart]*hlp.kernel[cu]; + for (int cv=0; cv r = 0.; - buf.prep_read(hlp.iu, hlp.iv); - auto ptr = buf.p0; - for (int cu=0; cu<2*nspread; ++cu) + auto ptr = hlp.p0; + for (int cu=0; cu tmp = 0.; - for (int cv=0; cv<2*nspread; ++cv) - tmp += (*ptr++) * hlp.kv[cv]; - r+=tmp*hlp.ku[cu]; - ptr += delta; + for (int cv=0; cv Date: Thu, 16 May 2019 08:24:29 +0200 Subject: [PATCH 3/9] cleanup --- nifty_gridder.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 00a0e7c..d35be37 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -204,10 +204,8 @@ class Helper iu0 = int(u-w*0.5 + 1 + nu) - nu; if (iu0+w>nu+nsafe) iu0 = nu+nsafe-w; auto v = fmodulo(v_in, 1.)*nv; -// cout << "uv0 " << u << " " << v << endl; iv0 = int(v-w*0.5 + 1 + nv) - nv; if (iv0+w>nv+nsafe) iv0 = nv+nsafe-w; -// cout << "iuv0 " << iu0 << " " << iv0 << endl; double xw=2./w; for (int i=0; i Date: Thu, 16 May 2019 13:13:18 +0200 Subject: [PATCH 4/9] implement correction factors --- nifty_gridder.cc | 83 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index d35be37..52d8ecc 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -148,6 +148,63 @@ inline double fmodulo (double v1, double v2) // return (v1>=0) ? ((v10.1) ? (1.+x)*(1.-x) : 1.-x*x; } + +void legendre_prep(int n, vector &x, vector &w) + { + constexpr double pi = 3.141592653589793238462643383279502884197; + constexpr double eps = 3e-14; + int m = (n+1)>>1; + x.resize(m); + w.resize(m); + + double t0 = 1 - (1-1./n) / (8.*n*n); + double t1 = 1./(4.*n+2.); + +#pragma omp parallel +{ + int i; +#pragma omp for schedule(dynamic,100) + for (i=1; i<=m; ++i) + { + double x0 = cos(pi * ((i<<2)-1) * t1) * t0; + + int dobreak=0; + int j=0; + double dpdx; + while(1) + { + double P_1 = 1.0; + double P0 = x0; + double dx, x1; + + for (int k=2; k<=n; k++) + { + double P_2 = P_1; + P_1 = P0; +// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; + P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); + } + + dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); + + /* Newton step */ + x1 = x0 - P0/dpdx; + dx = x0-x1; + x0 = x1; + if (dobreak) break; + + if (abs(dx)<=eps) dobreak=1; + if (++j>=100) throw runtime_error("convergence problem"); + } + + x[m-i] = x0; + w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); + } +} // end of parallel region + } + using a_i_c = py::array_t; using a_d_c = py::array_t; using a_c_c = py::array_t, @@ -450,6 +507,31 @@ a_c_c from_grid_pre (const a_d_c &grid_) return res; } +a_d_c correction_factors (size_t n, size_t nval, double epsilon) + { + auto w = int(-log10(epsilon)+1.9999); + auto beta = 2.3*w; + auto p = int(1.5*w+2); + double h = 2*pi/n; + double alpha = pi*w/n; + vector x, wgt; + legendre_prep(2*p,x,wgt); + auto psi = x; + for (auto &v:psi) + v = exp(beta*(sqrt(1-v*v)-1.)); + int odim[] = {nval}; + a_d_c res(odim); + auto val = res.mutable_data(); + for (size_t k=0; k Date: Thu, 16 May 2019 15:39:50 +0200 Subject: [PATCH 5/9] tweaks --- nifty_gridder.cc | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 52d8ecc..5d95c60 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -3,6 +3,12 @@ #include #include +#ifdef __GNUC__ +#define RESTRICT __restrict__ +#else +#define RESTRICT +#endif + using namespace std; namespace py = pybind11; @@ -403,13 +409,17 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_, for (int ipart=0; ipart tmp = vis[ipart]*hlp.kernel[cu]; - for (int cv=0; cv tmp = v*ku[cu]; + for (int cv=0; cv r = 0.; - auto ptr = hlp.p0; - for (int cu=0; cu tmp = 0.; - for (int cv=0; cv x, wgt; legendre_prep(2*p,x,wgt); auto psi = x; for (auto &v:psi) v = exp(beta*(sqrt(1-v*v)-1.)); - int odim[] = {nval}; + int odim[] = {int(nval)}; a_d_c res(odim); auto val = res.mutable_data(); for (size_t k=0; k Date: Fri, 17 May 2019 21:57:49 +0200 Subject: [PATCH 6/9] streamlining --- nifty_gridder.cc | 105 +++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 49 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 5d95c60..82e7235 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -5,6 +5,7 @@ #ifdef __GNUC__ #define RESTRICT __restrict__ +#define NOINLINE __attribute__ ((noinline)) #else #define RESTRICT #endif @@ -245,14 +246,21 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv) return res; } +int get_w(double epsilon) + { + int w=int(-log10(epsilon)+2.9999); + if (epsilon<1e-11) ++w; + return w; + } + class Helper { protected: int nu, nv; public: int w; - protected: double beta; + protected: int nsafe, su; public: int sv; @@ -261,7 +269,7 @@ class Helper int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer - void update(double u_in, double v_in) + void NOINLINE update(double u_in, double v_in) { auto u = fmodulo(u_in, 1.)*nu; iu0 = int(u-w*0.5 + 1 + nu) - nu; @@ -275,8 +283,8 @@ class Helper kernel[i ] = xw*(iu0+i-u); kernel[i+w] = xw*(iv0+i-v); } - for (int i=0; i<2*w; ++i) - kernel[i] = exp(beta*(sqrt(1-kernel[i]*kernel[i])-1.)); + for (auto &k : kernel) + k = exp(beta*sqrt(1.-k*k)); } bool need_to_move() const @@ -291,8 +299,8 @@ class Helper protected: Helper(int nu_, int nv_, double epsilon) : nu(nu_), nv(nv_), - w(int(-log10(epsilon)+1.9999)), beta(2.3*w), nsafe((w+1)/2), - su(min(3*w, nu)), sv(min(3*w, nv)), + w(get_w(epsilon)), beta(2.3*w), nsafe((w+1)/2), + su(min(max(2*w,80), nu)), sv(min(max(2*w,80), nv)), kernel(2*w), bu0(-1000000), bv0(-1000000) { @@ -384,7 +392,31 @@ class ReadHelper: public Helper } }; -a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_, +a_d_c complex2hartley (const a_c_c &grid_) + { + myassert(grid_.ndim()==2, "grid array must be 2D"); + int nu = grid_.shape(0), nv = grid_.shape(1); + auto grid = grid_.data(); + + int odim[] = {nu,nv}; + a_d_c res(odim); + auto grid2 = res.mutable_data(); + for (int u=0; u(v1+v2, v1-v2); } } return res; } -a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_, - int nu, int nv, double epsilon) +a_c_c from_grid (const a_d_c &uv_, const a_d_c &grid0_, double epsilon) { myassert(uv_.ndim()==2, "uv array must be 2D"); myassert(uv_.shape(1)==2, "uv.shape[1] must be 2"); - myassert(grid_.ndim()==2, "grid array must be 2D"); + myassert(grid0_.ndim()==2, "grid array must be 2D"); + auto grid_ = hartley2complex(grid0_); int nvis = uv_.shape(0); auto uv = uv_.data(); auto grid = grid_.data(); - myassert(nu==grid_.shape(0), "oops"); - myassert(nv==grid_.shape(1), "oops"); + int nu=grid_.shape(0), + nv=grid_.shape(1); int odim[] = {nvis}; a_c_c res(odim); @@ -471,6 +505,7 @@ a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_, #pragma omp parallel { ReadHelper hlp(nu, nv, epsilon, grid); + double emb = exp(-2*hlp.beta); #pragma omp for schedule(dynamic,10000) for (int ipart=0; ipart(v1+v2, v1-v2); - } - } - return res; - } - a_d_c correction_factors (size_t n, size_t nval, double epsilon) { - auto w = int(-log10(epsilon)+1.9999); + auto w = get_w(epsilon); auto beta = 2.3*w; auto p = int(1.5*w+2); double alpha = pi*w/n; @@ -548,12 +558,9 @@ a_d_c correction_factors (size_t n, size_t nval, double epsilon) PYBIND11_MODULE(nifty_gridder, m) { - using namespace pybind11::literals; - m.def("peanoindex",&peanoindex); + m.def("get_w",&get_w); m.def("to_grid",&to_grid); - m.def("to_grid_post",&to_grid_post); m.def("from_grid",&from_grid); - m.def("from_grid_pre",&from_grid_pre); m.def("correction_factors",&correction_factors); } -- GitLab From 734ea4ddcef122bc16547693df580d3f6272fbaa Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Sat, 18 May 2019 12:32:45 +0200 Subject: [PATCH 7/9] tweak W calculation --- nifty_gridder.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 82e7235..42b497c 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -247,11 +247,7 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv) } int get_w(double epsilon) - { - int w=int(-log10(epsilon)+2.9999); - if (epsilon<1e-11) ++w; - return w; - } + { return int(-log10(epsilon)+2.9999); } class Helper { -- GitLab From 736d7e438fa85d9020b2e468a5cda8af5fe406a5 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Mon, 20 May 2019 12:03:50 +0200 Subject: [PATCH 8/9] cleanup and better get_w() --- nifty_gridder.cc | 12 ++++++- setup.py | 86 ++++++++---------------------------------------- 2 files changed, 24 insertions(+), 74 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 42b497c..3100e9b 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -247,7 +247,17 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv) } int get_w(double epsilon) - { return int(-log10(epsilon)+2.9999); } + { + static const vector maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, + 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, + 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; + + double epssq = epsilon*epsilon; + + for (size_t i=1; imaxmaperr[i]) return i; + throw runtime_error("requested epsilon too small - minimum is 2e-13"); + } class Helper { diff --git a/setup.py b/setup.py index 5756538..e06f804 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,5 @@ -from setuptools import setup, Extension, Distribution -import setuptools.command.build_ext - +from setuptools import setup, Extension import sys -import sysconfig -import os -import os.path -import distutils.sysconfig -import itertools -from glob import iglob - - -def _get_distutils_build_directory(): - """ - Returns the directory distutils uses to build its files. - We need this directory since we build extensions which have to link - other ones. - """ - pattern = "lib.{platform}-{major}.{minor}" - return os.path.join( - 'build', pattern.format(platform=sysconfig.get_platform(), - major=sys.version_info[0], - minor=sys.version_info[1])) class _deferred_pybind11_include(object): @@ -32,68 +11,29 @@ class _deferred_pybind11_include(object): return pybind11.get_include(self.user) -def _get_cc_files(directory): - path = directory - iterable_sources = (iglob(os.path.join(root, '*.cc')) - for root, dirs, files in os.walk(path)) - source_files = itertools.chain.from_iterable(iterable_sources) - return list(source_files) - - -def _remove_strict_prototype_option_from_distutils_config(): - strict_prototypes = '-Wstrict-prototypes' - config = distutils.sysconfig.get_config_vars() - for key, value in config.items(): - if strict_prototypes in str(value): - config[key] = config[key].replace(strict_prototypes, '') - - -_remove_strict_prototype_option_from_distutils_config() - - extra_compile_args = [] -openmp_libs = [] -extra_cc_compile_args = [] include_dirs = ['./', _deferred_pybind11_include(), _deferred_pybind11_include(True)] -library_dirs = [_get_distutils_build_directory()] python_module_link_args = [] -base_library_link_args = [] if sys.platform == 'darwin': - extra_cc_compile_args.append('--std=c++14') - extra_cc_compile_args.append('--stdlib=libc++') - extra_compile_args.append('-mmacosx-version-min=10.9') - + import distutils.sysconfig + extra_compile_args += ['--std=c++14', '--stdlib=libc++', '-mmacosx-version-min=10.9'] vars = distutils.sysconfig.get_config_vars() vars['LDSHARED'] = vars['LDSHARED'].replace('-bundle', '') - python_module_link_args.append('-bundle') - builder = setuptools.command.build_ext.build_ext(Distribution()) - base_library_link_args.append('-dynamiclib') + python_module_link_args+=['-bundle'] else: - extra_compile_args += ['-fopenmp', '-march=native', '-O3', '-ffast-math'] - python_module_link_args += ['-fopenmp', '-march=native'] - extra_cc_compile_args.append('--std=c++14') - python_module_link_args.append("-Wl,-rpath,$ORIGIN") - -extra_cc_compile_args = extra_compile_args + extra_cc_compile_args + extra_compile_args += ['--std=c++14', '-fopenmp', '-march=native', '-O3', '-ffast-math'] + python_module_link_args += ['-fopenmp', '-march=native', '-Wl,-rpath,$ORIGIN'] def get_extension_modules(): - extension_modules = [] - - gridder_sources = _get_cc_files('.') - gridder_library = Extension('nifty_gridder', - sources=gridder_sources, - include_dirs=include_dirs, - extra_compile_args=extra_cc_compile_args, - extra_link_args=python_module_link_args, - library_dirs=library_dirs) - extension_modules.append(gridder_library) - - return extension_modules - + return [Extension('nifty_gridder', + sources=['nifty_gridder.cc'], + include_dirs=include_dirs, + extra_compile_args=extra_compile_args, + extra_link_args=python_module_link_args)] setup(name='nifty_gridder', version='0.0.1', @@ -102,7 +42,7 @@ setup(name='nifty_gridder', author='Martin Reinecke', author_email='martin@mpa-garching.mpg.de', packages=[], - setup_requires=['numpy>=1.10.4', 'pybind11>=2.2.1'], + setup_requires=['numpy>=1.15.0', 'pybind11>=2.2.4'], ext_modules=get_extension_modules(), - install_requires=['numpy>=1.10.4', 'pybind11>=2.2.1'] + install_requires=['numpy>=1.15.0', 'pybind11>=2.2.4'] ) -- GitLab From 5081ca7e992adbec093c1dccf3a32d4f1fa4454e Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Mon, 20 May 2019 16:45:43 +0200 Subject: [PATCH 9/9] housekeeping --- nifty_gridder.cc | 37 ++++++++++++++++++++++++++++++++++++- setup.py | 7 ++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 3100e9b..2f5c456 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -1,3 +1,21 @@ +/* + * This file is part of nifty_gridder. + * + * nifty_gridder is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * nifty_gridder is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with nifty_fridder; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + #include #include #include @@ -16,7 +34,9 @@ namespace py = pybind11; namespace { -constexpr double pi = 3.141592653589793238462643383279502884197; +// +// Utilities for converting between Cartesian coordinates and Peano index +// static const uint16_t utab[] = { #define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5 @@ -109,6 +129,10 @@ void myassert(bool cond, const char *msg) throw runtime_error(msg); } +// +// Utilities for indirect sorting (argsort) +// + template class IdxComp__ { private: @@ -155,6 +179,10 @@ inline double fmodulo (double v1, double v2) // return (v1>=0) ? ((v10.1) ? (1.+x)*(1.-x) : 1.-x*x; } @@ -212,6 +240,10 @@ void legendre_prep(int n, vector &x, vector &w) } // end of parallel region } +// +// Start of real gridder functionality +// + using a_i_c = py::array_t; using a_d_c = py::array_t; using a_c_c = py::array_t, @@ -536,8 +568,11 @@ a_c_c from_grid (const a_d_c &uv_, const a_d_c &grid0_, double epsilon) return res; } +/* Compute correction factors for the ES gridding kernel + This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ a_d_c correction_factors (size_t n, size_t nval, double epsilon) { + constexpr double pi = 3.141592653589793238462643383279502884197; auto w = get_w(epsilon); auto beta = 2.3*w; auto p = int(1.5*w+2); diff --git a/setup.py b/setup.py index e06f804..d8bfbd6 100644 --- a/setup.py +++ b/setup.py @@ -11,10 +11,9 @@ class _deferred_pybind11_include(object): return pybind11.get_include(self.user) +include_dirs = ['./', _deferred_pybind11_include(True), + _deferred_pybind11_include()] extra_compile_args = [] -include_dirs = ['./', _deferred_pybind11_include(), - _deferred_pybind11_include(True)] - python_module_link_args = [] if sys.platform == 'darwin': @@ -27,6 +26,8 @@ else: extra_compile_args += ['--std=c++14', '-fopenmp', '-march=native', '-O3', '-ffast-math'] python_module_link_args += ['-fopenmp', '-march=native', '-Wl,-rpath,$ORIGIN'] +# if you don't want debugging info, add "-s" to python_module_link_args + def get_extension_modules(): return [Extension('nifty_gridder', -- GitLab