diff --git a/mr_util/fft.h b/mr_util/fft.h index a73cefe2c74d992e910265aa5878691e781d328a..ddffd4e3e7ad32891910a8645da5e46f8fef8e7c 100644 --- a/mr_util/fft.h +++ b/mr_util/fft.h @@ -2898,8 +2898,7 @@ template void c2r(const cfmav> &in, return c2r(in, out, axes[0], forward, fct, nthreads); util::sanity_check_cr(in, out, axes); if (in.size()==0) return; - aligned_array> tmp(in.size()); - fmav> atmp(tmp.data(), in); + fmav> atmp(in.shape()); auto newaxes = shape_t({axes.begin(), --axes.end()}); c2c(in, atmp, newaxes, forward, T(1), nthreads); c2r(atmp, out, axes.back(), forward, fct, nthreads); @@ -2933,9 +2932,7 @@ template void r2r_genuine_hartley(const cfmav &in, if (in.size()==0) return; shape_t tshp(in.shape()); tshp[axes.back()] = tshp[axes.back()]/2+1; - auto tinfo = fmav_info(tshp); - aligned_array> tdata(tinfo.size()); - fmav> atmp(tdata.data(), tinfo); + fmav> atmp(tshp); r2c(in, atmp, axes, true, fct, nthreads); simple_iter iin(atmp); rev_iter iout(out, axes); diff --git a/mr_util/mav.h b/mr_util/mav.h index 5481dada7ccbc575c219cf16ea31d1c5b5056099..d6bcb66dc37d2d31121603afdab3558803f48513 100644 --- a/mr_util/mav.h +++ b/mr_util/mav.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "mr_util/error_handling.h" namespace mr { @@ -85,10 +86,14 @@ class fmav_info { return shp==other.shp; } }; +template class cmav; + // "mav" stands for "multidimensional array view" template class cfmav: public fmav_info { protected: + using Tsp = shared_ptr>; + Tsp ptr; T *d; public: @@ -96,16 +101,26 @@ template class cfmav: public fmav_info : fmav_info(shp_, str_), d(const_cast(d_)) {} cfmav(const T *d_, const shape_t &shp_) : fmav_info(shp_), d(const_cast(d_)) {} + cfmav(const shape_t &shp_) + : fmav_info(shp_), ptr(make_unique>(size())), + d(const_cast(ptr->data())) {} cfmav(const T* d_, const fmav_info &info) : fmav_info(info), d(const_cast(d_)) {} + cfmav(const cfmav &other) = default; + cfmav(cfmav &&other) = default; +// Not for public use! + cfmav(const T *d_, const Tsp &p, const shape_t &shp_, const stride_t &str_) + : fmav_info(shp_, str_), ptr(p), d(const_cast(d_)) {} template const T &operator[](I i) const { return d[i]; } const T *data() const { return d; } }; + template class fmav: public cfmav { protected: + using Tsp = shared_ptr>; using parent = cfmav; using parent::d; using parent::shp; @@ -116,8 +131,16 @@ template class fmav: public cfmav : parent(d_, shp_, str_) {} fmav(T *d_, const shape_t &shp_) : parent(d_, shp_) {} + fmav(const shape_t &shp_) + : parent(shp_) {} fmav(T* d_, const fmav_info &info) : parent(d_, info) {} + fmav(const fmav &other) = default; + fmav(fmav &&other) = default; +// Not for public use! + fmav(T *d_, const Tsp &p, const shape_t &shp_, const stride_t &str_) + : parent(d_, p, shp_, str_) {} + template T &operator[](I i) const { return d[i]; } using parent::shape; @@ -129,31 +152,40 @@ template class fmav: public cfmav using parent::conformable; }; - - template class cmav { - static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays"); + static_assert((ndim>0) && (ndim<4), "only supports 1D, 2D, and 3D arrays"); protected: - T *d; + using Tsp = shared_ptr>; array shp; array str; + Tsp ptr; + T *d; public: cmav(const T *d_, const array &shp_, const array &str_) - : d(const_cast(d_)), shp(shp_), str(str_) {} + : shp(shp_), str(str_), d(const_cast(d_)) {} cmav(const T *d_, const array &shp_) - : d(const_cast(d_)), shp(shp_) + : shp(shp_), d(const_cast(d_)) + { + str[ndim-1]=1; + for (size_t i=2; i<=ndim; ++i) + str[ndim-i] = str[ndim-i+1]*shp[ndim-i+1]; + } + cmav(const array &shp_) + : shp(shp_), ptr(make_unique>(size())), d(ptr->data()) { str[ndim-1]=1; for (size_t i=2; i<=ndim; ++i) str[ndim-i] = str[ndim-i+1]*shp[ndim-i+1]; } + cmav(const cmav &other) = default; + cmav(cmav &&other) = default; operator cfmav() const { - return cfmav(d, {shp.begin(), shp.end()}, {str.begin(), str.end()}); + return cfmav(d, ptr, {shp.begin(), shp.end()}, {str.begin(), str.end()}); } const T &operator[](size_t i) const { return operator()(i); } @@ -167,6 +199,11 @@ template class cmav static_assert(ndim==2, "ndim must be 2"); return d[str[0]*i + str[1]*j]; } + const T &operator()(size_t i, size_t j, size_t k) const + { + static_assert(ndim==3, "ndim must be 3"); + return d[str[0]*i + str[1]*j + str[2]*k]; + } size_t shape(size_t i) const { return shp[i]; } const array &shape() const { return shp; } size_t size() const @@ -196,8 +233,10 @@ template class cmav template class mav: public cmav { protected: + using Tsp = shared_ptr>; using parent = cmav; using parent::d; + using parent::ptr; using parent::shp; using parent::str; @@ -207,9 +246,13 @@ template class mav: public cmav : parent(d_, shp_, str_) {} mav(T *d_, const array &shp_) : parent(d_, shp_) {} + mav(const array &shp_) + : parent(shp_) {} + mav(const mav &other) = default; + mav(mav &&other) = default; operator fmav() const { - return fmav(d, {shp.begin(), shp.end()}, {str.begin(), str.end()}); + return fmav(d, ptr, {shp.begin(), shp.end()}, {str.begin(), str.end()}); } T &operator[](size_t i) const { return operator()(i); } @@ -223,6 +266,11 @@ template class mav: public cmav static_assert(ndim==2, "ndim must be 2"); return d[str[0]*i + str[1]*j]; } + T &operator()(size_t i, size_t j, size_t k) const + { + static_assert(ndim==3, "ndim must be 3"); + return d[str[0]*i + str[1]*j + str[2]*k]; + } using parent::shape; using parent::stride; using parent::size; @@ -240,6 +288,11 @@ template class mav: public cmav for (size_t i=0; i void checkShape template inline T fmod1 (T v) { return v-floor(v); } -template class tmpStorage - { - private: - vector d; - mav mav_; - - static size_t prod(const array &shp) - { - size_t res=1; - for (auto v: shp) res*=v; - return res; - } - - public: - tmpStorage(const array &shp) - : d(prod(shp)), mav_(d.data(), shp) {} - mav &getMav() { return mav_; } - void fill(const T & val) - { std::fill(d.begin(), d.end(), val); } - }; - // // Start of real gridder functionality // @@ -98,8 +77,8 @@ template void complex2hartley for (size_t v=0; v &dirty) const { checkShape(grid.shape(), {nu,nv}); - tmpStorage tmpdat({nu,nv}); - auto tmav = tmpdat.getMav(); + mav tmav({nu,nv}); hartley2_2D(grid, tmav, nthreads); grid2dirty_post(tmav, dirty); } @@ -1080,8 +1058,7 @@ template void x2dirty( WgridHelper hlp(gconf, srv, verbosity); double dw = hlp.DW(); dirty.fill(0); - tmpStorage,2> grid_({gconf.Nu(),gconf.Nv()}); - auto grid=grid_.getMav(); + mav,2> grid({gconf.Nu(),gconf.Nv()}); while(hlp.advance()) // iterate over w planes { if (hlp.Nvis()==0) continue; @@ -1099,12 +1076,10 @@ template void x2dirty( << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; - tmpStorage,2> grid_({gconf.Nu(), gconf.Nv()}); - auto grid=grid_.getMav(); - grid_.fill(0.); + mav,2> grid({gconf.Nu(), gconf.Nv()}); + grid.fill(0.); x2grid_c(gconf, srv, grid); - tmpStorage rgrid_(grid.shape()); - auto rgrid=rgrid_.getMav(); + mav rgrid(grid.shape()); complex2hartley(grid, rgrid, gconf.Nthreads()); gconf.grid2dirty(rgrid, dirty); } @@ -1121,15 +1096,13 @@ template void dirty2x( if (verbosity>0) cout << "Degridding using improved w-stacking" << endl; WgridHelper hlp(gconf, srv, verbosity); double dw = hlp.DW(); - tmpStorage tdirty_({nx_dirty,ny_dirty}); - auto tdirty=tdirty_.getMav(); + mav tdirty({nx_dirty,ny_dirty}); for (size_t i=0; i,2> grid_({gconf.Nu(),gconf.Nv()}); - auto grid=grid_.getMav(); + mav,2> grid({gconf.Nu(),gconf.Nv()}); while(hlp.advance()) // iterate over w planes { if (hlp.Nvis()==0) continue; @@ -1144,11 +1117,9 @@ template void dirty2x( << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; - tmpStorage grid_({gconf.Nu(), gconf.Nv()}); - auto grid=grid_.getMav(); + mav grid({gconf.Nu(), gconf.Nv()}); gconf.dirty2grid(dirty, grid); - tmpStorage,2> grid2_(grid.shape()); - auto grid2=grid2_.getMav(); + mav,2> grid2(grid.shape()); hartley2complex(grid, grid2, gconf.Nthreads()); grid2x_c(gconf, grid2, srv); } diff --git a/nifty_gridder/setup.py b/nifty_gridder/setup.py index c3232641bc9091946356c4e42c3a535d062da937..2427ba9e765d807b3aa9f031825963f699a5d4f6 100644 --- a/nifty_gridder/setup.py +++ b/nifty_gridder/setup.py @@ -19,12 +19,12 @@ python_module_link_args = [] if sys.platform == 'darwin': import distutils.sysconfig - extra_compile_args += ['--std=c++11', '--stdlib=libc++', '-mmacosx-version-min=10.9'] + extra_compile_args += ['--std=c++14', '--stdlib=libc++', '-mmacosx-version-min=10.9'] vars = distutils.sysconfig.get_config_vars() vars['LDSHARED'] = vars['LDSHARED'].replace('-bundle', '') python_module_link_args+=['-bundle'] else: - extra_compile_args += ['--std=c++11', '-march=native', '-O3', '-ffast-math'] + extra_compile_args += ['--std=c++14', '-march=native', '-O3', '-ffast-math'] python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN', '-ffast-math'] # if you don't want debugging info, add "-s" to python_module_link_args diff --git a/pypocketfft/setup.py b/pypocketfft/setup.py index 772aaf40bd2b7191e030be4f28b7bd1915b8e696..e92409f70a9e9971cc8625e92a4105260df023a1 100644 --- a/pypocketfft/setup.py +++ b/pypocketfft/setup.py @@ -13,7 +13,7 @@ class _deferred_pybind11_include(object): include_dirs = ['..', _deferred_pybind11_include(True), _deferred_pybind11_include()] -extra_compile_args = ['--std=c++11', '-march=native', '-ffast-math', '-O3'] +extra_compile_args = ['--std=c++14', '-march=native', '-ffast-math', '-O3'] python_module_link_args = [] define_macros = []