diff --git a/nifty_gridder.cc b/nifty_gridder.cc index fd76da8109ffdf093d55e5d236a7decbfc195c07..2c741f56fe2dbb31a5105de1dfa414f4caacfdee 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -659,6 +659,41 @@ template<typename T> class GridderConfig return res; } + pyarr_c<T> grid2dirty_wstacking(const pyarr_c<complex<T>> &grid, double w) const + { + checkArray(grid, "grid", {nu, nv}); + auto tmp = makeArray<complex<T>>({nu, nv}); + auto ptmp = tmp.mutable_data(); + pocketfft::c2c({nu,nv},{grid.strides(0),grid.strides(1)}, + {tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::BACKWARD, + grid.data(), tmp.mutable_data(), T(1), nthreads); + auto res = makeArray<T>({nx_dirty, ny_dirty}); + auto pout = res.mutable_data(); + { + py::gil_scoped_release release; +#pragma omp parallel num_threads(nthreads) +{ +#pragma omp for schedule(static) + for (size_t i=0; i<nx_dirty; ++i) + { + double fx = (i-nx_dirty/2.)*Pixsize_x(); + fx *= fx; + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + double fy = (j-ny_dirty/2.)*Pixsize_y(); + auto ws = wscreen(fx, fy*fy, w, true); + pout[ny_dirty*i + j] = std::real(ptmp[nv*i2+j2]*cfu[i]*cfv[j]*ws); + } + } + } + } + return res; + } + pyarr_c<T> dirty2grid(const pyarr_c<T> &dirty) const { checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); @@ -707,6 +742,50 @@ template<typename T> class GridderConfig } return tmp; } + inline complex<T> wscreen(double x, double y, double w, bool adjoint) const + { + constexpr double pi = 3.141592653589793238462643383279502884197; + constexpr complex<double> imagunit {0,1}; + double n = cos(sqrt(x+y)); + complex<T> phase = 2*pi*w*imagunit*(n-1); + if (adjoint) phase *= -1; + return exp(phase)/n; + } + pyarr_c<complex<T>> dirty2grid_wstacking(const pyarr_c<T> &dirty, double w) const + { + checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); + auto pdirty = dirty.data(); + auto tmp = makeArray<complex<T>>({nu, nv}); + auto ptmp = tmp.mutable_data(); + pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)}; + { + py::gil_scoped_release release; + for (size_t i=0; i<nu*nv; ++i) + ptmp[i] = 0.; +#pragma omp parallel num_threads(nthreads) + { +#pragma omp for schedule(static) + for (size_t i=0; i<nx_dirty; ++i) + { + double fx = (i-nx_dirty/2.)*Pixsize_x(); + fx *= fx; + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + double fy = (j-ny_dirty/2.)*Pixsize_y(); + auto ws = wscreen(fx, fy*fy, w, false); + ptmp[nv*i2+j2] = std::real(pdirty[ny_dirty*i + j])*ws*cfu[i]*cfv[j]; + } + } + } + pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD, + ptmp, ptmp, T(1), nthreads); + } + return tmp; + } inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const { u=fmodulo(u_in*psx, T(1))*nu, @@ -1481,6 +1560,8 @@ PYBIND11_MODULE(nifty_gridder, m) .def("grid2dirty", &GridderConfig<double>::grid2dirty, grid2dirty_DS, "grid"_a) .def("grid2dirty_c", &GridderConfig<double>::grid2dirty_c, "grid"_a) + .def("grid2dirty_wstacking", &GridderConfig<double>::grid2dirty_wstacking, "grid"_a, "w"_a) + .def("dirty2grid_wstacking", &GridderConfig<double>::dirty2grid_wstacking, "grid"_a, "w"_a) .def("dirty2grid", &GridderConfig<double>::dirty2grid, dirty2grid_DS, "dirty"_a) .def("dirty2grid_c", &GridderConfig<double>::dirty2grid_c, "dirty"_a) diff --git a/profile.py b/profile.py new file mode 100644 index 0000000000000000000000000000000000000000..26a94e725ccbfa4989793506129f4e2e4ced21c1 --- /dev/null +++ b/profile.py @@ -0,0 +1,42 @@ +import nifty_gridder as ng +from time import time +import numpy as np + + +def _wscreen(npix, dst, w): + dc = (np.linspace(start=-npix/2, stop=npix/2 - 1, num=npix)*dst)**2 + ls = np.broadcast_to(dc, (dc.shape[0],)*2) + theta = np.sqrt(ls + ls.T) + n = np.cos(theta) + wscreen = np.exp(2*np.pi*1j*w*(n - 1))/n + return wscreen + + +def time_op(func, x, ntries=5): + t0 = time() + for ii in range(ntries): + print(ii) + func(x) + return (time() - t0)/ntries + + +if __name__ == '__main__': + ng.set_nthreads(4) + ntries = 20 + nx = 2048 + dx = 12 + w = 1000.2 + ny, dy = nx, dx + conf = ng.GridderConfig(nx, ny, 1e-7, dx, dy) + x, y = conf.Nu(), conf.Nv() + + fld = np.random.randn(x, y) + 1j*np.random.randn(x, y) + func0 = lambda x: (_wscreen(nx, dy, w).conjugate()*conf.grid2dirty_c(x) + ).real + func1 = lambda x: conf.grid2dirty_wstacking(x, w) + print(time_op(func0, fld), time_op(func1, fld)) + + fld = np.random.randn(nx, ny) + func0 = lambda x: conf.dirty2grid_c(_wscreen(nx, dx, w)*x.real) + func1 = lambda x: conf.dirty2grid_wstacking(x, w) + print(time_op(func0, fld), time_op(func1, fld)) diff --git a/test.py b/test.py index 62b5da5a6c568b065d5254dd2a7c29bf241ad561..cbb5334b35ae5e0e7af61e09686db2357a59a6cd 100644 --- a/test.py +++ b/test.py @@ -155,3 +155,28 @@ def test_pickling(): assert_(gc.Pixsize_y() == gc2.Pixsize_y()) +def _wscreen(npix, dst, w): + dc = (np.linspace(start=-npix/2, stop=npix/2 - 1, num=npix)*dst)**2 + ls = np.broadcast_to(dc, (dc.shape[0],)*2) + theta = np.sqrt(ls + ls.T) + n = np.cos(theta) + wscreen = np.exp(2*np.pi*1j*w*(n - 1))/n + return wscreen + + +@pmp('nx', [4, 18, 54]) +@pmp('dx', [1., 0.13, 132]) +@pmp('w', [0, 10, 8489]) +def test_wstacking(nx, dx, w): + ny, dy = nx, dx + conf = ng.GridderConfig(nx, ny, 1e-7, dx, dy) + x, y = conf.Nu(), conf.Nv() + fld = np.random.randn(x, y) + 1j*np.random.randn(x, y) + res0 = (_wscreen(nx, dy, w).conjugate()*conf.grid2dirty_c(fld)).real + res1 = conf.grid2dirty_wstacking(fld, w) + np.testing.assert_allclose(res0, res1) + + fld = np.random.randn(nx, ny) + res0 = conf.dirty2grid_c(_wscreen(nx, dx, w)*fld.real) + res1 = conf.dirty2grid_wstacking(fld, w) + np.testing.assert_allclose(res0, res1)