diff --git a/nifty_gridder.cc b/nifty_gridder.cc index 2c741f56fe2dbb31a5105de1dfa414f4caacfdee..d7a1c71baf9c2545fc0d3bc69583b15b5a81f674 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -659,41 +659,6 @@ template<typename T> class GridderConfig return res; } - pyarr_c<T> grid2dirty_wstacking(const pyarr_c<complex<T>> &grid, double w) const - { - checkArray(grid, "grid", {nu, nv}); - auto tmp = makeArray<complex<T>>({nu, nv}); - auto ptmp = tmp.mutable_data(); - pocketfft::c2c({nu,nv},{grid.strides(0),grid.strides(1)}, - {tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::BACKWARD, - grid.data(), tmp.mutable_data(), T(1), nthreads); - auto res = makeArray<T>({nx_dirty, ny_dirty}); - auto pout = res.mutable_data(); - { - py::gil_scoped_release release; -#pragma omp parallel num_threads(nthreads) -{ -#pragma omp for schedule(static) - for (size_t i=0; i<nx_dirty; ++i) - { - double fx = (i-nx_dirty/2.)*Pixsize_x(); - fx *= fx; - for (size_t j=0; j<ny_dirty; ++j) - { - size_t i2 = nu-nx_dirty/2+i; - if (i2>=nu) i2-=nu; - size_t j2 = nv-ny_dirty/2+j; - if (j2>=nv) j2-=nv; - double fy = (j-ny_dirty/2.)*Pixsize_y(); - auto ws = wscreen(fx, fy*fy, w, true); - pout[ny_dirty*i + j] = std::real(ptmp[nv*i2+j2]*cfu[i]*cfv[j]*ws); - } - } - } - } - return res; - } - pyarr_c<T> dirty2grid(const pyarr_c<T> &dirty) const { checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); @@ -742,50 +707,14 @@ template<typename T> class GridderConfig } return tmp; } - inline complex<T> wscreen(double x, double y, double w, bool adjoint) const - { + complex<T> wscreen(double x, double y, double w, bool adjoint) const + { constexpr double pi = 3.141592653589793238462643383279502884197; - constexpr complex<double> imagunit {0,1}; - double n = cos(sqrt(x+y)); - complex<T> phase = 2*pi*w*imagunit*(n-1); + double n = cos(sqrt(x+y)), xn = 1./n; + double phase = 2*pi*w*(n-1); if (adjoint) phase *= -1; - return exp(phase)/n; - } - pyarr_c<complex<T>> dirty2grid_wstacking(const pyarr_c<T> &dirty, double w) const - { - checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); - auto pdirty = dirty.data(); - auto tmp = makeArray<complex<T>>({nu, nv}); - auto ptmp = tmp.mutable_data(); - pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)}; - { - py::gil_scoped_release release; - for (size_t i=0; i<nu*nv; ++i) - ptmp[i] = 0.; -#pragma omp parallel num_threads(nthreads) - { -#pragma omp for schedule(static) - for (size_t i=0; i<nx_dirty; ++i) - { - double fx = (i-nx_dirty/2.)*Pixsize_x(); - fx *= fx; - for (size_t j=0; j<ny_dirty; ++j) - { - size_t i2 = nu-nx_dirty/2+i; - if (i2>=nu) i2-=nu; - size_t j2 = nv-ny_dirty/2+j; - if (j2>=nv) j2-=nv; - double fy = (j-ny_dirty/2.)*Pixsize_y(); - auto ws = wscreen(fx, fy*fy, w, false); - ptmp[nv*i2+j2] = std::real(pdirty[ny_dirty*i + j])*ws*cfu[i]*cfv[j]; - } - } - } - pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD, - ptmp, ptmp, T(1), nthreads); + return complex<T>(cos(phase)*xn, sin(phase)*xn); } - return tmp; - } inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const { u=fmodulo(u_in*psx, T(1))*nu, @@ -795,6 +724,40 @@ template<typename T> class GridderConfig iv0 = int(v-w*0.5 + 1 + nv) - nv; if (iv0+w>nv+nsafe) iv0 = nv+nsafe-w; } + pyarr_c<complex<T>> apply_wscreen(const pyarr_c<complex<T>> &dirty_, double w, bool adjoint) const + { + checkArray(dirty_, "dirty", {nx_dirty, ny_dirty}); + auto dirty = dirty_.data(); + auto res_ = makeArray<complex<T>>({nx_dirty, ny_dirty}); + auto res = res_.mutable_data(); + double x0 = -0.5*nx_dirty*psx, + y0 = -0.5*ny_dirty*psy; + { + py::gil_scoped_release release; +#pragma omp parallel num_threads(nthreads) +{ +#pragma omp for schedule(static) + for (size_t i=0; i<=nx_dirty/2; ++i) + { + double fx = x0+i*psx; + fx *= fx; + for (size_t j=0; j<=ny_dirty/2; ++j) + { + double fy = y0+j*psy; + auto ws = wscreen(fx, fy*fy, w, adjoint); + res[ny_dirty*i+j] = dirty[ny_dirty*i+j]*ws; + if (i>0) + res[ny_dirty*(nx_dirty-i)+j] = dirty[ny_dirty*(nx_dirty-i)+j]*ws; + if (j>0) + res[ny_dirty*i+(ny_dirty-j)] = dirty[ny_dirty*i+(ny_dirty-j)]*ws; + if ((i>0)&&(j>0)) + res[ny_dirty*(nx_dirty-i)+(ny_dirty-j)] = dirty[ny_dirty*(nx_dirty-i)+(ny_dirty-j)]*ws; + } + } +} + } + return res_; + } }; template<typename T, typename T2=complex<T>> class Helper @@ -1560,11 +1523,10 @@ PYBIND11_MODULE(nifty_gridder, m) .def("grid2dirty", &GridderConfig<double>::grid2dirty, grid2dirty_DS, "grid"_a) .def("grid2dirty_c", &GridderConfig<double>::grid2dirty_c, "grid"_a) - .def("grid2dirty_wstacking", &GridderConfig<double>::grid2dirty_wstacking, "grid"_a, "w"_a) - .def("dirty2grid_wstacking", &GridderConfig<double>::dirty2grid_wstacking, "grid"_a, "w"_a) .def("dirty2grid", &GridderConfig<double>::dirty2grid, dirty2grid_DS, "dirty"_a) .def("dirty2grid_c", &GridderConfig<double>::dirty2grid_c, "dirty"_a) + .def("apply_wscreen", &GridderConfig<double>::apply_wscreen, "dirty"_a, "w"_a, "adjoint"_a) // pickle support .def(py::pickle( diff --git a/profile.py b/profile.py index 26a94e725ccbfa4989793506129f4e2e4ced21c1..dcb1d40dfaccdc732ef74798ca25df9e96a9dceb 100644 --- a/profile.py +++ b/profile.py @@ -28,15 +28,14 @@ if __name__ == '__main__': w = 1000.2 ny, dy = nx, dx conf = ng.GridderConfig(nx, ny, 1e-7, dx, dy) - x, y = conf.Nu(), conf.Nv() + x, y = conf.Nxdirty(), conf.Nydirty() fld = np.random.randn(x, y) + 1j*np.random.randn(x, y) - func0 = lambda x: (_wscreen(nx, dy, w).conjugate()*conf.grid2dirty_c(x) - ).real - func1 = lambda x: conf.grid2dirty_wstacking(x, w) + func0 = lambda x: _wscreen(nx, dy, w).conjugate()*x + func1 = lambda x: conf.apply_wscreen(x, w, True) print(time_op(func0, fld), time_op(func1, fld)) fld = np.random.randn(nx, ny) - func0 = lambda x: conf.dirty2grid_c(_wscreen(nx, dx, w)*x.real) - func1 = lambda x: conf.dirty2grid_wstacking(x, w) + func0 = lambda x: _wscreen(nx, dx, w)*x.real + func1 = lambda x: conf.apply_wscreen(x, w, False) print(time_op(func0, fld), time_op(func1, fld)) diff --git a/test.py b/test.py index cbb5334b35ae5e0e7af61e09686db2357a59a6cd..356ff0e4713eb785aa98aae48e377db2a96bab17 100644 --- a/test.py +++ b/test.py @@ -173,10 +173,10 @@ def test_wstacking(nx, dx, w): x, y = conf.Nu(), conf.Nv() fld = np.random.randn(x, y) + 1j*np.random.randn(x, y) res0 = (_wscreen(nx, dy, w).conjugate()*conf.grid2dirty_c(fld)).real - res1 = conf.grid2dirty_wstacking(fld, w) + res1 = conf.apply_wscreen(conf.grid2dirty_c(fld), w, True).real np.testing.assert_allclose(res0, res1) fld = np.random.randn(nx, ny) res0 = conf.dirty2grid_c(_wscreen(nx, dx, w)*fld.real) - res1 = conf.dirty2grid_wstacking(fld, w) + res1 = conf.dirty2grid_c(conf.apply_wscreen(fld, w, False)) np.testing.assert_allclose(res0, res1)