diff --git a/gridder_cxx.h b/gridder_cxx.h new file mode 100644 index 0000000000000000000000000000000000000000..6ddec362398c2660436af5782efdf328da2a50b6 --- /dev/null +++ b/gridder_cxx.h @@ -0,0 +1,778 @@ +#ifndef GRIDDER_CXX_H +#define GRIDDER_CXX_H + +/* + * This file is part of nifty_gridder. + * + * nifty_gridder is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * nifty_gridder is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with nifty_gridder; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Copyright (C) 2019 Max-Planck-Society + Author: Martin Reinecke */ + +#include <iostream> +#include <algorithm> +#include <cstdlib> +#include <cmath> +#include <vector> + +#include "pocketfft_hdronly.h" +#include <array> + +template<typename T, size_t ndim, bool c_contiguous> class mav + { + static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays"); + + private: + T *d; + std::array<size_t, ndim> shp; + std::array<ptrdiff_t, ndim> str; + + public: + mav(T *d_, const std::array<size_t,ndim> &shp_, + const std::array<ptrdiff_t,ndim> &str_) + : d(d_), shp(shp_), str(str_) + { static_assert(!c_contiguous, "this type does not accept strides"); } + mav(T *d_, const std::array<size_t,ndim> &shp_) + : d(d_), shp(shp_) + { + static_assert(c_contiguous, "this type requires strides"); + str[ndim-1]=1; + for (size_t d=2; d<=ndim; ++d) + str[ndim-d] = str[ndim-d+1]*shp[ndim-d+1]; + } + operator mav<const T, ndim, true>() const + { + static_assert(c_contiguous); + return mav<const T, ndim, true>(d,shp); + } + operator mav<const T, ndim, false>() const + { return mav<const T, ndim, false>(d,shp, str); } + T &operator[](size_t i) + { return operator()(i); } + const T &operator[](size_t i) const + { return operator()(i); } + T &operator()(size_t i) + { + static_assert(ndim==1, "ndim must be 1"); + return c_contiguous ? d[i] : d[str[0]*i]; + } + const T &operator()(size_t i) const + { + static_assert(ndim==1, "ndim must be 1"); + return c_contiguous ? d[i] : d[str[0]*i]; + } + T &operator()(size_t i, size_t j) + { + static_assert(ndim==2, "ndim must be 2"); + return c_contiguous ? d[str[0]*i + j] : d[str[0]*i + str[1]*j]; + } + const T &operator()(size_t i, size_t j) const + { + static_assert(ndim==2, "ndim must be 2"); + return c_contiguous ? d[str[0]*i + j] : d[str[0]*i + str[1]*j]; + } + size_t shape(size_t i) const { return shp[i]; } + const std::array<size_t,ndim> &shape() const { return shp; } + size_t size() const + { + size_t res=1; + for (auto v: shp) res*=v; + return res; + } + size_t stride(size_t i) const { return str[i]; } + T *data() + { +// static_assert(c_contiguous, "type is not C contiguous"); + return d; + } + const T *data() const + { +// static_assert(c_contiguous, "type is not C contiguous"); + return d; + } + }; + +template<typename T, size_t ndim> using cmav = mav<T,ndim,true>; +template<typename T, size_t ndim> using smav = mav<T,ndim,false>; + +// +// basic utilities +// + +void myassert(bool cond, const char *msg) + { + if (cond) return; + throw std::runtime_error(msg); + } + +template<size_t ndim> void checkShape + (const std::array<size_t, ndim> &shp1, const std::array<size_t, ndim> &shp2) + { + for (size_t i=0; i<ndim; ++i) + myassert(shp1[i]==shp2[i], "shape mismatch"); + } + +/*! Returns the remainder of the division \a v1/v2. + The result is non-negative. + \a v1 can be positive or negative; \a v2 must be positive. */ +template<typename T> inline T fmodulo (T v1, T v2) + { + if (v1>=0) + return (v1<v2) ? v1 : fmod(v1,v2); + T tmp=fmod(v1,v2)+v2; + return (tmp==v2) ? T(0) : tmp; + } + +static size_t nthreads = 1; + +constexpr auto set_nthreads_DS = R"""( +Specifies the number of threads to be used by the module + +Parameters +========== +nthreads: int + the number of threads to be used. Must be >=1. +)"""; +void set_nthreads(size_t nthreads_) + { + myassert(nthreads_>=1, "nthreads must be >= 1"); + nthreads = nthreads_; + } + +constexpr auto get_nthreads_DS = R"""( +Returns the number of threads used by the module + +Returns +======= +int : the number of threads used by the module +)"""; +size_t get_nthreads() + { return nthreads; } + +// +// Utilities for Gauss-Legendre quadrature +// + +static inline double one_minus_x2 (double x) + { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; } + +void legendre_prep(int n, std::vector<double> &x, std::vector<double> &w) + { + constexpr double pi = 3.141592653589793238462643383279502884197; + constexpr double eps = 3e-14; + int m = (n+1)>>1; + x.resize(m); + w.resize(m); + + double t0 = 1 - (1-1./n) / (8.*n*n); + double t1 = 1./(4.*n+2.); + +#pragma omp parallel num_threads(nthreads) +{ + int i; +#pragma omp for schedule(dynamic,100) + for (i=1; i<=m; ++i) + { + double x0 = cos(pi * ((i<<2)-1) * t1) * t0; + + int dobreak=0; + int j=0; + double dpdx; + while(1) + { + double P_1 = 1.0; + double P0 = x0; + double dx, x1; + + for (int k=2; k<=n; k++) + { + double P_2 = P_1; + P_1 = P0; +// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; + P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); + } + + dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); + + /* Newton step */ + x1 = x0 - P0/dpdx; + dx = x0-x1; + x0 = x1; + if (dobreak) break; + + if (abs(dx)<=eps) dobreak=1; + myassert(++j<100, "convergence problem"); + } + + x[m-i] = x0; + w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); + } +} // end of parallel region + } + +// +// Start of real gridder functionality +// + +size_t get_supp(double epsilon) + { + static const std::vector<double> maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, + 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, + 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; + + double epssq = epsilon*epsilon; + + for (size_t i=1; i<maxmaperr.size(); ++i) + if (epssq>maxmaperr[i]) return i; + throw std::runtime_error("requested epsilon too small - minimum is 2e-13"); + } + +template<typename T> void complex2hartley + (const smav<const std::complex<T>, 2> &grid, smav<T,2> &grid2) + { + myassert(grid.shape()==grid2.shape(), "shape mismatch"); + size_t nu=grid.shape(0), nv=grid.shape(1); + +#pragma omp parallel for num_threads(nthreads) + for (size_t u=0; u<nu; ++u) + { + size_t xu = (u==0) ? 0 : nu-u; + for (size_t v=0; v<nv; ++v) + { + size_t xv = (v==0) ? 0 : nv-v; + grid2(u,v) += T(0.5)*(grid( u, v).real()+grid( u, v).imag()+ + grid(xu,xv).real()-grid(xu,xv).imag()); + } + } + } + +template<typename T> void hartley2complex + (const smav<const T,2> &grid, smav<std::complex<T>,2> &grid2) + { + myassert(grid.shape()==grid2.shape(), "shape mismatch"); + size_t nu=grid.shape(0), nv=grid.shape(1); + +#pragma omp parallel for num_threads(nthreads) + for (size_t u=0; u<nu; ++u) + { + size_t xu = (u==0) ? 0 : nu-u; + for (size_t v=0; v<nv; ++v) + { + size_t xv = (v==0) ? 0 : nv-v; + T v1 = T(0.5)*grid( u, v); + T v2 = T(0.5)*grid(xu,xv); + grid2(u,v) = std::complex<T>(v1+v2, v1-v2); + } + } + } + +template<typename T> void hartley2_2D(const smav<const T,2> &in, smav<T,2> &out) + { + myassert(in.shape()==out.shape(), "shape mismatch"); + size_t nu=in.shape(0), nv=in.shape(1), sz=sizeof(T); + pocketfft::stride_t str{ptrdiff_t(sz*nv), ptrdiff_t(sz)}; + auto d_i = in.data(); + auto ptmp = out.data(); + pocketfft::r2r_separable_hartley({nu, nv}, str, str, {0,1}, d_i, ptmp, T(1), + nthreads); +#pragma omp parallel for num_threads(nthreads) + for(size_t i=1; i<(nu+1)/2; ++i) + for(size_t j=1; j<(nv+1)/2; ++j) + { + T a = ptmp[i*nv+j]; + T b = ptmp[(nu-i)*nv+j]; + T c = ptmp[i*nv+nv-j]; + T d = ptmp[(nu-i)*nv+nv-j]; + ptmp[i*nv+j] = T(0.5)*(a+b+c-d); + ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c); + ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b); + ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a); + } + } + +template<typename T> class EC_Kernel + { + protected: + size_t supp; + T beta, emb_; + + public: + EC_Kernel(size_t supp_) + : supp(supp_), beta(2.3*supp_), emb_(exp(-beta)) {} + T operator()(T v) const { return exp(beta*(sqrt(T(1)-v*v)-T(1))); } + T val_no_emb(T v) const { return exp(beta*sqrt(T(1)-v*v)); } + T emb() const { return emb_; } + }; + +template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T> + { + protected: + static constexpr T pi = T(3.141592653589793238462643383279502884197L); + int p; + std::vector<T> x, wgt, psi; + using EC_Kernel<T>::supp; + + public: + using EC_Kernel<T>::operator(); + EC_Kernel_with_correction(size_t supp_) + : EC_Kernel<T>(supp_), p(int(1.5*supp_+2)) + { + legendre_prep(2*p,x,wgt); + psi=x; + for (auto &v:psi) + v=operator()(v); + } + /* Compute correction factors for the ES gridding kernel + This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ + T corfac(T v) const + { + T tmp=0; + for (int i=0; i<p; ++i) + tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]); + return T(1)/(supp*tmp); + } + }; +/* Compute correction factors for the ES gridding kernel + This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ +std::vector<double> correction_factors (size_t n, size_t nval, size_t supp) + { + EC_Kernel_with_correction<double> kernel(supp); + std::vector<double> res(nval); + double xn = 1./n; +#pragma omp parallel for schedule(static) num_threads(nthreads) + for (size_t k=0; k<nval; ++k) + res[k] = kernel.corfac(k*xn); + return res; + } + +template<typename T> struct UVW + { + T u, v, w; + UVW () {} + UVW (T u_, T v_, T w_) : u(u_), v(v_), w(w_) {} + UVW operator* (T fct) const + { return UVW(u*fct, v*fct, w*fct); } + }; + +template<typename T> class Baselines + { + protected: + std::vector<UVW<T>> coord; + std::vector<T> f_over_c; + size_t nrows, nchan; + + public: + Baselines(const smav<const T,2> &coord_, const smav<const T,1> &freq) + { + constexpr double speedOfLight = 299792458.; + myassert(coord_.shape(1)==3, "dimension mismatch"); + nrows = coord_.shape(0); + nchan = freq.shape(0); + myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS"); + f_over_c.resize(nchan); + for (size_t i=0; i<nchan; ++i) + f_over_c[i] = freq(i)/speedOfLight; + coord.resize(nrows); + for (size_t i=0; i<coord.size(); ++i) + coord[i] = UVW<T>(coord_(i,0), coord_(i,1), coord_(i,2)); + } + + UVW<T> effectiveCoord(uint32_t index) const + { + size_t irow = index/nchan; + size_t ichan = index-nchan*irow; + return coord[irow]*f_over_c[ichan]; + } + UVW<T> effectiveCoord(size_t irow, size_t ichan) const + { return coord[irow]*f_over_c[ichan]; } + size_t Nrows() const { return nrows; } + size_t Nchannels() const { return nchan; } + + void effectiveUVW(const smav<const uint32_t,1> &idx, smav<T,2> &res) const + { + size_t nvis = idx.shape(0); + myassert(res.shape(0)==nvis, "shape mismatch"); + myassert(res.shape(1)==3, "shape mismatch"); + for (size_t i=0; i<nvis; i++) + { + auto uvw = effectiveCoord(idx(i)); + res(i,0) = uvw.u; + res(i,1) = uvw.v; + res(i,2) = uvw.w; + } + } + + template<typename T2> void ms2vis(const smav<const T2,2> &ms, + const smav<const uint32_t,1> &idx, smav<T2,1> &vis) const + { + myassert(ms.shape(0)==nrows, "shape mismatch"); + myassert(ms.shape(1)==nchan, "shape mismatch"); + size_t nvis = idx.shape(0); + myassert(vis.shape(0)==nvis, "shape mismatch"); +#pragma omp parallel for num_threads(nthreads) + for (size_t i=0; i<nvis; ++i) + { + auto t = idx(i); + auto row = t/nchan; + auto chan = t-row*nchan; + vis[i] = ms(row, chan); + } + } + + template<typename T2> void vis2ms(const smav<const T2,1> &vis, + const smav<const uint32_t,1> &idx, smav<T2,2> &ms) const + { + size_t nvis = vis.shape(0); + myassert(idx.shape(0)==nvis, "shape mismatch"); + myassert(ms.shape(0)==nrows, "shape mismatch"); + myassert(ms.shape(1)==nchan, "shape mismatch"); +#pragma omp parallel for num_threads(nthreads) + for (size_t i=0; i<nvis; ++i) + { + auto t = idx(i); + auto row = t/nchan; + auto chan = t-row*nchan; + ms(row, chan) += vis(i); + } + } + }; + +template<typename T> class GridderConfig + { + protected: + size_t nx_dirty, ny_dirty; + double eps, psx, psy; + size_t supp, nsafe, nu, nv; + T beta; + std::vector<T> cfu, cfv; + + std::complex<T> wscreen(double x, double y, double w, bool adjoint) const + { + using namespace std; + constexpr double pi = 3.141592653589793238462643383279502884197; +#if 1 + double eps = sqrt(x+y); + double s = sin(eps); + double nm1 = -s*s/(1.+cos(eps)); +#else + double nm1 = (-x-y)/(sqrt(1.-x-y)+1); +#endif + double n = nm1+1., xn = 1./n; + double phase = 2*pi*w*nm1; + if (adjoint) phase *= -1; + return complex<T>(cos(phase)*xn, sin(phase)*xn); + } + + public: + GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, + double pixsize_x, double pixsize_y) + : nx_dirty(nxdirty), ny_dirty(nydirty), eps(epsilon), + psx(pixsize_x), psy(pixsize_y), + supp(get_supp(epsilon)), nsafe((supp+1)/2), + nu(std::max(2*nsafe,2*nx_dirty)), nv(std::max(2*nsafe,2*ny_dirty)), + beta(2.3*supp), + cfu(nx_dirty), cfv(ny_dirty) + { + myassert((nx_dirty&1)==0, "nx_dirty must be even"); + myassert((ny_dirty&1)==0, "ny_dirty must be even"); + myassert(epsilon>0, "epsilon must be positive"); + myassert(pixsize_x>0, "pixsize_x must be positive"); + myassert(pixsize_y>0, "pixsize_y must be positive"); + + auto tmp = correction_factors(nu, nx_dirty/2+1, supp); + cfu[nx_dirty/2]=tmp[0]; + cfu[0]=tmp[nx_dirty/2]; + for (size_t i=1; i<nx_dirty/2; ++i) + cfu[nx_dirty/2-i] = cfu[nx_dirty/2+i] = tmp[i]; + tmp = correction_factors(nv, ny_dirty/2+1, supp); + cfv[ny_dirty/2]=tmp[0]; + cfv[0]=tmp[ny_dirty/2]; + for (size_t i=1; i<ny_dirty/2; ++i) + cfv[ny_dirty/2-i] = cfv[ny_dirty/2+i] = tmp[i]; + } + size_t Nxdirty() const { return nx_dirty; } + size_t Nydirty() const { return ny_dirty; } + double Epsilon() const { return eps; } + double Pixsize_x() const { return psx; } + double Pixsize_y() const { return psy; } + size_t Nu() const { return nu; } + size_t Nv() const { return nv; } + size_t Supp() const { return supp; } + size_t Nsafe() const { return nsafe; } + T Beta() const { return beta; } + + void grid2dirty(const smav<const T,2> &grid, smav<T,2> &grid2) const + { + checkShape(grid.shape(), {nu,nv}); + std::vector<T> tmpdat(nu*nv); + auto tmp=smav<T,2>(tmpdat.data(),{nu,nv},{nv,1}); + hartley2_2D<T>(grid, tmp); + checkShape(grid2.shape(), {nx_dirty, ny_dirty}); + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + grid2(i,j) = tmp(i2,j2)*cfu[i]*cfv[j]; + } + } +#if 0 + pyarr_c<T> apply_taper(const pyarr_c<T> &img, bool divide) const + { + checkArray(img, "img", {nx_dirty, ny_dirty}); + auto pin = img.data(); + auto res = makeArray<T>({nx_dirty, ny_dirty}); + auto pout = res.mutable_data(); + { + py::gil_scoped_release release; + if (divide) + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + pout[ny_dirty*i + j] = pin[ny_dirty*i + j]/(cfu[i]*cfv[j]); + else + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + pout[ny_dirty*i + j] = pin[ny_dirty*i + j]*cfu[i]*cfv[j]; + } + return res; + } + pyarr_c<complex<T>> grid2dirty_c(const pyarr_c<complex<T>> &grid) const + { + checkArray(grid, "grid", {nu, nv}); + auto tmp = makeArray<complex<T>>({nu, nv}); + auto ptmp = tmp.mutable_data(); + pocketfft::c2c({nu,nv},{grid.strides(0),grid.strides(1)}, + {tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::BACKWARD, + grid.data(), tmp.mutable_data(), T(1), nthreads); + auto res = makeArray<complex<T>>({nx_dirty, ny_dirty}); + auto pout = res.mutable_data(); + { + py::gil_scoped_release release; + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j]; + } + } + return res; + } + + pyarr_c<T> dirty2grid(const pyarr_c<T> &dirty) const + { + checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); + auto pdirty = dirty.data(); + auto tmp = makeArray<T>({nu, nv}); + auto ptmp = tmp.mutable_data(); + { + py::gil_scoped_release release; + for (size_t i=0; i<nu*nv; ++i) + ptmp[i] = 0.; + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j]; + } + } + hartley2_2D<T>(tmp, tmp); + return tmp; + } + pyarr_c<complex<T>> dirty2grid_c(const pyarr_c<complex<T>> &dirty) const + { + checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); + auto pdirty = dirty.data(); + auto tmp = makeArray<complex<T>>({nu, nv}); + auto ptmp = tmp.mutable_data(); + pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)}; + { + py::gil_scoped_release release; + for (size_t i=0; i<nu*nv; ++i) + ptmp[i] = 0.; + for (size_t i=0; i<nx_dirty; ++i) + for (size_t j=0; j<ny_dirty; ++j) + { + size_t i2 = nu-nx_dirty/2+i; + if (i2>=nu) i2-=nu; + size_t j2 = nv-ny_dirty/2+j; + if (j2>=nv) j2-=nv; + ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j]; + } + pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD, + ptmp, ptmp, T(1), nthreads); + } + return tmp; + } +#endif + inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const + { + u=fmodulo(u_in*psx, T(1))*nu, + iu0 = int(u-supp*0.5 + 1 + nu) - nu; + if (iu0+supp>nu+nsafe) iu0 = nu+nsafe-supp; + v=fmodulo(v_in*psy, T(1))*nv; + iv0 = int(v-supp*0.5 + 1 + nv) - nv; + if (iv0+supp>nv+nsafe) iv0 = nv+nsafe-supp; + } +#if 0 + pyarr_c<complex<T>> apply_wscreen(const pyarr_c<complex<T>> &dirty_, double w, bool adjoint) const + { + checkArray(dirty_, "dirty", {nx_dirty, ny_dirty}); + auto dirty = dirty_.data(); + auto res_ = makeArray<complex<T>>({nx_dirty, ny_dirty}); + auto res = res_.mutable_data(); + double x0 = -0.5*nx_dirty*psx, + y0 = -0.5*ny_dirty*psy; + { + py::gil_scoped_release release; +#pragma omp parallel num_threads(nthreads) +{ +#pragma omp for schedule(static) + for (size_t i=0; i<=nx_dirty/2; ++i) + { + double fx = x0+i*psx; + fx *= fx; + for (size_t j=0; j<=ny_dirty/2; ++j) + { + double fy = y0+j*psy; + auto ws = wscreen(fx, fy*fy, w, adjoint); + res[ny_dirty*i+j] = dirty[ny_dirty*i+j]*ws; // lower left + size_t i2 = nx_dirty-i, j2 = ny_dirty-j; + if ((i>0)&&(i<i2)) + { + res[ny_dirty*i2+j] = dirty[ny_dirty*i2+j]*ws; // lower right + if ((j>0)&&(j<j2)) + res[ny_dirty*i2+j2] = dirty[ny_dirty*i2+j2]*ws; // upper right + } + if ((j>0)&&(j<j2)) + res[ny_dirty*i+j2] = dirty[ny_dirty*i+j2]*ws; // upper left + } + } +} + } + return res_; + } +#endif + }; + +constexpr int logsquare=4; + +template<typename T, typename T2=std::complex<T>> class Helper + { + private: + const GridderConfig<T> &gconf; + int nu, nv, nsafe, supp; + T beta; + const T2 *grid_r; + T2 *grid_w; + int su, sv; + int iu0, iv0; // start index of the current visibility + int bu0, bv0; // start index of the current buffer + + std::vector<T2> rbuf, wbuf; + + void dump() const + { + if (bu0<-nsafe) return; // nothing written into buffer yet + +#pragma omp critical (gridder_writing_to_grid) +{ + int idxu = (bu0+nu)%nu; + int idxv0 = (bv0+nv)%nv; + for (int iu=0; iu<su; ++iu) + { + int idxv = idxv0; + for (int iv=0; iv<sv; ++iv) + { + grid_w[idxu*nv + idxv] += wbuf[iu*sv + iv]; + if (++idxv>=nv) idxv=0; + } + if (++idxu>=nu) idxu=0; + } +} + } + + void load() + { + int idxu = (bu0+nu)%nu; + int idxv0 = (bv0+nv)%nv; + for (int iu=0; iu<su; ++iu) + { + int idxv = idxv0; + for (int iv=0; iv<sv; ++iv) + { + rbuf[iu*sv + iv] = grid_r[idxu*nv + idxv]; + if (++idxv>=nv) idxv=0; + } + if (++idxu>=nu) idxu=0; + } + } + + public: + const T2 *p0r; + T2 *p0w; + std::vector<T> kernel; + + Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_) + : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), + supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_), + grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)), + bu0(-1000000), bv0(-1000000), + rbuf(su*sv*(grid_r!=nullptr),T(0)), + wbuf(su*sv*(grid_w!=nullptr),T(0)), + kernel(2*supp) + {} + ~Helper() { if (grid_w) dump(); } + + int lineJump() const { return sv; } + + void prep(T u_in, T v_in) + { + T u, v; + gconf.getpix(u_in, v_in, u, v, iu0, iv0); + T xsupp=T(2)/supp; + auto x0 = xsupp*(iu0-u); + auto y0 = xsupp*(iv0-v); + for (int i=0; i<supp; ++i) + { + auto x = x0+i*xsupp; + kernel[i ] = beta*sqrt(T(1)-x*x); + auto y = y0+i*xsupp; + kernel[i+supp] = beta*sqrt(T(1)-y*y); + } + for (auto &k : kernel) + k = exp(k); + + if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv)) + { + if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); } + bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe; + bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe; + if (grid_r) load(); + } + p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr; + p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr; + } + }; + +#endif diff --git a/nifty_gridder.cc b/nifty_gridder.cc index f23b5a31741d97023ffcb46d6ef0958488a8ae09..fccb64f2b3cf19c5aa7082e3072b3e3f7c6af274 100644 --- a/nifty_gridder.cc +++ b/nifty_gridder.cc @@ -21,12 +21,8 @@ #include <pybind11/pybind11.h> #include <pybind11/numpy.h> -#include <iostream> -#include <algorithm> -#include <cstdlib> -#include <cmath> -#include "pocketfft_hdronly.h" +#include "gridder_cxx.h" using namespace std; @@ -40,110 +36,6 @@ auto None = py::none(); // basic utilities // -void myassert(bool cond, const char *msg) - { - if (cond) return; - throw runtime_error(msg); - } - -/*! Returns the remainder of the division \a v1/v2. - The result is non-negative. - \a v1 can be positive or negative; \a v2 must be positive. */ -template<typename T> inline T fmodulo (T v1, T v2) - { - if (v1>=0) - return (v1<v2) ? v1 : fmod(v1,v2); - T tmp=fmod(v1,v2)+v2; - return (tmp==v2) ? T(0) : tmp; - } - -static size_t nthreads = 1; - -constexpr auto set_nthreads_DS = R"""( -Specifies the number of threads to be used by the module - -Parameters -========== -nthreads: int - the number of threads to be used. Must be >=1. -)"""; -void set_nthreads(size_t nthreads_) - { - myassert(nthreads_>=1, "nthreads must be >= 1"); - nthreads = nthreads_; - } - -constexpr auto get_nthreads_DS = R"""( -Returns the number of threads used by the module - -Returns -======= -int : the number of threads used by the module -)"""; -size_t get_nthreads() - { return nthreads; } - -// -// Utilities for Gauss-Legendre quadrature -// - -static inline double one_minus_x2 (double x) - { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; } - -void legendre_prep(int n, vector<double> &x, vector<double> &w) - { - constexpr double pi = 3.141592653589793238462643383279502884197; - constexpr double eps = 3e-14; - int m = (n+1)>>1; - x.resize(m); - w.resize(m); - - double t0 = 1 - (1-1./n) / (8.*n*n); - double t1 = 1./(4.*n+2.); - -#pragma omp parallel num_threads(nthreads) -{ - int i; -#pragma omp for schedule(dynamic,100) - for (i=1; i<=m; ++i) - { - double x0 = cos(pi * ((i<<2)-1) * t1) * t0; - - int dobreak=0; - int j=0; - double dpdx; - while(1) - { - double P_1 = 1.0; - double P0 = x0; - double dx, x1; - - for (int k=2; k<=n; k++) - { - double P_2 = P_1; - P_1 = P0; -// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; - P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); - } - - dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); - - /* Newton step */ - x1 = x0 - P0/dpdx; - dx = x0-x1; - x0 = x1; - if (dobreak) break; - - if (abs(dx)<=eps) dobreak=1; - myassert(++j<100, "convergence problem"); - } - - x[m-i] = x0; - w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); - } -} // end of parallel region - } - // // Start of real gridder functionality // @@ -157,19 +49,6 @@ template<typename T> template<typename T> pyarr_c<T> makeArray(const vector<size_t> &shape) { return pyarr_c<T>(shape); } -size_t get_supp(double epsilon) - { - static const vector<double> maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, - 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, - 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; - - double epssq = epsilon*epsilon; - - for (size_t i=1; i<maxmaperr.size(); ++i) - if (epssq>maxmaperr[i]) return i; - throw runtime_error("requested epsilon too small - minimum is 2e-13"); - } - void checkArray(const py::array &arr, const char *aname, const vector<size_t> &shape) { @@ -232,153 +111,111 @@ template<typename T> pyarr_c<T> provideCArray(py::object &in, return tmp_; } -template<typename T> pyarr_c<T> complex2hartley - (const pyarr_c<complex<T>> &grid_, py::object &grid_in) +template<size_t ndim, typename T> cmav<T,ndim> make_cmav(pyarr_c<T> &in) { - checkArray(grid_, "grid", {0,0}); - size_t nu = size_t(grid_.shape(0)), nv = size_t(grid_.shape(1)); - auto grid = grid_.data(); - - auto res = provideCArray<T>(grid_in, {nu, nv}); - auto grid2 = res.mutable_data(); + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + for (size_t i=0; i<ndim; ++i) dims[i]=in.shape(i); + return cmav<T, ndim>(in.mutable_data(),dims); + } +template<size_t ndim, typename T> smav<T,ndim> make_smav(pyarr<T> &in) { - py::gil_scoped_release release; -#pragma omp parallel for num_threads(nthreads) - for (size_t u=0; u<nu; ++u) + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + array<ptrdiff_t,ndim> str; + for (size_t i=0; i<ndim; ++i) { - size_t xu = (u==0) ? 0 : nu-u; - for (size_t v=0; v<nv; ++v) - { - size_t xv = (v==0) ? 0 : nv-v; - size_t i1 = u*nv+v; - size_t i2 = xu*nv+xv; - grid2[i1] += T(0.5)*(grid[i1].real()+grid[i1].imag()+ - grid[i2].real()-grid[i2].imag()); - } + dims[i]=in.shape(i); + str[i]=in.strides(i)/sizeof(T); + myassert(str[i]*ptrdiff_t(sizeof(T))==in.strides(i), "weird strides"); } + return smav<T, ndim>(in.mutable_data(),dims,str); } - return res; +template<size_t ndim, typename T> smav<T,ndim> make_smav(pyarr_c<T> &in) + { + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + array<ptrdiff_t,ndim> str; + for (size_t i=0; i<ndim; ++i) + { + dims[i]=in.shape(i); + str[i]=in.strides(i)/sizeof(T); + myassert(str[i]*ptrdiff_t(sizeof(T))==in.strides(i), "weird strides"); + } + return smav<T, ndim>(in.mutable_data(),dims,str); } - -template<typename T> pyarr_c<complex<T>> hartley2complex - (const pyarr_c<T> &grid_) +template<size_t ndim, typename T> cmav<const T,ndim> make_const_cmav(const pyarr_c<T> &in) { - checkArray(grid_, "grid", {0, 0}); - size_t nu = size_t(grid_.shape(0)), nv = size_t(grid_.shape(1)); - auto grid = grid_.data(); - - auto res=makeArray<complex<T>>({nu, nv}); - auto grid2 = res.mutable_data(); + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + for (size_t i=0; i<ndim; ++i) dims[i]=in.shape(i); + return cmav<const T, ndim>(in.data(),dims); + } +template<size_t ndim, typename T> smav<const T,ndim> make_const_smav(const pyarr<T> &in) { - py::gil_scoped_release release; -#pragma omp parallel for num_threads(nthreads) - for (size_t u=0; u<nu; ++u) + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + array<ptrdiff_t,ndim> str; + for (size_t i=0; i<ndim; ++i) { - size_t xu = (u==0) ? 0 : nu-u; - for (size_t v=0; v<nv; ++v) - { - size_t xv = (v==0) ? 0 : nv-v; - size_t i1 = u*nv+v; - size_t i2 = xu*nv+xv; - T v1 = T(0.5)*grid[i1]; - T v2 = T(0.5)*grid[i2]; - grid2[i1] = complex<T>(v1+v2, v1-v2); - } + dims[i]=in.shape(i); + str[i]=in.strides(i)/sizeof(T); + myassert(str[i]*ptrdiff_t(sizeof(T))==in.strides(i), "weird strides"); } + return smav<const T, ndim>(in.data(),dims,str); } - return res; +template<size_t ndim, typename T> smav<const T,ndim> make_const_smav(const pyarr_c<T> &in) + { + myassert(ndim==in.ndim(), "dimension mismatch"); + array<size_t,ndim> dims; + array<ptrdiff_t,ndim> str; + for (size_t i=0; i<ndim; ++i) + { + dims[i]=in.shape(i); + str[i]=in.strides(i)/sizeof(T); + myassert(str[i]*ptrdiff_t(sizeof(T))==in.strides(i), "weird strides"); + } + return smav<const T, ndim>(in.data(),dims,str); } - -template<typename T> void hartley2_2D(const pyarr_c<T> &in, pyarr_c<T> &out) +template<typename T> pyarr_c<T> complex2hartley + (const pyarr_c<complex<T>> &grid_, py::object &grid_in) { - size_t nu=in.shape(0), nv=in.shape(1); - pocketfft::stride_t s_i{in.strides(0), in.strides(1)}, - s_o{out.strides(0), out.strides(1)}; - auto d_i = in.data(); - auto ptmp = out.mutable_data(); + auto grid = make_const_smav<2>(grid_); + size_t nu=grid.shape(0), nv=grid.shape(1); + + auto res = provideCArray<T>(grid_in, {nu, nv}); + auto grid2 = make_smav<2>(res); { py::gil_scoped_release release; - pocketfft::r2r_separable_hartley({nu, nv}, s_i, s_o, {0,1}, d_i, ptmp, T(1), - nthreads); -#pragma omp parallel for num_threads(nthreads) - for(size_t i=1; i<(nu+1)/2; ++i) - for(size_t j=1; j<(nv+1)/2; ++j) - { - T a = ptmp[i*nv+j]; - T b = ptmp[(nu-i)*nv+j]; - T c = ptmp[i*nv+nv-j]; - T d = ptmp[(nu-i)*nv+nv-j]; - ptmp[i*nv+j] = T(0.5)*(a+b+c-d); - ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c); - ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b); - ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a); - } + complex2hartley(grid, grid2); } + return res; } -template<typename T> class EC_Kernel - { - protected: - size_t supp; - T beta, emb_; - - public: - EC_Kernel(size_t supp_) - : supp(supp_), beta(2.3*supp_), emb_(exp(-beta)) {} - T operator()(T v) const { return exp(beta*(sqrt(T(1)-v*v)-T(1))); } - T val_no_emb(T v) const { return exp(beta*sqrt(T(1)-v*v)); } - T emb() const { return emb_; } - }; - -template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T> +template<typename T> pyarr_c<complex<T>> hartley2complex + (const pyarr_c<T> &grid_) { - protected: - static constexpr T pi = T(3.141592653589793238462643383279502884197L); - int p; - vector<T> x, wgt, psi; - using EC_Kernel<T>::supp; + auto grid = make_const_smav<2>(grid_); + size_t nu=grid.shape(0), nv=grid.shape(1); - public: - using EC_Kernel<T>::operator(); - EC_Kernel_with_correction(size_t supp_) - : EC_Kernel<T>(supp_), p(int(1.5*supp_+2)) - { - legendre_prep(2*p,x,wgt); - psi=x; - for (auto &v:psi) - v=operator()(v); - } - T corfac(T v) const - { - T tmp=0; - for (int i=0; i<p; ++i) - tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]); - return T(1)/(supp*tmp); - } - }; -/* Compute correction factors for the ES gridding kernel - This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ -vector<double> correction_factors (size_t n, size_t nval, size_t supp) + auto res = makeArray<complex<T>>({nu, nv}); + auto grid2 = make_smav<2>(res); { - EC_Kernel_with_correction<double> kernel(supp); - vector<double> res(nval); - double xn = 1./n; -#pragma omp parallel for schedule(static) num_threads(nthreads) - for (size_t k=0; k<nval; ++k) - res[k] = kernel.corfac(k*xn); + py::gil_scoped_release release; + hartley2complex(grid, grid2); + } return res; } -template<typename T> struct UVW +template<typename T> void hartley2_2D(const pyarr_c<T> &in, pyarr_c<T> &out) { - T u, v, w; - UVW () {} - UVW (T u_, T v_, T w_) : u(u_), v(v_), w(w_) {} - UVW operator* (T fct) const - { return UVW(u*fct, v*fct, w*fct); } - }; + auto grid = make_const_smav<2>(in); + auto grid2 = make_smav<2>(out); + py::gil_scoped_release release; + hartley2_2D(grid, grid2); + } -constexpr auto Baselines_DS = R"""( +constexpr auto PyBaselines_DS = R"""( Class storing UVW coordinates and channel information. Parameters @@ -388,45 +225,23 @@ coord: np.array((nrows, 3), dtype=np.float) freq: np.array((nchannels,), dtype=np.float) frequency for each individual channel (in Hz) )"""; -template<typename T> class Baselines +template<typename T> class PyBaselines: public Baselines<T> { - private: - vector<UVW<T>> coord; - vector<T> f_over_c; - size_t nrows, nchan; + protected: + using Baselines<T>::coord; + using Baselines<T>::f_over_c; + using Baselines<T>::nrows; + using Baselines<T>::nchan; public: - Baselines(const pyarr<T> &coord_, const pyarr<T> &freq_) - { - constexpr double speedOfLight = 299792458.; - checkArray(coord_, "coord", {0, 3}); - checkArray(freq_, "freq", {0}); - nrows = coord_.shape(0); - nchan = freq_.shape(0); - myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS"); - auto freq = freq_.template unchecked<1>(); - auto cood = coord_.template unchecked<2>(); - { - py::gil_scoped_release release; - f_over_c.resize(nchan); - for (size_t i=0; i<nchan; ++i) - f_over_c[i] = freq(i)/speedOfLight; - coord.resize(nrows); - for (size_t i=0; i<coord.size(); ++i) - coord[i] = UVW<T>(cood(i,0), cood(i,1), cood(i,2)); - } - } + using Baselines<T>::Baselines; + PyBaselines(const pyarr<T> &coord, const pyarr<T> &freq) + : Baselines<T>(make_const_smav<2>(coord), make_const_smav<1>(freq)) + {} - UVW<T> effectiveCoord(uint32_t index) const - { - size_t irow = index/nchan; - size_t ichan = index-nchan*irow; - return coord[irow]*f_over_c[ichan]; - } - UVW<T> effectiveCoord(size_t irow, size_t ichan) const - { return coord[irow]*f_over_c[ichan]; } - size_t Nrows() const { return nrows; } - size_t Nchannels() const { return nchan; } + using Baselines<T>::effectiveCoord; + using Baselines<T>::Nrows; + using Baselines<T>::Nchannels; static constexpr auto ms2vis_DS = R"""( Extracts visibility data from a measurement for the provided indices. @@ -444,44 +259,31 @@ template<typename T> class Baselines The visibility data for the index array )"""; - pyarr_c<T> effectiveuvw(const pyarr_c<uint32_t> &idx_) const - { - checkArray(idx_, "idx", {0}); + // using Baselines<T>::effectiveUVW; + pyarr_c<T> effectiveuvw(const pyarr<uint32_t> &idx_) const + { size_t nvis = size_t(idx_.shape(0)); - auto idx = idx_.template unchecked<1>(); + auto idx=make_const_smav<1>(idx_); auto res_=makeArray<T>({nvis, 3}); - auto res = res_.template mutable_unchecked<2>(); - for (size_t i=0; i<nvis; i++) - { - auto uvw = effectiveCoord(idx(i)); - res(i,0) = uvw.u; - res(i,1) = uvw.v; - res(i,2) = uvw.w; - } + auto res=make_smav<2>(res_); + { + py::gil_scoped_release release; + Baselines<T>::effectiveUVW(idx,res); + } return res_; - } + } template<typename T2> pyarr_c<T2> ms2vis(const pyarr<T2> &ms_, const pyarr_c<uint32_t> &idx_) const { - checkArray(idx_, "idx", {0}); - checkArray(ms_, "ms", {nrows, nchan}); - size_t nvis = size_t(idx_.shape(0)); - auto idx = idx_.template unchecked<1>(); - auto ms = ms_.template unchecked<2>(); - + auto idx=make_const_smav<1>(idx_); + size_t nvis = size_t(idx.shape(0)); + auto ms = make_const_smav<2>(ms_); auto res=makeArray<T2>({nvis}); - auto vis = res.mutable_data(); + auto vis = make_smav<1>(res); { py::gil_scoped_release release; -#pragma omp parallel for num_threads(nthreads) - for (size_t i=0; i<nvis; ++i) - { - auto t = idx(i); - auto row = t/nchan; - auto chan = t-row*nchan; - vis[i] = ms(row, chan); - } + Baselines<T>::ms2vis(ms, idx, vis); } return res; } @@ -506,31 +308,18 @@ template<typename T> class Baselines template<typename T2> pyarr_c<T2> vis2ms(const pyarr<T2> &vis_, const pyarr<uint32_t> &idx_, py::object &ms_in) const { - checkArray(vis_, "vis", {0}); - size_t nvis = size_t(vis_.shape(0)); - checkArray(idx_, "idx", {nvis}); - auto idx = idx_.template unchecked<1>(); - auto vis = vis_.template unchecked<1>(); - + auto vis=make_const_smav<1>(vis_); + auto idx=make_const_smav<1>(idx_); auto res = provideArray<T2>(ms_in, {nrows, nchan}); - auto ms = res.template mutable_unchecked<2>(); + auto ms = make_smav<2>(res); { py::gil_scoped_release release; -#pragma omp parallel for num_threads(nthreads) - for (size_t i=0; i<nvis; ++i) - { - auto t = idx(i); - auto row = t/nchan; - auto chan = t-row*nchan; - ms(row, chan) += vis(i); - } + Baselines<T>::vis2ms(vis, idx, ms); } return res; } }; -constexpr int logsquare=4; - constexpr auto grid2dirty_DS = R"""( Converts from UV grid to dirty image (FFT, cropping, correction) @@ -610,91 +399,48 @@ pixsize_x: float pixsize_y: float Pixel size in y direction (radians) )"""; -template<typename T> class GridderConfig +template<typename T> class PyGridderConfig: public GridderConfig<T> { - private: - size_t nx_dirty, ny_dirty; - double eps, psx, psy; - size_t supp, nsafe, nu, nv; - T beta; - vector<T> cfu, cfv; - - complex<T> wscreen(double x, double y, double w, bool adjoint) const - { - constexpr double pi = 3.141592653589793238462643383279502884197; -#if 1 - double eps = sqrt(x+y); - double s = sin(eps); - double nm1 = -s*s/(1.+cos(eps)); -#else - double nm1 = (-x-y)/(sqrt(1.-x-y)+1); -#endif - double n = nm1+1., xn = 1./n; - double phase = 2*pi*w*nm1; - if (adjoint) phase *= -1; - return complex<T>(cos(phase)*xn, sin(phase)*xn); - } + protected: + using GridderConfig<T>::nx_dirty; + using GridderConfig<T>::ny_dirty; + using GridderConfig<T>::eps; + using GridderConfig<T>::psx; + using GridderConfig<T>::psy; + using GridderConfig<T>::supp; + using GridderConfig<T>::nsafe; + using GridderConfig<T>::nu; + using GridderConfig<T>::nv; + using GridderConfig<T>::beta; + using GridderConfig<T>::cfu; + using GridderConfig<T>::cfv; + using GridderConfig<T>::wscreen; public: - GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, + using GridderConfig<T>::GridderConfig; + PyGridderConfig(size_t nxdirty, size_t nydirty, double epsilon, double pixsize_x, double pixsize_y) - : nx_dirty(nxdirty), ny_dirty(nydirty), eps(epsilon), - psx(pixsize_x), psy(pixsize_y), - supp(get_supp(epsilon)), nsafe((supp+1)/2), - nu(max(2*nsafe,2*nx_dirty)), nv(max(2*nsafe,2*ny_dirty)), - beta(2.3*supp), - cfu(nx_dirty), cfv(ny_dirty) - { - { - py::gil_scoped_release release; - myassert((nx_dirty&1)==0, "nx_dirty must be even"); - myassert((ny_dirty&1)==0, "ny_dirty must be even"); - myassert(epsilon>0, "epsilon must be positive"); - myassert(pixsize_x>0, "pixsize_x must be positive"); - myassert(pixsize_y>0, "pixsize_y must be positive"); - - auto tmp = correction_factors(nu, nx_dirty/2+1, supp); - cfu[nx_dirty/2]=tmp[0]; - cfu[0]=tmp[nx_dirty/2]; - for (size_t i=1; i<nx_dirty/2; ++i) - cfu[nx_dirty/2-i] = cfu[nx_dirty/2+i] = tmp[i]; - tmp = correction_factors(nv, ny_dirty/2+1, supp); - cfv[ny_dirty/2]=tmp[0]; - cfv[0]=tmp[ny_dirty/2]; - for (size_t i=1; i<ny_dirty/2; ++i) - cfv[ny_dirty/2-i] = cfv[ny_dirty/2+i] = tmp[i]; - } - } - size_t Nxdirty() const { return nx_dirty; } - size_t Nydirty() const { return ny_dirty; } - double Epsilon() const { return eps; } - double Pixsize_x() const { return psx; } - double Pixsize_y() const { return psy; } - size_t Nu() const { return nu; } - size_t Nv() const { return nv; } - size_t Supp() const { return supp; } - size_t Nsafe() const { return nsafe; } - T Beta() const { return beta; } + : GridderConfig<T>(nxdirty, nydirty, epsilon, pixsize_x, pixsize_y) {} + using GridderConfig<T>::Nxdirty; + using GridderConfig<T>::Nydirty; + using GridderConfig<T>::Epsilon; + using GridderConfig<T>::Pixsize_x; + using GridderConfig<T>::Pixsize_y; + using GridderConfig<T>::Nu; + using GridderConfig<T>::Nv; + using GridderConfig<T>::Supp; + using GridderConfig<T>::Nsafe; + using GridderConfig<T>::Beta; pyarr_c<T> grid2dirty(const pyarr_c<T> &grid) const { checkArray(grid, "grid", {nu, nv}); - auto tmp = makeArray<T>({nu, nv}); - auto ptmp = tmp.mutable_data(); - hartley2_2D<T>(grid, tmp); auto res = makeArray<T>({nx_dirty, ny_dirty}); - auto pout = res.mutable_data(); + auto grid2=make_const_smav<2>(grid); + auto res2=make_smav<2>(res); { py::gil_scoped_release release; - for (size_t i=0; i<nx_dirty; ++i) - for (size_t j=0; j<ny_dirty; ++j) - { - size_t i2 = nu-nx_dirty/2+i; - if (i2>=nu) i2-=nu; - size_t j2 = nv-ny_dirty/2+j; - if (j2>=nv) j2-=nv; - pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j]; - } + GridderConfig<T>::grid2dirty(grid2,res2); } return res; } @@ -790,15 +536,6 @@ template<typename T> class GridderConfig } return tmp; } - inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const - { - u=fmodulo(u_in*psx, T(1))*nu, - iu0 = int(u-supp*0.5 + 1 + nu) - nu; - if (iu0+supp>nu+nsafe) iu0 = nu+nsafe-supp; - v=fmodulo(v_in*psy, T(1))*nv; - iv0 = int(v-supp*0.5 + 1 + nv) - nv; - if (iv0+supp>nv+nsafe) iv0 = nv+nsafe-supp; - } pyarr_c<complex<T>> apply_wscreen(const pyarr_c<complex<T>> &dirty_, double w, bool adjoint) const { checkArray(dirty_, "dirty", {nx_dirty, ny_dirty}); @@ -838,103 +575,6 @@ template<typename T> class GridderConfig } }; -template<typename T, typename T2=complex<T>> class Helper - { - private: - const GridderConfig<T> &gconf; - int nu, nv, nsafe, supp; - T beta; - const T2 *grid_r; - T2 *grid_w; - int su, sv; - int iu0, iv0; // start index of the current visibility - int bu0, bv0; // start index of the current buffer - - vector<T2> rbuf, wbuf; - - void dump() const - { - if (bu0<-nsafe) return; // nothing written into buffer yet - -#pragma omp critical (gridder_writing_to_grid) -{ - int idxu = (bu0+nu)%nu; - int idxv0 = (bv0+nv)%nv; - for (int iu=0; iu<su; ++iu) - { - int idxv = idxv0; - for (int iv=0; iv<sv; ++iv) - { - grid_w[idxu*nv + idxv] += wbuf[iu*sv + iv]; - if (++idxv>=nv) idxv=0; - } - if (++idxu>=nu) idxu=0; - } -} - } - - void load() - { - int idxu = (bu0+nu)%nu; - int idxv0 = (bv0+nv)%nv; - for (int iu=0; iu<su; ++iu) - { - int idxv = idxv0; - for (int iv=0; iv<sv; ++iv) - { - rbuf[iu*sv + iv] = grid_r[idxu*nv + idxv]; - if (++idxv>=nv) idxv=0; - } - if (++idxu>=nu) idxu=0; - } - } - - public: - const T2 *p0r; - T2 *p0w; - vector<T> kernel; - - Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_) - : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), - supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_), - grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)), - bu0(-1000000), bv0(-1000000), - rbuf(su*sv*(grid_r!=nullptr),T(0)), - wbuf(su*sv*(grid_w!=nullptr),T(0)), - kernel(2*supp) - {} - ~Helper() { if (grid_w) dump(); } - - int lineJump() const { return sv; } - - void prep(T u_in, T v_in) - { - T u, v; - gconf.getpix(u_in, v_in, u, v, iu0, iv0); - T xsupp=T(2)/supp; - auto x0 = xsupp*(iu0-u); - auto y0 = xsupp*(iv0-v); - for (int i=0; i<supp; ++i) - { - auto x = x0+i*xsupp; - kernel[i ] = beta*sqrt(T(1)-x*x); - auto y = y0+i*xsupp; - kernel[i+supp] = beta*sqrt(T(1)-y*y); - } - for (auto &k : kernel) - k = exp(k); - - if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv)) - { - if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); } - bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe; - bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe; - if (grid_r) load(); - } - p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr; - p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr; - } - }; constexpr auto vis2grid_c_DS = R"""( Grids visibilities onto a UV grid @@ -961,7 +601,7 @@ np.array((nu,nv), dtype=np.complex128): the gridded visibilities )"""; template<typename T> pyarr_c<complex<T>> vis2grid_c( - const Baselines<T> &baselines, const GridderConfig<T> &gconf, + const PyBaselines<T> &baselines, const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &vis_, py::object &grid_in, const py::object &wgt_) { @@ -1037,8 +677,8 @@ Returns np.array((nu,nv), dtype=np.float64): the gridded visibilities (made real by making use of Hermitian symmetry) )"""; -template<typename T> pyarr_c<T> vis2grid(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<T> vis2grid(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &vis_, py::object &grid_in, const py::object &wgt_) { return complex2hartley(vis2grid_c(baselines, gconf, idx_, vis_, None, wgt_), grid_in); } @@ -1067,7 +707,7 @@ np.array((nu,nv), dtype=np.complex128): the gridded visibilities )"""; template<typename T> pyarr_c<complex<T>> ms2grid_c( - const Baselines<T> &baselines, const GridderConfig<T> &gconf, + const PyBaselines<T> &baselines, const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &ms_, py::object &grid_in, const py::object &wgt_) { @@ -1124,13 +764,13 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c( return res; } -template<typename T> pyarr_c<T> ms2grid(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<T> ms2grid(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &ms_, py::object &grid_in, const py::object &wgt_) { return complex2hartley(ms2grid_c(baselines, gconf, idx_, ms_, None, wgt_), grid_in); } template<typename T> pyarr_c<complex<T>> grid2vis_c( - const Baselines<T> &baselines, const GridderConfig<T> &gconf, + const PyBaselines<T> &baselines, const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_, const py::object &wgt_) { @@ -1205,13 +845,13 @@ Returns np.array((nvis,), dtype=np.complex) The degridded visibility data )"""; -template<typename T> pyarr_c<complex<T>> grid2vis(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<complex<T>> grid2vis(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr_c<T> &grid_, const py::object &wgt_) { return grid2vis_c(baselines, gconf, idx_, hartley2complex(grid_), wgt_); } -template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<complex<T>> grid2ms_c(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_, py::object &ms_in, const py::object &wgt_) { auto nrows = baselines.Nrows(); @@ -1270,13 +910,13 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines return res; } -template<typename T> pyarr_c<complex<T>> grid2ms(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<complex<T>> grid2ms(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr_c<T> &grid_, py::object &ms_in, const py::object &wgt_) { return grid2ms_c(baselines, gconf, idx_, hartley2complex(grid_), ms_in, wgt_); } template<typename T> pyarr_c<complex<T>> apply_holo( - const Baselines<T> &baselines, const GridderConfig<T> &gconf, + const PyBaselines<T> &baselines, const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_, const py::object &wgt_) { @@ -1345,7 +985,7 @@ template<typename T> pyarr_c<complex<T>> apply_holo( } template<typename T> pyarr_c<T> get_correlations( - const Baselines<T> &baselines, const GridderConfig<T> &gconf, + const PyBaselines<T> &baselines, const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, int du, int dv, const py::object &wgt_) { size_t nu=gconf.Nu(), nv=gconf.Nv(); @@ -1437,8 +1077,8 @@ np.array((nvis,), dtype=np.uint32) the compressed indices for all entries which match the selected criteria and are not flagged. )"""; -template<typename T> pyarr_c<uint32_t> getIndices(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr_c<bool> &flags_, int chbegin, +template<typename T> pyarr_c<uint32_t> getIndices(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr_c<bool> &flags_, int chbegin, int chend, T wmin, T wmax) { size_t nrow=baselines.Nrows(), @@ -1494,8 +1134,8 @@ template<typename T> pyarr_c<uint32_t> getIndices(const Baselines<T> &baselines, return res; } -template<typename T> pyarr_c<complex<T>> vis2dirty_wstack(const Baselines<T> &baselines, - const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, +template<typename T> pyarr_c<complex<T>> vis2dirty_wstack(const PyBaselines<T> &baselines, + const PyGridderConfig<T> &gconf, const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &vis_) { auto nx_dirty=gconf.Nxdirty(); @@ -1617,42 +1257,42 @@ PYBIND11_MODULE(nifty_gridder, m) { using namespace pybind11::literals; - py::class_<Baselines<double>> (m, "Baselines", Baselines_DS) + py::class_<PyBaselines<double>> (m, "Baselines", PyBaselines_DS) .def(py::init<const pyarr_c<double> &, const pyarr_c<double> &>(), "coord"_a, "freq"_a) - .def ("Nrows",&Baselines<double>::Nrows) - .def ("Nchannels",&Baselines<double>::Nchannels) - .def ("ms2vis",&Baselines<double>::ms2vis<complex<double>>, - Baselines<double>::ms2vis_DS, "ms"_a, "idx"_a) - .def ("effectiveuvw",&Baselines<double>::effectiveuvw, "idx"_a) - .def ("vis2ms",&Baselines<double>::vis2ms<complex<double>>, - Baselines<double>::vis2ms_DS, "vis"_a, "idx"_a, "ms_in"_a=None); - py::class_<GridderConfig<double>> (m, "GridderConfig", GridderConfig_DS) + .def ("Nrows",&PyBaselines<double>::Nrows) + .def ("Nchannels",&PyBaselines<double>::Nchannels) + .def ("ms2vis",&PyBaselines<double>::ms2vis<complex<double>>, + PyBaselines<double>::ms2vis_DS, "ms"_a, "idx"_a) + .def ("effectiveuvw",&PyBaselines<double>::effectiveuvw, "idx"_a) + .def ("vis2ms",&PyBaselines<double>::vis2ms<complex<double>>, + PyBaselines<double>::vis2ms_DS, "vis"_a, "idx"_a, "ms_in"_a=None); + py::class_<PyGridderConfig<double>> (m, "GridderConfig", GridderConfig_DS) .def(py::init<size_t, size_t, double, double, double>(),"nxdirty"_a, "nydirty"_a, "epsilon"_a, "pixsize_x"_a, "pixsize_y"_a) - .def("Nxdirty", &GridderConfig<double>::Nxdirty) - .def("Nydirty", &GridderConfig<double>::Nydirty) - .def("Epsilon", &GridderConfig<double>::Epsilon) - .def("Pixsize_x", &GridderConfig<double>::Pixsize_x) - .def("Pixsize_y", &GridderConfig<double>::Pixsize_y) - .def("Nu", &GridderConfig<double>::Nu) - .def("Nv", &GridderConfig<double>::Nv) - .def("Supp", &GridderConfig<double>::Supp) - .def("apply_taper", &GridderConfig<double>::apply_taper, apply_taper_DS, + .def("Nxdirty", &PyGridderConfig<double>::Nxdirty) + .def("Nydirty", &PyGridderConfig<double>::Nydirty) + .def("Epsilon", &PyGridderConfig<double>::Epsilon) + .def("Pixsize_x", &PyGridderConfig<double>::Pixsize_x) + .def("Pixsize_y", &PyGridderConfig<double>::Pixsize_y) + .def("Nu", &PyGridderConfig<double>::Nu) + .def("Nv", &PyGridderConfig<double>::Nv) + .def("Supp", &PyGridderConfig<double>::Supp) + .def("apply_taper", &PyGridderConfig<double>::apply_taper, apply_taper_DS, "img"_a, "divide"_a=false) - .def("grid2dirty", &GridderConfig<double>::grid2dirty, + .def("grid2dirty", &PyGridderConfig<double>::grid2dirty, grid2dirty_DS, "grid"_a) - .def("grid2dirty_c", &GridderConfig<double>::grid2dirty_c, "grid"_a) - .def("dirty2grid", &GridderConfig<double>::dirty2grid, + .def("grid2dirty_c", &PyGridderConfig<double>::grid2dirty_c, "grid"_a) + .def("dirty2grid", &PyGridderConfig<double>::dirty2grid, dirty2grid_DS, "dirty"_a) - .def("dirty2grid_c", &GridderConfig<double>::dirty2grid_c, "dirty"_a) - .def("apply_wscreen", &GridderConfig<double>::apply_wscreen, + .def("dirty2grid_c", &PyGridderConfig<double>::dirty2grid_c, "dirty"_a) + .def("apply_wscreen", &PyGridderConfig<double>::apply_wscreen, apply_wscreen_DS, "dirty"_a, "w"_a, "adjoint"_a) // pickle support .def(py::pickle( // __getstate__ - [](const GridderConfig<double> & gc) { + [](const PyGridderConfig<double> & gc) { // Encode object state in tuple return py::make_tuple(gc.Nxdirty(), gc.Nydirty(), gc.Epsilon(), gc.Pixsize_x(), gc.Pixsize_y()); @@ -1662,7 +1302,7 @@ PYBIND11_MODULE(nifty_gridder, m) myassert(t.size()==5,"Invalid state"); // Reconstruct from tuple - return GridderConfig<double>(t[0].cast<size_t>(), t[1].cast<size_t>(), + return PyGridderConfig<double>(t[0].cast<size_t>(), t[1].cast<size_t>(), t[2].cast<double>(), t[3].cast<double>(), t[4].cast<double>());