/* * This file is part of nifty_gridder. * * nifty_gridder is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * nifty_gridder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with nifty_gridder; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Copyright (C) 2019 Max-Planck-Society Author: Martin Reinecke */ #include #include #include #include #include #include #include "pocketfft_hdronly.h" #ifdef __GNUC__ #define RESTRICT __restrict__ #define NOINLINE __attribute__ ((noinline)) #else #define RESTRICT #endif using namespace std; namespace py = pybind11; namespace { auto None = py::none(); // // basic utilities // void myassert(bool cond, const char *msg) { if (cond) return; throw runtime_error(msg); } /*! Returns the remainder of the division \a v1/v2. The result is non-negative. \a v1 can be positive or negative; \a v2 must be positive. */ template inline T fmodulo (T v1, T v2) { if (v1>=0) return (v1=1. )"""; void set_nthreads(size_t nthreads_) { myassert(nthreads_>=1, "nthreads must be >= 1"); nthreads = nthreads_; } constexpr auto get_nthreads_DS = R"""( Returns the number of threads used by the module Returns ======= int : the number of threads used by the module )"""; size_t get_nthreads() { return nthreads; } // // Utilities for Gauss-Legendre quadrature // static inline double one_minus_x2 (double x) { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; } void legendre_prep(int n, vector &x, vector &w) { constexpr double pi = 3.141592653589793238462643383279502884197; constexpr double eps = 3e-14; int m = (n+1)>>1; x.resize(m); w.resize(m); double t0 = 1 - (1-1./n) / (8.*n*n); double t1 = 1./(4.*n+2.); #pragma omp parallel num_threads(nthreads) { int i; #pragma omp for schedule(dynamic,100) for (i=1; i<=m; ++i) { double x0 = cos(pi * ((i<<2)-1) * t1) * t0; int dobreak=0; int j=0; double dpdx; while(1) { double P_1 = 1.0; double P0 = x0; double dx, x1; for (int k=2; k<=n; k++) { double P_2 = P_1; P_1 = P0; // P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); } dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); /* Newton step */ x1 = x0 - P0/dpdx; dx = x0-x1; x0 = x1; if (dobreak) break; if (abs(dx)<=eps) dobreak=1; myassert(++j<100, "convergence problem"); } x[m-i] = x0; w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); } } // end of parallel region } // // Start of real gridder functionality // template using pyarr = py::array_t; // The "_c" suffix here stands for "C memory order, contiguous" template using pyarr_c = py::array_t; template pyarr_c makeArray(const vector &shape) { return pyarr_c(shape); } size_t get_w(double epsilon) { static const vector maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; double epssq = epsilon*epsilon; for (size_t i=1; imaxmaperr[i]) return i; throw runtime_error("requested epsilon too small - minimum is 2e-13"); } void checkArray(const py::array &arr, const char *aname, const vector &shape) { if (size_t(arr.ndim())!=shape.size()) { cerr << "Array '" << aname << "' has " << arr.ndim() << " dimensions; " "expected " << shape.size() << endl; throw runtime_error("bad dimensionality"); } for (size_t i=0; i pyarr provideArray(py::object &in, const vector &shape) { if (in.is_none()) { auto tmp_ = makeArray(shape); size_t sz = size_t(tmp_.size()); auto tmp = tmp_.mutable_data(); for (size_t i=0; i>(); checkArray(tmp_, "temporary", shape); return tmp_; } template pyarr_c provideCArray(py::object &in, const vector &shape) { if (in.is_none()) { auto tmp_ = makeArray(shape); size_t sz = size_t(tmp_.size()); auto tmp = tmp_.mutable_data(); for (size_t i=0; i>(); checkArray(tmp_, "temporary", shape); return tmp_; } template pyarr_c complex2hartley (const pyarr_c> &grid_, py::object &grid_in) { checkArray(grid_, "grid", {0,0}); size_t nu = size_t(grid_.shape(0)), nv = size_t(grid_.shape(1)); auto grid = grid_.data(); auto res = provideCArray(grid_in, {nu, nv}); auto grid2 = res.mutable_data(); { py::gil_scoped_release release; #pragma omp parallel for num_threads(nthreads) for (size_t u=0; u pyarr_c> hartley2complex (const pyarr_c &grid_) { checkArray(grid_, "grid", {0, 0}); size_t nu = size_t(grid_.shape(0)), nv = size_t(grid_.shape(1)); auto grid = grid_.data(); auto res=makeArray>({nu, nv}); auto grid2 = res.mutable_data(); { py::gil_scoped_release release; #pragma omp parallel for num_threads(nthreads) for (size_t u=0; u(v1+v2, v1-v2); } } } return res; } template void hartley2_2D(const pyarr_c &in, pyarr_c &out) { size_t nu=in.shape(0), nv=in.shape(1); pocketfft::stride_t s_i{in.strides(0), in.strides(1)}, s_o{out.strides(0), out.strides(1)}; auto d_i = in.data(); auto ptmp = out.mutable_data(); { py::gil_scoped_release release; pocketfft::r2r_separable_hartley({nu, nv}, s_i, s_o, {0,1}, d_i, ptmp, T(1), nthreads); #pragma omp parallel for num_threads(nthreads) for(size_t i=1; i<(nu+1)/2; ++i) for(size_t j=1; j<(nv+1)/2; ++j) { T a = ptmp[i*nv+j]; T b = ptmp[(nu-i)*nv+j]; T c = ptmp[i*nv+nv-j]; T d = ptmp[(nu-i)*nv+nv-j]; ptmp[i*nv+j] = T(0.5)*(a+b+c-d); ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c); ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b); ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a); } } } /* Compute correction factors for the ES gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ vector correction_factors (size_t n, size_t nval, size_t w) { constexpr double pi = 3.141592653589793238462643383279502884197; auto beta = 2.3*w; auto p = int(1.5*w+2); double alpha = pi*w/n; vector x, wgt; legendre_prep(2*p,x,wgt); auto psi = x; for (auto &v:psi) v = exp(beta*(sqrt(1-v*v)-1.)); vector res(nval); #pragma omp parallel for schedule(static) num_threads(nthreads) for (size_t k=0; k struct UVW { T u, v, w; UVW () {} UVW (T u_, T v_, T w_) : u(u_), v(v_), w(w_) {} UVW operator* (T fct) const { return UVW(u*fct, v*fct, w*fct); } }; constexpr auto Baselines_DS = R"""( Class storing UVW coordinates and channel information. Parameters ========== coord: np.array((nrows, 3), dtype=np.float) u, v and w coordinates for each row freq: np.array((nchannels,), dtype=np.float) frequency for each individual channel (in Hz) )"""; template class Baselines { private: vector> coord; vector f_over_c; size_t nrows, nchan; public: Baselines(const pyarr &coord_, const pyarr &freq_) { constexpr double speedOfLight = 299792458.; checkArray(coord_, "coord", {0, 3}); checkArray(freq_, "freq", {0}); nrows = coord_.shape(0); nchan = freq_.shape(0); myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS"); auto freq = freq_.template unchecked<1>(); auto cood = coord_.template unchecked<2>(); { py::gil_scoped_release release; f_over_c.resize(nchan); for (size_t i=0; i(cood(i,0), cood(i,1), cood(i,2)); } } UVW effectiveCoord(uint32_t index) const { size_t irow = index/nchan; size_t ichan = index-nchan*irow; return coord[irow]*f_over_c[ichan]; } UVW effectiveCoord(size_t irow, size_t ichan) const { return coord[irow]*f_over_c[ichan]; } size_t Nrows() const { return nrows; } size_t Nchannels() const { return nchan; } static constexpr auto ms2vis_DS = R"""( Extracts visibility data from a measurement for the provided indices. Parameters ========== ms: np.array((nrows, nchannels), dtype=np.complex) the measurement set's visibility data idx: np.array((nvis,), dtype=np.uint32) the indices to be extracted Returns ======= np.array((nvis,), dtype=np.complex) The visibility data for the index array )"""; template pyarr_c ms2vis(const pyarr &ms_, const pyarr_c &idx_) const { checkArray(idx_, "idx", {0}); checkArray(ms_, "ms", {nrows, nchan}); size_t nvis = size_t(idx_.shape(0)); auto idx = idx_.template unchecked<1>(); auto ms = ms_.template unchecked<2>(); auto res=makeArray({nvis}); auto vis = res.mutable_data(); { py::gil_scoped_release release; #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i pyarr_c vis2ms(const pyarr &vis_, const pyarr &idx_, py::object &ms_in) const { checkArray(vis_, "vis", {0}); size_t nvis = size_t(vis_.shape(0)); checkArray(idx_, "idx", {nvis}); auto idx = idx_.template unchecked<1>(); auto vis = vis_.template unchecked<1>(); auto res = provideArray(ms_in, {nrows, nchan}); auto ms = res.template mutable_unchecked<2>(); { py::gil_scoped_release release; #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i= 2e-13. pixsize_x: float Pixel size in x direction (radians) pixsize_y: float Pixel size in y direction (radians) )"""; template class GridderConfig { private: size_t nx_dirty, ny_dirty; double eps, psx, psy; size_t w, nsafe, nu, nv; T beta; vector cfu, cfv; complex wscreen(double x, double y, double w, bool adjoint) const { constexpr double pi = 3.141592653589793238462643383279502884197; double n = cos(sqrt(x+y)), xn = 1./n; double phase = 2*pi*w*(n-1); if (adjoint) phase *= -1; return complex(cos(phase)*xn, sin(phase)*xn); } public: GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, double pixsize_x, double pixsize_y) : nx_dirty(nxdirty), ny_dirty(nydirty), eps(epsilon), psx(pixsize_x), psy(pixsize_y), w(get_w(epsilon)), nsafe((w+1)/2), nu(max(2*nsafe,2*nx_dirty)), nv(max(2*nsafe,2*ny_dirty)), beta(2.3*w), cfu(nx_dirty), cfv(ny_dirty) { { py::gil_scoped_release release; myassert((nx_dirty&1)==0, "nx_dirty must be even"); myassert((ny_dirty&1)==0, "ny_dirty must be even"); myassert(epsilon>0, "epsilon must be positive"); myassert(pixsize_x>0, "pixsize_x must be positive"); myassert(pixsize_y>0, "pixsize_y must be positive"); auto tmp = correction_factors(nu, nx_dirty/2+1, w); cfu[nx_dirty/2]=tmp[0]; cfu[0]=tmp[nx_dirty/2]; for (size_t i=1; i grid2dirty(const pyarr_c &grid) const { checkArray(grid, "grid", {nu, nv}); auto tmp = makeArray({nu, nv}); auto ptmp = tmp.mutable_data(); hartley2_2D(grid, tmp); auto res = makeArray({nx_dirty, ny_dirty}); auto pout = res.mutable_data(); { py::gil_scoped_release release; for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j]; } } return res; } pyarr_c apply_taper(const pyarr_c &img, bool divide) const { checkArray(img, "img", {nx_dirty, ny_dirty}); auto pin = img.data(); auto res = makeArray({nx_dirty, ny_dirty}); auto pout = res.mutable_data(); { py::gil_scoped_release release; if (divide) for (size_t i=0; i> grid2dirty_c(const pyarr_c> &grid) const { checkArray(grid, "grid", {nu, nv}); auto tmp = makeArray>({nu, nv}); auto ptmp = tmp.mutable_data(); pocketfft::c2c({nu,nv},{grid.strides(0),grid.strides(1)}, {tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::BACKWARD, grid.data(), tmp.mutable_data(), T(1), nthreads); auto res = makeArray>({nx_dirty, ny_dirty}); auto pout = res.mutable_data(); { py::gil_scoped_release release; for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j]; } } return res; } pyarr_c dirty2grid(const pyarr_c &dirty) const { checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); auto pdirty = dirty.data(); auto tmp = makeArray({nu, nv}); auto ptmp = tmp.mutable_data(); { py::gil_scoped_release release; for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j]; } } hartley2_2D(tmp, tmp); return tmp; } pyarr_c> dirty2grid_c(const pyarr_c> &dirty) const { checkArray(dirty, "dirty", {nx_dirty, ny_dirty}); auto pdirty = dirty.data(); auto tmp = makeArray>({nu, nv}); auto ptmp = tmp.mutable_data(); pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)}; { py::gil_scoped_release release; for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j]; } pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD, ptmp, ptmp, T(1), nthreads); } return tmp; } inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const { u=fmodulo(u_in*psx, T(1))*nu, iu0 = int(u-w*0.5 + 1 + nu) - nu; if (iu0+w>nu+nsafe) iu0 = nu+nsafe-w; v=fmodulo(v_in*psy, T(1))*nv; iv0 = int(v-w*0.5 + 1 + nv) - nv; if (iv0+w>nv+nsafe) iv0 = nv+nsafe-w; } pyarr_c> apply_wscreen(const pyarr_c> &dirty_, double w, bool adjoint) const { checkArray(dirty_, "dirty", {nx_dirty, ny_dirty}); auto dirty = dirty_.data(); auto res_ = makeArray>({nx_dirty, ny_dirty}); auto res = res_.mutable_data(); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; { py::gil_scoped_release release; #pragma omp parallel num_threads(nthreads) { #pragma omp for schedule(static) for (size_t i=0; i<=nx_dirty/2; ++i) { double fx = x0+i*psx; fx *= fx; for (size_t j=0; j<=ny_dirty/2; ++j) { double fy = y0+j*psy; auto ws = wscreen(fx, fy*fy, w, adjoint); res[ny_dirty*i+j] = dirty[ny_dirty*i+j]*ws; // lower left size_t i2 = nx_dirty-i, j2 = ny_dirty-j; if ((i>0)&&(i0)&&(j0)&&(j> class Helper { private: const GridderConfig &gconf; int nu, nv, nsafe, w; T beta; const T2 *grid_r; T2 *grid_w; int su, sv; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer vector rbuf, wbuf; void dump() const { if (bu0<-nsafe) return; // nothing written into buffer yet #pragma omp critical (gridder_writing_to_grid) { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } } void load() { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } public: const T2 *p0r; T2 *p0w; vector kernel; Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_) : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), w(gconf.W()), beta(gconf.Beta()), grid_r(grid_r_), grid_w(grid_w_), su(2*nsafe+(1<bu0+su) || (iv0+w>bv0+sv)) { if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); } bu0=((((iu0+nsafe)>>logsquare)<>logsquare)< pyarr_c> vis2grid_c( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &vis_, py::object &grid_in) { checkArray(vis_, "vis", {0}); size_t nvis = size_t(vis_.shape(0)); checkArray(idx_, "idx", {nvis}); auto vis=vis_.template unchecked<1>(); auto idx = idx_.template unchecked<1>(); size_t nu=gconf.Nu(), nv=gconf.Nv(); auto res = provideCArray>(grid_in, {nu, nv}); auto grid = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, grid); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; // Loop over sampling points #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); hlp.prep(coord.u, coord.v); auto * RESTRICT ptr = hlp.p0w; auto v(vis(ipart)*emb); for (size_t cu=0; cu tmp(v*ku[cu]); for (size_t cv=0; cv pyarr_c vis2grid(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &vis_, py::object &grid_in) { return complex2hartley(vis2grid_c(baselines, gconf, idx_, vis_, None), grid_in); } constexpr auto ms2grid_c_DS = R"""( Grids measurement set data onto a UV grid Parameters ========== baselines: Baselines the Baselines object gconf: GridderConf the GridderConf object to be used (used to optimize the ordering of the indices) idx: np.array((nvis,), dtype=np.uint32) the indices for the entries to be gridded ms: np.array((nrows, nchannels), dtype=np.complex128) the measurement set. grid_in: np.array((nu,nv), dtype=np.complex128), optional If present, the result is added to this array. Returns ======= np.array((nu,nv), dtype=np.complex128): the gridded visibilities )"""; template pyarr_c> ms2grid_c( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &ms_, py::object &grid_in) { auto nrows = baselines.Nrows(); auto nchan = baselines.Nchannels(); checkArray(ms_, "ms", {nrows, nchan}); checkArray(idx_, "idx", {0}); size_t nvis = size_t(idx_.shape(0)); auto ms = ms_.template unchecked<2>(); auto idx = idx_.template unchecked<1>(); size_t nu=gconf.Nu(), nv=gconf.Nv(); auto res = provideCArray>(grid_in, {nu, nv}); auto grid = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, grid); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; // Loop over sampling points #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(tidx); hlp.prep(coord.u, coord.v); auto * RESTRICT ptr = hlp.p0w; auto v(ms(row,chan)*emb); for (size_t cu=0; cu tmp(v*ku[cu]); for (size_t cv=0; cv pyarr_c ms2grid(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &ms_, py::object &grid_in) { return complex2hartley(ms2grid_c(baselines, gconf, idx_, ms_, None), grid_in); } template pyarr_c> ms2grid_c_wgt( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &ms_, const pyarr &wgt_, py::object &grid_in) { auto nrows = baselines.Nrows(); auto nchan = baselines.Nchannels(); checkArray(wgt_, "wgt", {nrows, nchan}); checkArray(ms_, "ms", {nrows, nchan}); checkArray(idx_, "idx", {0}); size_t nvis = size_t(idx_.shape(0)); auto ms = ms_.template unchecked<2>(); auto wgt = wgt_.template unchecked<2>(); auto idx = idx_.template unchecked<1>(); size_t nu=gconf.Nu(), nv=gconf.Nv(); auto res = provideArray>(grid_in, {nu, nv}); auto grid = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, grid); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; // Loop over sampling points #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(tidx); hlp.prep(coord.u, coord.v); auto * RESTRICT ptr = hlp.p0w; auto v(ms(row,chan)*(emb*wgt(row, chan))); for (size_t cu=0; cu tmp(v*ku[cu]); for (size_t cv=0; cv pyarr_c ms2grid_wgt(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr> &ms_, const pyarr &wgt_, py::object &grid_in) { return complex2hartley(ms2grid_c_wgt(baselines, gconf, idx_, ms_, wgt_, None), grid_in); } template pyarr_c> grid2vis_c( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c> &grid_) { size_t nu=gconf.Nu(), nv=gconf.Nv(); checkArray(idx_, "idx", {0}); auto grid = grid_.data(); checkArray(grid_, "grid", {nu, nv}); size_t nvis = size_t(idx_.shape(0)); auto idx = idx_.template unchecked<1>(); auto res = makeArray>({nvis}); auto vis = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, grid, nullptr); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); hlp.prep(coord.u, coord.v); complex r = 0; const auto * RESTRICT ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv pyarr_c> grid2vis(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c &grid_) { return grid2vis_c(baselines, gconf, idx_, hartley2complex(grid_)); } template pyarr_c> grid2ms_c(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c> &grid_, py::object &ms_in) { auto nrows = baselines.Nrows(); auto nchan = baselines.Nchannels(); size_t nu=gconf.Nu(), nv=gconf.Nv(); checkArray(idx_, "idx", {0}); auto grid = grid_.data(); checkArray(grid_, "grid", {nu, nv}); size_t nvis = size_t(idx_.shape(0)); auto idx = idx_.template unchecked<1>(); auto res = provideArray>(ms_in, {nrows, nchan}); auto ms = res.template mutable_unchecked<2>(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, grid, nullptr); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(tidx); hlp.prep(coord.u, coord.v); complex r = 0; const auto * RESTRICT ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv pyarr_c> grid2ms(const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c &grid_, py::object &ms_in) { return grid2ms_c(baselines, gconf, idx_, hartley2complex(grid_), ms_in); } template pyarr_c> grid2ms_c_wgt( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c> &grid_, const pyarr &wgt_, py::object &ms_in) { auto nrows = baselines.Nrows(); auto nchan = baselines.Nchannels(); checkArray(wgt_, "wgt", {nrows, nchan}); auto wgt = wgt_.template unchecked<2>(); size_t nu=gconf.Nu(), nv=gconf.Nv(); checkArray(idx_, "idx", {0}); auto grid = grid_.data(); checkArray(grid_, "grid", {nu, nv}); size_t nvis = size_t(idx_.shape(0)); auto idx = idx_.template unchecked<1>(); auto res = provideArray>(ms_in, {nrows, nchan}); auto ms = res.template mutable_unchecked<2>(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, grid, nullptr); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(tidx); hlp.prep(coord.u, coord.v); complex r = 0; const auto * RESTRICT ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv pyarr_c> grid2ms_wgt( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c &grid_, const pyarr &wgt_, py::object &ms_in) { return grid2ms_c_wgt(baselines, gconf, idx_, hartley2complex(grid_), wgt_, ms_in); } template pyarr_c> apply_holo( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, const pyarr_c> &grid_) { size_t nu=gconf.Nu(), nv=gconf.Nv(); checkArray(idx_, "idx", {0}); auto grid = grid_.data(); checkArray(grid_, "grid", {nu, nv}); size_t nvis = size_t(idx_.shape(0)); auto idx = idx_.template unchecked<1>(); auto res = makeArray>({nu, nv}); auto ogrid = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); size_t w = gconf.W(); // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, grid, ogrid); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); hlp.prep(coord.u, coord.v); complex r = 0; const auto * RESTRICT ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv tmp(r*ku[cu]); for (size_t cv=0; cv pyarr_c get_correlations( const Baselines &baselines, const GridderConfig &gconf, const pyarr &idx_, int du, int dv) { size_t nu=gconf.Nu(), nv=gconf.Nv(); size_t w = gconf.W(); myassert(size_t(abs(du))(); auto res = makeArray({nu, nv}); auto ogrid = res.mutable_data(); { py::gil_scoped_release release; T beta = gconf.Beta(); for (size_t i=0; i=0) { u0=0; u1=w-du; } else { u0=-du; u1=w; } if (dv>=0) { v0=0; v1=w-dv; } else { v0=-dv; v1=w; } // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, ogrid); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * RESTRICT ku = hlp.kernel.data(); const T * RESTRICT kv = hlp.kernel.data()+w; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); hlp.prep(coord.u, coord.v); auto * RESTRICT wptr = hlp.p0w + u0*jump; for (size_t cu=u0; cu=wmin wmax: float only select entries with w pyarr_c getIndices(const Baselines &baselines, const GridderConfig &gconf, const pyarr_c &flags_, int chbegin, int chend, T wmin, T wmax) { size_t nrow=baselines.Nrows(), nchan=baselines.Nchannels(), nsafe=gconf.Nsafe(); if (chbegin<0) chbegin=0; if (chend<0) chend=nchan; myassert(chend>chbegin, "empty channel range selected"); myassert(chend<=int(nchan), "chend too large"); myassert(wmax>wmin, "empty w range selected"); checkArray(flags_, "flags", {nrow, nchan}); auto flags = flags_.data(); constexpr int side=1<> logsquare, nbv = (gconf.Nv()+1+side-1) >> logsquare; vector acc(nbu*nbv+1, 0); vector tmp(nrow*(chend-chbegin)); { py::gil_scoped_release release; for (size_t irow=0, idx=0; irow=wmin) && (uvw.w>logsquare; iv0 = (iv0+nsafe)>>logsquare; ++acc[nbv*iu0 + iv0 + 1]; tmp[idx++] = nbv*iu0 + iv0; } } for (size_t i=1; i({acc.back()}); auto iout = res.mutable_data(); { py::gil_scoped_release release; for (size_t irow=0, idx=0; irow=wmin) && (uvw.w> (m, "Baselines", Baselines_DS) .def(py::init &, const pyarr_c &>(), "coord"_a, "freq"_a) .def ("Nrows",&Baselines::Nrows) .def ("Nchannels",&Baselines::Nchannels) .def ("ms2vis",&Baselines::ms2vis>, Baselines::ms2vis_DS, "ms"_a, "idx"_a) .def ("vis2ms",&Baselines::vis2ms>, Baselines::vis2ms_DS, "vis"_a, "idx"_a, "ms_in"_a=None); py::class_> (m, "GridderConfig", GridderConfig_DS) .def(py::init(),"nxdirty"_a, "nydirty"_a, "epsilon"_a, "pixsize_x"_a, "pixsize_y"_a) .def("Nxdirty", &GridderConfig::Nxdirty) .def("Nydirty", &GridderConfig::Nydirty) .def("Epsilon", &GridderConfig::Epsilon) .def("Pixsize_x", &GridderConfig::Pixsize_x) .def("Pixsize_y", &GridderConfig::Pixsize_y) .def("Nu", &GridderConfig::Nu) .def("Nv", &GridderConfig::Nv) .def("apply_taper", &GridderConfig::apply_taper, apply_taper_DS, "img"_a, "divide"_a=false) .def("grid2dirty", &GridderConfig::grid2dirty, grid2dirty_DS, "grid"_a) .def("grid2dirty_c", &GridderConfig::grid2dirty_c, "grid"_a) .def("dirty2grid", &GridderConfig::dirty2grid, dirty2grid_DS, "dirty"_a) .def("dirty2grid_c", &GridderConfig::dirty2grid_c, "dirty"_a) .def("apply_wscreen", &GridderConfig::apply_wscreen, apply_wscreen_DS, "dirty"_a, "w"_a, "adjoint"_a) // pickle support .def(py::pickle( // __getstate__ [](const GridderConfig & gc) { // Encode object state in tuple return py::make_tuple(gc.Nxdirty(), gc.Nydirty(), gc.Epsilon(), gc.Pixsize_x(), gc.Pixsize_y()); }, // __setstate__ [](py::tuple t) { myassert(t.size()==5,"Invalid state"); // Reconstruct from tuple return GridderConfig(t[0].cast(), t[1].cast(), t[2].cast(), t[3].cast(), t[4].cast()); })); m.def("getIndices", getIndices, getIndices_DS, "baselines"_a, "gconf"_a, "flags"_a, "chbegin"_a=-1, "chend"_a=-1, "wmin"_a=-1e30, "wmax"_a=1e30); m.def("vis2grid",&vis2grid, vis2grid_DS, "baselines"_a, "gconf"_a, "idx"_a, "vis"_a, "grid_in"_a=None); m.def("ms2grid",&ms2grid, "baselines"_a, "gconf"_a, "idx"_a, "ms"_a, "grid_in"_a=None); m.def("ms2grid_wgt",&ms2grid_wgt, "baselines"_a, "gconf"_a, "idx"_a, "ms"_a, "wgt"_a, "grid_in"_a=None); m.def("grid2vis",&grid2vis, grid2vis_DS, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a); m.def("grid2ms",&grid2ms, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a, "ms_in"_a=None); m.def("grid2ms_wgt",&grid2ms_wgt, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a, "wgt"_a, "ms_in"_a=None); m.def("vis2grid_c",&vis2grid_c, vis2grid_c_DS, "baselines"_a, "gconf"_a, "idx"_a, "vis"_a, "grid_in"_a=None); m.def("ms2grid_c",&ms2grid_c, ms2grid_c_DS, "baselines"_a, "gconf"_a, "idx"_a, "ms"_a, "grid_in"_a=None); m.def("ms2grid_c_wgt",&ms2grid_c_wgt, "baselines"_a, "gconf"_a, "idx"_a, "ms"_a, "wgt"_a, "grid_in"_a=None); m.def("grid2vis_c",&grid2vis_c, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a); m.def("grid2ms_c",&grid2ms_c, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a, "ms_in"_a=None); m.def("grid2ms_c_wgt",&grid2ms_c_wgt, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a, "wgt"_a, "ms_in"_a=None); m.def("apply_holo",&apply_holo, "baselines"_a, "gconf"_a, "idx"_a, "grid"_a); m.def("get_correlations", &get_correlations, "baselines"_a, "gconf"_a, "idx"_a, "du"_a, "dv"_a); m.def("set_nthreads",&set_nthreads, set_nthreads_DS, "nthreads"_a); m.def("get_nthreads",&get_nthreads, get_nthreads_DS); }