#ifndef GRIDDER_CXX_H #define GRIDDER_CXX_H /* * This file is part of nifty_gridder. * * nifty_gridder is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * nifty_gridder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with nifty_gridder; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Copyright (C) 2019 Max-Planck-Society Author: Martin Reinecke */ #include #include #include #include #include #include "pocketfft_hdronly.h" #include #define NOINLINE __attribute__((noinline)) namespace gridder { namespace detail { using namespace std; template class mav { static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays"); private: T *d; array shp; array str; public: mav(T *d_, const array &shp_, const array &str_) : d(d_), shp(shp_), str(str_) {} mav(T *d_, const array &shp_) : d(d_), shp(shp_) { str[ndim-1]=1; for (size_t d=2; d<=ndim; ++d) str[ndim-d] = str[ndim-d+1]*shp[ndim-d+1]; } operator mav() const { return mav(d,shp,str); } T &operator[](size_t i) { return operator()(i); } const T &operator[](size_t i) const { return operator()(i); } T &operator()(size_t i) const { static_assert(ndim==1, "ndim must be 1"); return d[str[0]*i]; } T &operator()(size_t i, size_t j) const { static_assert(ndim==2, "ndim must be 2"); return d[str[0]*i + str[1]*j]; } size_t shape(size_t i) const { return shp[i]; } const array &shape() const { return shp; } size_t size() const { size_t res=1; for (auto v: shp) res*=v; return res; } ptrdiff_t stride(size_t i) const { return str[i]; } T *data() { return d; } const T *data() const { return d; } bool contiguous() const { ptrdiff_t stride=1; for (size_t i=0; i using const_mav = mav; template class tmpStorage { private: vector d; mav mav_; static size_t prod(const array &shp) { size_t res=1; for (auto v: shp) res*=v; return res; } public: tmpStorage(const array &shp) : d(prod(shp)), mav_(d.data(), shp) {} mav &getMav() { return mav_; }; void fill(const T & val) { std::fill(d.begin(), d.end(), val); } }; // // basic utilities // void myassert(bool cond, const char *msg) { if (cond) return; throw runtime_error(msg); } template void checkShape (const array &shp1, const array &shp2) { for (size_t i=0; i inline T fmodulo (T v1, T v2) { if (v1>=0) return (v10.1) ? (1.+x)*(1.-x) : 1.-x*x; } void legendre_prep(int n, vector &x, vector &w, size_t nthreads) { constexpr double pi = 3.141592653589793238462643383279502884197; constexpr double eps = 3e-14; int m = (n+1)>>1; x.resize(m); w.resize(m); double t0 = 1 - (1-1./n) / (8.*n*n); double t1 = 1./(4.*n+2.); #pragma omp parallel num_threads(nthreads) { int i; #pragma omp for schedule(dynamic,100) for (i=1; i<=m; ++i) { double x0 = cos(pi * ((i<<2)-1) * t1) * t0; int dobreak=0; int j=0; double dpdx; while(1) { double P_1 = 1.0; double P0 = x0; double dx, x1; for (int k=2; k<=n; k++) { double P_2 = P_1; P_1 = P0; // P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); } dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); /* Newton step */ x1 = x0 - P0/dpdx; dx = x0-x1; x0 = x1; if (dobreak) break; if (abs(dx)<=eps) dobreak=1; myassert(++j<100, "convergence problem"); } x[m-i] = x0; w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); } } // end of parallel region } // // Start of real gridder functionality // size_t get_supp(double epsilon) { static const vector maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; double epssq = epsilon*epsilon; for (size_t i=1; imaxmaperr[i]) return i; throw runtime_error("requested epsilon too small - minimum is 2e-13"); } struct RowChan { size_t row, chan; }; template class EC_Kernel { protected: size_t supp; T beta, emb_; public: EC_Kernel(size_t supp_) : supp(supp_), beta(2.3*supp_), emb_(exp(-beta)) {} T operator()(T v) const { return exp(beta*(sqrt(T(1)-v*v)-T(1))); } T val_no_emb(T v) const { return exp(beta*sqrt(T(1)-v*v)); } T emb() const { return emb_; } }; template class EC_Kernel_with_correction: public EC_Kernel { protected: static constexpr T pi = T(3.141592653589793238462643383279502884197L); int p; vector x, wgt, psi; using EC_Kernel::supp; public: using EC_Kernel::operator(); EC_Kernel_with_correction(size_t supp_, size_t nthreads) : EC_Kernel(supp_), p(int(1.5*supp_+2)) { legendre_prep(2*p,x,wgt,nthreads); psi=x; for (auto &v:psi) v=operator()(v); } /* Compute correction factors for the ES gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ T corfac(T v) const { T tmp=0; for (int i=0; i correction_factors (size_t n, size_t nval, size_t supp, size_t nthreads) { EC_Kernel_with_correction kernel(supp, nthreads); vector res(nval); double xn = 1./n; #pragma omp parallel for schedule(static) num_threads(nthreads) for (size_t k=0; k struct UVW { T u, v, w; UVW () {} UVW (T u_, T v_, T w_) : u(u_), v(v_), w(w_) {} UVW operator* (T fct) const { return UVW(u*fct, v*fct, w*fct); } void Flip() { u=-u; v=-v; w=-w; } bool FixW() { bool flip = w<0; if (flip) Flip(); return flip; } }; template class Baselines { protected: vector> coord; vector f_over_c; size_t nrows, nchan; public: Baselines(const const_mav &coord_, const const_mav &freq) { constexpr double speedOfLight = 299792458.; myassert(coord_.shape(1)==3, "dimension mismatch"); nrows = coord_.shape(0); nchan = freq.shape(0); myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS"); f_over_c.resize(nchan); for (size_t i=0; i0, "negative channel frequency encountered"); f_over_c[i] = freq(i)/speedOfLight; } coord.resize(nrows); for (size_t i=0; i(coord_(i,0), coord_(i,1), coord_(i,2)); } RowChan getRowChan(uint32_t index) const { size_t irow = index/nchan; size_t ichan = index-nchan*irow; return RowChan{irow, ichan}; } UVW effectiveCoord(const RowChan &rc) const { return coord[rc.row]*f_over_c[rc.chan]; } UVW effectiveCoord(uint32_t index) const { return effectiveCoord(getRowChan(index)); } size_t Nrows() const { return nrows; } size_t Nchannels() const { return nchan; } void effectiveUVW(const mav &idx, mav &res) const { size_t nvis = idx.shape(0); myassert(res.shape(0)==nvis, "shape mismatch"); myassert(res.shape(1)==3, "shape mismatch"); for (size_t i=0; i void ms2vis(const mav &ms, const mav &idx, mav &vis, size_t nthreads) const { myassert(ms.shape(0)==nrows, "shape mismatch"); myassert(ms.shape(1)==nchan, "shape mismatch"); size_t nvis = idx.shape(0); myassert(vis.shape(0)==nvis, "shape mismatch"); #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i void vis2ms(const mav &vis, const mav &idx, mav &ms, size_t nthreads) const { size_t nvis = vis.shape(0); myassert(idx.shape(0)==nvis, "shape mismatch"); myassert(ms.shape(0)==nrows, "shape mismatch"); myassert(ms.shape(1)==nchan, "shape mismatch"); #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i class GridderConfig { protected: size_t nx_dirty, ny_dirty; T eps, psx, psy; size_t supp, nsafe, nu, nv; T beta; vector cfu, cfv; size_t nthreads; complex wscreen(double x, double y, double w, bool adjoint) const { constexpr double pi = 3.141592653589793238462643383279502884197; double tmp = 1-x-y; if (tmp<0) return 0.; double nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y) double n = nm1+1., xn = 1./n; double phase = 2*pi*w*nm1; if (adjoint) phase *= -1; return complex(cos(phase)*xn, sin(phase)*xn); } public: GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_) : nx_dirty(nxdirty), ny_dirty(nydirty), eps(epsilon), psx(pixsize_x), psy(pixsize_y), supp(get_supp(epsilon)), nsafe((supp+1)/2), nu(max(2*nsafe,2*nx_dirty)), nv(max(2*nsafe,2*ny_dirty)), beta(2.3*supp), cfu(nx_dirty), cfv(ny_dirty), nthreads(nthreads_) { myassert((nx_dirty&1)==0, "nx_dirty must be even"); myassert((ny_dirty&1)==0, "ny_dirty must be even"); myassert(epsilon>0, "epsilon must be positive"); myassert(pixsize_x>0, "pixsize_x must be positive"); myassert(pixsize_y>0, "pixsize_y must be positive"); auto tmp = correction_factors(nu, nx_dirty/2+1, supp, nthreads); cfu[nx_dirty/2]=tmp[0]; cfu[0]=tmp[nx_dirty/2]; for (size_t i=1; i &img, mav &img2, bool divide) const { checkShape(img.shape(), {nx_dirty, ny_dirty}); checkShape(img2.shape(), {nx_dirty, ny_dirty}); if (divide) for (size_t i=0; i,2> &grid, mav,2> &dirty) const { checkShape(grid.shape(), {nu,nv}); checkShape(dirty.shape(), {nx_dirty,ny_dirty}); vector> tmpdat(nu*nv); auto tmp=mav,2>(tmpdat.data(),{nu,nv},{ptrdiff_t(nv),ptrdiff_t(1)}); constexpr auto sc = ptrdiff_t(sizeof(complex)); pocketfft::c2c({nu,nv},{grid.stride(0)*sc,grid.stride(1)*sc}, {tmp.stride(0)*sc, tmp.stride(1)*sc}, {0,1}, pocketfft::BACKWARD, grid.data(), tmp.data(), T(1), nthreads); for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; dirty(i,j) = tmp(i2,j2)*cfu[i]*cfv[j]; } } void dirty2grid_c(const const_mav,2> &dirty, mav,2> &grid) const { checkShape(dirty.shape(), {nx_dirty, ny_dirty}); checkShape(grid.shape(), {nu, nv}); for (size_t i=0; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; grid(i2,j2) = dirty(i,j)*cfu[i]*cfv[j]; } constexpr auto sc = ptrdiff_t(sizeof(complex)); pocketfft::stride_t strides{grid.stride(0)*sc,grid.stride(1)*sc}; pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD, grid.data(), grid.data(), T(1), nthreads); } inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const { u=fmodulo(u_in*psx, T(1))*nu, iu0 = int(u-supp*0.5 + 1 + nu) - nu; if (iu0+supp>nu+nsafe) iu0 = nu+nsafe-supp; v=fmodulo(v_in*psy, T(1))*nv; iv0 = int(v-supp*0.5 + 1 + nv) - nv; if (iv0+supp>nv+nsafe) iv0 = nv+nsafe-supp; } void apply_wscreen(const const_mav,2> &dirty, mav,2> &dirty2, double w, bool adjoint) const { checkShape(dirty.shape(), {nx_dirty, ny_dirty}); checkShape(dirty2.shape(), {nx_dirty, ny_dirty}); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; #pragma omp parallel for num_threads(nthreads) schedule(static) for (size_t i=0; i<=nx_dirty/2; ++i) { double fx = x0+i*psx; fx *= fx; for (size_t j=0; j<=ny_dirty/2; ++j) { double fy = y0+j*psy; auto ws = wscreen(fx, fy*fy, w, adjoint); dirty2(i,j) = dirty(i,j)*ws; // lower left size_t i2 = nx_dirty-i, j2 = ny_dirty-j; if ((i>0)&&(i0)&&(j0)&&(j> class Helper { private: const GridderConfig &gconf; int nu, nv, nsafe, supp; T beta; const T2 *grid_r; T2 *grid_w; int su, sv; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer vector rbuf, wbuf; void dump() const { if (bu0<-nsafe) return; // nothing written into buffer yet #pragma omp critical (gridder_writing_to_grid) { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } } void load() { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } public: const T2 *p0r; T2 *p0w; vector kernel; Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_) : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_), grid_w(grid_w_), su(2*nsafe+(1<bu0+su) || (iv0+supp>bv0+sv)) { if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); } bu0=((((iu0+nsafe)>>logsquare)<>logsquare)< class VisServ { private: const Baselines &baselines; const const_mav &idx; T2 vis; const T3 &wgt; size_t nvis; bool have_wgt; public: VisServ(const Baselines &baselines_, const const_mav &idx_, T2 vis_, const T3 &wgt_) : baselines(baselines_), idx(idx_), vis(vis_), wgt(wgt_) { nvis = vis.shape(0); checkShape(idx.shape(), {nvis}); have_wgt = wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(), {nvis}); } size_t Nvis() const { return nvis; } const Baselines &getBaselines() const { return baselines; } UVW getCoord(size_t i) const { return baselines.effectiveCoord(idx[i]); } complex getVis(size_t i) const { return have_wgt ? vis(i)*wgt(i) : vis(i); } uint32_t getIdx(size_t i) const { return idx[i]; } void setVis (size_t i, const complex &v) const { vis(i) = have_wgt ? v*wgt(i) : v; } void addVis (size_t i, const complex &v) const { vis(i) += have_wgt ? v*wgt(i) : v; } }; template class MsServ { private: const Baselines &baselines; const const_mav &idx; T2 ms; const T3 &wgt; size_t nvis; bool have_wgt; public: MsServ(const Baselines &baselines_, const const_mav &idx_, T2 ms_, const T3 &wgt_) : baselines(baselines_), idx(idx_), ms(ms_), wgt(wgt_) { auto nrows = baselines.Nrows(); auto nchan = baselines.Nchannels(); nvis = idx.shape(0); checkShape(ms.shape(), {nrows, nchan}); have_wgt = wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(), {nrows, nchan}); } size_t Nvis() const { return nvis; } const Baselines &getBaselines() const { return baselines; } UVW getCoord(size_t i) const { return baselines.effectiveCoord(idx(i)); } complex getVis(size_t i) const { auto rc = baselines.getRowChan(idx(i)); return have_wgt ? ms(rc.row, rc.chan)*wgt(rc.row, rc.chan) : ms(rc.row, rc.chan); } uint32_t getIdx(size_t i) const { return idx[i]; } void setVis (size_t i, const complex &v) const { auto rc = baselines.getRowChan(idx(i)); ms(rc.row, rc.chan) = have_wgt ? v*wgt(rc.row, rc.chan) : v; } void addVis (size_t i, const complex &v) const { auto rc = baselines.getRowChan(idx(i)); ms(rc.row, rc.chan) += have_wgt ? v*wgt(rc.row, rc.chan) : v; } }; template void x2grid_c (const GridderConfig &gconf, const Serv &srv, mav,2> &grid) { size_t nu=gconf.Nu(), nv=gconf.Nv(); checkShape(grid.shape(), {nu, nv}); myassert(grid.contiguous(), "grid is not contiguous"); T beta = gconf.Beta(); size_t supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, grid.data()); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * ku = hlp.kernel.data(); const T * kv = hlp.kernel.data()+supp; size_t np = srv.Nvis(); // Loop over sampling points #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = srv.getCoord(ipart); auto flip = coord.FixW(); hlp.prep(coord.u, coord.v); auto * ptr = hlp.p0w; auto v(srv.getVis(ipart)*emb); if (flip) v=conj(v); for (size_t cu=0; cu tmp(v*ku[cu]); for (size_t cv=0; cv void vis2grid_c (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,1> &vis, mav,2> &grid, const const_mav &wgt) { x2grid_c(gconf, VisServ,1>,const_mav> (baselines, idx, vis, wgt), grid); } template void ms2grid_c (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,2> &ms, mav,2> &grid, const const_mav &wgt) { x2grid_c(gconf, MsServ,2>,const_mav> (baselines, idx, ms, wgt), grid); } template void grid2x_c (const GridderConfig &gconf, const const_mav,2> &grid, const Serv &srv) { size_t nu=gconf.Nu(), nv=gconf.Nv(); checkShape(grid.shape(), {nu, nv}); myassert(grid.contiguous(), "grid is not contiguous"); T beta = gconf.Beta(); size_t supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, grid.data(), nullptr); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * ku = hlp.kernel.data(); const T * kv = hlp.kernel.data()+supp; size_t np = srv.Nvis(); #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = srv.getCoord(ipart); auto flip = coord.FixW(); hlp.prep(coord.u, coord.v); complex r = 0; const auto * ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv void grid2vis_c (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,2> &grid, mav,1> &vis, const const_mav &wgt) { grid2x_c(gconf, grid, VisServ,1>,const_mav> (baselines, idx, vis, wgt)); } template void grid2ms_c (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,2> &grid, mav,2> &ms, const const_mav &wgt) { grid2x_c(gconf, grid, MsServ,2>,const_mav> (baselines, idx, ms, wgt)); } template void apply_holo (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,2> &grid, mav,2> &ogrid, const const_mav &wgt) { size_t nu=gconf.Nu(), nv=gconf.Nv(); checkShape(grid.shape(), {nu, nv}); size_t nvis = idx.shape(0); size_t nthreads = gconf.Nthreads(); bool have_wgt = wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(), {nvis}); checkShape(ogrid.shape(), {nu, nv}); for (size_t i=0; i hlp(gconf, grid.data(), ogrid.data()); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * ku = hlp.kernel.data(); const T * kv = hlp.kernel.data()+supp; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); coord.FixW(); hlp.prep(coord.u, coord.v); complex r = 0; const auto * ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); for (size_t cv=0; cv tmp(r*ku[cu]); for (size_t cv=0; cv void get_correlations (const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, int du, int dv, mav &ogrid, const const_mav &wgt) { size_t nu=gconf.Nu(), nv=gconf.Nv(); size_t nvis = idx.shape(0); bool have_wgt = wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(), {nvis}); checkShape(ogrid.shape(), {nu, nv}); size_t supp = gconf.Supp(); myassert(size_t(abs(du))=0) { u0=0; u1=supp-du; } else { u0=-du; u1=supp; } if (dv>=0) { v0=0; v1=supp-dv; } else { v0=-dv; v1=supp; } // Loop over sampling points #pragma omp parallel num_threads(nthreads) { Helper hlp(gconf, nullptr, ogrid.data()); T emb = exp(-2*beta); int jump = hlp.lineJump(); const T * ku = hlp.kernel.data(); const T * kv = hlp.kernel.data()+supp; #pragma omp for schedule(guided,100) for (size_t ipart=0; ipart coord = baselines.effectiveCoord(idx(ipart)); coord.FixW(); hlp.prep(coord.u, coord.v); auto * wptr = hlp.p0w + u0*jump; auto f0 = emb*emb; if (have_wgt) { auto twgt = wgt(ipart); f0*=twgt*twgt; } for (size_t cu=u0; cu void apply_wcorr(const GridderConfig &gconf, const mav &dirty, const EC_Kernel_with_correction &kernel, double dw) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); size_t nthreads = gconf.Nthreads(); auto psx=gconf.Pixsize_x(); auto psy=gconf.Pixsize_y(); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; #pragma omp parallel for schedule(static) num_threads(nthreads) for (size_t i=0; i<=nx_dirty/2; ++i) { double fx = x0+i*psx; fx *= fx; for (size_t j=0; j<=ny_dirty/2; ++j) { double fy = y0+j*psy; fy*=fy; T fct = 0; double tmp = 1.-fx-fy; if (tmp>=0) { auto nm1 = (-fx-fy)/(sqrt(1.-fx-fy)+1.); // accurate form of sqrt(1-x-y) fct = (nm1<=-1) ? 0. : kernel.corfac(nm1*dw); } size_t i2 = nx_dirty-i, j2 = ny_dirty-j; dirty(i,j)*=fct; if ((i>0)&&(i0)&&(j0)&&(j void x2dirty( const GridderConfig &gconf, const Serv &srv, const mav &dirty, size_t verbosity=0) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); auto nu=gconf.Nu(); auto nv=gconf.Nv(); if (verbosity>0) cout << "Gridding without w-stacking: " << srv.Nvis() << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; tmpStorage,2> grid_({nu,nv}); auto grid=grid_.getMav(); grid_.fill(0.); x2grid_c(gconf, srv, grid); tmpStorage,2> cdirty_({nx_dirty,ny_dirty}); auto cdirty=cdirty_.getMav(); // FIXME: this could use symmetries to accelerate the FFT gconf.grid2dirty_c(grid, cdirty); for (size_t i=0; i void dirty2x( const GridderConfig &gconf, const const_mav &dirty, const Serv &srv, size_t verbosity=0) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); auto nu=gconf.Nu(); auto nv=gconf.Nv(); size_t nvis = srv.Nvis(); if (verbosity>0) cout << "Degridding without w-stacking: " << srv.Nvis() << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; for (size_t i=0; i,2> cdirty_({nx_dirty,ny_dirty}); auto cdirty=cdirty_.getMav(); for (size_t i=0; i,2> grid_({nu,nv}); auto grid=grid_.getMav(); // FIXME: this could use symmetries to accelerate the FFT gconf.dirty2grid_c(cdirty, grid); grid2x_c(gconf, const_mav,2>(grid.data(),{nu,nv}), srv); } template void wstack_common( const GridderConfig &gconf, const Serv &srv, T &wmin, vector &wval, double &dw, size_t &nplanes, vector &nvis_plane, vector &minplane, size_t verbosity) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); auto psx=gconf.Pixsize_x(); auto psy=gconf.Pixsize_y(); size_t nvis = srv.Nvis(); size_t nthreads = gconf.Nthreads(); // determine w values for every visibility, and min/max w; wmin=T(1e38); T wmax=T(-1e38); wval.resize(nvis); #pragma omp parallel for num_threads(nthreads) reduction(min:wmin) reduction(max:wmax) for (size_t ipart=0; ipart0) cout << "Using " << nthreads << " threads" << endl; if (verbosity>0) cout << "W range: " << wmin << " to " << wmax << endl; double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.; dw = 0.25/abs(nmin); nplanes = max(2,((wmax-wmin)/dw+2)); dw=(wmax-wmin)/(nplanes-1); auto w_supp = gconf.Supp(); wmin -= (0.5*w_supp-1)*dw; wmax += (0.5*w_supp-1)*dw; nplanes += w_supp-2; if (verbosity>0) cout << "Kernel support: " << w_supp << endl; if (verbosity>0) cout << "nplanes: " << nplanes << endl; nvis_plane.resize(nplanes,0); minplane.resize(nvis); #pragma omp parallel num_threads(nthreads) { vector nvploc(nplanes,0); #pragma omp for for (size_t ipart=0; ipart(0,plane0); i(nplanes,plane0+w_supp); ++i) ++nvploc[i]; } #pragma omp critical for (size_t i=0; i void x2dirty_wstack( const GridderConfig &gconf, const Serv &srv, const mav &dirty, size_t verbosity=0) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); auto nu=gconf.Nu(); auto nv=gconf.Nv(); size_t nvis = srv.Nvis(); auto w_supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); EC_Kernel_with_correction kernel(w_supp, nthreads); T wmin; vector wval; double dw; size_t nplanes; vector nvis_plane; vector minplane; if (verbosity>0) cout << "Gridding using improved w-stacking" << endl; wstack_common(gconf, srv, wmin, wval, dw, nplanes, nvis_plane, minplane, verbosity); for (size_t i=0; i> vis_loc_; vector idx_loc_; vector mapper; tmpStorage,2> grid_({nu,nv}); auto grid=grid_.getMav(); tmpStorage,2> tdirty_({nx_dirty,ny_dirty}); auto tdirty=tdirty_.getMav(); for (size_t iw=0; iw1) cout << "Working on plane " << iw << " containing " << nvis_plane[iw] << " visibilities" << endl; T wcur = wmin+iw*dw; size_t cnt=0; vis_loc_.resize(nvis_plane[iw]); auto vis_loc = mav,1>(vis_loc_.data(), {nvis_plane[iw]}); idx_loc_.resize(nvis_plane[iw]); auto idx_loc = mav(idx_loc_.data(), {nvis_plane[iw]}); mapper.resize(nvis_plane[iw]); for (size_t ipart=0; ipart=minplane[ipart]) && (iw(idx_loc), const_mav,1>(vis_loc), grid, const_mav(nullptr,{0})); gconf.grid2dirty_c(grid, tdirty); gconf.apply_wscreen(tdirty, tdirty, wcur, false); #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i void vis2dirty_wstack(const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav,1> &vis, mav &dirty) { x2dirty_wstack(gconf, VisServ,1>,const_mav>(baselines, idx, vis, const_mav(nullptr,{0})), dirty); } template void dirty2x_wstack( const GridderConfig &gconf, const const_mav &dirty, const Serv &srv, size_t verbosity=0) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); auto nu=gconf.Nu(); auto nv=gconf.Nv(); size_t nvis = srv.Nvis(); auto w_supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); EC_Kernel_with_correction kernel(w_supp, nthreads); T wmin; vector wval; double dw; size_t nplanes; vector nvis_plane; vector minplane; if (verbosity>0) cout << "Degridding using improved w-stacking" << endl; wstack_common(gconf, srv, wmin, wval, dw, nplanes, nvis_plane, minplane, verbosity); for (size_t i=0; i tdirty_({nx_dirty,ny_dirty}); auto tdirty=tdirty_.getMav(); for (size_t i=0; i> vis_loc_; vector idx_loc_; vector mapper; tmpStorage,2> grid_({nu,nv}); auto grid=grid_.getMav(); tmpStorage,2> tdirty2_({nx_dirty,ny_dirty}); auto tdirty2=tdirty2_.getMav(); for (size_t iw=0; iw1) cout << "Working on plane " << iw << " containing " << nvis_plane[iw] << " visibilities" << endl; T wcur = wmin+iw*dw; size_t cnt=0; vis_loc_.resize(nvis_plane[iw]); auto vis_loc = mav,1>(vis_loc_.data(), {nvis_plane[iw]}); idx_loc_.resize(nvis_plane[iw]); auto idx_loc = mav(idx_loc_.data(), {nvis_plane[iw]}); mapper.resize(nvis_plane[iw]); for (size_t ipart=0; ipart=minplane[ipart]) && (iw(idx_loc), const_mav,2>(grid), vis_loc, const_mav(nullptr,{0})); T tmpval=T(2)/(w_supp*dw); #pragma omp parallel for num_threads(nthreads) for (size_t i=0; i void dirty2vis_wstack(const Baselines &baselines, const GridderConfig &gconf, const const_mav &idx, const const_mav &dirty, mav,1> &vis) { dirty2x_wstack(gconf, dirty, VisServ,1>,const_mav>(baselines, idx, vis, const_mav(nullptr,{0}))); } template size_t getIdxSize(const Baselines &baselines, const const_mav &flags, int chbegin, int chend, T wmin, T wmax, size_t nthreads) { size_t nrow=baselines.Nrows(), nchan=baselines.Nchannels(); if (chbegin<0) chbegin=0; if (chend<0) chend=nchan; myassert(chend>chbegin, "empty channel range selected"); myassert(chend<=int(nchan), "chend too large"); myassert(wmax>wmin, "empty w range selected"); checkShape(flags.shape(), {nrow, nchan}); size_t res=0; #pragma omp parallel for num_threads(nthreads) reduction(+:res) for (size_t irow=0; irow=wmin) && (w void fillIdx(const Baselines &baselines, const GridderConfig &gconf, const const_mav &flags, int chbegin, int chend, T wmin, T wmax, mav &res) { size_t nrow=baselines.Nrows(), nchan=baselines.Nchannels(), nsafe=gconf.Nsafe(); if (chbegin<0) chbegin=0; if (chend<0) chend=nchan; myassert(chend>chbegin, "empty channel range selected"); myassert(chend<=int(nchan), "chend too large"); myassert(wmax>wmin, "empty w range selected"); checkShape(flags.shape(), {nrow, nchan}); constexpr int side=1<> logsquare, nbv = (gconf.Nv()+1+side-1) >> logsquare; vector acc(nbu*nbv+1, 0); vector tmp(nrow*(chend-chbegin)); for (size_t irow=0, idx=0; irow=wmin) && (uvw.w>logsquare; iv0 = (iv0+nsafe)>>logsquare; ++acc[nbv*iu0 + iv0 + 1]; tmp[idx++] = nbv*iu0 + iv0; } } for (size_t i=1; i=wmin) && (w vector getWgtIndices(const Baselines &baselines, const GridderConfig &gconf, const const_mav &wgt) { size_t nrow=baselines.Nrows(), nchan=baselines.Nchannels(), nsafe=gconf.Nsafe(); bool have_wgt=wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(),{nrow,nchan}); constexpr int side=1<> logsquare, nbv = (gconf.Nv()+1+side-1) >> logsquare; vector acc(nbu*nbv+1, 0); vector tmp(nrow*nchan); for (size_t irow=0, idx=0; irow>logsquare; iv0 = (iv0+nsafe)>>logsquare; ++acc[nbv*iu0 + iv0 + 1]; tmp[idx++] = nbv*iu0 + iv0; } for (size_t i=1; i res(acc.back()); for (size_t irow=0, idx=0; irow void gridding(const const_mav &uvw, const const_mav &freq, const const_mav,2> &ms, const const_mav &wgt, T pixsize_x, T pixsize_y, double epsilon, size_t nthreads, const mav &dirty, size_t verbosity) { Baselines baselines(uvw, freq); GridderConfig gconf(dirty.shape(0), dirty.shape(1), epsilon, pixsize_x, pixsize_y, nthreads); auto idx = getWgtIndices(baselines, gconf, wgt); auto idx2 = const_mav(idx.data(),{idx.size()}); auto srv = MsServ,2>,const_mav>(baselines,idx2,ms,wgt); x2dirty(gconf, srv, dirty, verbosity); } template void degridding(const const_mav &uvw, const const_mav &freq, const const_mav &dirty, const const_mav &wgt, T pixsize_x, T pixsize_y, double epsilon, size_t nthreads, const mav,2> &ms, size_t verbosity) { Baselines baselines(uvw, freq); GridderConfig gconf(dirty.shape(0), dirty.shape(1), epsilon, pixsize_x, pixsize_y, nthreads); auto idx = getWgtIndices(baselines, gconf, wgt); auto idx2 = const_mav(idx.data(),{idx.size()}); for (size_t i=0; i,2>,const_mav>(baselines,idx2,ms,wgt); dirty2x(gconf, dirty, srv, verbosity); } template void full_gridding(const const_mav &uvw, const const_mav &freq, const const_mav,2> &ms, const const_mav &wgt, T pixsize_x, T pixsize_y, double epsilon, size_t nthreads, const mav &dirty, size_t verbosity) { Baselines baselines(uvw, freq); GridderConfig gconf(dirty.shape(0), dirty.shape(1), epsilon, pixsize_x, pixsize_y, nthreads); auto idx = getWgtIndices(baselines, gconf, wgt); auto idx2 = const_mav(idx.data(),{idx.size()}); auto srv = MsServ,2>,const_mav>(baselines,idx2,ms,wgt); x2dirty_wstack(gconf, srv, dirty, verbosity); } template void full_degridding(const const_mav &uvw, const const_mav &freq, const const_mav &dirty, const const_mav &wgt, T pixsize_x, T pixsize_y, double epsilon, size_t nthreads, const mav,2> &ms, size_t verbosity) { Baselines baselines(uvw, freq); GridderConfig gconf(dirty.shape(0), dirty.shape(1), epsilon, pixsize_x, pixsize_y, nthreads); auto idx = getWgtIndices(baselines, gconf, wgt); auto idx2 = const_mav(idx.data(),{idx.size()}); for (size_t i=0; i,2>,const_mav>(baselines,idx2,ms,wgt); dirty2x_wstack(gconf, dirty, srv, verbosity); } } // namespace detail // public names using detail::mav; using detail::const_mav; using detail::myassert; using detail::Baselines; using detail::GridderConfig; using detail::ms2grid_c; using detail::grid2ms_c; using detail::vis2grid_c; using detail::grid2vis_c; using detail::vis2dirty_wstack; using detail::dirty2vis_wstack; using detail::full_gridding; using detail::full_degridding; } // namespace gridder #endif