#ifndef GRIDDER_CXX_H #define GRIDDER_CXX_H /* * This file is part of nifty_gridder. * * nifty_gridder is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * nifty_gridder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with nifty_gridder; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Copyright (C) 2019 Max-Planck-Society Author: Martin Reinecke */ #include #include #include #include #include #include #include "mr_util/error_handling.h" #include "mr_util/fft.h" #include "mr_util/threading.h" #include "mr_util/useful_macros.h" #include "mr_util/mav.h" #include "mr_util/gl_integrator.h" #if defined(__GNUC__) #define ALIGNED(align) __attribute__ ((aligned(align))) #else #define ALIGNED(align) #endif namespace gridder { namespace detail { using namespace std; using namespace mr; template void checkShape (const array &shp1, const array &shp2) { for (size_t i=0; i inline T fmod1 (T v) { return v-floor(v); } // // Start of real gridder functionality // template void complex2hartley (const cmav, 2> &grid, const mav &grid2, size_t nthreads) { checkShape(grid.shape(), grid2.shape()); size_t nu=grid.shape(0), nv=grid.shape(1); execStatic(nu, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto u=rng.lo; u void hartley2complex (const cmav &grid, const mav,2> &grid2, size_t nthreads) { checkShape(grid.shape(), grid2.shape()); size_t nu=grid.shape(0), nv=grid.shape(1); execStatic(nu, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto u=rng.lo; u(v1+v2, v1-v2); } } }); } template void hartley2_2D(const cmav &in, const mav &out, size_t nthreads) { checkShape(in.shape(), out.shape()); size_t nu=in.shape(0), nv=in.shape(1); auto ptmp = out.data(); r2r_separable_hartley(cfmav(in), fmav(out), {0,1}, T(1), nthreads); execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i x, wgt, psi; size_t supp; public: ES_Kernel(size_t supp_, double ofactor, size_t nthreads) : beta(get_beta(supp_,ofactor)*supp_), p(int(1.5*supp_+2)), supp(supp_) { GL_Integrator integ(2*p,nthreads); x = integ.coordsSymmetric(); wgt = integ.weightsSymmetric(); psi=x; for (auto &v:psi) v=operator()(v); } ES_Kernel(size_t supp_, size_t nthreads) : ES_Kernel(supp_, 2., nthreads){} double operator()(double v) const { return (v*v>1.) ? 0. : exp(beta*(sqrt(1.-v*v)-1.)); } /* Compute correction factors for the ES gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ double corfac(double v) const { double tmp=0; for (int i=0; i=2) && (supp<=15), "unsupported support size"); if (ofactor>=2) { static const vector opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26, 2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317}; MR_assert(supp=1.175) { // empirical, but pretty accurate approximation static const array betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0}; auto x0 = 1./(2*ofactor); auto bcstrength=1.+(x0-0.25)*2.5; return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1; } MR_fail("oversampling factor is too small"); } static size_t get_supp(double epsilon, double ofactor=2) { double epssq = epsilon*epsilon; if (ofactor>=2) { static const vector maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5, 1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17, 2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 }; for (size_t i=2; imaxmaperr[i]) return i; MR_fail("requested epsilon too small - minimum is 1e-13"); } if (ofactor>=1.175) { for (size_t w=2; w<16; ++w) { auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation if (epssq>estimate) return w; } MR_fail("requested epsilon too small"); } MR_fail("oversampling factor is too small"); } }; /* Compute correction factors for the ES gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ vector correction_factors(size_t n, double ofactor, size_t nval, size_t supp, size_t nthreads) { ES_Kernel kernel(supp, ofactor, nthreads); vector res(nval); double xn = 1./n; execStatic(nval, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i coord; vector f_over_c; idx_t nrows, nchan; idx_t shift, mask; public: template Baselines(const cmav &coord_, const cmav &freq, bool negate_v=false) { constexpr double speedOfLight = 299792458.; MR_assert(coord_.shape(1)==3, "dimension mismatch"); auto hugeval = size_t(~(idx_t(0))); MR_assert(coord_.shape(0)0, "negative channel frequency encountered"); f_over_c[i] = freq(i)/speedOfLight; } coord.resize(nrows); if (negate_v) for (size_t i=0; i>shift, index&mask}; } UVW effectiveCoord(const RowChan &rc) const { return coord[rc.row]*f_over_c[rc.chan]; } UVW effectiveCoord(idx_t index) const { return effectiveCoord(getRowChan(index)); } size_t Nrows() const { return nrows; } size_t Nchannels() const { return nchan; } idx_t getIdx(idx_t irow, idx_t ichan) const { return ichan+(irow< complex wscreen(T x, T y, T w, bool adjoint) const { constexpr T pi = T(3.141592653589793238462643383279502884197); T tmp = 1-x-y; if (tmp<=0) return 1; // no phase factor beyond the horizon T nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1 T phase = 2*pi*w*nm1; if (adjoint) phase *= -1; return complex(cos(phase), sin(phase)); } public: GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_, double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_) : nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_), ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)), eps(epsilon), psx(pixsize_x), psy(pixsize_y), supp(ES_Kernel::get_supp(epsilon, ofactor)), nsafe((supp+1)/2), beta(ES_Kernel::get_beta(supp, ofactor)*supp), nthreads(nthreads_), ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv), maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp) { MR_assert(nu>=2*nsafe, "nu too small"); MR_assert(nv>=2*nsafe, "nv too small"); MR_assert((nx_dirty&1)==0, "nx_dirty must be even"); MR_assert((ny_dirty&1)==0, "ny_dirty must be even"); MR_assert((nu&1)==0, "nu must be even"); MR_assert((nv&1)==0, "nv must be even"); MR_assert(epsilon>0, "epsilon must be positive"); MR_assert(pixsize_x>0, "pixsize_x must be positive"); MR_assert(pixsize_y>0, "pixsize_y must be positive"); MR_assert(ofactor>=1.175, "oversampling factor too small (>=1.2 recommended)"); } GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_) : GridderConfig(nxdirty, nydirty, max(30,2*nxdirty), max(30,2*nydirty), epsilon, pixsize_x, pixsize_y, nthreads_) {} size_t Nxdirty() const { return nx_dirty; } size_t Nydirty() const { return ny_dirty; } double Epsilon() const { return eps; } double Pixsize_x() const { return psx; } double Pixsize_y() const { return psy; } size_t Nu() const { return nu; } size_t Nv() const { return nv; } size_t Supp() const { return supp; } size_t Nsafe() const { return nsafe; } double Beta() const { return beta; } size_t Nthreads() const { return nthreads; } double Ofactor() const{ return ofactor; } template void grid2dirty_post(const mav &tmav, const mav &dirty) const { checkShape(dirty.shape(), {nx_dirty,ny_dirty}); auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads); auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads); execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; // FIXME: for some reason g++ warns about double-to-float conversion // here, even though there is an explicit cast... dirty(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]); } } }); } template void grid2dirty_post2( const mav,2> &tmav, const mav &dirty, T w) const { checkShape(dirty.shape(), {nx_dirty,ny_dirty}); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i=nu) ix-=nu; size_t jx = nv-ny_dirty/2+j; if (jx>=nv) jx-=nv; dirty(i,j) += (tmav(ix,jx)*ws).real(); // lower left size_t i2 = nx_dirty-i, j2 = ny_dirty-j; size_t ix2 = nu-nx_dirty/2+i2; if (ix2>=nu) ix2-=nu; size_t jx2 = nv-ny_dirty/2+j2; if (jx2>=nv) jx2-=nv; if ((i>0)&&(i0)&&(j0)&&(j void grid2dirty(const cmav &grid, const mav &dirty) const { checkShape(grid.shape(), {nu,nv}); mav tmav({nu,nv}); hartley2_2D(grid, tmav, nthreads); grid2dirty_post(tmav, dirty); } template void grid2dirty_c_overwrite_wscreen_add (const mav,2> &grid, const mav &dirty, T w) const { checkShape(grid.shape(), {nu,nv}); fmav> inout(grid); c2c(inout, inout, {0,1}, BACKWARD, T(1), nthreads); grid2dirty_post2(grid, dirty, w); } template void dirty2grid_pre(const cmav &dirty, const mav &grid) const { checkShape(dirty.shape(), {nx_dirty, ny_dirty}); checkShape(grid.shape(), {nu, nv}); auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads); auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads); grid.fill(0); execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i=nu) i2-=nu; size_t j2 = nv-ny_dirty/2+j; if (j2>=nv) j2-=nv; grid(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]); } } }); } template void dirty2grid_pre2(const cmav &dirty, const mav,2> &grid, T w) const { checkShape(dirty.shape(), {nx_dirty, ny_dirty}); checkShape(grid.shape(), {nu, nv}); grid.fill(0); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i=nu) ix-=nu; size_t jx = nv-ny_dirty/2+j; if (jx>=nv) jx-=nv; grid(ix,jx) = dirty(i,j)*ws; // lower left size_t i2 = nx_dirty-i, j2 = ny_dirty-j; size_t ix2 = nu-nx_dirty/2+i2; if (ix2>=nu) ix2-=nu; size_t jx2 = nv-ny_dirty/2+j2; if (jx2>=nv) jx2-=nv; if ((i>0)&&(i0)&&(j0)&&(j void dirty2grid(const cmav &dirty, const mav &grid) const { dirty2grid_pre(dirty, grid); hartley2_2D(grid, grid, nthreads); } template void dirty2grid_c_wscreen(const cmav &dirty, const mav,2> &grid, T w) const { dirty2grid_pre2(dirty, grid, w); fmav> inout(grid); c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads); } void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const { u=fmod1(u_in*psx)*nu; iu0 = min(int(u+ushift)-int(nu), maxiu0); v=fmod1(v_in*psy)*nv; iv0 = min(int(v+vshift)-int(nv), maxiv0); } template void apply_wscreen(const cmav,2> &dirty, mav,2> &dirty2, double w, bool adjoint) const { checkShape(dirty.shape(), {nx_dirty, ny_dirty}); checkShape(dirty2.shape(), {nx_dirty, ny_dirty}); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i0)&&(i0)&&(j0)&&(j> class Helper { private: const GridderConfig &gconf; int nu, nv, nsafe, supp; T beta; const T2 *grid_r; T2 *grid_w; int su, sv; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer vector rbuf, wbuf; bool do_w_gridding; double w0, xdw; size_t nexp; size_t nvecs; vector &locks; void dump() const { if (bu0<-nsafe) return; // nothing written into buffer yet int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu lock(locks[idxu]); for (int iv=0; iv=nv) idxv=0; } } if (++idxu>=nu) idxu=0; } } void load() { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } public: const T2 *p0r; T2 *p0w; T kernel[64] ALIGNED(64); static constexpr size_t vlen=native_simd::size(); Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_, vector &locks_, double w0_=-1, double dw_=-1) : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), supp(gconf.Supp()), beta(T(gconf.Beta())), grid_r(grid_r_), grid_w(grid_w_), su(2*nsafe+(1<0), w0(w0_), xdw(T(1)/dw_), nexp(2*supp + do_w_gridding), nvecs(vlen*((nexp+vlen-1)/vlen)), locks(locks_) {} ~Helper() { if (grid_w) dump(); } int lineJump() const { return sv; } T Wfac() const { return kernel[2*supp]; } void prep(const UVW &in) { double u, v; gconf.getpix(in.u, in.v, u, v, iu0, iv0); double xsupp=2./supp; double x0 = xsupp*(iu0-u); double y0 = xsupp*(iv0-v); for (int i=0; ibu0+su) || (iv0+supp>bv0+sv)) { if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); } bu0=((((iu0+nsafe)>>logsquare)<>logsquare)< class SubServ { private: const Serv &srv; cmav subidx; public: SubServ(const Serv &orig, const cmav &subidx_) : srv(orig), subidx(subidx_){} size_t Nvis() const { return subidx.size(); } const Baselines &getBaselines() const { return srv.getBaselines(); } UVW getCoord(size_t i) const { return srv.getCoord(subidx[i]); } complex getVis(size_t i) const { return srv.getVis(subidx[i]); } idx_t getIdx(size_t i) const { return srv.getIdx(subidx[i]); } void setVis (size_t i, const complex &v) const { srv.setVis(subidx[i], v); } void addVis (size_t i, const complex &v) const { srv.addVis(subidx[i], v); } }; template class MsServ { private: const Baselines &baselines; cmav idx; T2 ms; cmav wgt; size_t nvis; bool have_wgt; public: using Tsub = SubServ; MsServ(const Baselines &baselines_, const cmav &idx_, T2 ms_, const cmav &wgt_) : baselines(baselines_), idx(idx_), ms(ms_), wgt(wgt_), nvis(idx.shape(0)), have_wgt(wgt.size()!=0) { checkShape(ms.shape(), {baselines.Nrows(), baselines.Nchannels()}); if (have_wgt) checkShape(wgt.shape(), ms.shape()); } Tsub getSubserv(const cmav &subidx) const { return Tsub(*this, subidx); } size_t Nvis() const { return nvis; } const Baselines &getBaselines() const { return baselines; } UVW getCoord(size_t i) const { return baselines.effectiveCoord(idx(i)); } complex getVis(size_t i) const { auto rc = baselines.getRowChan(idx(i)); return have_wgt ? ms(rc.row, rc.chan)*wgt(rc.row, rc.chan) : ms(rc.row, rc.chan); } idx_t getIdx(size_t i) const { return idx[i]; } void setVis (size_t i, const complex &v) const { auto rc = baselines.getRowChan(idx(i)); ms(rc.row, rc.chan) = have_wgt ? v*wgt(rc.row, rc.chan) : v; } void addVis (size_t i, const complex &v) const { auto rc = baselines.getRowChan(idx(i)); ms(rc.row, rc.chan) += have_wgt ? v*wgt(rc.row, rc.chan) : v; } }; template MsServ makeMsServ (const Baselines &baselines, const cmav &idx, const T2 &ms, const cmav &wgt) { return MsServ(baselines, idx, ms, wgt); } template void x2grid_c (const GridderConfig &gconf, const Serv &srv, mav,2> &grid, double w0=-1, double dw=-1) { checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()}); MR_assert(grid.contiguous(), "grid is not contiguous"); size_t supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); bool do_w_gridding = dw>0; vector locks(gconf.Nu()); size_t np = srv.Nvis(); execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched) { Helper hlp(gconf, nullptr, grid.data(), locks, w0, dw); int jump = hlp.lineJump(); const T * MRUTIL_RESTRICT ku = hlp.kernel; const T * MRUTIL_RESTRICT kv = hlp.kernel+supp; while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart tmp(v*ku[cu]); size_t cv=0; for (; cv+3 void grid2x_c (const GridderConfig &gconf, const cmav,2> &grid, const Serv &srv, double w0=-1, double dw=-1) { checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()}); MR_assert(grid.contiguous(), "grid is not contiguous"); size_t supp = gconf.Supp(); size_t nthreads = gconf.Nthreads(); bool do_w_gridding = dw>0; vector locks(gconf.Nu()); // Loop over sampling points size_t np = srv.Nvis(); execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched) { Helper hlp(gconf, grid.data(), nullptr, locks, w0, dw); int jump = hlp.lineJump(); const T * MRUTIL_RESTRICT ku = hlp.kernel; const T * MRUTIL_RESTRICT kv = hlp.kernel+supp; while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart r = 0; const auto * MRUTIL_RESTRICT ptr = hlp.p0r; for (size_t cu=0; cu tmp(0); size_t cv=0; for (; cv+3 void apply_global_corrections(const GridderConfig &gconf, const mav &dirty, const ES_Kernel &kernel, double dw, bool divide_by_n) { auto nx_dirty=gconf.Nxdirty(); auto ny_dirty=gconf.Nydirty(); size_t nthreads = gconf.Nthreads(); auto psx=gconf.Pixsize_x(); auto psy=gconf.Pixsize_y(); double x0 = -0.5*nx_dirty*psx, y0 = -0.5*ny_dirty*psy; auto cfu = correction_factors(gconf.Nu(), gconf.Ofactor(), nx_dirty/2+1, gconf.Supp(), nthreads); auto cfv = correction_factors(gconf.Nv(), gconf.Ofactor(), ny_dirty/2+1, gconf.Supp(), nthreads); execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched) { while (auto rng=sched.getNext()) for(auto i=rng.lo; i=0) { auto nm1 = (-fx-fy)/(sqrt(tmp)+1); // accurate form of sqrt(1-x-y)-1 fct = T(kernel.corfac(nm1*dw)); if (divide_by_n) fct /= nm1+1; } else // beyond the horizon, don't really know what to do here { if (divide_by_n) fct=0; else { auto nm1 = sqrt(-tmp)-1; fct = T(kernel.corfac(nm1*dw)); } } fct *= T(cfu[nx_dirty/2-i]*cfv[ny_dirty/2-j]); size_t i2 = nx_dirty-i, j2 = ny_dirty-j; dirty(i,j)*=fct; if ((i>0)&&(i0)&&(j0)&&(j class WgridHelper { private: const Serv &srv; double wmin, dw; size_t nplanes, supp; vector> minplane; size_t verbosity; int curplane; vector subidx; static void wminmax(const Serv &srv, double &wmin, double &wmax) { size_t nvis = srv.Nvis(); wmin= 1e38; wmax=-1e38; // FIXME maybe this can be done more intelligently for (size_t ipart=0; ipart static void update_idx(vector &v, const vector &add, const vector &del) { MR_assert(v.size()>=del.size(), "must not happen"); vector res; res.reserve((v.size()+add.size())-del.size()); auto iin=v.begin(), ein=v.end(); auto iadd=add.begin(), eadd=add.end(); auto irem=del.begin(), erem=del.end(); while(iin!=ein) { if ((irem!=erem) && (*iin==*irem)) { ++irem; ++iin; } // skip removed entry else if ((iadd!=eadd) && (*iadd<*iin)) res.push_back(*(iadd++)); // add new entry else res.push_back(*(iin++)); } MR_assert(irem==erem, "must not happen"); while(iadd!=eadd) res.push_back(*(iadd++)); MR_assert(res.size()==(v.size()+add.size())-del.size(), "must not happen"); v.swap(res); } public: WgridHelper(const GridderConfig &gconf, const Serv &srv_, size_t verbosity_) : srv(srv_), verbosity(verbosity_), curplane(-1) { size_t nvis = srv.Nvis(); size_t nthreads = gconf.Nthreads(); double wmax; wminmax(srv, wmin, wmax); if (verbosity>0) cout << "Using " << nthreads << " thread" << ((nthreads!=1) ? "s" : "") << endl; if (verbosity>0) cout << "W range: " << wmin << " to " << wmax << endl; double x0 = -0.5*gconf.Nxdirty()*gconf.Pixsize_x(), y0 = -0.5*gconf.Nydirty()*gconf.Pixsize_y(); double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.; if (x0*x0+y0*y0>1.) nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.; dw = 0.25/abs(nmin); nplanes = size_t((wmax-wmin)/dw+2); dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1); supp = gconf.Supp(); wmin -= (0.5*supp-1)*dw; wmax += (0.5*supp-1)*dw; nplanes += supp-2; if (verbosity>0) cout << "Kernel support: " << supp << endl; if (verbosity>0) cout << "nplanes: " << nplanes << endl; minplane.resize(nplanes); #if 0 // extra short, but potentially inefficient version: for (size_t ipart=0; ipart cnt(nplanes,0); for(size_t ipart=0; ipart ofs(nplanes, 0); for (size_t ipart=0; ipart(subidx.data(), {subidx.size()}); return srv.getSubserv(subidx2); } double W() const { return wmin+curplane*dw; } size_t Nvis() const { return subidx.size(); } double DW() const { return dw; } bool advance() { if (++curplane>=int(nplanes)) return false; update_idx(subidx, minplane[curplane], curplane>=int(supp) ? minplane[curplane-supp] : vector()); if (verbosity>1) cout << "Working on plane " << curplane << " containing " << subidx.size() << " visibilities" << endl; return true; } }; template void x2dirty( const GridderConfig &gconf, const Serv &srv, const mav &dirty, bool do_wstacking, size_t verbosity) { if (do_wstacking) { size_t nthreads = gconf.Nthreads(); if (verbosity>0) cout << "Gridding using improved w-stacking" << endl; WgridHelper hlp(gconf, srv, verbosity); double dw = hlp.DW(); dirty.fill(0); mav,2> grid({gconf.Nu(),gconf.Nv()}); while(hlp.advance()) // iterate over w planes { if (hlp.Nvis()==0) continue; grid.fill(0); x2grid_c(gconf, hlp.getSubserv(), grid, hlp.W(), dw); gconf.grid2dirty_c_overwrite_wscreen_add(grid, dirty, T(hlp.W())); } // correct for w gridding etc. apply_global_corrections(gconf, dirty, ES_Kernel(gconf.Supp(), gconf.Ofactor(), nthreads), dw, true); } else { if (verbosity>0) cout << "Gridding without w-stacking: " << srv.Nvis() << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; mav,2> grid({gconf.Nu(), gconf.Nv()}); grid.fill(0.); x2grid_c(gconf, srv, grid); mav rgrid(grid.shape()); complex2hartley(grid, rgrid, gconf.Nthreads()); gconf.grid2dirty(rgrid, dirty); } } template void dirty2x( const GridderConfig &gconf, const cmav &dirty, const Serv &srv, bool do_wstacking, size_t verbosity) { if (do_wstacking) { size_t nx_dirty=gconf.Nxdirty(), ny_dirty=gconf.Nydirty(); size_t nthreads = gconf.Nthreads(); if (verbosity>0) cout << "Degridding using improved w-stacking" << endl; WgridHelper hlp(gconf, srv, verbosity); double dw = hlp.DW(); mav tdirty({nx_dirty,ny_dirty}); for (size_t i=0; i,2> grid({gconf.Nu(),gconf.Nv()}); while(hlp.advance()) // iterate over w planes { if (hlp.Nvis()==0) continue; gconf.dirty2grid_c_wscreen(tdirty, grid, T(hlp.W())); grid2x_c(gconf, grid, hlp.getSubserv(), hlp.W(), dw); } } else { if (verbosity>0) cout << "Degridding without w-stacking: " << srv.Nvis() << " visibilities" << endl; if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl; mav grid({gconf.Nu(), gconf.Nv()}); gconf.dirty2grid(dirty, grid); mav,2> grid2(grid.shape()); hartley2complex(grid, grid2, gconf.Nthreads()); grid2x_c(gconf, grid2, srv); } } void calc_share(size_t nshares, size_t myshare, size_t nwork, size_t &lo, size_t &hi) { size_t nbase = nwork/nshares; size_t additional = nwork%nshares; lo = myshare*nbase + ((myshare vector getWgtIndices(const Baselines &baselines, const GridderConfig &gconf, const cmav &wgt, const cmav,2> &ms) { size_t nrow=baselines.Nrows(), nchan=baselines.Nchannels(), nsafe=gconf.Nsafe(); bool have_wgt=wgt.size()!=0; if (have_wgt) checkShape(wgt.shape(),{nrow,nchan}); bool have_ms=ms.size()!=0; if (have_ms) checkShape(ms.shape(), {nrow,nchan}); constexpr int side=1<> logsquare, nbv = (gconf.Nv()+1+side-1) >> logsquare; vector acc(nbu*nbv+1,0); vector tmp(nrow*nchan); for (idx_t irow=0, idx=0; irow>logsquare; iv0 = (iv0+nsafe)>>logsquare; ++acc[nbv*iu0 + iv0 + 1]; tmp[idx] = nbv*iu0 + iv0; } else tmp[idx] = ~idx_t(0); for (size_t i=1; i res(acc.back()); for (size_t irow=0, idx=0; irow void ms2dirty_general(const cmav &uvw, const cmav &freq, const cmav,2> &ms, const cmav &wgt, double pixsize_x, double pixsize_y, size_t nu, size_t nv, double epsilon, bool do_wstacking, size_t nthreads, const mav &dirty, size_t verbosity, bool negate_v=false) { Baselines baselines(uvw, freq, negate_v); GridderConfig gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads); auto idx = getWgtIndices(baselines, gconf, wgt, ms); auto idx2 = cmav(idx.data(),{idx.size()}); x2dirty(gconf, makeMsServ(baselines,idx2,ms,wgt), dirty, do_wstacking, verbosity); } template void ms2dirty(const cmav &uvw, const cmav &freq, const cmav,2> &ms, const cmav &wgt, double pixsize_x, double pixsize_y, double epsilon, bool do_wstacking, size_t nthreads, const mav &dirty, size_t verbosity) { ms2dirty_general(uvw, freq, ms, wgt, pixsize_x, pixsize_y, 2*dirty.shape(0), 2*dirty.shape(1), epsilon, do_wstacking, nthreads, dirty, verbosity); } template void dirty2ms_general(const cmav &uvw, const cmav &freq, const cmav &dirty, const cmav &wgt, double pixsize_x, double pixsize_y, size_t nu, size_t nv,double epsilon, bool do_wstacking, size_t nthreads, const mav,2> &ms, size_t verbosity, bool negate_v=false) { Baselines baselines(uvw, freq, negate_v); GridderConfig gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads); cmav,2> null_ms(nullptr, {0,0}); auto idx = getWgtIndices(baselines, gconf, wgt, null_ms); auto idx2 = cmav(idx.data(),{idx.size()}); ms.fill(0); dirty2x(gconf, dirty, makeMsServ(baselines,idx2,ms,wgt), do_wstacking, verbosity); } template void dirty2ms(const cmav &uvw, const cmav &freq, const cmav &dirty, const cmav &wgt, double pixsize_x, double pixsize_y, double epsilon, bool do_wstacking, size_t nthreads, const mav,2> &ms, size_t verbosity) { dirty2ms_general(uvw, freq, dirty, wgt, pixsize_x, pixsize_y, 2*dirty.shape(0), 2*dirty.shape(1), epsilon, do_wstacking, nthreads, ms, verbosity); } } // namespace detail // public names using detail::ms2dirty_general; using detail::dirty2ms_general; using detail::ms2dirty; using detail::dirty2ms; } // namespace gridder #endif