/* * This file is part of nifty_gridder. * * nifty_gridder is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * nifty_gridder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with nifty_fridder; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include #include #ifdef __GNUC__ #define RESTRICT __restrict__ #define NOINLINE __attribute__ ((noinline)) #else #define RESTRICT #endif using namespace std; namespace py = pybind11; namespace { // // basic utilities // void myassert(bool cond, const char *msg) { if (cond) return; throw runtime_error(msg); } /*! Returns the largest integer \a n that fulfills \a 2^n<=arg. */ template inline int ilog2 (I arg) { #ifdef __GNUC__ if (arg==0) return 0; if (sizeof(I)==sizeof(int)) return 8*sizeof(int)-1-__builtin_clz(arg); if (sizeof(I)==sizeof(long)) return 8*sizeof(long)-1-__builtin_clzl(arg); if (sizeof(I)==sizeof(long long)) return 8*sizeof(long long)-1-__builtin_clzll(arg); #endif int res=0; while (arg > 0xFFFF) { res+=16; arg>>=16; } if (arg > 0x00FF) { res|=8; arg>>=8; } if (arg > 0x000F) { res|=4; arg>>=4; } if (arg > 0x0003) { res|=2; arg>>=2; } if (arg > 0x0001) { res|=1; } return res; } /*! Returns the number of bits needed to represent \a arg different values. \a arg must be >=1. */ template inline int bits_needed (I arg) { myassert(arg>=1, "argument must be >=1"); if (arg==1) return 0; return ilog2(arg-1)+1; } /*! Returns the remainder of the division \a v1/v2. The result is non-negative. \a v1 can be positive or negative; \a v2 must be positive. */ template inline T fmodulo (T v1, T v2) { if (v1>=0) return (v1>8)&0xff])<<16) | ((I)(utab[y&0xff])<<1) | ((I)(utab[(y>>8)&0xff])<<17); } static const uint8_t m2p2D_1[4][4] = { { 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}}; static uint8_t m2p2D_3[4][64]; static const uint8_t p2m2D_1[4][4] = { { 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}}; static uint8_t p2m2D_3[4][64]; static int peano2d_done=0; static void init_peano2d (void) { peano2d_done=1; for (int d=0; d<4; ++d) for (uint32_t p=0; p<64; ++p) { unsigned rot = d; uint32_t v = p<<26; uint32_t res = 0; for (int i=0; i<3; ++i) { unsigned tab=m2p2D_1[rot][v>>30]; v<<=2; res = (res<<2) | (tab&0x3); rot = tab>>2; } m2p2D_3[d][p]=res|(rot<<6); } for (int d=0; d<4; ++d) for (uint32_t p=0; p<64; ++p) { unsigned rot = d; uint32_t v = p<<26; uint32_t res = 0; for (int i=0; i<3; ++i) { unsigned tab=p2m2D_1[rot][v>>30]; v<<=2; res = (res<<2) | (tab&0x3); rot = tab>>2; } p2m2D_3[d][p]=res|(rot<<6); } } uint32_t morton2peano2D_32(uint32_t v, int bits) { if (!peano2d_done) init_peano2d(); unsigned rot = 0; uint32_t res = 0; v<<=32-(bits<<1); int i=0; for (; i>26]; v<<=6; res = (res<<6) | (tab&0x3fu); rot = tab>>6; } for (; i>30]; v<<=2; res = (res<<2) | (tab&0x3); rot = tab>>2; } return res; } // // Utilities for indirect sorting (argsort) // template class IdxComp__ { private: It begin; Comp comp; public: IdxComp__ (It begin_, Comp comp_): begin(begin_), comp(comp_) {} bool operator() (std::size_t a, std::size_t b) const { return comp(*(begin+a),*(begin+b)); } }; /*! Performs an indirect sort on the supplied iterator range and returns in \a idx a \a vector containing the indices of the smallest, second smallest, third smallest, etc. element, according to \a comp. */ template inline void buildIndex (It begin, It end, std::vector &idx, Comp comp) { using namespace std; T2 num=end-begin; idx.resize(num); for (T2 i=0; i(begin,comp)); } /*! Performs an indirect sort on the supplied iterator range and returns in \a idx a \a vector containing the indices of the smallest, second smallest, third smallest, etc. element. */ template inline void buildIndex (It begin, It end, std::vector &idx) { using namespace std; typedef typename iterator_traits::value_type T; buildIndex(begin,end,idx,less()); } // // Utilities for Gauss-Legendre quadrature // static inline double one_minus_x2 (double x) { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; } void legendre_prep(int n, vector &x, vector &w) { constexpr double pi = 3.141592653589793238462643383279502884197; constexpr double eps = 3e-14; int m = (n+1)>>1; x.resize(m); w.resize(m); double t0 = 1 - (1-1./n) / (8.*n*n); double t1 = 1./(4.*n+2.); #pragma omp parallel { int i; #pragma omp for schedule(dynamic,100) for (i=1; i<=m; ++i) { double x0 = cos(pi * ((i<<2)-1) * t1) * t0; int dobreak=0; int j=0; double dpdx; while(1) { double P_1 = 1.0; double P0 = x0; double dx, x1; for (int k=2; k<=n; k++) { double P_2 = P_1; P_1 = P0; // P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k; P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2); } dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0); /* Newton step */ x1 = x0 - P0/dpdx; dx = x0-x1; x0 = x1; if (dobreak) break; if (abs(dx)<=eps) dobreak=1; if (++j>=100) throw runtime_error("convergence problem"); } x[m-i] = x0; w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx); } } // end of parallel region } // // Start of real gridder functionality // template using pyarr = py::array_t; template using pyarr_c = py::array_t; size_t get_w(double epsilon) { static const vector maxmaperr { 1e8, 0.32, 0.021, 6.2e-4, 1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17, 6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 }; double epssq = epsilon*epsilon; for (size_t i=1; imaxmaperr[i]) return i; throw runtime_error("requested epsilon too small - minimum is 2e-13"); } pyarr_c complex2hartley (const pyarr_c> &grid_) { myassert(grid_.ndim()==2, "grid array must be 2D"); int nu = grid_.shape(0), nv = grid_.shape(1); auto grid = grid_.data(); int odim[] = {nu,nv}; pyarr_c res(odim); auto grid2 = res.mutable_data(); for (int u=0; u> hartley2complex (const pyarr_c &grid_) { myassert(grid_.ndim()==2, "grid array must be 2D"); int nu = grid_.shape(0), nv = grid_.shape(1); auto grid = grid_.data(); int odim[] = {nu,nv}; pyarr_c> res(odim); auto grid2 = res.mutable_data(); for (int u=0; u(v1+v2, v1-v2); } } return res; } /* Compute correction factors for the ES gridding kernel This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */ pyarr_c correction_factors (size_t n, size_t nval, size_t w) { constexpr double pi = 3.141592653589793238462643383279502884197; auto beta = 2.3*w; auto p = int(1.5*w+2); double alpha = pi*w/n; vector x, wgt; legendre_prep(2*p,x,wgt); auto psi = x; for (auto &v:psi) v = exp(beta*(sqrt(1-v*v)-1.)); int odim[] = {int(nval)}; pyarr_c res(odim); auto val = res.mutable_data(); for (size_t k=0; k coord; vector scaling; size_t nrows; size_t channelbits, channelmask; public: Baselines(const vector &coord_, const vector &scaling_) : coord(coord_), scaling(scaling_), nrows(coord.size()/scaling.size()) { myassert(nrows*scaling.size()==coord.size(), "bad array dimensions"); channelbits = bits_needed(scaling.size()); channelmask = (size_t(1)< &coord_, const pyarr_c &scaling_) { myassert(coord_.ndim()==2, "coord array must be 2D"); myassert(coord_.shape(1)==2, "coord.shape[1] must be 2"); myassert(scaling_.ndim()==1, "scaling array must be 1D"); nrows = coord_.shape(0)/scaling_.shape(0); myassert(nrows*size_t(scaling_.shape(0))==size_t(coord_.shape(0)), "bad array dimensions"); scaling.resize(scaling_.shape(0)); for (size_t i=0; i getIndices() const { int odim[] = {int(nrows*scaling.size())}; pyarr_c res(odim); auto odata = res.mutable_data(); for (size_t i=0; i>channelbits]*scaling[index&channelmask]; } size_t Nrows() const { return nrows; } size_t Nchannels() const { return scaling.size(); } size_t irow(uint32_t index) const { return index>>channelbits; } size_t ichannel(uint32_t index) const { return index&channelmask; } size_t offset(uint32_t index) const { return (index>>channelbits)*scaling.size() + (index&channelmask); } }; class GridderConfig { private: size_t nx_dirty, ny_dirty; double ucorr, vcorr; size_t w, nsafe, nu, nv; size_t peano_level; public: GridderConfig(size_t nxdirty, size_t nydirty, double epsilon, double urange, double vrange) : nx_dirty(nxdirty), ny_dirty(nydirty), ucorr(1./urange), vcorr(1./vrange), w(get_w(epsilon)), nsafe((w+1)/2), nu(max(2*nsafe,2*nx_dirty)), nv(max(2*nsafe,2*ny_dirty)), peano_level(bits_needed(max(nu, nv))) { myassert((nx_dirty&1)==0, "nx_dirty must be even"); myassert((ny_dirty&1)==0, "ny_dirty must be even"); myassert(epsilon>0, "epsilon must be positive"); myassert(urange>0, "urange must be positive"); myassert(vrange>0, "vrange must be positive"); } size_t Nu() const { return nu; } size_t Nv() const { return nv; } size_t W() const { return w; } size_t coord2peano(const UV &coord) const { double u=fmodulo(coord.u*ucorr, 1.)*nu, v=fmodulo(coord.v*vcorr, 1.)*nv; auto iu = min(nu-1, size_t(u)); auto iv = min(nv-1, size_t(v)); return morton2peano2D_32(coord2morton2D_32(iu,iv),peano_level); } pyarr_c reorderIndices (const pyarr_c &idx, const Baselines &baselines) const { myassert(idx.ndim()==1, "need 1D index array"); vector peano(idx.shape(0)); auto pidx = idx.data(); for (size_t i=0; i newind; buildIndex(peano.begin(), peano.end(), newind); peano=vector(); // deallocate int odim[] = {int(idx.shape(0))}; pyarr_c res(odim); auto iout = res.mutable_data(); for (int i=0; i U_corrections() const { return correction_factors(nu, nx_dirty/2+1, w); } pyarr_c V_corrections() const { return correction_factors(nv, ny_dirty/2+1, w); } }; class Helper2 { protected: int nu, nv; public: int w; double beta; protected: int nsafe, su; public: int sv; vector kernel; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer void NOINLINE update(double u_in, double v_in) { auto u = fmodulo(u_in, 1.)*nu; iu0 = int(u-w*0.5 + 1 + nu) - nu; if (iu0+w>nu+nsafe) iu0 = nu+nsafe-w; auto v = fmodulo(v_in, 1.)*nv; iv0 = int(v-w*0.5 + 1 + nv) - nv; if (iv0+w>nv+nsafe) iv0 = nv+nsafe-w; double xw=2./w; for (int i=0; ibu0+su) || (iv0+w>bv0+sv); } void update_position() { bu0=max(-nsafe, min(nu+nsafe-su, iu0+nsafe-su/2)); bv0=max(-nsafe, min(nv+nsafe-sv, iv0+nsafe-sv/2)); } protected: Helper2(int nu_, int nv_, int w_) : nu(nu_), nv(nv_), w(w_), beta(2.3*w), nsafe((w+1)/2), su(min(max(2*nsafe,80), nu)), sv(min(max(2*nsafe,80), nv)), kernel(2*w), bu0(-1000000), bv0(-1000000) { if (min(nu,nv)<2*nsafe) throw runtime_error("grid dimensions too small"); } }; class WriteHelper2: public Helper2 { private: vector> data; complex *grid; void dump() { if (bu0<-nsafe) return; // nothing written into buffer yet #pragma omp critical { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } } public: complex *p0; WriteHelper2(int nu_, int nv_, int w, complex *grid_) : Helper2(nu_, nv_, w), data(su*sv, 0.), grid(grid_) {} ~WriteHelper2() { dump(); } void prep_write(double u_in, double v_in) { update(u_in, v_in); if (need_to_move()) { dump(); update_position(); fill(data.begin(), data.end(), 0.); } p0 = data.data() + sv*(iu0-bu0) + iv0-bv0; } }; class ReadHelper2: public Helper2 { private: vector> data; const complex *grid; void load() { int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; for (int iu=0; iu=nv) idxv=0; } if (++idxu>=nu) idxu=0; } } public: const complex *p0; ReadHelper2(int nu_, int nv_, int w_, const complex *grid_) : Helper2(nu_, nv_, w_), data(su*sv,0.), grid(grid_), p0(nullptr) {} void prep_read(double u_in, double v_in) { update(u_in, v_in); if (need_to_move()) { update_position(); load(); } p0 = data.data() + sv*(iu0-bu0) + iv0-bv0; } }; pyarr_c ms2grid(const Baselines &baselines, const GridderConfig &gconf, const pyarr_c &idx_, const pyarr_c> &data_) { myassert(idx_.ndim()==1, "idx array must be 1D"); myassert(data_.ndim()==2, "data must be 2D"); auto data=data_.data(); myassert(data_.shape(0)==int(baselines.Nrows()), "bad data dimension"); myassert(data_.shape(1)==int(baselines.Nchannels()), "bad data dimension"); int nvis = idx_.shape(0); auto idx = idx_.data(); size_t nu=gconf.Nu(), nv=gconf.Nv(); int odim[] = {int(nu), int(nv)}; pyarr_c> res(odim); auto grid = res.mutable_data(); for (size_t i=0; i tmp(v*ku[cu]); for (int cv=0; cv> grid2ms(const Baselines &baselines, const GridderConfig &gconf, const pyarr_c &idx_, const pyarr_c &grid0_) { size_t nu=gconf.Nu(), nv=gconf.Nv(); myassert(idx_.ndim()==1, "idx array must be 1D"); auto grid_ = hartley2complex(grid0_); auto grid = grid_.data(); myassert(grid_.ndim()==2, "data must be 2D"); myassert(grid_.shape(0)==int(nu), "bad grid dimension"); myassert(grid_.shape(1)==int(nv), "bad grid dimension"); int nvis = idx_.shape(0); auto idx = idx_.data(); int odim[] = {nvis}; pyarr_c> res(odim); auto vis = res.mutable_data(); // Loop over sampling points #pragma omp parallel { ReadHelper2 hlp(nu, nv, gconf.W(), grid); double emb = exp(-2*hlp.beta); const double * RESTRICT ku = hlp.kernel.data(); const double * RESTRICT kv = hlp.kernel.data()+hlp.w; #pragma omp for schedule(dynamic,10000) for (int ipart=0; ipart r = 0.; auto * RESTRICT ptr = hlp.p0; int w = hlp.w; for (int cu=0; cu tmp(0.); for (int cv=0; cv (m, "Baselines") .def(py::init, pyarr_c>(), "coord"_a, "scaling"_a) .def("getIndices", &Baselines::getIndices); py::class_ (m, "GridderConfig") .def(py::init(),"nxdirty"_a, "nydirty"_a, "epsilon"_a, "urange"_a, "vrange"_a) .def("reorderIndices", &GridderConfig::reorderIndices, "idx"_a, "baselines"_a) .def("Nu", &GridderConfig::Nu) .def("Nv", &GridderConfig::Nv) .def("U_corrections", &GridderConfig::U_corrections) .def("V_corrections", &GridderConfig::V_corrections); m.def ("ms2grid",&ms2grid); m.def ("grid2ms",&grid2ms); }