Commit d7d34c48 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

Merge branch 'kernels' into 'ducc0'

More flexible gridding kernel support

See merge request !22
parents 758f5d3e e1417dac
Pipeline #78940 passed with stages
in 16 minutes and 52 seconds
......@@ -20,12 +20,12 @@ include src/ducc0/bindings/pybind_utils.h
include src/ducc0/math/cmplx.h
include src/ducc0/math/constants.h
include src/ducc0/math/es_kernel.h
include src/ducc0/math/fft1d.h
include src/ducc0/math/fft.h
include src/ducc0/math/geom_utils.cc
include src/ducc0/math/geom_utils.h
include src/ducc0/math/gl_integrator.h
include src/ducc0/math/gridding_kernel.h
include src/ducc0/math/math_utils.h
include src/ducc0/math/pointing.cc
include src/ducc0/math/pointing.h
......
......@@ -28,6 +28,7 @@
#include <cmath>
#include <vector>
#include <array>
#include <memory>
#include "ducc0/infra/error_handling.h"
#include "ducc0/math/fft.h"
......@@ -35,7 +36,7 @@
#include "ducc0/infra/useful_macros.h"
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/math/gridding_kernel.h"
namespace ducc0 {
......@@ -121,15 +122,6 @@ template<typename T> void hartley2_2D(const mav<T,2> &in,
});
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, double ofactor, size_t nval, size_t supp,
size_t nthreads)
{
ES_Kernel kernel(supp, ofactor, nthreads);
return kernel.correction_factors(n, nval, nthreads);
}
using idx_t = uint32_t;
struct RowChan
......@@ -205,18 +197,25 @@ class Baselines
{ return ichan+(irow<<shift); }
};
class GridderConfig
template<typename T> class GridderConfig
{
protected:
size_t nx_dirty, ny_dirty, nu, nv;
double ofactor, eps, psx, psy;
double ofactor;
// FIXME: this should probably be done more cleanly
public:
shared_ptr<GriddingKernel<T>> krn;
protected:
double psx, psy;
size_t supp, nsafe;
double beta;
size_t nthreads;
double ushift, vshift;
int maxiu0, maxiv0;
template<typename T> complex<T> wscreen(T x, T y, T w, bool adjoint) const
complex<T> wscreen(T x, T y, T w, bool adjoint) const
{
constexpr T pi = T(3.141592653589793238462643383279502884197);
T tmp = 1-x-y;
......@@ -232,10 +231,9 @@ class GridderConfig
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
: nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
eps(epsilon),
krn(selectKernel<T>(ofactor, epsilon)),
psx(pixsize_x), psy(pixsize_y),
supp(ES_Kernel::get_supp(epsilon, ofactor)), nsafe((supp+1)/2),
beta(ES_Kernel::get_beta(supp, ofactor)*supp),
supp(krn->support()), nsafe((supp+1)/2),
nthreads(nthreads_),
ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp)
......@@ -249,8 +247,6 @@ class GridderConfig
MR_assert(epsilon>0, "epsilon must be positive");
MR_assert(pixsize_x>0, "pixsize_x must be positive");
MR_assert(pixsize_y>0, "pixsize_y must be positive");
MR_assert(ofactor>=1.175,
"oversampling factor too small (>=1.2 recommended)");
}
GridderConfig(size_t nxdirty, size_t nydirty,
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
......@@ -259,23 +255,20 @@ class GridderConfig
pixsize_y, nthreads_) {}
size_t Nxdirty() const { return nx_dirty; }
size_t Nydirty() const { return ny_dirty; }
double Epsilon() const { return eps; }
double Pixsize_x() const { return psx; }
double Pixsize_y() const { return psy; }
size_t Nu() const { return nu; }
size_t Nv() const { return nv; }
size_t Supp() const { return supp; }
size_t Nsafe() const { return nsafe; }
double Beta() const { return beta; }
size_t Nthreads() const { return nthreads; }
double Ofactor() const{ return ofactor; }
template<typename T> void grid2dirty_post(mav<T,2> &tmav,
void grid2dirty_post(mav<T,2> &tmav,
mav<T,2> &dirty) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
......@@ -295,7 +288,7 @@ class GridderConfig
}
});
}
template<typename T> void grid2dirty_post2(
void grid2dirty_post2(
mav<complex<T>,2> &tmav, mav<T,2> &dirty, T w) const
{
checkShape(dirty.shape(), {nx_dirty,ny_dirty});
......@@ -334,7 +327,7 @@ class GridderConfig
});
}
template<typename T> void grid2dirty(const mav<T,2> &grid,
void grid2dirty(const mav<T,2> &grid,
mav<T,2> &dirty) const
{
checkShape(grid.shape(), {nu,nv});
......@@ -343,7 +336,7 @@ class GridderConfig
grid2dirty_post(tmav, dirty);
}
template<typename T> void grid2dirty_c_overwrite_wscreen_add
void grid2dirty_c_overwrite_wscreen_add
(mav<complex<T>,2> &grid, mav<T,2> &dirty, T w) const
{
checkShape(grid.shape(), {nu,nv});
......@@ -352,13 +345,13 @@ class GridderConfig
grid2dirty_post2(grid, dirty, w);
}
template<typename T> void dirty2grid_pre(const mav<T,2> &dirty,
void dirty2grid_pre(const mav<T,2> &dirty,
mav<T,2> &grid) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
checkShape(grid.shape(), {nu, nv});
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
grid.fill(0);
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
{
......@@ -377,7 +370,7 @@ class GridderConfig
}
});
}
template<typename T> void dirty2grid_pre2(const mav<T,2> &dirty,
void dirty2grid_pre2(const mav<T,2> &dirty,
mav<complex<T>,2> &grid, T w) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
......@@ -419,14 +412,14 @@ class GridderConfig
});
}
template<typename T> void dirty2grid(const mav<T,2> &dirty,
void dirty2grid(const mav<T,2> &dirty,
mav<T,2> &grid) const
{
dirty2grid_pre(dirty, grid);
hartley2_2D<T>(grid, grid, nthreads);
}
template<typename T> void dirty2grid_c_wscreen(const mav<T,2> &dirty,
void dirty2grid_c_wscreen(const mav<T,2> &dirty,
mav<complex<T>,2> &grid, T w) const
{
dirty2grid_pre2(dirty, grid, w);
......@@ -442,7 +435,7 @@ class GridderConfig
iv0 = min(int(v+vshift)-int(nv), maxiv0);
}
template<typename T> void apply_wscreen(const mav<complex<T>,2> &dirty,
void apply_wscreen(const mav<complex<T>,2> &dirty,
mav<complex<T>,2> &dirty2, double w, bool adjoint) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
......@@ -481,20 +474,18 @@ constexpr int logsquare=4;
template<typename T, typename T2=complex<T>> class Helper
{
private:
const GridderConfig &gconf;
const GridderConfig<T> &gconf;
int nu, nv, nsafe, supp;
T beta;
const T2 *grid_r;
T2 *grid_w;
int su, sv;
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
T wfac;
vector<T2> rbuf, wbuf;
bool do_w_gridding;
double w0, xdw;
size_t nexp;
size_t nvecs;
vector<std::mutex> &locks;
void dump() const
......@@ -535,18 +526,20 @@ template<typename T, typename T2=complex<T>> class Helper
}
public:
size_t nvec;
const T2 *p0r;
T2 *p0w;
static constexpr size_t vlen=native_simd<T>::size();
union {
union kbuf {
T scalar[64];
native_simd<T> simd[64/vlen];
} kernel;
};
kbuf buf;
Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_,
Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_,
vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), beta(T(gconf.Beta())), grid_r(grid_r_),
supp(gconf.Supp()), grid_r(grid_r_),
grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)),
......@@ -554,33 +547,27 @@ template<typename T, typename T2=complex<T>> class Helper
do_w_gridding(dw_>0),
w0(w0_),
xdw(T(1)/dw_),
nexp(2*supp + do_w_gridding),
nvecs((nexp+vlen-1)/vlen),
locks(locks_)
locks(locks_),
nvec((supp+vlen-1)/vlen)
{
MR_assert(2*supp+1<=64, "support too large");
for (auto &v: kernel.simd) v=0;
MR_assert(supp<=32, "support too large");
}
~Helper() { if (grid_w) dump(); }
int lineJump() const { return sv; }
T Wfac() const { return kernel.scalar[2*supp]; }
T Wfac() const { return wfac; }
void prep(const UVW &in)
{
const auto &krn(*(gconf.krn));
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
for (int i=0; i<supp; ++i)
{
kernel.scalar[i ] = T(x0+i*xsupp);
kernel.scalar[i+supp] = T(y0+i*xsupp);
}
krn.eval(T(x0), &buf.simd[0]);
krn.eval(T(y0), &buf.simd[nvec]);
if (do_w_gridding)
kernel.scalar[2*supp] = T(xdw*xsupp*abs(w0-in.w));
for (size_t i=0; i<nvecs; ++i)
kernel.simd[i] = esk(kernel.simd[i], beta);
wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
{
if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); }
......@@ -666,7 +653,7 @@ template<class T, class T2> MsServ<T, T2> makeMsServ
{ return MsServ<T, T2>(baselines, idx, ms, wgt); }
template<typename T, typename Serv> void x2grid_c
(const GridderConfig &gconf, Serv &srv, mav<complex<T>,2> &grid,
(const GridderConfig<T> &gconf, Serv &srv, mav<complex<T>,2> &grid,
double w0=-1, double dw=-1)
{
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
......@@ -681,8 +668,8 @@ template<typename T, typename Serv> void x2grid_c
{
Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.kernel.scalar;
const T * DUCC0_RESTRICT kv = hlp.kernel.scalar+supp;
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const T * DUCC0_RESTRICT kv = hlp.buf.scalar+hlp.vlen*hlp.nvec;
while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
{
......@@ -713,7 +700,7 @@ template<typename T, typename Serv> void x2grid_c
}
template<typename T, typename Serv> void grid2x_c
(const GridderConfig &gconf, const mav<complex<T>,2> &grid,
(const GridderConfig<T> &gconf, const mav<complex<T>,2> &grid,
Serv &srv, double w0=-1, double dw=-1)
{
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
......@@ -729,8 +716,8 @@ template<typename T, typename Serv> void grid2x_c
{
Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.kernel.scalar;
const T * DUCC0_RESTRICT kv = hlp.kernel.scalar+supp;
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const T * DUCC0_RESTRICT kv = hlp.buf.scalar+hlp.vlen*hlp.nvec;
while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
{
......@@ -760,8 +747,8 @@ template<typename T, typename Serv> void grid2x_c
});
}
template<typename T> void apply_global_corrections(const GridderConfig &gconf,
mav<T,2> &dirty, const ES_Kernel &kernel, double dw, bool divide_by_n)
template<typename T> void apply_global_corrections(const GridderConfig<T> &gconf,
mav<T,2> &dirty, double dw, bool divide_by_n)
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
......@@ -770,10 +757,8 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
auto psy=gconf.Pixsize_y();
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
auto cfu = correction_factors(gconf.Nu(), gconf.Ofactor(),
nx_dirty/2+1, gconf.Supp(), nthreads);
auto cfv = correction_factors(gconf.Nv(), gconf.Ofactor(),
ny_dirty/2+1, gconf.Supp(), nthreads);
auto cfu = gconf.krn->corfunc(nx_dirty/2+1, 1./gconf.Nu(), nthreads);
auto cfv = gconf.krn->corfunc(ny_dirty/2+1, 1./gconf.Nv(), nthreads);
execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
......@@ -789,7 +774,7 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
if (tmp>=0)
{
auto nm1 = (-fx-fy)/(sqrt(tmp)+1); // accurate form of sqrt(1-x-y)-1
fct = T(kernel.corfac(nm1*dw));
fct = T(gconf.krn->corfunc(nm1*dw));
if (divide_by_n)
fct /= nm1+1;
}
......@@ -800,7 +785,7 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
else
{
auto nm1 = sqrt(-tmp)-1;
fct = T(kernel.corfac(nm1*dw));
fct = T(gconf.krn->corfunc(nm1*dw));
}
}
fct *= T(cfu[nx_dirty/2-i]*cfv[ny_dirty/2-j]);
......@@ -819,7 +804,7 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
});
}
template<typename Serv> class WgridHelper
template<typename T, typename Serv> class WgridHelper
{
private:
Serv &srv;
......@@ -846,11 +831,11 @@ template<typename Serv> class WgridHelper
}
}
template<typename T> static void update_idx(vector<T> &v, const vector<T> &add,
const vector<T> &del)
template<typename T2> static void update_idx(vector<T2> &v, const vector<T2> &add,
const vector<T2> &del)
{
MR_assert(v.size()>=del.size(), "must not happen");
vector<T> res;
vector<T2> res;
res.reserve((v.size()+add.size())-del.size());
auto iin=v.begin(), ein=v.end();
auto iadd=add.begin(), eadd=add.end();
......@@ -873,8 +858,8 @@ template<typename Serv> class WgridHelper
}
public:
WgridHelper(const GridderConfig &gconf, Serv &srv_, size_t verbosity_)
: srv(srv_), verbosity(verbosity_), curplane(-1)
WgridHelper(const GridderConfig<T> &gconf, Serv &srv_, size_t verbosity_)
: srv(srv_), supp(gconf.Supp()), verbosity(verbosity_), curplane(-1)
{
size_t nvis = srv.Nvis();
size_t nthreads = gconf.Nthreads();
......@@ -891,13 +876,8 @@ template<typename Serv> class WgridHelper
if (x0*x0+y0*y0>1.)
nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
dw = 0.25/abs(nmin);
nplanes = size_t((wmax-wmin)/dw+2);
dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1);
supp = gconf.Supp();
wmin -= (0.5*supp-1)*dw;
wmax += (0.5*supp-1)*dw;
nplanes += supp-2;
nplanes = size_t((wmax-wmin)/dw+supp);
wmin = (wmin+wmax)*0.5 - 0.5*(nplanes-1)*dw;
if (verbosity>0) cout << "Kernel support: " << supp << endl;
if (verbosity>0) cout << "nplanes: " << nplanes << endl;
......@@ -950,14 +930,13 @@ template<typename Serv> class WgridHelper
};
template<typename T, typename Serv> void x2dirty(
const GridderConfig &gconf, Serv &srv, mav<T,2> &dirty,
const GridderConfig<T> &gconf, Serv &srv, mav<T,2> &dirty,
bool do_wstacking, size_t verbosity)
{
if (do_wstacking)
{
size_t nthreads = gconf.Nthreads();
if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
WgridHelper<Serv> hlp(gconf, srv, verbosity);
WgridHelper<T, Serv> hlp(gconf, srv, verbosity);
double dw = hlp.DW();
dirty.fill(0);
mav<complex<T>,2> grid({gconf.Nu(),gconf.Nv()});
......@@ -970,7 +949,7 @@ template<typename T, typename Serv> void x2dirty(
gconf.grid2dirty_c_overwrite_wscreen_add(grid, dirty, T(hlp.W()));
}
// correct for w gridding etc.
apply_global_corrections(gconf, dirty, ES_Kernel(gconf.Supp(), gconf.Ofactor(), nthreads), dw, true);
apply_global_corrections(gconf, dirty, dw, true);
}
else
{
......@@ -989,22 +968,21 @@ template<typename T, typename Serv> void x2dirty(
}
template<typename T, typename Serv> void dirty2x(
const GridderConfig &gconf, const mav<T,2> &dirty,
const GridderConfig<T> &gconf, const mav<T,2> &dirty,
Serv &srv, bool do_wstacking, size_t verbosity)
{
if (do_wstacking)
{
size_t nx_dirty=gconf.Nxdirty(), ny_dirty=gconf.Nydirty();
size_t nthreads = gconf.Nthreads();
if (verbosity>0) cout << "Degridding using improved w-stacking" << endl;
WgridHelper<Serv> hlp(gconf, srv, verbosity);
WgridHelper<T, Serv> hlp(gconf, srv, verbosity);
double dw = hlp.DW();
mav<T,2> tdirty({nx_dirty,ny_dirty});
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
tdirty.v(i,j) = dirty(i,j);
// correct for w gridding etc.
apply_global_corrections(gconf, tdirty, ES_Kernel(gconf.Supp(), gconf.Ofactor(), nthreads), dw, true);
apply_global_corrections(gconf, tdirty, dw, true);
mav<complex<T>,2> grid({gconf.Nu(),gconf.Nv()});
while(hlp.advance()) // iterate over w planes
{
......@@ -1040,7 +1018,7 @@ void calc_share(size_t nshares, size_t myshare, size_t nwork, size_t &lo,
template<typename T> vector<idx_t> getWgtIndices(const Baselines &baselines,
const GridderConfig &gconf, const mav<T,2> &wgt,
const GridderConfig<T> &gconf, const mav<T,2> &wgt,
const mav<complex<T>,2> &ms)
{
size_t nrow=baselines.Nrows(),
......@@ -1092,7 +1070,9 @@ template<typename T> void ms2dirty(const mav<double,2> &uvw,
bool negate_v=false)
{
Baselines baselines(uvw, freq, negate_v);
GridderConfig gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
// adjust for increased error when gridding in 2 or 3 dimensions
epsilon /= do_wstacking ? 3 : 2;
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
auto idx = getWgtIndices(baselines, gconf, wgt, ms);
auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
auto serv = makeMsServ(baselines,idx2,ms,wgt);
......@@ -1106,7 +1086,9 @@ template<typename T> void dirty2ms(const mav<double,2> &uvw,
size_t verbosity, bool negate_v=false)
{
Baselines baselines(uvw, freq, negate_v);
GridderConfig gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
// adjust for increased error when gridding in 2 or 3 dimensions
epsilon /= do_wstacking ? 3 : 2;
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
mav<complex<T>,2> null_ms(nullptr, {0,0}, true);
auto idx = getWgtIndices(baselines, gconf, wgt, null_ms);
auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
......
......@@ -98,11 +98,11 @@ def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
@pmp('nxdirty', [16, 64])
@pmp('nydirty', [64])
@pmp('ofactor', [1.2, 1.4, 1.7, 2])
@pmp("nrow", (2, 27))
@pmp("nrow", (1, 2, 27))
@pmp("nchan", (1, 5))
@pmp("epsilon", (1e-2, 1e-3, 1e-4, 1e-7))
@pmp("singleprec", (False,))
@pmp("wstacking", (True,))
@pmp("wstacking", (False, True))
@pmp("use_wgt", (True,))
@pmp("nthreads", (1, 2))
@pmp("fov", (1., 20.))
......
......@@ -264,7 +264,6 @@ void add_totalconvolve(py::module &msup)
.def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &inter_f::support);
m.def("epsilon_guess", &epsilon_guess, "support"_a, "ofactor"_a);
}
}
......
......@@ -30,7 +30,7 @@
#include <cmath>
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/math/gridding_kernel.h"
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/sharp/sharp.h"
......@@ -142,8 +142,8 @@ template<typename T> class Interpolator
size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
int nthreads;
T ofactor;
shared_ptr<GriddingKernel<T>> kernel;
size_t supp;
ES_Kernel kernel;
size_t ncomp;
#ifdef SIMD_INTERPOL
mav<native_simd<T>,4> scube;
......@@ -166,7 +166,7 @@ template<typename T> class Interpolator
}
for (size_t j=0; j<nphi0; ++j)
tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> ftmp(tmp);
......@@ -180,7 +180,7 @@ template<typename T> class Interpolator
{
T sfct = (spin&1) ? -1 : 1;
mav<T,2> tmp({nphi,nphi0});
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> farr(arr);
......@@ -207,7 +207,6 @@ template<typename T> class Interpolator
vector<size_t> getIdx(const mav<T,2> &ptg) const
{
vector<size_t> idx(ptg.shape(0));
constexpr size_t cellsize=16;
size_t nct = ntheta/cellsize+1,
ncp = nphi/cellsize+1;
......@@ -219,6 +218,7 @@ template<typename T> class Interpolator
// MR_assert((itheta<nct)&&(iphi<ncp), "oops");
mapper[itheta*ncp+iphi].push_back(i);
}
vector<size_t> idx(ptg.shape(0));
size_t cnt=0;
for (auto &vec: mapper)
{
......@@ -243,8 +243,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
kernel(selectKernel<T>(ofactor, 0.5*epsilon)),
supp(kernel->support()),
ncomp(separate ? slm.size() : 1),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -359,8 +359,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
kernel(selectKernel<T>(ofactor, 0.5*epsilon)),
supp(kernel->support()),
ncomp(ncomp_),