Commit 06123dc7 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

polishing

parent e06ca06f
Pipeline #78278 passed with stages
in 16 minutes and 9 seconds
...@@ -37,7 +37,6 @@ ...@@ -37,7 +37,6 @@
#include "ducc0/infra/mav.h" #include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h" #include "ducc0/infra/simd.h"
#include "ducc0/math/gridding_kernel.h" #include "ducc0/math/gridding_kernel.h"
#include "ducc0/math/least_misfit.h"
namespace ducc0 { namespace ducc0 {
...@@ -231,7 +230,8 @@ template<typename T> class GridderConfig ...@@ -231,7 +230,8 @@ template<typename T> class GridderConfig
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_) double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
: nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_), : nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)), ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
krn(make_shared<HornerKernel<T>>(selectLeastMisfitKernel(ofactor, epsilon))), // krn(make_shared<HornerKernel<T>>(selectLeastMisfitKernel(ofactor, epsilon))),
krn(selectESKernel<T>(ofactor, epsilon)),
psx(pixsize_x), psy(pixsize_y), psx(pixsize_x), psy(pixsize_y),
supp(krn->support()), nsafe((supp+1)/2), supp(krn->support()), nsafe((supp+1)/2),
nthreads(nthreads_), nthreads(nthreads_),
...@@ -880,6 +880,8 @@ template<typename T, typename Serv> class WgridHelper ...@@ -880,6 +880,8 @@ template<typename T, typename Serv> class WgridHelper
dw = 0.25/abs(nmin); dw = 0.25/abs(nmin);
nplanes = size_t((wmax-wmin)/dw+2); nplanes = size_t((wmax-wmin)/dw+2);
dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1); dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1);
// paranoid fix
if (dw==0.) dw=1e-3;
supp = gconf.Supp(); supp = gconf.Supp();
wmin -= (0.5*supp-1)*dw; wmin -= (0.5*supp-1)*dw;
......
...@@ -52,60 +52,60 @@ def explicit_gridder(uvw, freq, ms, wgt, nxdirty, nydirty, xpixsize, ypixsize, ...@@ -52,60 +52,60 @@ def explicit_gridder(uvw, freq, ms, wgt, nxdirty, nydirty, xpixsize, ypixsize,
res += (ms[row, chan]*wgt[row, chan] res += (ms[row, chan]*wgt[row, chan]
* np.exp(2j*np.pi*phase)).real * np.exp(2j*np.pi*phase)).real
return res/n return res/n
#
#
# @pmp("nxdirty", (30, 128))
# @pmp("nydirty", (128, 250))
# @pmp("ofactor", (1.2, 1.5, 1.7, 2.0))
# @pmp("nrow", (2, 27))
# @pmp("nchan", (1, 5))
# @pmp("epsilon", (1e-1, 1e-3, 1e-5))
# @pmp("singleprec", (True, False))
# @pmp("wstacking", (True, False))
# @pmp("use_wgt", (True, False))
# @pmp("nthreads", (1, 2))
# def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
# singleprec, wstacking, use_wgt, nthreads):
# if singleprec and epsilon < 5e-5:
# return
# rng = np.random.default_rng(42)
# pixsizex = np.pi/180/60/nxdirty*0.2398
# pixsizey = np.pi/180/60/nxdirty
# speedoflight, f0 = 299792458., 1e9
# freq = f0 + np.arange(nchan)*(f0/nchan)
# uvw = (rng.random((nrow, 3))-0.5)/(pixsizey*f0/speedoflight)
# ms = rng.random((nrow, nchan))-0.5 + 1j*(rng.random((nrow, nchan))-0.5)
# wgt = rng.random((nrow, nchan)) if use_wgt else None
# dirty = rng.random((nxdirty, nydirty))-0.5
# nu, nv = int(nxdirty*ofactor)+1, int(nydirty*ofactor)+1
# if nu & 1:
# nu += 1
# if nv & 1:
# nv += 1
# if singleprec:
# ms = ms.astype("c8")
# dirty = dirty.astype("f4")
# if wgt is not None:
# wgt = wgt.astype("f4")
# dirty2 = ng.ms2dirty(uvw, freq, ms, wgt, nxdirty, nydirty, pixsizex,
# pixsizey, nu, nv, epsilon, wstacking, nthreads, 0).astype("f8")
# ms2 = ng.dirty2ms(uvw, freq, dirty, wgt, pixsizex, pixsizey, nu, nv, epsilon,
# wstacking, nthreads, 0).astype("c16")
# tol = 5e-4 if singleprec else 5e-11
# assert_allclose(np.vdot(ms, ms2).real, np.vdot(dirty2, dirty), rtol=tol)
@pmp('nxdirty', [16]) @pmp("nxdirty", (30, 128))
@pmp("nydirty", (128, 250))
@pmp("ofactor", (1.2, 1.5, 1.7, 2.0))
@pmp("nrow", (2, 27))
@pmp("nchan", (1, 5))
@pmp("epsilon", (1e-1, 1e-3, 1e-5))
@pmp("singleprec", (True, False))
@pmp("wstacking", (True, False))
@pmp("use_wgt", (True, False))
@pmp("nthreads", (1, 2))
def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
singleprec, wstacking, use_wgt, nthreads):
if singleprec and epsilon < 5e-5:
return
rng = np.random.default_rng(42)
pixsizex = np.pi/180/60/nxdirty*0.2398
pixsizey = np.pi/180/60/nxdirty
speedoflight, f0 = 299792458., 1e9
freq = f0 + np.arange(nchan)*(f0/nchan)
uvw = (rng.random((nrow, 3))-0.5)/(pixsizey*f0/speedoflight)
ms = rng.random((nrow, nchan))-0.5 + 1j*(rng.random((nrow, nchan))-0.5)
wgt = rng.random((nrow, nchan)) if use_wgt else None
dirty = rng.random((nxdirty, nydirty))-0.5
nu, nv = int(nxdirty*ofactor)+1, int(nydirty*ofactor)+1
if nu & 1:
nu += 1
if nv & 1:
nv += 1
if singleprec:
ms = ms.astype("c8")
dirty = dirty.astype("f4")
if wgt is not None:
wgt = wgt.astype("f4")
dirty2 = ng.ms2dirty(uvw, freq, ms, wgt, nxdirty, nydirty, pixsizex,
pixsizey, nu, nv, epsilon, wstacking, nthreads, 0).astype("f8")
ms2 = ng.dirty2ms(uvw, freq, dirty, wgt, pixsizex, pixsizey, nu, nv, epsilon,
wstacking, nthreads, 0).astype("c16")
tol = 5e-4 if singleprec else 5e-11
assert_allclose(np.vdot(ms, ms2).real, np.vdot(dirty2, dirty), rtol=tol)
@pmp('nxdirty', [16, 64])
@pmp('nydirty', [64]) @pmp('nydirty', [64])
@pmp('ofactor', [1.2, 2]) @pmp('ofactor', [1.2, 1.4, 1.7, 2])
@pmp("nrow", (2,)) @pmp("nrow", (2, 27))
@pmp("nchan", (1,)) @pmp("nchan", (1, 5))
@pmp("epsilon", ( 1e-2, 1e-4, 1e-7, 1e-9, 1e-12)) @pmp("epsilon", (1e-2, 1e-3, 1e-4, 1e-7))
@pmp("singleprec", (False,)) @pmp("singleprec", (False,))
@pmp("wstacking", (False,)) @pmp("wstacking", (True,))
@pmp("use_wgt", (False,)) @pmp("use_wgt", (True,))
@pmp("nthreads", (1, )) @pmp("nthreads", (1, 2))
@pmp("fov", (1., )) @pmp("fov", (1., 20.))
def test_ms2dirty_against_wdft2(nxdirty, nydirty, ofactor, nrow, nchan, epsilon, singleprec, wstacking, use_wgt, fov, nthreads): def test_ms2dirty_against_wdft2(nxdirty, nydirty, ofactor, nrow, nchan, epsilon, singleprec, wstacking, use_wgt, fov, nthreads):
if singleprec and epsilon < 5e-5: if singleprec and epsilon < 5e-5:
return return
......
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
#include <cmath> #include <cmath>
#include "ducc0/math/constants.h" #include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h" #include "ducc0/math/gl_integrator.h"
#include "ducc0/math/es_kernel.h" #include "ducc0/math/gridding_kernel.h"
#include "ducc0/infra/mav.h" #include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h" #include "ducc0/infra/simd.h"
#include "ducc0/sharp/sharp.h" #include "ducc0/sharp/sharp.h"
...@@ -143,7 +143,7 @@ template<typename T> class Interpolator ...@@ -143,7 +143,7 @@ template<typename T> class Interpolator
int nthreads; int nthreads;
T ofactor; T ofactor;
size_t supp; size_t supp;
ES_Kernel kernel; ES_Kernel<T> kernel;
size_t ncomp; size_t ncomp;
#ifdef SIMD_INTERPOL #ifdef SIMD_INTERPOL
mav<native_simd<T>,4> scube; mav<native_simd<T>,4> scube;
...@@ -166,7 +166,7 @@ template<typename T> class Interpolator ...@@ -166,7 +166,7 @@ template<typename T> class Interpolator
} }
for (size_t j=0; j<nphi0; ++j) for (size_t j=0; j<nphi0; ++j)
tmp.v(ntheta0-1,j) = arr(ntheta0-1,j); tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads); auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size()); vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0); for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> ftmp(tmp); fmav<T> ftmp(tmp);
...@@ -180,7 +180,7 @@ template<typename T> class Interpolator ...@@ -180,7 +180,7 @@ template<typename T> class Interpolator
{ {
T sfct = (spin&1) ? -1 : 1; T sfct = (spin&1) ? -1 : 1;
mav<T,2> tmp({nphi,nphi0}); mav<T,2> tmp({nphi,nphi0});
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads); auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size()); vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0); for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> farr(arr); fmav<T> farr(arr);
...@@ -243,8 +243,8 @@ template<typename T> class Interpolator ...@@ -243,8 +243,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1), ntheta(nphi/2+1),
nthreads(nthreads_), nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)), ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)), supp(esk_support(epsilon, ofactor)),
kernel(supp, ofactor, nthreads), kernel(supp, esk_beta(supp, ofactor)*supp),
ncomp(separate ? slm.size() : 1), ncomp(separate ? slm.size() : 1),
#ifdef SIMD_INTERPOL #ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}), scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
...@@ -359,8 +359,8 @@ template<typename T> class Interpolator ...@@ -359,8 +359,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1), ntheta(nphi/2+1),
nthreads(nthreads_), nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)), ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)), supp(esk_support(epsilon, ofactor)),
kernel(supp, ofactor, nthreads), kernel(supp, esk_beta(supp, ofactor)*supp),
ncomp(ncomp_), ncomp(ncomp_),
#ifdef SIMD_INTERPOL #ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}), scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
...@@ -441,11 +441,11 @@ template<typename T> class Interpolator ...@@ -441,11 +441,11 @@ template<typename T> class Interpolator
union { union {
native_simd<T> simd[64/vl]; native_simd<T> simd[64/vl];
T scalar[64]; T scalar[64];
} kdata; } tbuf, pbuf;
T *wt(kdata.scalar), *wp(kdata.scalar+supp); T *wt(tbuf.scalar), *wp(pbuf.scalar);
size_t nvec = (2*supp+vl-1)/vl; for (auto &v: tbuf.simd) v=0;
for (auto &v: kdata.simd) v=0; for (auto &v: pbuf.simd) v=0;
MR_assert(2*supp<=64, "support too large"); MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1); vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL #ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl); vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
...@@ -456,14 +456,10 @@ template<typename T> class Interpolator ...@@ -456,14 +456,10 @@ template<typename T> class Interpolator
size_t i=idx[ind]; size_t i=idx[ind];
T f0=T(0.5*supp+ptg(i,0)*xdtheta); T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1)); size_t i0 = size_t(f0+T(1));
for (size_t t=0; t<supp; ++t) kernel.eval((i0-f0)*delta-1, tbuf.simd);
wt[t] = (t+i0-f0)*delta - 1;
T f1=T(0.5)*supp+ptg(i,1)*xdphi; T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.); size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t) kernel.eval((i1-f1)*delta-1, pbuf.simd);
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
psiarr[0]=1.; psiarr[0]=1.;
double psi=ptg(i,2); double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi); double cpsi=cos(psi), spsi=sin(psi);
...@@ -645,11 +641,11 @@ template<typename T> class Interpolator ...@@ -645,11 +641,11 @@ template<typename T> class Interpolator
union { union {
native_simd<T> simd[64/vl]; native_simd<T> simd[64/vl];
T scalar[64]; T scalar[64];
} kdata; } tbuf, pbuf;
T *wt(kdata.scalar), *wp(kdata.scalar+supp); T *wt(tbuf.scalar), *wp(pbuf.scalar);
size_t nvec = (2*supp+vl-1)/vl; for (auto &v: tbuf.simd) v=0;
for (auto &v: kdata.simd) v=0; for (auto &v: pbuf.simd) v=0;
MR_assert(2*supp<=64, "support too large"); MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1); vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL #ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl); vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
...@@ -658,16 +654,12 @@ template<typename T> class Interpolator ...@@ -658,16 +654,12 @@ template<typename T> class Interpolator
while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind) while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
{ {
size_t i=idx[ind]; size_t i=idx[ind];
T f0=T(0.5)*supp+ptg(i,0)*xdtheta; T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+1.); size_t i0 = size_t(f0+T(1));
for (size_t t=0; t<supp; ++t) kernel.eval((i0-f0)*delta-1, tbuf.simd);
wt[t] = (t+i0-f0)*delta - 1;
T f1=T(0.5)*supp+ptg(i,1)*xdphi; T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.); size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t) kernel.eval((i1-f1)*delta-1, pbuf.simd);
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
psiarr[0]=1.; psiarr[0]=1.;
double psi=ptg(i,2); double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi); double cpsi=cos(psi), spsi=sin(psi);
......
...@@ -49,6 +49,7 @@ template<typename T> class exponator ...@@ -49,6 +49,7 @@ template<typename T> class exponator
public: public:
T operator()(T val) { return exp(val); } T operator()(T val) { return exp(val); }
}; };
template<typename T> native_simd<T> esk (native_simd<T> v, T beta) template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
{ {
auto tmp = (T(1)-v)*(T(1)+v); auto tmp = (T(1)-v)*(T(1)+v);
...@@ -59,110 +60,57 @@ template<typename T> native_simd<T> esk (native_simd<T> v, T beta) ...@@ -59,110 +60,57 @@ template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
return res; return res;
} }
/* This class implements the "exponential of semicircle" gridding kernel double esk_beta(size_t supp, double ofactor)
described in https://arxiv.org/abs/1808.06736 */
class ES_Kernel
{ {
private: MR_assert((supp>=2) && (supp<=15), "unsupported support size");
double beta; if (ofactor>=2)
int p; {
vector<double> x, wgt, psi; static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
size_t supp; 2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
public: return opt_beta[supp];
ES_Kernel(size_t supp_, double ofactor, size_t nthreads) }
: beta(get_beta(supp_,ofactor)*supp_), if (ofactor>=1.175)
p(int(1.5*supp_+2)), supp(supp_) {
{ // empirical, but pretty accurate approximation
GL_Integrator integ(2*p,nthreads); static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
x = integ.coordsSymmetric(); auto x0 = 1./(2*ofactor);
wgt = integ.weightsSymmetric(); auto bcstrength=1.+(x0-0.25)*2.5;
psi=x; return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
for (auto &v:psi) }
v=operator()(v); MR_fail("oversampling factor is too small");
} }
ES_Kernel(size_t supp_, size_t nthreads)
: ES_Kernel(supp_, 2., nthreads){}
template<typename T> T operator()(T v) const
{ return esk(v, T(beta)); }
template<typename T> native_simd<T> operator()(native_simd<T> v) const
{ return esk(v, T(beta)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
{
double tmp=0;
for (int i=0; i<p; ++i)
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return 2./(supp*tmp);
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, size_t nval, size_t nthreads)
{
vector<double> res(nval);
double xn = 1./n;
execStatic(nval, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfac(i*xn);
});
return res;
}
static double get_beta(size_t supp, double ofactor=2)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
static size_t get_supp(double epsilon, double ofactor=2) size_t esk_support(double epsilon, double ofactor)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{ {
double epssq = epsilon*epsilon; auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (ofactor>=2) if (epssq>estimate) return w;
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
} }
}; MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
} }
using detail_es_kernel::esk; using detail_es_kernel::esk;
using detail_es_kernel::ES_Kernel; using detail_es_kernel::esk_beta;
using detail_es_kernel::esk_support;
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "ducc0/infra/simd.h" #include "ducc0/infra/simd.h"
#include "ducc0/math/gl_integrator.h" #include "ducc0/math/gl_integrator.h"
#include "ducc0/math/least_misfit.h" #include "ducc0/math/least_misfit.h"
#include "ducc0/math/es_kernel.h"
namespace ducc0 { namespace ducc0 {
...@@ -109,22 +110,73 @@ template<typename T> class GriddingKernel ...@@ -109,22 +110,73 @@ template<typename T> class GriddingKernel
virtual vector<double> corfunc(size_t n, double dx, int nthreads=1) const = 0; virtual vector<double> corfunc(size_t n, double dx, int nthreads=1) const = 0;
}; };
class KernelCorrection class PiecewiseKernelCorrection
{ {
private: private:
static constexpr size_t ideg=10; // integration points per interval
static_assert((ideg&1)==0, "ideg must be even");
vector<double> x, wgtpsi; vector<double> x, wgtpsi;
size_t supp; size_t supp;
public: public:
KernelCorrection(size_t W, const function<double(double)> &func) PiecewiseKernelCorrection(size_t W, const function<double(double)> &func)
: supp(W) : supp(W)
{ {
size_t p = size_t(1.5*supp+2); // estimate; may need to be higher for arbitrary kernels // we know that the kernel is piecewise smooth in all W sections but not
GL_Integrator integ(2*p, 1); // necessarily at borders between sections. Therefore we integrate piece
// by piece.
GL_Integrator integ(ideg, 1);
auto xgl = integ.coords();
auto wgl = integ.weights();
x.resize((supp*ideg)/2);
wgtpsi.resize((supp*ideg)/2);
for (size_t i=0; i<x.size(); ++i)
{
size_t iiv = i/ideg;
x[i] = -1. + iiv*2./supp + (1.+xgl[i%ideg])/supp;
wgtpsi[i] = wgl[i%ideg]*func(x[i]);
}
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfunc(double v) const
{
double tmp=0;
for (size_t i=0; i<x.size(); ++i)
tmp += wgtpsi[i]*cos(pi*supp*v*x[i]);
return 1./tmp;
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{
vector<double> res(n);
execStatic(n, nthreads, 0, [&](auto &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfunc(i*dx);
});
return res;
}
};