Commit 06123dc7 authored by Martin Reinecke's avatar Martin Reinecke

polishing

parent e06ca06f
Pipeline #78278 passed with stages
in 16 minutes and 9 seconds
......@@ -37,7 +37,6 @@
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/math/gridding_kernel.h"
#include "ducc0/math/least_misfit.h"
namespace ducc0 {
......@@ -231,7 +230,8 @@ template<typename T> class GridderConfig
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
: nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
krn(make_shared<HornerKernel<T>>(selectLeastMisfitKernel(ofactor, epsilon))),
// krn(make_shared<HornerKernel<T>>(selectLeastMisfitKernel(ofactor, epsilon))),
krn(selectESKernel<T>(ofactor, epsilon)),
psx(pixsize_x), psy(pixsize_y),
supp(krn->support()), nsafe((supp+1)/2),
nthreads(nthreads_),
......@@ -880,6 +880,8 @@ template<typename T, typename Serv> class WgridHelper
dw = 0.25/abs(nmin);
nplanes = size_t((wmax-wmin)/dw+2);
dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1);
// paranoid fix
if (dw==0.) dw=1e-3;
supp = gconf.Supp();
wmin -= (0.5*supp-1)*dw;
......
......@@ -52,60 +52,60 @@ def explicit_gridder(uvw, freq, ms, wgt, nxdirty, nydirty, xpixsize, ypixsize,
res += (ms[row, chan]*wgt[row, chan]
* np.exp(2j*np.pi*phase)).real
return res/n
#
#
# @pmp("nxdirty", (30, 128))
# @pmp("nydirty", (128, 250))
# @pmp("ofactor", (1.2, 1.5, 1.7, 2.0))
# @pmp("nrow", (2, 27))
# @pmp("nchan", (1, 5))
# @pmp("epsilon", (1e-1, 1e-3, 1e-5))
# @pmp("singleprec", (True, False))
# @pmp("wstacking", (True, False))
# @pmp("use_wgt", (True, False))
# @pmp("nthreads", (1, 2))
# def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
# singleprec, wstacking, use_wgt, nthreads):
# if singleprec and epsilon < 5e-5:
# return
# rng = np.random.default_rng(42)
# pixsizex = np.pi/180/60/nxdirty*0.2398
# pixsizey = np.pi/180/60/nxdirty
# speedoflight, f0 = 299792458., 1e9
# freq = f0 + np.arange(nchan)*(f0/nchan)
# uvw = (rng.random((nrow, 3))-0.5)/(pixsizey*f0/speedoflight)
# ms = rng.random((nrow, nchan))-0.5 + 1j*(rng.random((nrow, nchan))-0.5)
# wgt = rng.random((nrow, nchan)) if use_wgt else None
# dirty = rng.random((nxdirty, nydirty))-0.5
# nu, nv = int(nxdirty*ofactor)+1, int(nydirty*ofactor)+1
# if nu & 1:
# nu += 1
# if nv & 1:
# nv += 1
# if singleprec:
# ms = ms.astype("c8")
# dirty = dirty.astype("f4")
# if wgt is not None:
# wgt = wgt.astype("f4")
# dirty2 = ng.ms2dirty(uvw, freq, ms, wgt, nxdirty, nydirty, pixsizex,
# pixsizey, nu, nv, epsilon, wstacking, nthreads, 0).astype("f8")
# ms2 = ng.dirty2ms(uvw, freq, dirty, wgt, pixsizex, pixsizey, nu, nv, epsilon,
# wstacking, nthreads, 0).astype("c16")
# tol = 5e-4 if singleprec else 5e-11
# assert_allclose(np.vdot(ms, ms2).real, np.vdot(dirty2, dirty), rtol=tol)
@pmp('nxdirty', [16])
@pmp("nxdirty", (30, 128))
@pmp("nydirty", (128, 250))
@pmp("ofactor", (1.2, 1.5, 1.7, 2.0))
@pmp("nrow", (2, 27))
@pmp("nchan", (1, 5))
@pmp("epsilon", (1e-1, 1e-3, 1e-5))
@pmp("singleprec", (True, False))
@pmp("wstacking", (True, False))
@pmp("use_wgt", (True, False))
@pmp("nthreads", (1, 2))
def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
singleprec, wstacking, use_wgt, nthreads):
if singleprec and epsilon < 5e-5:
return
rng = np.random.default_rng(42)
pixsizex = np.pi/180/60/nxdirty*0.2398
pixsizey = np.pi/180/60/nxdirty
speedoflight, f0 = 299792458., 1e9
freq = f0 + np.arange(nchan)*(f0/nchan)
uvw = (rng.random((nrow, 3))-0.5)/(pixsizey*f0/speedoflight)
ms = rng.random((nrow, nchan))-0.5 + 1j*(rng.random((nrow, nchan))-0.5)
wgt = rng.random((nrow, nchan)) if use_wgt else None
dirty = rng.random((nxdirty, nydirty))-0.5
nu, nv = int(nxdirty*ofactor)+1, int(nydirty*ofactor)+1
if nu & 1:
nu += 1
if nv & 1:
nv += 1
if singleprec:
ms = ms.astype("c8")
dirty = dirty.astype("f4")
if wgt is not None:
wgt = wgt.astype("f4")
dirty2 = ng.ms2dirty(uvw, freq, ms, wgt, nxdirty, nydirty, pixsizex,
pixsizey, nu, nv, epsilon, wstacking, nthreads, 0).astype("f8")
ms2 = ng.dirty2ms(uvw, freq, dirty, wgt, pixsizex, pixsizey, nu, nv, epsilon,
wstacking, nthreads, 0).astype("c16")
tol = 5e-4 if singleprec else 5e-11
assert_allclose(np.vdot(ms, ms2).real, np.vdot(dirty2, dirty), rtol=tol)
@pmp('nxdirty', [16, 64])
@pmp('nydirty', [64])
@pmp('ofactor', [1.2, 2])
@pmp("nrow", (2,))
@pmp("nchan", (1,))
@pmp("epsilon", ( 1e-2, 1e-4, 1e-7, 1e-9, 1e-12))
@pmp('ofactor', [1.2, 1.4, 1.7, 2])
@pmp("nrow", (2, 27))
@pmp("nchan", (1, 5))
@pmp("epsilon", (1e-2, 1e-3, 1e-4, 1e-7))
@pmp("singleprec", (False,))
@pmp("wstacking", (False,))
@pmp("use_wgt", (False,))
@pmp("nthreads", (1, ))
@pmp("fov", (1., ))
@pmp("wstacking", (True,))
@pmp("use_wgt", (True,))
@pmp("nthreads", (1, 2))
@pmp("fov", (1., 20.))
def test_ms2dirty_against_wdft2(nxdirty, nydirty, ofactor, nrow, nchan, epsilon, singleprec, wstacking, use_wgt, fov, nthreads):
if singleprec and epsilon < 5e-5:
return
......
......@@ -30,7 +30,7 @@
#include <cmath>
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/math/gridding_kernel.h"
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/sharp/sharp.h"
......@@ -143,7 +143,7 @@ template<typename T> class Interpolator
int nthreads;
T ofactor;
size_t supp;
ES_Kernel kernel;
ES_Kernel<T> kernel;
size_t ncomp;
#ifdef SIMD_INTERPOL
mav<native_simd<T>,4> scube;
......@@ -166,7 +166,7 @@ template<typename T> class Interpolator
}
for (size_t j=0; j<nphi0; ++j)
tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> ftmp(tmp);
......@@ -180,7 +180,7 @@ template<typename T> class Interpolator
{
T sfct = (spin&1) ? -1 : 1;
mav<T,2> tmp({nphi,nphi0});
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> farr(arr);
......@@ -243,8 +243,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
supp(esk_support(epsilon, ofactor)),
kernel(supp, esk_beta(supp, ofactor)*supp),
ncomp(separate ? slm.size() : 1),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -359,8 +359,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
supp(esk_support(epsilon, ofactor)),
kernel(supp, esk_beta(supp, ofactor)*supp),
ncomp(ncomp_),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -441,11 +441,11 @@ template<typename T> class Interpolator
union {
native_simd<T> simd[64/vl];
T scalar[64];
} kdata;
T *wt(kdata.scalar), *wp(kdata.scalar+supp);
size_t nvec = (2*supp+vl-1)/vl;
for (auto &v: kdata.simd) v=0;
MR_assert(2*supp<=64, "support too large");
} tbuf, pbuf;
T *wt(tbuf.scalar), *wp(pbuf.scalar);
for (auto &v: tbuf.simd) v=0;
for (auto &v: pbuf.simd) v=0;
MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
......@@ -456,14 +456,10 @@ template<typename T> class Interpolator
size_t i=idx[ind];
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
for (size_t t=0; t<supp; ++t)
wt[t] = (t+i0-f0)*delta - 1;
kernel.eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
kernel.eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......@@ -645,11 +641,11 @@ template<typename T> class Interpolator
union {
native_simd<T> simd[64/vl];
T scalar[64];
} kdata;
T *wt(kdata.scalar), *wp(kdata.scalar+supp);
size_t nvec = (2*supp+vl-1)/vl;
for (auto &v: kdata.simd) v=0;
MR_assert(2*supp<=64, "support too large");
} tbuf, pbuf;
T *wt(tbuf.scalar), *wp(pbuf.scalar);
for (auto &v: tbuf.simd) v=0;
for (auto &v: pbuf.simd) v=0;
MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
......@@ -658,16 +654,12 @@ template<typename T> class Interpolator
while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
{
size_t i=idx[ind];
T f0=T(0.5)*supp+ptg(i,0)*xdtheta;
size_t i0 = size_t(f0+1.);
for (size_t t=0; t<supp; ++t)
wt[t] = (t+i0-f0)*delta - 1;
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
kernel.eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
kernel.eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......
......@@ -49,6 +49,7 @@ template<typename T> class exponator
public:
T operator()(T val) { return exp(val); }
};
template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
{
auto tmp = (T(1)-v)*(T(1)+v);
......@@ -59,110 +60,57 @@ template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
return res;
}
/* This class implements the "exponential of semicircle" gridding kernel
described in https://arxiv.org/abs/1808.06736 */
class ES_Kernel
double esk_beta(size_t supp, double ofactor)
{
private:
double beta;
int p;
vector<double> x, wgt, psi;
size_t supp;
public:
ES_Kernel(size_t supp_, double ofactor, size_t nthreads)
: beta(get_beta(supp_,ofactor)*supp_),
p(int(1.5*supp_+2)), supp(supp_)
{
GL_Integrator integ(2*p,nthreads);
x = integ.coordsSymmetric();
wgt = integ.weightsSymmetric();
psi=x;
for (auto &v:psi)
v=operator()(v);
}
ES_Kernel(size_t supp_, size_t nthreads)
: ES_Kernel(supp_, 2., nthreads){}
template<typename T> T operator()(T v) const
{ return esk(v, T(beta)); }
template<typename T> native_simd<T> operator()(native_simd<T> v) const
{ return esk(v, T(beta)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
{
double tmp=0;
for (int i=0; i<p; ++i)
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return 2./(supp*tmp);
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, size_t nval, size_t nthreads)
{
vector<double> res(nval);
double xn = 1./n;
execStatic(nval, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfac(i*xn);
});
return res;
}
static double get_beta(size_t supp, double ofactor=2)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
static size_t get_supp(double epsilon, double ofactor=2)
size_t esk_support(double epsilon, double ofactor)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
};
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
}
using detail_es_kernel::esk;
using detail_es_kernel::ES_Kernel;
using detail_es_kernel::esk_beta;
using detail_es_kernel::esk_support;
}
......
......@@ -26,6 +26,7 @@
#include "ducc0/infra/simd.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/least_misfit.h"
#include "ducc0/math/es_kernel.h"
namespace ducc0 {
......@@ -109,22 +110,73 @@ template<typename T> class GriddingKernel
virtual vector<double> corfunc(size_t n, double dx, int nthreads=1) const = 0;
};
class KernelCorrection
class PiecewiseKernelCorrection
{
private:
static constexpr size_t ideg=10; // integration points per interval
static_assert((ideg&1)==0, "ideg must be even");
vector<double> x, wgtpsi;
size_t supp;
public:
KernelCorrection(size_t W, const function<double(double)> &func)
PiecewiseKernelCorrection(size_t W, const function<double(double)> &func)
: supp(W)
{
size_t p = size_t(1.5*supp+2); // estimate; may need to be higher for arbitrary kernels
GL_Integrator integ(2*p, 1);
// we know that the kernel is piecewise smooth in all W sections but not
// necessarily at borders between sections. Therefore we integrate piece
// by piece.
GL_Integrator integ(ideg, 1);
auto xgl = integ.coords();
auto wgl = integ.weights();
x.resize((supp*ideg)/2);
wgtpsi.resize((supp*ideg)/2);
for (size_t i=0; i<x.size(); ++i)
{
size_t iiv = i/ideg;
x[i] = -1. + iiv*2./supp + (1.+xgl[i%ideg])/supp;
wgtpsi[i] = wgl[i%ideg]*func(x[i]);
}
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfunc(double v) const
{
double tmp=0;
for (size_t i=0; i<x.size(); ++i)
tmp += wgtpsi[i]*cos(pi*supp*v*x[i]);
return 1./tmp;
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{
vector<double> res(n);
execStatic(n, nthreads, 0, [&](auto &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfunc(i*dx);
});
return res;
}
};
class GLFullCorrection
{
private:
vector<double> x, wgtpsi;
size_t supp;
public:
GLFullCorrection(size_t W, const function<double(double)> &func)
: supp(W)
{
size_t p = size_t(1.5*W)+2;
GL_Integrator integ(2*p,1);
x = integ.coordsSymmetric();
wgtpsi = integ.weightsSymmetric();
for (size_t i=0; i<x.size(); ++i)
wgtpsi[i] *= func(x[i]);
wgtpsi[i] *= func(x[i])*supp*0.5;
}
/* Compute correction factors for gridding kernel
......@@ -134,7 +186,7 @@ class KernelCorrection
double tmp=0;
for (size_t i=0; i<x.size(); ++i)
tmp += wgtpsi[i]*cos(pi*supp*v*x[i]);
return 2./(supp*tmp);
return 1./tmp;
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
......@@ -161,7 +213,7 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
vector<Tsimd> coeff;
void (HornerKernel<T>::* evalfunc) (T, native_simd<T> *) const;
KernelCorrection corr;
PiecewiseKernelCorrection corr;
template<size_t NV, size_t DEG> void eval_intern(T x, native_simd<T> *res) const
{
......@@ -221,10 +273,10 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
static vector<Tsimd> coeffFromPolyKernel(const PolyKernel &krn)
{
auto W = krn.W;
auto D = krn.coeff.size()/((W+1)/2)-1;
auto D = krn.D;
auto nvec = ((W+vlen-1)/vlen);
vector<Tsimd> coeff(nvec*(D+1), 0);
vector<double> coeff_raw(W*(D+1), 0);
vector<Tsimd> coeff(nvec*(D+1), 42.);
vector<double> coeff_raw(W*(D+1), 42.);
size_t Whalf = W/2;
for (size_t j=0; j<=D; ++j)
{
......@@ -290,10 +342,65 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
{ return corr.corfunc(n, dx, nthreads); }
};
template<typename T> class ES_Kernel: public GriddingKernel<T>
{
private:
using Tsimd = native_simd<T>;
static constexpr auto vlen = Tsimd::size();
size_t W, nvec;
double beta;
GLFullCorrection corr;
public:
using GriddingKernel<T>::eval;
using GriddingKernel<T>::eval_single;
using GriddingKernel<T>::corfunc;
ES_Kernel(size_t W_, double beta_)
: W(W_), nvec((W+vlen-1)/vlen), beta(beta_),
corr(W, [this](T v){return eval_single(v);}) {}
virtual size_t support() const { return W; }
virtual void eval(T x, native_simd<T> *res) const
{
T dist = T(2./W);
for (size_t i=0; i<W; ++i)
res[i/vlen][i%vlen] = x+i*dist;
for (size_t i=W; i<nvec*vlen; ++i)
res[i/vlen][i%vlen] = 0;
for (size_t i=0; i<nvec; ++i)
res[i] = esk(res[i], T(beta));
}
/*! Returns the function approximation at location x.
x must lie in [-1; 1]. */
virtual T eval_single(T x) const
{ return esk(x, T(beta)); }
virtual double corfunc(double x) const {return corr.corfunc(x); }
/* Computes the correction function values at a coordinates
[0, dx, 2*dx, ..., (n-1)*dx] */
virtual vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{ return corr.corfunc(n, dx, nthreads); }
};
template<typename T> auto selectESKernel(double ofactor, double epsilon)
{
auto supp = esk_support(epsilon, ofactor);
auto beta = esk_beta(supp, ofactor)*supp;
// return make_shared<HornerKernel<T>>(supp, supp+3, [beta](double v){return esk(v, beta);});
return make_shared<ES_Kernel<T>>(supp, beta);
}
}
using detail_gridding_kernel::GriddingKernel;
using detail_gridding_kernel::HornerKernel;
using detail_gridding_kernel::ES_Kernel;
using detail_gridding_kernel::selectESKernel;
}
......