Commit 5ef1c033 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

first try

parent 653c994b
Pipeline #80005 failed with stages
in 11 minutes and 16 seconds
......@@ -226,7 +226,7 @@ template<typename T> class GridderConfig
// FIXME: this should probably be done more cleanly
public:
shared_ptr<GriddingKernel<T>> krn;
shared_ptr<HornerKernel<T>> krn;
protected:
double psx, psy;
......@@ -513,12 +513,13 @@ template<typename T> class GridderConfig
constexpr int logsquare=4;
template<typename T> class HelperX2g
template<size_t supp, typename T> class HelperX2g2
{
private:
const GridderConfig<T> &gconf;
TemplateKernel<supp, T> krn;
mav<complex<T>,2> &grid;
int nu, nv, supp;
int nu, nv;
int su, sv, svvec;
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
......@@ -563,10 +564,9 @@ template<typename T> class HelperX2g
};
kbuf buf;
HelperX2g(const GridderConfig<T> &gconf_, mav<complex<T>,2> &grid_,
HelperX2g2(const GridderConfig<T> &gconf_, mav<complex<T>,2> &grid_,
vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
: gconf(gconf_), grid(grid_),
supp(gconf.Supp()),
: gconf(gconf_), krn(*gconf.krn), grid(grid_),
su(2*gconf.Nsafe()+(1<<logsquare)),
sv(2*gconf.Nsafe()+(1<<logsquare)),
svvec(((sv+vlen-1)/vlen)*vlen),
......@@ -579,26 +579,25 @@ template<typename T> class HelperX2g
locks(locks_),
nvec((supp+vlen-1)/vlen)
{
MR_assert(supp<=32, "support too large");
static_assert(supp<=32, "support too large");
checkShape(grid.shape(), {gconf.Nu(),gconf.Nv()});
}
~HelperX2g() { dump(); }
~HelperX2g2() { dump(); }
int lineJump() const { return svvec; }
T Wfac() const { return wfac; }
void prep(const UVW &in)
[[gnu::hot]] void prep(const UVW &in)
{
const auto &krn(*(gconf.krn));
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
double xsupp=2./supp;
constexpr double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
krn.eval(T(x0), &buf.simd[0]);
krn.eval(T(y0), &buf.simd[nvec]);
if (do_w_gridding)
wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
if ((iu0<bu0) || (iv0<bv0) || (iu0+int(supp)>bu0+su) || (iv0+int(supp)>bv0+sv))
{
dump();
bu0=((((iu0+gconf.Nsafe())>>logsquare)<<logsquare))-gconf.Nsafe();
......@@ -609,12 +608,12 @@ template<typename T> class HelperX2g
}
};
template<typename T> class HelperG2x
template<size_t supp, typename T> class HelperG2x2
{
private:
const GridderConfig<T> &gconf;
TemplateKernel<supp, T> krn;
const mav<complex<T>,2> &grid;
int supp;
int su, sv, svvec;
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
......@@ -653,10 +652,9 @@ template<typename T> class HelperG2x
};
kbuf buf;
HelperG2x(const GridderConfig<T> &gconf_, const mav<complex<T>,2> &grid_,
HelperG2x2(const GridderConfig<T> &gconf_, const mav<complex<T>,2> &grid_,
double w0_=-1, double dw_=-1)
: gconf(gconf_), grid(grid_),
supp(gconf.Supp()),
: gconf(gconf_), krn(*gconf.krn), grid(grid_),
su(2*gconf.Nsafe()+(1<<logsquare)),
sv(2*gconf.Nsafe()+(1<<logsquare)),
svvec(((sv+vlen-1)/vlen)*vlen),
......@@ -674,19 +672,18 @@ template<typename T> class HelperG2x
int lineJump() const { return svvec; }
T Wfac() const { return wfac; }
void prep(const UVW &in)
[[gnu::hot]] void prep(const UVW &in)
{
const auto &krn(*(gconf.krn));
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
double xsupp=2./supp;
constexpr double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
krn.eval(T(x0), &buf.simd[0]);
krn.eval(T(y0), &buf.simd[nvec]);
if (do_w_gridding)
wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
if ((iu0<bu0) || (iv0<bv0) || (iu0+int(supp)>bu0+su) || (iv0+int(supp)>bv0+sv))
{
bu0=((((iu0+gconf.Nsafe())>>logsquare)<<logsquare))-gconf.Nsafe();
bv0=((((iv0+gconf.Nsafe())>>logsquare)<<logsquare))-gconf.Nsafe();
......@@ -782,7 +779,7 @@ template<size_t SUPP, typename T, typename Serv> [[gnu::hot]] void x2grid_c_help
size_t np = srv.Nvis();
execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
{
HelperX2g<T> hlp(gconf, grid, locks, w0, dw);
HelperX2g2<SUPP,T> hlp(gconf, grid, locks, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC;
......@@ -857,7 +854,7 @@ template<size_t SUPP, typename T, typename Serv> [[gnu::hot]] void grid2x_c_help
size_t np = srv.Nvis();
execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
{
HelperG2x<T> hlp(gconf, grid, w0, dw);
HelperG2x2<SUPP,T> hlp(gconf, grid, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC;
......
......@@ -276,7 +276,7 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
: W(W_), D(D_), nvec((W+vlen-1)/vlen),
coeff(makeCoeff(W_, D_, func)), scoeff(reinterpret_cast<T *>(&coeff[0])),
sstride(vlen*nvec), corr(corr_)
{ wire_eval(); }
{}
virtual size_t support() const { return W; }
......@@ -292,45 +292,9 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
[0, dx, 2*dx, ..., (n-1)*dx] */
virtual vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{ return corr.corfunc(n, dx, nthreads); }
};
template<size_t W> class TemplateCorrection
{
protected:
static constexpr auto N = W+W/2+2;
vector<double> x, wgtpsi;
public:
TemplateCorrection(const function<double(double)> &func)
{
GL_Integrator integ(2*N,1);
x = integ.coordsSymmetric();
wgtpsi = integ.weightsSymmetric();
for (size_t i=0; i<x.size(); ++i)
wgtpsi[i] *= func(x[i])*W*0.5;
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfunc(double v) const
{
double tmp=0;
for (size_t i=0; i<N; ++i)
tmp += wgtpsi[i]*cos(pi*W*v*x[i]);
return 1./tmp;
}
/* Compute correction factors for gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{
vector<double> res(n);
execStatic(n, nthreads, 0, [&](auto &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfunc(i*dx);
});
return res;
}
const vector<Tsimd> &Coeff() const { return coeff; }
size_t degree() const { return D; }
};
template<size_t W, typename T> class TemplateKernel
......@@ -345,8 +309,6 @@ template<size_t W, typename T> class TemplateKernel
const T *scoeff;
static constexpr auto sstride = nvec*vlen;
KernelCorrection corr;
template<size_t NV, size_t DEG> void eval_intern(T x, native_simd<T> *res) const
{
x = (x+1)*W-1;
......@@ -384,11 +346,13 @@ template<size_t W, typename T> class TemplateKernel
}
public:
TemplateKernel(const function<double(double)> &func,
const KernelCorrection &corr_)
: scoeff(reinterpret_cast<T *>(&coeff[0])),
corr(corr_)
{ makeCoeff(func, coeff); }
TemplateKernel(const HornerKernel<T> &krn)
: scoeff(reinterpret_cast<T *>(&coeff[0]))
{
MR_assert(W==krn.support(), "support mismatch");
MR_assert(D==krn.degree(), "degree mismatch");
for (size_t i=0; i<coeff.size(); ++i) coeff[i] = krn.Coeff()[i];
}
constexpr size_t support() const { return W; }
......@@ -414,13 +378,6 @@ template<size_t W, typename T> class TemplateKernel
tval = tval*x + ptr[j*sstride];
return tval;
}
double corfunc(double x) const {return corr.corfunc(x); }
/* Computes the correction function values at a coordinates
[0, dx, 2*dx, ..., (n-1)*dx] */
vector<double> corfunc(size_t n, double dx, int nthreads=1) const
{ return corr.corfunc(n, dx, nthreads); }
};
struct NESdata
......@@ -695,6 +652,8 @@ template<typename T> auto selectKernel(double ofactor, double epsilon)
using detail_gridding_kernel::GriddingKernel;
using detail_gridding_kernel::selectKernel;
using detail_gridding_kernel::HornerKernel;
using detail_gridding_kernel::TemplateKernel;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment