Commit 93c5c74b authored by Martin Reinecke's avatar Martin Reinecke

cleanup

parent 5f4f22fc
Pipeline #78781 passed with stages
in 16 minutes and 37 seconds
......@@ -486,8 +486,6 @@ template<typename T, typename T2=complex<T>> class Helper
vector<T2> rbuf, wbuf;
bool do_w_gridding;
double w0, xdw;
size_t nexp;
size_t nvecs;
vector<std::mutex> &locks;
void dump() const
......@@ -528,6 +526,7 @@ template<typename T, typename T2=complex<T>> class Helper
}
public:
size_t nvec;
const T2 *p0r;
T2 *p0w;
static constexpr size_t vlen=native_simd<T>::size();
......@@ -535,7 +534,7 @@ template<typename T, typename T2=complex<T>> class Helper
T scalar[64];
native_simd<T> simd[64/vlen];
};
kbuf bufx, bufy;
kbuf buf;
Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_,
vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
......@@ -548,11 +547,10 @@ template<typename T, typename T2=complex<T>> class Helper
do_w_gridding(dw_>0),
w0(w0_),
xdw(T(1)/dw_),
nexp(2*supp + do_w_gridding),
nvecs((nexp+vlen-1)/vlen),
locks(locks_)
locks(locks_),
nvec((supp+vlen-1)/vlen)
{
MR_assert(supp<=64, "support too large");
MR_assert(supp<=32, "support too large");
}
~Helper() { if (grid_w) dump(); }
......@@ -566,8 +564,8 @@ template<typename T, typename T2=complex<T>> class Helper
double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
krn.eval(T(x0), bufx.simd);
krn.eval(T(y0), bufy.simd);
krn.eval(T(x0), &buf.simd[0]);
krn.eval(T(y0), &buf.simd[nvec]);
if (do_w_gridding)
wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
......@@ -670,8 +668,8 @@ template<typename T, typename Serv> void x2grid_c
{
Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.bufx.scalar;
const T * DUCC0_RESTRICT kv = hlp.bufy.scalar;
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const T * DUCC0_RESTRICT kv = hlp.buf.scalar+hlp.vlen*hlp.nvec;
while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
{
......@@ -718,8 +716,8 @@ template<typename T, typename Serv> void grid2x_c
{
Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.bufx.scalar;
const T * DUCC0_RESTRICT kv = hlp.bufy.scalar;
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const T * DUCC0_RESTRICT kv = hlp.buf.scalar+hlp.vlen*hlp.nvec;
while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
{
......
......@@ -394,6 +394,19 @@ template<typename T> class fmav: public fmav_info, public membuf<T>
}
};
// template<typename Func, typename T0, typename Ts...> void fmav_pointwise_op(Func func, T0 & arg0, Ts&... args)
// {
// MR_assert(multiequal(arg0.shape()==args.shape()...), "fmav shape mismatch");
// if (multiequal(true, arg0.stride()==args.stride()...)) // equal strides, we can make simplifications
// {
// if (arg0.compact()) // even better, we can go through everything in a single loop
// {
// for (size_t i=0; i<arg0.size(); ++i)
// func(arg0.ptr[i], args.ptr[i]...);
// }
// else
// }
template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membuf<T>
{
// static_assert((ndim>0) && (ndim<4), "only supports 1D, 2D, and 3D arrays");
......
......@@ -241,29 +241,40 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
return tval;
}
template<size_t NV, size_t DEG> auto evfhelper2() const
template<size_t NV, size_t DEG> void evfhelper2()
{
if (DEG==D)
return &HornerKernel::eval_intern<NV,DEG>;
if (DEG>MAXDEG)
return &HornerKernel::eval_intern_general;
return evfhelper2<NV, ((DEG>MAXDEG) ? DEG : DEG+1)>();
evalfunc = &HornerKernel::eval_intern<NV,DEG>;
else if (DEG>MAXDEG)
evalfunc = &HornerKernel::eval_intern_general;
else
evfhelper2<NV, ((DEG>MAXDEG) ? DEG : DEG+1)>();
}
template<size_t NV> auto evfhelper1() const
template<size_t NV> void evfhelper1()
{
if (nvec==NV) return evfhelper2<NV,0>();
if (nvec*vlen>MAXW) return &HornerKernel::eval_intern_general;
return evfhelper1<((NV*vlen>MAXW) ? NV : NV+1)>();
if (nvec==NV)
evfhelper2<NV,0>();
else if (nvec*vlen>MAXW)
evalfunc = &HornerKernel::eval_intern_general;
else
evfhelper1<((NV*vlen>MAXW) ? NV : NV+1)>();
}
template<size_t DEG> auto evsfhelper1() const
template<size_t DEG> void evsfhelper1()
{
if (DEG==D)
return &HornerKernel::eval_single_intern<DEG>;
if (DEG>MAXDEG)
return &HornerKernel::eval_single_intern_general;
return evsfhelper1<((DEG>MAXDEG) ? DEG : DEG+1)>();
evalsinglefunc = &HornerKernel::eval_single_intern<DEG>;
else if (DEG>MAXDEG)
evalsinglefunc = &HornerKernel::eval_single_intern_general;
else
evsfhelper1<((DEG>MAXDEG) ? DEG : DEG+1)>();
}
void wire_eval()
{
evfhelper1<1>();
evsfhelper1<0>();
}
static vector<Tsimd> makeCoeff(size_t W, size_t D,
......@@ -290,10 +301,8 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
HornerKernel(size_t W_, size_t D_, const function<double(double)> &func,
const KernelCorrection &corr_)
: W(W_), D(D_), nvec((W+vlen-1)/vlen),
coeff(makeCoeff(W_, D_, func)), evalfunc(evfhelper1<1>()),
evalsinglefunc(evsfhelper1<0>()),
corr(corr_)
{}
coeff(makeCoeff(W_, D_, func)), corr(corr_)
{ wire_eval(); }
// HornerKernel(size_t W_, size_t D_, const function<double(double)> &func)
// : W(W_), D(D_), nvec((W+vlen-1)/vlen),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment