Commit 37e65c6a authored by Martin Reinecke's avatar Martin Reinecke
Browse files

Merge branch 'auto_choice' of gitlab.mpcdf.mpg.de:mtr/ducc into auto_choice

parents 70fd71cd bf212934
Pipeline #80283 passed with stages
in 14 minutes and 58 seconds
......@@ -503,7 +503,7 @@ template<typename T> class GridderConfig
c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
}
void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
[[gnu::always_inline]] void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{
u=fmod1(u_in*psx)*nu;
iu0 = min(int(u+ushift)-int(nu), maxiu0);
......@@ -540,7 +540,7 @@ template<size_t supp, typename T> class HelperX2g2
double w0, xdw;
vector<std::mutex> &locks;
void dump()
DUCC0_NOINLINE void dump()
{
int nu = int(gconf.Nu());
int nv = int(gconf.Nv());
......@@ -585,9 +585,9 @@ template<size_t supp, typename T> class HelperX2g2
{ checkShape(grid.shape(), {gconf.Nu(),gconf.Nv()}); }
~HelperX2g2() { dump(); }
int lineJump() const { return svvec; }
constexpr int lineJump() const { return svvec; }
T Wfac() const { return wfac; }
[[gnu::hot]] void prep(const UVW &in)
[[gnu::always_inline]] [[gnu::hot]] void prep(const UVW &in)
{
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
......@@ -632,7 +632,7 @@ template<size_t supp, typename T> class HelperG2x2
bool do_w_gridding;
double w0, xdw;
void load()
DUCC0_NOINLINE void load()
{
int nu = int(gconf.Nu());
int nv = int(gconf.Nv());
......@@ -670,9 +670,9 @@ template<size_t supp, typename T> class HelperG2x2
xdw(T(1)/dw_)
{ checkShape(grid.shape(), {gconf.Nu(),gconf.Nv()}); }
int lineJump() const { return svvec; }
constexpr int lineJump() const { return svvec; }
T Wfac() const { return wfac; }
[[gnu::hot]] void prep(const UVW &in)
[[gnu::always_inline]] [[gnu::hot]] void prep(const UVW &in)
{
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
......@@ -779,7 +779,7 @@ template<size_t SUPP, typename T, typename Serv> [[gnu::hot]] void x2grid_c_help
execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
{
HelperX2g2<SUPP,T> hlp(gconf, grid, locks, w0, dw);
int jump = hlp.lineJump();
constexpr int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC;
......@@ -855,7 +855,7 @@ template<size_t SUPP, typename T, typename Serv> [[gnu::hot]] void grid2x_c_help
execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
{
HelperG2x2<SUPP,T> hlp(gconf, grid, w0, dw);
int jump = hlp.lineJump();
constexpr int jump = hlp.lineJump();
const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC;
......@@ -1264,7 +1264,7 @@ template<typename T> auto getNuNv(const Baselines &baselines,
double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
if (x0*x0+y0*y0>1.)
nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
auto [supp0, ofactors] = getAvailableKernels(epsilon);
auto [supp0, ofactors] = getAvailableKernels(epsilon, sizeof(T)<8);
double mincost = 1e300;
constexpr double nref_fft=2048;
constexpr double costref_fft=0.0693;
......
......@@ -93,7 +93,7 @@ nu, nv: int
oversampling values lie between 1.5 and 2.
Increasing the oversampling factor decreases the kernel support width
required for the desired accuracy, so it typically reduces run-time; on the
other hand, this will increase memory consumption.
other hand, this will increase memory consumption.
If at least one of these two values is 0, the library will automatically
pick values that result in a fast computation.
epsilon: float
......@@ -170,7 +170,7 @@ nu, nv: int
oversampling values lie between 1.5 and 2.
Increasing the oversampling factor decreases the kernel support width
required for the desired accuracy, so it typically reduces run-time; on the
other hand, this will increase memory consumption.
other hand, this will increase memory consumption.
If at least one of these two values is 0, the library will automatically
pick values that result in a fast computation.
epsilon: float
......
......@@ -320,7 +320,7 @@ template<size_t W, typename T> class TemplateKernel
constexpr size_t support() const { return W; }
void eval(T x, native_simd<T> *res) const
[[gnu::always_inline]] void eval(T x, native_simd<T> *res) const
{
x = (x+1)*W-1;
for (size_t i=0; i<nvec; ++i)
......@@ -332,7 +332,7 @@ template<size_t W, typename T> class TemplateKernel
}
}
T eval_single(T x) const
[[gnu::always_inline]] T eval_single(T x) const
{
auto nth = min(W-1, size_t(max(T(0), (x+1)*W*T(0.5))));
x = (x+1)*W-2*nth-1;
......@@ -624,11 +624,11 @@ size_t getMinSupport(double epsilon)
return Wmin;
}
auto getAvailableKernels(double epsilon)
auto getAvailableKernels(double epsilon, bool single_precision)
{
size_t supp0 = getMinSupport(epsilon);
vector<double> ofactors;
for (size_t supp=supp0; supp<15; ++supp)
for (size_t supp=supp0; supp<(single_precision ? 9 : 17); ++supp)
{
double ofac=3;
size_t idx = NEScache.size();
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment