...
 
Commits (5)
0.2.0:
- wgridder:
- kernels are now evaluated via polynomial approximation, allowing much
more freedom in the choice of kernel function
- switch to 2-parameter ES kernels for better accuracy
- unnecessary FFT calculations are skipped
- totalconvolve:
- improved accuracy by making use of the new wgridder kernels
- *INTERFACE CHANGE* removed method "epsilon_guess()"
- pointingprovider:
new, experimental module for computing detector pointings from a time stream
of satellite pointings. To be used by litebird_sim initially.
......@@ -478,7 +478,7 @@ template<typename T> class GridderConfig
c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
}
void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
void DUCC0_NOINLINE getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{
u=fmod1(u_in*psx)*nu;
iu0 = min(int(u+ushift)-int(nu), maxiu0);
......@@ -526,7 +526,9 @@ template<typename T, typename T2=complex<T>> class Helper
{
private:
const GridderConfig<T> &gconf;
const GriddingKernel<T> &krn;
int nu, nv, nsafe, supp;
double xsupp;
const T2 *grid_r;
T2 *grid_w;
int su, sv;
......@@ -539,7 +541,7 @@ template<typename T, typename T2=complex<T>> class Helper
double w0, xdw;
vector<std::mutex> &locks;
void dump() const
void DUCC0_NOINLINE dump() const
{
if (bu0<-nsafe) return; // nothing written into buffer yet
......@@ -560,7 +562,7 @@ template<typename T, typename T2=complex<T>> class Helper
}
}
void load()
void DUCC0_NOINLINE load()
{
int idxu = (bu0+nu)%nu;
int idxv0 = (bv0+nv)%nv;
......@@ -589,8 +591,8 @@ template<typename T, typename T2=complex<T>> class Helper
Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_,
vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), grid_r(grid_r_),
: gconf(gconf_), krn(*gconf.krn), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), xsupp(2./supp), grid_r(grid_r_),
grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)),
......@@ -607,12 +609,12 @@ template<typename T, typename T2=complex<T>> class Helper
int lineJump() const { return sv; }
T Wfac() const { return wfac; }
void prep(const UVW &in)
[[gnu::noinline]] void prep(const UVW &in)
{
const auto &krn(*(gconf.krn));
// const auto &krn(*(gconf.krn));
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
double xsupp=2./supp;
// double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
krn.eval(T(x0), &buf.simd[0]);
......@@ -626,8 +628,11 @@ template<typename T, typename T2=complex<T>> class Helper
bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
if (grid_r) load();
}
p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
// p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
// p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
auto ofs = sv*(iu0-bu0) + iv0-bv0;
if (grid_r) p0r = rbuf.data() + ofs;
if (grid_w) p0w = wbuf.data() + ofs;
}
};
......@@ -703,7 +708,7 @@ template<class T, class T2> MsServ<T, T2> makeMsServ
const mav<idx_t,1> &idx, T2 &ms, const mav<T,2> &wgt)
{ return MsServ<T, T2>(baselines, idx, ms, wgt); }
template<typename T, typename Serv> void x2grid_c
template<typename T, typename Serv> [[gnu::hot]] void x2grid_c
(const GridderConfig<T> &gconf, Serv &srv, mav<complex<T>,2> &grid,
double w0=-1, double dw=-1)
{
......@@ -715,7 +720,7 @@ template<typename T, typename Serv> void x2grid_c
vector<std::mutex> locks(gconf.Nu());
size_t np = srv.Nvis();
execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
execGuided(np, nthreads, 100, 0.2, [&][[gnu::hot]] (Scheduler &sched)
{
Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
int jump = hlp.lineJump();
......@@ -750,7 +755,7 @@ template<typename T, typename Serv> void x2grid_c
});
}
template<typename T, typename Serv> void grid2x_c
template<typename T, typename Serv> [[gnu::hot]] void grid2x_c
(const GridderConfig<T> &gconf, const mav<complex<T>,2> &grid,
Serv &srv, double w0=-1, double dw=-1)
{
......@@ -763,7 +768,7 @@ template<typename T, typename Serv> void grid2x_c
// Loop over sampling points
size_t np = srv.Nvis();
execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
execGuided(np, nthreads, 1000, 0.5, [&][[gnu::hot]] (Scheduler &sched)
{
Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
int jump = hlp.lineJump();
......
......@@ -7,7 +7,7 @@ from setuptools import setup, Extension
import pybind11
pkgname = 'ducc0'
version = '0.1.9'
version = '0.2.0'
def _get_files_by_suffix(directory, suffix):
......
......@@ -166,7 +166,7 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
KernelCorrection corr;
template<size_t NV, size_t DEG> void eval_intern(T x, native_simd<T> *res) const
template<size_t NV, size_t DEG> [[gnu::hot]] void eval_intern(T x, native_simd<T> *res) const
{
x = (x+1)*W-1;
for (size_t i=0; i<NV; ++i)
......@@ -178,7 +178,8 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
}
}
template<size_t DEG> T eval_single_intern(T x) const
#if 1
template<size_t DEG> [[gnu::hot]] T eval_single_intern(T x) const
{
auto nth = min(W-1, size_t(max(T(0), (x+1)*W*T(0.5))));
x = (x+1)*W-2*nth-1;
......@@ -189,6 +190,19 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
tval = tval*x + coeff[j*nvec+i][imod];
return tval;
}
#else
template<size_t DEG> [[gnu::hot]] T eval_single_intern(T x) const
{
auto nth = min(W-1, size_t(max(T(0), (x+1)*W*T(0.5))));
x = (x+1)*W-2*nth-1;
auto i = nth/vlen;
auto imod = nth%vlen;
auto tval = coeff[i];
for (size_t j=1; j<=DEG; ++j)
tval = tval*x + coeff[j*nvec+i];
return tval[imod];
}
#endif
void eval_intern_general(T x, native_simd<T> *res) const
{
......
......@@ -8,53 +8,35 @@ def _l2error(a, b):
def reference(arr):
return ducc0.fft.c2c(arr)
return ducc0.fft.c2c(arr, axes=(0,))
#def get_tw(f1,f2):
# a1 =
# works
def fourstep_1w(arr, f1):
assert(arr.ndim==1)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.copy().reshape((f1, f2))
tx = time.time()
t1 = ducc0.fft.c2c(t1, out=t1, axes=(0,))
print("xx",time.time()-tx)
tx = time.time()
t1 = ducc0.misc.twiddle(t1,True)
print("xx",time.time()-tx)
# rngj = np.arange(f2).astype(np.float64)
# tw0 = -2*np.pi*1j/(f1*f2)
# for i in range(f1):
# t1[i] *= np.exp((tw0*i)*rngj)
tx = time.time()
t1 = t1.T
t1 = ducc0.fft.c2c(t1, axes=(0,))
print("xx",time.time()-tx)
return t1.reshape((-1,))
def fourstep_1(arr, f1):
assert(arr.ndim==1)
assert(arr.ndim==2)
f2 = arr.shape[0] // f1
assert(arr.shape[0] == f1*f2)
t1 = arr.reshape((f1, f2))
t1 = arr.reshape((f1, f2, 4))
tx = time.time()
t1 = ducc0.fft.c2c(t1, axes=(0,))
print("xx",time.time()-tx)
tx = time.time()
t1 = ducc0.misc.twiddle(t1,True)
ducc0.misc.twiddle(t1[:,:,0],True)
ducc0.misc.twiddle(t1[:,:,1],True)
ducc0.misc.twiddle(t1[:,:,2],True)
ducc0.misc.twiddle(t1[:,:,3],True)
print("xx",time.time()-tx)
# rngj = np.arange(f2).astype(np.float64)
# tw0 = -2*np.pi*1j/(f1*f2)
# for i in range(f1):
# t1[i] *= np.exp((tw0*i)*rngj)
tx = time.time()
t2 = np.empty((f2,f1),dtype=arr.dtype)
t2 = ducc0.fft.c2c(t1, out=t2.T, axes=(1,))
t2 = np.empty((f2,f1,4),dtype=arr.dtype)
t2 = ducc0.fft.c2c(t1, out=t2.transpose((1,0,2)), axes=(1,))
print("xx",time.time()-tx)
print(t2.strides)
return t2.T.reshape((-1,))
return t2.transpose((1,0,2)).reshape((-1,4))
def fourstep_2(arr, f1):
assert(arr.ndim==1)
......@@ -113,10 +95,11 @@ def sixstep_1(arr, f1):
return t1.reshape((-1,))
f1, f2 = 3000, 3000
f1, f2 = 100, 200
rng = np.random.default_rng(42)
arr = rng.random(f1*f2) + 1j*rng.random(f1*f2) - 0.5 -0.5j
arr = rng.random(f1*f2*4) + 1j*rng.random(f1*f2*4) - 0.5 -0.5j
arr = arr.reshape((f1*f2,4))
t0 = time.time()
ref = reference(arr)
print(time.time()-t0)
......