Commit 145541d0 authored by Martin Reinecke's avatar Martin Reinecke

unexpected speedup

parent 9777c495
......@@ -672,7 +672,7 @@ class GridderConfig
grid.data(), grid.data(), T(1), nthreads);
}
template<typename T>void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const
void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{
u=fmod1(u_in*psx)*nu,
iu0 = int(u-supp*0.5 + 1 + nu) - nu;
......@@ -729,7 +729,7 @@ template<typename T, typename T2=complex<T>> class Helper
vector<T2> rbuf, wbuf;
bool do_w_gridding;
T w0, xdw;
double w0, xdw;
size_t nexp;
size_t nvecs;
......@@ -776,9 +776,9 @@ template<typename T, typename T2=complex<T>> class Helper
T kernel[64] ALIGNED(64);
Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_,
T w0_=-1, T dw_=-1)
double w0_=-1, double dw_=-1)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_),
supp(gconf.Supp()), beta(T(gconf.Beta())), grid_r(grid_r_),
grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)),
......@@ -795,18 +795,18 @@ template<typename T, typename T2=complex<T>> class Helper
T Wfac() const { return kernel[2*supp]; }
void prep(const UVW &in)
{
T u, v;
gconf.getpix<T>(in.u, in.v, u, v, iu0, iv0);
T xsupp=T(2)/supp;
auto x0 = xsupp*(iu0-u);
auto y0 = xsupp*(iv0-v);
double u, v;
gconf.getpix(in.u, in.v, u, v, iu0, iv0);
double xsupp=2./supp;
double x0 = xsupp*(iu0-u);
double y0 = xsupp*(iv0-v);
for (int i=0; i<supp; ++i)
{
kernel[i ] = x0+i*xsupp;
kernel[i+supp] = y0+i*xsupp;
kernel[i ] = T(x0+i*xsupp);
kernel[i+supp] = T(y0+i*xsupp);
}
if (do_w_gridding)
kernel[2*supp] = min(T(1), xdw*xsupp*abs(w0-T(in.w)));
kernel[2*supp] = min(T(1), T(xdw*xsupp*abs(w0-in.w)));
for (size_t i=nexp; i<nvecs; ++i)
kernel[i]=0;
for (size_t i=0; i<nvecs; ++i)
......@@ -1196,12 +1196,12 @@ template<typename T> void apply_wcorr(const GridderConfig &gconf,
{
double fy = y0+j*psy;
fy*=fy;
double fct = 0;
T fct = 0;
double tmp = 1.-fx-fy;
if (tmp>=0)
{
auto nm1 = (-fx-fy)/(sqrt(1.-fx-fy)+1.); // accurate form of sqrt(1-x-y)
fct = (nm1<=-1) ? 0. : kernel.corfac(nm1*dw);
fct = T((nm1<=-1) ? 0. : kernel.corfac(nm1*dw));
}
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
dirty(i,j)*=fct;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment