Commit 145541d0 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

unexpected speedup

parent 9777c495
...@@ -672,7 +672,7 @@ class GridderConfig ...@@ -672,7 +672,7 @@ class GridderConfig
grid.data(), grid.data(), T(1), nthreads); grid.data(), grid.data(), T(1), nthreads);
} }
template<typename T>void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{ {
u=fmod1(u_in*psx)*nu, u=fmod1(u_in*psx)*nu,
iu0 = int(u-supp*0.5 + 1 + nu) - nu; iu0 = int(u-supp*0.5 + 1 + nu) - nu;
...@@ -729,7 +729,7 @@ template<typename T, typename T2=complex<T>> class Helper ...@@ -729,7 +729,7 @@ template<typename T, typename T2=complex<T>> class Helper
vector<T2> rbuf, wbuf; vector<T2> rbuf, wbuf;
bool do_w_gridding; bool do_w_gridding;
T w0, xdw; double w0, xdw;
size_t nexp; size_t nexp;
size_t nvecs; size_t nvecs;
...@@ -776,9 +776,9 @@ template<typename T, typename T2=complex<T>> class Helper ...@@ -776,9 +776,9 @@ template<typename T, typename T2=complex<T>> class Helper
T kernel[64] ALIGNED(64); T kernel[64] ALIGNED(64);
Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_, Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_,
T w0_=-1, T dw_=-1) double w0_=-1, double dw_=-1)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_), supp(gconf.Supp()), beta(T(gconf.Beta())), grid_r(grid_r_),
grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)), grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000), bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)), rbuf(su*sv*(grid_r!=nullptr),T(0)),
...@@ -795,18 +795,18 @@ template<typename T, typename T2=complex<T>> class Helper ...@@ -795,18 +795,18 @@ template<typename T, typename T2=complex<T>> class Helper
T Wfac() const { return kernel[2*supp]; } T Wfac() const { return kernel[2*supp]; }
void prep(const UVW &in) void prep(const UVW &in)
{ {
T u, v; double u, v;
gconf.getpix<T>(in.u, in.v, u, v, iu0, iv0); gconf.getpix(in.u, in.v, u, v, iu0, iv0);
T xsupp=T(2)/supp; double xsupp=2./supp;
auto x0 = xsupp*(iu0-u); double x0 = xsupp*(iu0-u);
auto y0 = xsupp*(iv0-v); double y0 = xsupp*(iv0-v);
for (int i=0; i<supp; ++i) for (int i=0; i<supp; ++i)
{ {
kernel[i ] = x0+i*xsupp; kernel[i ] = T(x0+i*xsupp);
kernel[i+supp] = y0+i*xsupp; kernel[i+supp] = T(y0+i*xsupp);
} }
if (do_w_gridding) if (do_w_gridding)
kernel[2*supp] = min(T(1), xdw*xsupp*abs(w0-T(in.w))); kernel[2*supp] = min(T(1), T(xdw*xsupp*abs(w0-in.w)));
for (size_t i=nexp; i<nvecs; ++i) for (size_t i=nexp; i<nvecs; ++i)
kernel[i]=0; kernel[i]=0;
for (size_t i=0; i<nvecs; ++i) for (size_t i=0; i<nvecs; ++i)
...@@ -1196,12 +1196,12 @@ template<typename T> void apply_wcorr(const GridderConfig &gconf, ...@@ -1196,12 +1196,12 @@ template<typename T> void apply_wcorr(const GridderConfig &gconf,
{ {
double fy = y0+j*psy; double fy = y0+j*psy;
fy*=fy; fy*=fy;
double fct = 0; T fct = 0;
double tmp = 1.-fx-fy; double tmp = 1.-fx-fy;
if (tmp>=0) if (tmp>=0)
{ {
auto nm1 = (-fx-fy)/(sqrt(1.-fx-fy)+1.); // accurate form of sqrt(1-x-y) auto nm1 = (-fx-fy)/(sqrt(1.-fx-fy)+1.); // accurate form of sqrt(1-x-y)
fct = (nm1<=-1) ? 0. : kernel.corfac(nm1*dw); fct = T((nm1<=-1) ? 0. : kernel.corfac(nm1*dw));
} }
size_t i2 = nx_dirty-i, j2 = ny_dirty-j; size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
dirty(i,j)*=fct; dirty(i,j)*=fct;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment