Commit 6e1acd1e authored by Martin Reinecke's avatar Martin Reinecke
Browse files

streamlining

parent 34dddd25
......@@ -5,6 +5,7 @@
#ifdef __GNUC__
#define RESTRICT __restrict__
#define NOINLINE __attribute__ ((noinline))
#else
#define RESTRICT
#endif
......@@ -245,14 +246,21 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv)
return res;
}
int get_w(double epsilon)
{
int w=int(-log10(epsilon)+2.9999);
if (epsilon<1e-11) ++w;
return w;
}
class Helper
{
protected:
int nu, nv;
public:
int w;
protected:
double beta;
protected:
int nsafe, su;
public:
int sv;
......@@ -261,7 +269,7 @@ class Helper
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
void update(double u_in, double v_in)
void NOINLINE update(double u_in, double v_in)
{
auto u = fmodulo(u_in, 1.)*nu;
iu0 = int(u-w*0.5 + 1 + nu) - nu;
......@@ -275,8 +283,8 @@ class Helper
kernel[i ] = xw*(iu0+i-u);
kernel[i+w] = xw*(iv0+i-v);
}
for (int i=0; i<2*w; ++i)
kernel[i] = exp(beta*(sqrt(1-kernel[i]*kernel[i])-1.));
for (auto &k : kernel)
k = exp(beta*sqrt(1.-k*k));
}
bool need_to_move() const
......@@ -291,8 +299,8 @@ class Helper
protected:
Helper(int nu_, int nv_, double epsilon)
: nu(nu_), nv(nv_),
w(int(-log10(epsilon)+1.9999)), beta(2.3*w), nsafe((w+1)/2),
su(min(3*w, nu)), sv(min(3*w, nv)),
w(get_w(epsilon)), beta(2.3*w), nsafe((w+1)/2),
su(min(max(2*w,80), nu)), sv(min(max(2*w,80), nv)),
kernel(2*w),
bu0(-1000000), bv0(-1000000)
{
......@@ -384,7 +392,31 @@ class ReadHelper: public Helper
}
};
a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
a_d_c complex2hartley (const a_c_c &grid_)
{
myassert(grid_.ndim()==2, "grid array must be 2D");
int nu = grid_.shape(0), nv = grid_.shape(1);
auto grid = grid_.data();
int odim[] = {nu,nv};
a_d_c res(odim);
auto grid2 = res.mutable_data();
for (int u=0; u<nu; ++u)
{
int xu = (u==0) ? 0 : nu-u;
for (int v=0; v<nv; ++v)
{
int xv = (v==0) ? 0 : nv-v;
int i1 = u*nv+v;
int i2 = xu*nv+xv;
grid2[i1] = 0.5*(grid[i1].real()+grid[i1].imag()+
grid[i2].real()-grid[i2].imag());
}
}
return res;
}
a_d_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
int nu, int nv, double epsilon)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
......@@ -403,6 +435,7 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
#pragma omp parallel
{
WriteHelper hlp(nu, nv, epsilon, grid);
double emb = exp(-2*hlp.beta);
// Loop over sampling points
#pragma omp for schedule(dynamic,10000)
......@@ -411,7 +444,7 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
hlp.prep_write(uv[2*ipart], uv[2*ipart+1]);
auto * RESTRICT ptr = hlp.p0;
int w = hlp.w;
auto v = vis[ipart];
auto v = vis[ipart]*emb;
const double * RESTRICT ku = hlp.kernel.data();
const double * RESTRICT kv = hlp.kernel.data()+hlp.w;
for (int cu=0; cu<w; ++cu)
......@@ -423,17 +456,17 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
}
}
} // end of parallel region
return res;
return complex2hartley(res);
}
a_d_c to_grid_post (const a_c_c &grid_)
a_c_c hartley2complex (const a_d_c &grid_)
{
myassert(grid_.ndim()==2, "grid array must be 2D");
int nu = grid_.shape(0), nv = grid_.shape(1);
auto grid = grid_.data();
int odim[] = {nu,nv};
a_d_c res(odim);
a_c_c res(odim);
auto grid2 = res.mutable_data();
for (int u=0; u<nu; ++u)
{
......@@ -443,25 +476,26 @@ a_d_c to_grid_post (const a_c_c &grid_)
int xv = (v==0) ? 0 : nv-v;
int i1 = u*nv+v;
int i2 = xu*nv+xv;
grid2[i1] = 0.5*(grid[i1].real()+grid[i1].imag()+
grid[i2].real()-grid[i2].imag());
double v1 = 0.5*grid[i1];
double v2 = 0.5*grid[i2];
grid2[i1] = complex<double>(v1+v2, v1-v2);
}
}
return res;
}
a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
int nu, int nv, double epsilon)
a_c_c from_grid (const a_d_c &uv_, const a_d_c &grid0_, double epsilon)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
myassert(grid_.ndim()==2, "grid array must be 2D");
myassert(grid0_.ndim()==2, "grid array must be 2D");
auto grid_ = hartley2complex(grid0_);
int nvis = uv_.shape(0);
auto uv = uv_.data();
auto grid = grid_.data();
myassert(nu==grid_.shape(0), "oops");
myassert(nv==grid_.shape(1), "oops");
int nu=grid_.shape(0),
nv=grid_.shape(1);
int odim[] = {nvis};
a_c_c res(odim);
......@@ -471,6 +505,7 @@ a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
#pragma omp parallel
{
ReadHelper hlp(nu, nv, epsilon, grid);
double emb = exp(-2*hlp.beta);
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
......@@ -489,40 +524,15 @@ a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
r += tmp*ku[cu];
ptr += hlp.sv;
}
vis[ipart] = r;
vis[ipart] = r*emb;
}
}
return res;
}
a_c_c from_grid_pre (const a_d_c &grid_)
{
myassert(grid_.ndim()==2, "grid array must be 2D");
int nu = grid_.shape(0), nv = grid_.shape(1);
auto grid = grid_.data();
int odim[] = {nu,nv};
a_c_c res(odim);
auto grid2 = res.mutable_data();
for (int u=0; u<nu; ++u)
{
int xu = (u==0) ? 0 : nu-u;
for (int v=0; v<nv; ++v)
{
int xv = (v==0) ? 0 : nv-v;
int i1 = u*nv+v;
int i2 = xu*nv+xv;
double v1 = 0.5*grid[i1];
double v2 = 0.5*grid[i2];
grid2[i1] = complex<double>(v1+v2, v1-v2);
}
}
return res;
}
a_d_c correction_factors (size_t n, size_t nval, double epsilon)
{
auto w = int(-log10(epsilon)+1.9999);
auto w = get_w(epsilon);
auto beta = 2.3*w;
auto p = int(1.5*w+2);
double alpha = pi*w/n;
......@@ -548,12 +558,9 @@ a_d_c correction_factors (size_t n, size_t nval, double epsilon)
PYBIND11_MODULE(nifty_gridder, m)
{
using namespace pybind11::literals;
m.def("peanoindex",&peanoindex);
m.def("get_w",&get_w);
m.def("to_grid",&to_grid);
m.def("to_grid_post",&to_grid_post);
m.def("from_grid",&from_grid);
m.def("from_grid_pre",&from_grid_pre);
m.def("correction_factors",&correction_factors);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment