Scheduled maintenance on Monday 2019-06-24 between 10:00-11:00 CEST

Commit a2c3da44 authored by Martin Reinecke's avatar Martin Reinecke

prepare Helper class for simultaneous reading and writing (e.g. for holographic matrix)

parent 8e7b0e6a
......@@ -542,16 +542,13 @@ template<typename T> class Helper
const GridderConfig<T> &gconf;
int nu, nv, nsafe, w;
T beta;
complex<T> *grid;
bool write;
int su;
public:
int sv;
private:
const complex<T> *grid_r;
complex<T> *grid_w;
int su, sv;
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
vector<complex<T>> data;
vector<complex<T>> rbuf, wbuf;
void dump() const
{
......@@ -566,7 +563,7 @@ template<typename T> class Helper
int idxv = idxv0;
for (int iv=0; iv<sv; ++iv)
{
grid[idxu*nv + idxv] += data[iu*sv + iv];
grid_w[idxu*nv + idxv] += wbuf[iu*sv + iv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
......@@ -583,7 +580,7 @@ template<typename T> class Helper
int idxv = idxv0;
for (int iv=0; iv<sv; ++iv)
{
data[iu*sv + iv] = grid[idxu*nv + idxv];
rbuf[iu*sv + iv] = grid_r[idxu*nv + idxv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
......@@ -591,17 +588,23 @@ template<typename T> class Helper
}
public:
complex<T> *p0;
const complex<T> *p0r;
complex<T> *p0w;
vector<T> kernel;
Helper(const GridderConfig<T> &gconf_, const complex<T> *grid_, bool write_)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()), w(gconf.W()),
beta(gconf.Beta()),
grid(const_cast<complex<T> *>(grid_)), write(write_),
Helper(const GridderConfig<T> &gconf_, const complex<T> *grid_r_,
complex<T> *grid_w_)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
w(gconf.W()), beta(gconf.Beta()), grid_r(grid_r_), grid_w(grid_w_),
su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000), data(su*sv,T(0)), kernel(2*w)
bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)),
wbuf(su*sv*(grid_w!=nullptr),T(0)),
kernel(2*w)
{}
~Helper() { if (write) dump(); }
~Helper() { if (grid_w) dump(); }
int lineJump() const { return sv; }
void prep(T u_in, T v_in)
{
......@@ -622,12 +625,13 @@ template<typename T> class Helper
if ((iu0<bu0) || (iv0<bv0) || (iu0+w>bu0+su) || (iv0+w>bv0+sv))
{
if (write) { dump(); fill(data.begin(), data.end(), T(0)); }
if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); }
bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
if (!write) load();
if (grid_r) load();
}
p0 = data.data() + sv*(iu0-bu0) + iv0-bv0;
p0r = rbuf.data() + sv*(iu0-bu0) + iv0-bv0;
p0w = wbuf.data() + sv*(iu0-bu0) + iv0-bv0;
}
};
......@@ -652,8 +656,9 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(
#pragma omp parallel
{
Helper<T> hlp(gconf, grid, true);
Helper<T> hlp(gconf, nullptr, grid);
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * RESTRICT ku = hlp.kernel.data();
const T * RESTRICT kv = hlp.kernel.data()+w;
......@@ -663,14 +668,14 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
hlp.prep(coord.u, coord.v);
auto * RESTRICT ptr = hlp.p0;
auto * RESTRICT ptr = hlp.p0w;
auto v(vis[ipart]*emb);
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(v*ku[cu]);
for (size_t cv=0; cv<w; ++cv)
ptr[cv] += tmp*kv[cv];
ptr+=hlp.sv;
ptr+=jump;
}
}
} // end of parallel region
......@@ -706,8 +711,9 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(
#pragma omp parallel
{
Helper<T> hlp(gconf, grid, true);
Helper<T> hlp(gconf, nullptr, grid);
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * RESTRICT ku = hlp.kernel.data();
const T * RESTRICT kv = hlp.kernel.data()+w;
......@@ -717,14 +723,14 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
hlp.prep(coord.u, coord.v);
auto * RESTRICT ptr = hlp.p0;
auto * RESTRICT ptr = hlp.p0w;
auto v(ms[idx[ipart]]*emb);
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(v*ku[cu]);
for (size_t cv=0; cv<w; ++cv)
ptr[cv] += tmp*kv[cv];
ptr+=hlp.sv;
ptr+=jump;
}
}
} // end of parallel region
......@@ -737,9 +743,9 @@ template<typename T> pyarr_c<T> ms2grid(const Baselines<T> &baselines,
const pyarr_c<complex<T>> &ms_)
{ return complex2hartley(ms2grid_c(baselines, gconf, idx_, ms_)); }
template<typename T> pyarr_c<complex<T>> grid2vis_c(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const pyarr_c<complex<T>> &grid_)
template<typename T> pyarr_c<complex<T>> grid2vis_c(
const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const pyarr_c<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_)
{
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkArray(idx_, "idx", {0});
......@@ -758,8 +764,9 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(const Baselines<T> &baseline
// Loop over sampling points
#pragma omp parallel
{
Helper<T> hlp(gconf, grid, false);
Helper<T> hlp(gconf, grid, nullptr);
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * RESTRICT ku = hlp.kernel.data();
const T * RESTRICT kv = hlp.kernel.data()+w;
......@@ -769,14 +776,14 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(const Baselines<T> &baseline
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
hlp.prep(coord.u, coord.v);
complex<T> r = 0;
auto * RESTRICT ptr = hlp.p0;
const auto * RESTRICT ptr = hlp.p0r;
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(0);
for (size_t cv=0; cv<w; ++cv)
tmp += ptr[cv] * kv[cv];
r += tmp*ku[cu];
ptr += hlp.sv;
ptr += jump;
}
vis[ipart] = r*emb;
}
......@@ -812,8 +819,9 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
// Loop over sampling points
#pragma omp parallel
{
Helper<T> hlp(gconf, grid, false);
Helper<T> hlp(gconf, grid, nullptr);
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * RESTRICT ku = hlp.kernel.data();
const T * RESTRICT kv = hlp.kernel.data()+w;
......@@ -823,14 +831,14 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
hlp.prep(coord.u, coord.v);
complex<T> r = 0;
auto * RESTRICT ptr = hlp.p0;
const auto * RESTRICT ptr = hlp.p0r;
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(0);
for (size_t cv=0; cv<w; ++cv)
tmp += ptr[cv] * kv[cv];
r += tmp*ku[cu];
ptr += hlp.sv;
ptr += jump;
}
ms[idx[ipart]] += r*emb;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment