Commit 2812ffad authored by Martin Reinecke's avatar Martin Reinecke
Browse files

tweak code to reduce non-parallel part

parent 9f85bcd9
......@@ -1198,10 +1198,16 @@ template<typename T> void apply_wcorr(const GridderConfig &gconf,
}
}
struct idxhelper
{
uint32_t idx;
int minplane;
};
template<typename Serv> void wstack_common(
const GridderConfig &gconf, const Serv &srv, double &wmin,
double &dw, size_t &nplanes, vector<size_t> &nvis_plane,
vector<int> &minplane, size_t verbosity)
vector<vector<idxhelper>> &minplane, size_t verbosity)
{
size_t nvis = srv.Nvis();
size_t nthreads = gconf.Nthreads();
......@@ -1232,22 +1238,24 @@ template<typename Serv> void wstack_common(
if (verbosity>0) cout << "Kernel support: " << supp << endl;
if (verbosity>0) cout << "nplanes: " << nplanes << endl;
nvis_plane.resize(nplanes,0);
minplane.resize(nvis);
#pragma omp parallel num_threads(nthreads)
{
vector<size_t> nvploc(nplanes,0);
#pragma omp for
// FIXME: this could still use some tweaking
int dbunch=supp;
int nbunch=(nplanes+dbunch-1)/dbunch;
minplane.resize(nbunch);
for (auto &pl: minplane)
pl.resize(0);
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
minplane[ipart]=plane0;
for (int ibunch=0; ibunch<nbunch; ++ibunch)
if ((plane0+dbunch>ibunch*dbunch) && (plane0<(ibunch+1)*dbunch))
minplane[ibunch].push_back({ipart,plane0});
for (int i=max<int>(0,plane0); i<min<int>(nplanes,plane0+supp); ++i)
++nvploc[i];
++nvis_plane[i];
}
#pragma omp critical
for (size_t i=0; i<nplanes; ++i)
nvis_plane[i]+=nvploc[i];
}
for(auto &pl:minplane)
pl.shrink_to_fit();
}
template<typename T, typename Serv> void x2dirty(
......@@ -1258,21 +1266,20 @@ template<typename T, typename Serv> void x2dirty(
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
size_t nvis = srv.Nvis();
auto supp = gconf.Supp();
size_t nthreads = gconf.Nthreads();
double wmin;
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<int> minplane;
vector<vector<idxhelper>> minplane;
if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
dirty.fill(0);
vector<complex<T>> vis_loc_;
vector<uint32_t> idx_loc_;
vector<uint32_t> newidx;
vector<uint32_t> mapper;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
auto grid=grid_.getMav();
tmpStorage<complex<T>,2> tdirty_(dirty.shape());
......@@ -1286,35 +1293,36 @@ template<typename T, typename Serv> void x2dirty(
double wcur = wmin+iw*dw;
size_t cnt=0;
newidx.resize(nvis_plane[iw]);
mapper.resize(nvis_plane[iw]);
const auto &bla(minplane[iw/supp]);
for (size_t i=0; i<bla.size(); ++i)
if ((int(iw)>=bla[i].minplane) && (int(iw)<bla[i].minplane+int(supp)))
mapper[cnt++] = bla[i].idx;
idx_loc_.resize(nvis_plane[iw]);
if (srv.isMsServ())
{
for (size_t ipart=0; ipart<nvis; ++ipart)
if ((int(iw)>=minplane[ipart]) && (iw<minplane[ipart]+supp))
newidx[cnt++] = srv.getIdx(ipart);
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
idx_loc_[i] = srv.getIdx(mapper[i]);
grid.fill(0);
auto newidx2 = const_mav<uint32_t, 1>(newidx.data(), {newidx.size()});
Serv newsrv(srv, newidx2);
auto idx_loc = const_mav<uint32_t, 1>(idx_loc_.data(), {idx_loc_.size()});
Serv newsrv(srv, idx_loc);
x2grid_c(gconf, newsrv, grid, wcur, dw);
}
else
{
for (size_t ipart=0; ipart<nvis; ++ipart)
if ((int(iw)>=minplane[ipart]) && (iw<minplane[ipart]+supp))
newidx[cnt++] = ipart;
vis_loc_.resize(nvis_plane[iw]);
auto vis_loc = mav<complex<T>,1>(vis_loc_.data(), {nvis_plane[iw]});
idx_loc_.resize(nvis_plane[iw]);
auto idx_loc = mav<uint32_t,1>(idx_loc_.data(), {nvis_plane[iw]});
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
{
auto ipart = newidx[i];
vis_loc[i] = srv.getVis(ipart);
idx_loc[i] = srv.getIdx(ipart);
auto ipart = mapper[i];
vis_loc_[i] = srv.getVis(ipart);
idx_loc_[i] = srv.getIdx(ipart);
}
grid.fill(0);
vis2grid_c(srv.getBaselines(), gconf, cmav(idx_loc), cmav(vis_loc), grid,
auto vis_loc = const_mav<complex<T>,1>(vis_loc_.data(), {nvis_plane[iw]});
auto idx_loc = const_mav<uint32_t,1>(idx_loc_.data(), {nvis_plane[iw]});
vis2grid_c(srv.getBaselines(), gconf, idx_loc, vis_loc, grid,
nullmav<T,1>(), wcur, dw);
}
gconf.grid2dirty_c(cmav(grid), tdirty);
......@@ -1359,14 +1367,13 @@ template<typename T, typename Serv> void dirty2x(
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
size_t nvis = srv.Nvis();
auto supp = gconf.Supp();
size_t nthreads = gconf.Nthreads();
double wmin;
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<int> minplane;
vector<vector<idxhelper>> minplane;
if (verbosity>0) cout << "Degridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
......@@ -1380,7 +1387,7 @@ template<typename T, typename Serv> void dirty2x(
vector<complex<T>> vis_loc_;
vector<uint32_t> idx_loc_;
vector<uint32_t> newidx;
vector<uint32_t> mapper;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
auto grid=grid_.getMav();
tmpStorage<complex<T>,2> tdirty2_({nx_dirty,ny_dirty});
......@@ -1400,35 +1407,36 @@ template<typename T, typename Serv> void dirty2x(
gconf.apply_wscreen(cmav(tdirty2), tdirty2, wcur, false);
gconf.dirty2grid_c(cmav(tdirty2), grid);
newidx.resize(nvis_plane[iw]);
size_t cnt=0;
mapper.resize(nvis_plane[iw]);
const auto &bla(minplane[iw/supp]);
for (size_t i=0; i<bla.size(); ++i)
if ((int(iw)>=bla[i].minplane) && (int(iw)<bla[i].minplane+int(supp)))
mapper[cnt++] = bla[i].idx;
idx_loc_.resize(nvis_plane[iw]);
if (srv.isMsServ())
{
for (size_t ipart=0; ipart<nvis; ++ipart)
if ((int(iw)>=minplane[ipart]) && (iw<minplane[ipart]+supp))
newidx[cnt++] = srv.getIdx(ipart);
auto newidx2 = const_mav<uint32_t, 1>(newidx.data(), {newidx.size()});
Serv newsrv(srv, newidx2);
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
idx_loc_[i] = srv.getIdx(mapper[i]);
auto idx_loc = const_mav<uint32_t, 1>(idx_loc_.data(), {idx_loc_.size()});
Serv newsrv(srv, idx_loc);
grid2x_c(gconf, cmav(grid), newsrv, wcur, dw);
}
else
{
for (size_t ipart=0; ipart<nvis; ++ipart)
if ((int(iw)>=minplane[ipart]) && (iw<minplane[ipart]+supp))
newidx[cnt++] = ipart;
vis_loc_.resize(nvis_plane[iw]);
auto vis_loc = mav<complex<T>,1>(vis_loc_.data(), {nvis_plane[iw]});
vis_loc.fill(0);
idx_loc_.resize(nvis_plane[iw]);
auto idx_loc = mav<uint32_t,1>(idx_loc_.data(), {nvis_plane[iw]});
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
idx_loc[i] = srv.getIdx(newidx[i]);
idx_loc[i] = srv.getIdx(mapper[i]);
grid2vis_c(srv.getBaselines(), gconf, cmav(idx_loc), cmav(grid), vis_loc,
nullmav<T,1>(), wcur, dw);
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
srv.addVis(newidx[i], vis_loc[i]);
srv.addVis(mapper[i], vis_loc[i]);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment