Commit bf42bc10 authored by Martin Reinecke's avatar Martin Reinecke

tweak index computation

parent ddcdd590
......@@ -1260,12 +1260,6 @@ template<typename T> void apply_wcorr(const GridderConfig &gconf,
}
}
struct idxhelper
{
uint32_t idx;
int minplane;
};
template<typename Serv> void wminmax(const GridderConfig &gconf,
const Serv &srv, double &wmin, double &wmax)
{
......@@ -1284,10 +1278,37 @@ template<typename Serv> void wminmax(const GridderConfig &gconf,
}
}
template<typename T> void update_idx(vector<T> &v, vector<T> &vold,
const vector<T> &add, const vector<T> &del)
{
vold.resize(0);
vold.reserve((v.size()+add.size())-del.size());
auto iin=v.begin(), ein=v.end();
auto iadd=add.begin(), eadd=add.end();
auto irem=del.begin(), erem=del.end();
while(iin!=ein)
{
if (irem!=erem)
{
while ((iin!=ein) && (*iin==*irem))
{ ++irem; ++iin; } // skip the entries to be removed
if (iin==ein) break;
}
if (iadd!=eadd)
while ((iadd!=eadd) && (*iadd<*iin))
vold.push_back(*(iadd++));
vold.push_back(*(iin++));
}
while(iadd!=eadd)
vold.push_back(*(iadd++));
v.swap(vold);
}
template<typename Serv> void wstack_common(
const GridderConfig &gconf, const Serv &srv, double &wmin,
double &dw, size_t &nplanes, vector<size_t> &nvis_plane,
vector<vector<idxhelper>> &minplane, size_t verbosity)
vector<vector<uint32_t>> &minplane, size_t verbosity)
{
size_t nvis = srv.Nvis();
size_t nthreads = gconf.Nthreads();
......@@ -1301,7 +1322,7 @@ template<typename Serv> void wstack_common(
double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
dw = 0.25/abs(nmin);
nplanes = size_t((wmax-wmin)/dw+2);
dw = (wmax-wmin)/(nplanes-1);
dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1);
auto supp = gconf.Supp();
wmin -= (0.5*supp-1)*dw;
......@@ -1311,71 +1332,49 @@ template<typename Serv> void wstack_common(
if (verbosity>0) cout << "nplanes: " << nplanes << endl;
nvis_plane.resize(nplanes,0);
// FIXME: this could still use some tweaking
int dbunch=supp;
int nbunch=(nplanes+dbunch-1)/dbunch;
minplane.resize(nbunch);
#if 0
for (auto &pl: minplane)
pl.resize(0);
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
for (int ibunch=0; ibunch<nbunch; ++ibunch)
if ((plane0+dbunch>ibunch*dbunch) && (plane0<(ibunch+1)*dbunch))
minplane[ibunch].push_back({ipart,plane0});
for (int i=max<int>(0,plane0); i<min<int>(nplanes,plane0+supp); ++i)
++nvis_plane[i];
}
for(auto &pl:minplane)
pl.shrink_to_fit();
#else
vector<size_t> tcnt(omp_get_max_threads()*nbunch,0);
minplane.resize(nplanes);
vector<size_t> tcnt(omp_get_max_threads()*nplanes,0);
#pragma omp parallel num_threads(nthreads)
{
int nthreads = omp_get_num_threads();
vector<size_t> mytcnt(nbunch,0);
vector<size_t> mytcnt(nplanes,0);
vector<size_t> nvp(nplanes,0);
auto mythread = omp_get_thread_num();
#pragma omp for schedule(static)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
for (int ibunch=max(plane0,0)/dbunch; (ibunch<nbunch)&&(plane0>(ibunch-1)*dbunch); ++ibunch)
++mytcnt[ibunch];
for (int i=max<int>(0,plane0); i<min<int>(nplanes,plane0+supp); ++i)
int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
++mytcnt[plane0];
for (int i=plane0; i<min<int>(nplanes,plane0+supp); ++i)
++nvp[i];
}
#pragma omp critical (wstack_common)
{
for (size_t i=0; i<nplanes; ++i)
nvis_plane[i] += nvp[i];
for (int i=0; i<nbunch; ++i)
tcnt[mythread*nbunch+i] = mytcnt[i];
for (size_t i=0; i<nplanes; ++i)
tcnt[mythread*nplanes+i] = mytcnt[i];
}
#pragma omp barrier
#pragma omp single
for (int j=0; j<nbunch; ++j)
for (size_t j=0; j<nplanes; ++j)
{
size_t l=0;
for (int i=0; i<nthreads; ++i)
l+=tcnt[i*nbunch+j];
for (size_t i=0; i<nthreads; ++i)
l+=tcnt[i*nplanes+j];
minplane[j].resize(l);
}
#pragma omp barrier
vector<size_t> myofs(nbunch, 0);
for (int j=0; j<nbunch; ++j)
vector<size_t> myofs(nplanes, 0);
for (size_t j=0; j<nplanes; ++j)
for (int i=0; i<mythread; ++i)
myofs[j]+=tcnt[i*nbunch+j];
myofs[j]+=tcnt[i*nplanes+j];
#pragma omp for schedule(static)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
for (int ibunch=max(plane0,0)/dbunch; (ibunch<nbunch)&&(plane0>(ibunch-1)*dbunch); ++ibunch)
minplane[ibunch][myofs[ibunch]++]={uint32_t(ipart),plane0};
int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
minplane[plane0][myofs[plane0]++]=uint32_t(ipart);
}
}
#endif
}
template<typename T, typename Serv> void x2dirty(
......@@ -1392,11 +1391,11 @@ template<typename T, typename Serv> void x2dirty(
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<vector<idxhelper>> minplane;
vector<vector<uint32_t>> minplane;
if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
dirty.fill(0);
vector<uint32_t> subidx;
vector<uint32_t> subidx, subidx_old;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
auto grid=grid_.getMav();
tmpStorage<complex<T>,2> tdirty_(dirty.shape());
......@@ -1409,12 +1408,7 @@ template<typename T, typename Serv> void x2dirty(
<< " visibilities" << endl;
double wcur = wmin+iw*dw;
size_t cnt=0;
subidx.resize(nvis_plane[iw]);
const auto &bla(minplane[iw/supp]);
for (size_t i=0; i<bla.size(); ++i)
if ((int(iw)>=bla[i].minplane) && (int(iw)<bla[i].minplane+int(supp)))
subidx[cnt++] = bla[i].idx;
update_idx(subidx, subidx_old, minplane[iw], iw>=supp ? minplane[iw-supp] : vector<uint32_t>());
auto subidx2 = const_mav<uint32_t, 1>(subidx.data(), {subidx.size()});
auto subsrv = srv.getSubserv(subidx2);
grid.fill(0);
......@@ -1469,7 +1463,7 @@ template<typename T, typename Serv> void dirty2x(
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<vector<idxhelper>> minplane;
vector<vector<uint32_t>> minplane;
if (verbosity>0) cout << "Degridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
......@@ -1481,7 +1475,7 @@ template<typename T, typename Serv> void dirty2x(
// correct for w gridding
apply_wcorr(gconf, tdirty, ES_Kernel(supp, nthreads), dw);
vector<uint32_t> subidx;
vector<uint32_t> subidx, subidx_old;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
auto grid=grid_.getMav();
tmpStorage<complex<T>,2> tdirty2_({nx_dirty,ny_dirty});
......@@ -1501,12 +1495,7 @@ template<typename T, typename Serv> void dirty2x(
gconf.apply_wscreen(cmav(tdirty2), tdirty2, wcur, false);
gconf.dirty2grid_c(cmav(tdirty2), grid);
subidx.resize(nvis_plane[iw]);
size_t cnt=0;
const auto &bla(minplane[iw/supp]);
for (size_t i=0; i<bla.size(); ++i)
if ((int(iw)>=bla[i].minplane) && (int(iw)<bla[i].minplane+int(supp)))
subidx[cnt++] = bla[i].idx;
update_idx(subidx, subidx_old, minplane[iw], iw>=supp ? minplane[iw-supp] : vector<uint32_t>());
auto subidx2 = const_mav<uint32_t, 1>(subidx.data(), {subidx.size()});
auto subsrv = srv.getSubserv(subidx2);
grid2x_c(gconf, cmav(grid), subsrv, wcur, dw);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment