Commit ddcdd590 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

tweak scaling some more

parent 294536e6
......@@ -1315,6 +1315,7 @@ template<typename Serv> void wstack_common(
int dbunch=supp;
int nbunch=(nplanes+dbunch-1)/dbunch;
minplane.resize(nbunch);
#if 0
for (auto &pl: minplane)
pl.resize(0);
for (size_t ipart=0; ipart<nvis; ++ipart)
......@@ -1328,6 +1329,53 @@ template<typename Serv> void wstack_common(
}
for(auto &pl:minplane)
pl.shrink_to_fit();
#else
vector<size_t> tcnt(omp_get_max_threads()*nbunch,0);
#pragma omp parallel num_threads(nthreads)
{
int nthreads = omp_get_num_threads();
vector<size_t> mytcnt(nbunch,0);
vector<size_t> nvp(nplanes,0);
auto mythread = omp_get_thread_num();
#pragma omp for schedule(static)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
for (int ibunch=max(plane0,0)/dbunch; (ibunch<nbunch)&&(plane0>(ibunch-1)*dbunch); ++ibunch)
++mytcnt[ibunch];
for (int i=max<int>(0,plane0); i<min<int>(nplanes,plane0+supp); ++i)
++nvp[i];
}
#pragma omp critical (wstack_common)
{
for (size_t i=0; i<nplanes; ++i)
nvis_plane[i] += nvp[i];
for (int i=0; i<nbunch; ++i)
tcnt[mythread*nbunch+i] = mytcnt[i];
}
#pragma omp barrier
#pragma omp single
for (int j=0; j<nbunch; ++j)
{
size_t l=0;
for (int i=0; i<nthreads; ++i)
l+=tcnt[i*nbunch+j];
minplane[j].resize(l);
}
#pragma omp barrier
vector<size_t> myofs(nbunch, 0);
for (int j=0; j<nbunch; ++j)
for (int i=0; i<mythread; ++i)
myofs[j]+=tcnt[i*nbunch+j];
#pragma omp for schedule(static)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
int plane0 = int((abs(srv.getCoord(ipart).w)-wmin)/dw+0.5*supp)-int(supp-1);
for (int ibunch=max(plane0,0)/dbunch; (ibunch<nbunch)&&(plane0>(ibunch-1)*dbunch); ++ibunch)
minplane[ibunch][myofs[ibunch]++]={uint32_t(ipart),plane0};
}
}
#endif
}
template<typename T, typename Serv> void x2dirty(
......@@ -1347,7 +1395,6 @@ template<typename T, typename Serv> void x2dirty(
vector<vector<idxhelper>> minplane;
if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
dirty.fill(0);
vector<uint32_t> subidx;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment