Commit da64e8d9 authored by Martin Reinecke's avatar Martin Reinecke

cleanup

parent c2b45dd7
......@@ -1600,10 +1600,8 @@ void fillIdx(const Baselines &baselines,
#pragma omp parallel num_threads(gconf.Nthreads())
{
idx_t nthr = my_num_threads();
idx_t id = my_thread_num();
size_t lo, hi;
calc_share(nthr, id, nrow, lo, hi);
calc_share(my_num_threads(), my_thread_num(), nrow, lo, hi);
vector<idx_t> lacc(nbu*nbv+1, 0);
for (idx_t irow=lo, idx=lo*(chend-chbegin); irow<hi; ++irow)
......@@ -1623,6 +1621,7 @@ void fillIdx(const Baselines &baselines,
tmp[idx++] = nbv*iu0 + iv0;
}
}
#pragma omp barrier
#pragma omp critical(xyz)
for (size_t i=0; i<acc.size(); ++i)
......@@ -1642,20 +1641,6 @@ void fillIdx(const Baselines &baselines,
res[acc[tmp[idx++]]++] = baselines.getIdx(irow, ichan);
}
}
class SimpleTimer
{
private:
using clock = std::chrono::steady_clock;
clock::time_point starttime;
public:
SimpleTimer()
: starttime(clock::now()) {}
double operator()() const
{
return std::chrono::duration<double>(clock::now() - starttime).count();
}
};
template<typename T> vector<idx_t> getWgtIndices(const Baselines &baselines,
const GridderConfig &gconf, const const_mav<T,2> &wgt)
......@@ -1671,14 +1656,10 @@ template<typename T> vector<idx_t> getWgtIndices(const Baselines &baselines,
vector<idx_t> acc(nbu*nbv+1, 0);
vector<idx_t> tmp(nrow*nchan,~idx_t(0));
#pragma omp parallel
#pragma omp parallel num_threads(gconf.Nthreads())
{
idx_t nthr = my_num_threads();
idx_t id = my_thread_num();
idx_t nbase = nrow/nthr;
idx_t additional = nrow%nthr;
idx_t lo = id*nbase + ((id<additional) ? id : additional);
idx_t hi = lo+nbase+(id<additional);
size_t lo, hi;
calc_share(my_num_threads(), my_thread_num(), nrow, lo, hi);
vector<idx_t> lacc(nbu*nbv+1, 0);
for (idx_t irow=lo, idx=lo*nchan; irow<hi; ++irow)
......@@ -1695,11 +1676,13 @@ template<typename T> vector<idx_t> getWgtIndices(const Baselines &baselines,
++lacc[nbv*iu0 + iv0 + 1];
tmp[idx++] = nbv*iu0 + iv0;
}
#pragma omp barrier
#pragma omp critical(xyz)
for (size_t i=0; i<acc.size(); ++i)
acc[i]+=lacc[i];
}
for (size_t i=1; i<acc.size(); ++i)
acc[i] += acc[i-1];
......@@ -1739,156 +1722,6 @@ template<typename T> void dirty2ms(const const_mav<double,2> &uvw,
dirty2x(gconf, dirty, makeMsServ(baselines,idx2,ms,wgt), do_wstacking, verbosity);
}
static const uint16_t utab[] = {
#define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5
#define Y(a) Z(a##0), Z(a##1), Z(a##4), Z(a##5)
#define X(a) Y(a##0), Y(a##1), Y(a##4), Y(a##5)
X(0),X(1),X(4),X(5)
#undef X
#undef Y
#undef Z
};
uint32_t coord2morton2D_32 (uint32_t x, uint32_t y)
{
typedef uint32_t I;
return (I)(utab[x&0xff]) | ((I)(utab[(x>>8)&0xff])<<16)
| ((I)(utab[y&0xff])<<1) | ((I)(utab[(y>>8)&0xff])<<17);
}
static const uint8_t m2p2D_1[4][4] = {
{ 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}};
static uint8_t m2p2D_3[4][64];
static const uint8_t p2m2D_1[4][4] = {
{ 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}};
static uint8_t p2m2D_3[4][64];
static int peano2d_done=0;
static void init_peano2d (void)
{
peano2d_done=1;
for (int d=0; d<4; ++d)
for (uint32_t p=0; p<64; ++p)
{
unsigned rot = d;
uint32_t v = p<<26;
uint32_t res = 0;
for (int i=0; i<3; ++i)
{
unsigned tab=m2p2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
m2p2D_3[d][p]=res|(rot<<6);
}
for (int d=0; d<4; ++d)
for (uint32_t p=0; p<64; ++p)
{
unsigned rot = d;
uint32_t v = p<<26;
uint32_t res = 0;
for (int i=0; i<3; ++i)
{
unsigned tab=p2m2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
p2m2D_3[d][p]=res|(rot<<6);
}
}
uint32_t morton2peano2D_32(uint32_t v, int bits)
{
if (!peano2d_done) init_peano2d();
unsigned rot = 0;
uint32_t res = 0;
v<<=32-(bits<<1);
int i=0;
for (; i<bits-2; i+=3)
{
unsigned tab=m2p2D_3[rot][v>>26];
v<<=6;
res = (res<<6) | (tab&0x3fu);
rot = tab>>6;
}
for (; i<bits; ++i)
{
unsigned tab=m2p2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
return res;
}
template<typename It, typename Comp> class IdxComp__
{
private:
It begin;
Comp comp;
public:
IdxComp__ (It begin_, Comp comp_): begin(begin_), comp(comp_) {}
bool operator() (std::size_t a, std::size_t b) const
{ return comp(*(begin+a),*(begin+b)); }
};
/*! Performs an indirect sort on the supplied iterator range and returns in
\a idx a \a vector containing the indices of the smallest, second smallest,
third smallest, etc. element, according to \a comp. */
template<typename It, typename T2, typename Comp>
inline void buildIndex (It begin, It end, std::vector<T2> &idx, Comp comp)
{
using namespace std;
T2 num=end-begin;
idx.resize(num);
for (T2 i=0; i<num; ++i) idx[i] = i;
sort (idx.begin(),idx.end(),IdxComp__<It,Comp>(begin,comp));
}
/*! Performs an indirect sort on the supplied iterator range and returns in
\a idx a \a vector containing the indices of the smallest, second smallest,
third smallest, etc. element. */
template<typename It, typename T2> inline void buildIndex (It begin, It end,
std::vector<T2> &idx)
{
using namespace std;
typedef typename iterator_traits<It>::value_type T;
buildIndex(begin,end,idx,less<T>());
}
void getRowOrdering(const const_mav<double,2> &uvw, const mav<idx_t,1> &idx)
{
myassert(uvw.shape(1)==3, "second dimension of uvw array must be 3");
size_t nrow = uvw.shape(0);
myassert(nrow>0, "zero-sized array");
myassert(nrow==idx.shape(0), "dimension mismatch");
double umin=uvw(0,0), umax=uvw(0,0), vmin=uvw(0,1), vmax=uvw(0,1);
for (size_t i=1; i<nrow; ++i)
{
umin=min(umin, uvw(i,0));
umax=max(umax, uvw(i,0));
vmin=min(vmin, uvw(i,1));
vmax=max(vmax, uvw(i,1));
}
constexpr size_t order=12;
constexpr size_t ncells=size_t(1)<<order;
double xdu = ncells/(umax-umin), xdv = ncells/(vmax-vmin);
vector<idx_t> peano(nrow);
for (size_t i=0; i<nrow; ++i)
{
size_t iu = min(ncells-1,size_t((uvw(i,0)-umin)*xdu));
size_t iv = min(ncells-1,size_t((uvw(i,1)-vmin)*xdv));
peano[i] = morton2peano2D_32(coord2morton2D_32(iu,iv), order);
}
vector<idx_t> tidx;
buildIndex(peano.begin(), peano.end(), tidx);
for (size_t i=0; i<nrow; ++i)
idx(i) = tidx[i];
}
} // namespace detail
// public names
......@@ -1904,7 +1737,6 @@ using detail::vis2dirty;
using detail::dirty2vis;
using detail::ms2dirty;
using detail::dirty2ms;
using detail::getRowOrdering;
using detail::streamDump__;
using detail::CodeLocation;
} // namespace gridder
......
......@@ -1027,19 +1027,6 @@ py::array Pydirty2ms(const py::array &uvw,
myfail("type matching failed: 'dirty' has neither type 'f4' nor 'f8'");
}
py::array PygetRowOrdering(const py::array &uvw_)
{
auto uvw = getPyarr<double>(uvw_, "uvw");
auto uvw2 = make_const_mav<2>(uvw);
auto res = makeArray<idx_t>({uvw2.shape(0)});
auto res2 = make_mav<1>(res);
{
py::gil_scoped_release release;
getRowOrdering(uvw2, res2);
}
return res;
}
} // unnamed namespace
PYBIND11_MODULE(nifty_gridder, m)
......@@ -1130,6 +1117,4 @@ PYBIND11_MODULE(nifty_gridder, m)
m.def("dirty2ms", &Pydirty2ms, dirty2ms_DS, "uvw"_a, "freq"_a, "dirty"_a,
"wgt"_a=None, "pixsize_x"_a, "pixsize_y"_a, "epsilon"_a,
"do_wstacking"_a=false, "nthreads"_a=1, "verbosity"_a=0);
m.def("getRowOrdering", &PygetRowOrdering, "uvw"_a);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment