Commit aa401fbb authored by Martin Reinecke's avatar Martin Reinecke
Browse files

use simpler array accesses where performance allows it

parent 6fe25b62
......@@ -128,6 +128,7 @@ void legendre_prep(int n, vector<double> &x, vector<double> &w)
template<typename T>
using pyarr = py::array_t<T>;
// The "_c" suffix here stands for "C memory order, contiguous"
template<typename T>
using pyarr_c = py::array_t<T, py::array::c_style | py::array::forcecast>;
......@@ -165,7 +166,7 @@ void checkArray(const py::array &arr, const char *aname,
}
}
template<typename T> pyarr_c<T> provideArray(py::object &in,
template<typename T> pyarr<T> provideArray(py::object &in,
const vector<size_t> &shape)
{
if (in.is(py::none()))
......@@ -177,7 +178,7 @@ template<typename T> pyarr_c<T> provideArray(py::object &in,
tmp[i] = T(0);
return tmp_;
}
auto tmp_ = in.cast<pyarr_c<T>>();
auto tmp_ = in.cast<pyarr<T>>();
checkArray(tmp_, "temporary", shape);
return tmp_;
}
......@@ -307,7 +308,7 @@ template<typename T> class Baselines
size_t nrows, nchan;
public:
Baselines(const pyarr_c<T> &coord_, const pyarr_c<T> &freq_)
Baselines(const pyarr<T> &coord_, const pyarr<T> &freq_)
{
constexpr double speedOfLight = 299792458.;
checkArray(coord_, "coord", {0, 3});
......@@ -315,16 +316,16 @@ template<typename T> class Baselines
nrows = coord_.shape(0);
nchan = freq_.shape(0);
myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS");
auto freq = freq_.data();
auto cood = coord_.data();
auto freq = freq_.template unchecked<1>();
auto cood = coord_.template unchecked<2>();
{
py::gil_scoped_release release;
f_over_c.resize(nchan);
for (size_t i=0; i<nchan; ++i)
f_over_c[i] = freq[i]/speedOfLight;
f_over_c[i] = freq(i)/speedOfLight;
coord.resize(nrows);
for (size_t i=0; i<coord.size(); ++i)
coord[i] = UVW<T>(cood[3*i], cood[3*i+1], cood[3*i+2]);
coord[i] = UVW<T>(cood(i,0), cood(i,1), cood(i,2));
}
}
......@@ -339,14 +340,14 @@ template<typename T> class Baselines
size_t Nrows() const { return nrows; }
size_t Nchannels() const { return nchan; }
template<typename T2> pyarr_c<T2> ms2vis(const pyarr_c<T2> &ms_,
template<typename T2> pyarr_c<T2> ms2vis(const pyarr<T2> &ms_,
const pyarr_c<uint32_t> &idx_) const
{
checkArray(idx_, "idx", {0});
checkArray(ms_, "ms", {nrows, nchan});
size_t nvis = size_t(idx_.shape(0));
auto idx = idx_.data();
auto ms = ms_.data();
auto idx = idx_.template unchecked<1>();
auto ms = ms_.template unchecked<2>();
auto res=makeArray<T2>({nvis});
auto vis = res.mutable_data();
......@@ -354,27 +355,37 @@ template<typename T> class Baselines
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t i=0; i<nvis; ++i)
vis[i] = ms[idx[i]];
{
auto t = idx(i);
auto row = t/nchan;
auto chan = t-row*nchan;
vis[i] = ms(row, chan);
}
}
return res;
}
template<typename T2> pyarr_c<T2> vis2ms(const pyarr_c<T2> &vis_,
const pyarr_c<uint32_t> &idx_, py::object &ms_in) const
template<typename T2> pyarr_c<T2> vis2ms(const pyarr<T2> &vis_,
const pyarr<uint32_t> &idx_, py::object &ms_in) const
{
checkArray(vis_, "vis", {0});
size_t nvis = size_t(vis_.shape(0));
checkArray(idx_, "idx", {nvis});
auto idx = idx_.data();
auto vis = vis_.data();
auto idx = idx_.template unchecked<1>();
auto vis = vis_.template unchecked<1>();
auto res = provideArray<T2>(ms_in, {nrows, nchan});
auto ms = res.mutable_data();
auto ms = res.template mutable_unchecked<2>();
{
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t i=0; i<nvis; ++i)
ms[idx[i]] = vis[i];
{
auto t = idx(i);
auto row = t/nchan;
auto chan = t-row*nchan;
ms(row, chan) = vis(i);
}
}
return res;
}
......@@ -637,13 +648,13 @@ template<typename T> class Helper
template<typename T> pyarr_c<complex<T>> vis2grid_c(
const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const pyarr_c<uint32_t> &idx_, const pyarr_c<complex<T>> &vis_)
const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &vis_)
{
checkArray(vis_, "vis", {0});
size_t nvis = size_t(vis_.shape(0));
checkArray(idx_, "idx", {nvis});
auto vis=vis_.data();
auto idx = idx_.data();
auto vis=vis_.template unchecked<1>();
auto idx = idx_.template unchecked<1>();
size_t nu=gconf.Nu(), nv=gconf.Nv();
auto res = makeArray<complex<T>>({nu, nv});
......@@ -666,10 +677,10 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(
#pragma omp for schedule(guided,100)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
UVW<T> coord = baselines.effectiveCoord(idx(ipart));
hlp.prep(coord.u, coord.v);
auto * RESTRICT ptr = hlp.p0w;
auto v(vis[ipart]*emb);
auto v(vis(ipart)*emb);
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(v*ku[cu]);
......@@ -684,21 +695,21 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(
}
template<typename T> pyarr_c<T> vis2grid(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const pyarr_c<complex<T>> &vis_)
const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_,
const pyarr<complex<T>> &vis_)
{ return complex2hartley(vis2grid_c(baselines, gconf, idx_, vis_)); }
template<typename T> pyarr_c<complex<T>> ms2grid_c(
const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const pyarr_c<uint32_t> &idx_, const pyarr_c<complex<T>> &ms_)
const pyarr<uint32_t> &idx_, const pyarr<complex<T>> &ms_)
{
auto nrows = baselines.Nrows();
auto nchan = baselines.Nchannels();
checkArray(ms_, "ms", {nrows, nchan});
checkArray(idx_, "idx", {0});
size_t nvis = size_t(idx_.shape(0));
auto ms = ms_.data();
auto idx = idx_.data();
auto ms = ms_.template unchecked<2>();
auto idx = idx_.template unchecked<1>();
size_t nu=gconf.Nu(), nv=gconf.Nv();
auto res = makeArray<complex<T>>({nu, nv});
......@@ -721,10 +732,13 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(
#pragma omp for schedule(guided,100)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
auto tidx = idx(ipart);
auto row = tidx/nchan;
auto chan = tidx-row*nchan;
UVW<T> coord = baselines.effectiveCoord(tidx);
hlp.prep(coord.u, coord.v);
auto * RESTRICT ptr = hlp.p0w;
auto v(ms[idx[ipart]]*emb);
auto v(ms(row,chan)*emb);
for (size_t cu=0; cu<w; ++cu)
{
complex<T> tmp(v*ku[cu]);
......@@ -739,20 +753,20 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(
}
template<typename T> pyarr_c<T> ms2grid(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const pyarr_c<complex<T>> &ms_)
const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_,
const pyarr<complex<T>> &ms_)
{ return complex2hartley(ms2grid_c(baselines, gconf, idx_, ms_)); }
template<typename T> pyarr_c<complex<T>> grid2vis_c(
const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const pyarr_c<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_)
const pyarr<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_)
{
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkArray(idx_, "idx", {0});
auto grid = grid_.data();
checkArray(grid_, "grid", {nu, nv});
size_t nvis = size_t(idx_.shape(0));
auto idx = idx_.data();
auto idx = idx_.template unchecked<1>();
auto res = makeArray<complex<T>>({nvis});
auto vis = res.mutable_data();
......@@ -773,7 +787,7 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(
#pragma omp for schedule(guided,100)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
UVW<T> coord = baselines.effectiveCoord(idx(ipart));
hlp.prep(coord.u, coord.v);
complex<T> r = 0;
const auto * RESTRICT ptr = hlp.p0r;
......@@ -793,24 +807,25 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(
}
template<typename T> pyarr_c<complex<T>> grid2vis(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_,
const pyarr_c<T> &grid_)
{ return grid2vis_c(baselines, gconf, idx_, hartley2complex(grid_)); }
template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_,
const pyarr_c<complex<T>> &grid_, py::object &ms_in)
{
auto nrows = baselines.Nrows();
auto nchan = baselines.Nchannels();
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkArray(idx_, "idx", {0});
auto grid = grid_.data();
checkArray(grid_, "grid", {nu, nv});
size_t nvis = size_t(idx_.shape(0));
auto idx = idx_.data();
auto idx = idx_.template unchecked<1>();
auto res = provideArray<complex<T>>(ms_in,
{baselines.Nrows(), baselines.Nchannels()});
auto ms = res.mutable_data();
auto res = provideArray<complex<T>>(ms_in, {nrows, nchan});
auto ms = res.template mutable_unchecked<2>();
{
py::gil_scoped_release release;
T beta = gconf.Beta();
......@@ -828,7 +843,10 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
#pragma omp for schedule(guided,100)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
auto tidx = idx(ipart);
auto row = tidx/nchan;
auto chan = tidx-row*nchan;
UVW<T> coord = baselines.effectiveCoord(tidx);
hlp.prep(coord.u, coord.v);
complex<T> r = 0;
const auto * RESTRICT ptr = hlp.p0r;
......@@ -840,7 +858,7 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
r += tmp*ku[cu];
ptr += jump;
}
ms[idx[ipart]] += r*emb;
ms(row,chan) += r*emb;
}
}
}
......@@ -848,20 +866,20 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
}
template<typename T> pyarr_c<complex<T>> grid2ms(const Baselines<T> &baselines,
const GridderConfig<T> &gconf, const pyarr_c<uint32_t> &idx_,
const GridderConfig<T> &gconf, const pyarr<uint32_t> &idx_,
const pyarr_c<T> &grid_, py::object &ms_in)
{ return grid2ms_c(baselines, gconf, idx_, hartley2complex(grid_), ms_in); }
template<typename T> pyarr_c<complex<T>> apply_holo(
const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const pyarr_c<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_)
const pyarr<uint32_t> &idx_, const pyarr_c<complex<T>> &grid_)
{
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkArray(idx_, "idx", {0});
auto grid = grid_.data();
checkArray(grid_, "grid", {nu, nv});
size_t nvis = size_t(idx_.shape(0));
auto idx = idx_.data();
auto idx = idx_.template unchecked<1>();
auto res = makeArray<complex<T>>({nu, nv});
auto ogrid = res.mutable_data();
......@@ -882,7 +900,7 @@ template<typename T> pyarr_c<complex<T>> apply_holo(
#pragma omp for schedule(guided,100)
for (size_t ipart=0; ipart<nvis; ++ipart)
{
UVW<T> coord = baselines.effectiveCoord(idx[ipart]);
UVW<T> coord = baselines.effectiveCoord(idx(ipart));
hlp.prep(coord.u, coord.v);
complex<T> r = 0;
const auto * RESTRICT ptr = hlp.p0r;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment