Commit e4e1c032 authored by Martin Reinecke's avatar Martin Reinecke

Merge branch 'master' into improve_scaling

parents 686dcfb3 7cac7cb7
......@@ -304,7 +304,7 @@ struct RowChan
template<typename T> void complex2hartley
(const const_mav<complex<T>, 2> &grid, const mav<T,2> &grid2, size_t nthreads)
{
myassert(grid.shape()==grid2.shape(), "shape mismatch");
checkShape(grid.shape(), grid2.shape());
size_t nu=grid.shape(0), nv=grid.shape(1);
#pragma omp parallel for num_threads(nthreads)
......@@ -323,7 +323,7 @@ template<typename T> void complex2hartley
template<typename T> void hartley2complex
(const const_mav<T,2> &grid, const mav<complex<T>,2> &grid2, size_t nthreads)
{
myassert(grid.shape()==grid2.shape(), "shape mismatch");
checkShape(grid.shape(), grid2.shape());
size_t nu=grid.shape(0), nv=grid.shape(1);
#pragma omp parallel for num_threads(nthreads)
......@@ -343,7 +343,7 @@ template<typename T> void hartley2complex
template<typename T> void hartley2_2D(const const_mav<T,2> &in,
const mav<T,2> &out, size_t nthreads)
{
myassert(in.shape()==out.shape(), "shape mismatch");
checkShape(in.shape(), out.shape());
size_t nu=in.shape(0), nv=in.shape(1);
ptrdiff_t sz=ptrdiff_t(sizeof(T));
pocketfft::stride_t stri{sz*in.stride(0), sz*in.stride(1)};
......@@ -370,42 +370,23 @@ template<typename T> void hartley2_2D(const const_mav<T,2> &in,
class ES_Kernel
{
protected:
double beta;
public:
ES_Kernel(size_t supp) : beta(2.3*supp) {}
double operator()(double v) const { return exp(beta*(sqrt(1.-v*v)-1.)); }
static size_t get_supp(double epsilon)
{
static const vector<double> maxmaperr { 1e8, 0.32, 0.021, 6.2e-4,
1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17,
6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 };
double epssq = epsilon*epsilon;
for (size_t i=1; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
myfail("requested epsilon too small - minimum is 2e-13");
}
};
class ES_Kernel_with_correction: public ES_Kernel
{
protected:
private:
static constexpr double pi = 3.141592653589793238462643383279502884197;
double beta;
int p;
vector<double> x, wgt, psi;
size_t supp;
public:
ES_Kernel_with_correction(size_t supp_, size_t nthreads)
: ES_Kernel(supp_), p(int(1.5*supp_+2)), supp(supp_)
ES_Kernel(size_t supp_, size_t nthreads)
: beta(2.3*supp_), p(int(1.5*supp_+2)), supp(supp_)
{
legendre_prep(2*p,x,wgt,nthreads);
psi=x;
for (auto &v:psi)
v=operator()(v);
}
double operator()(double v) const { return exp(beta*(sqrt(1.-v*v)-1.)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
......@@ -415,13 +396,25 @@ class ES_Kernel_with_correction: public ES_Kernel
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return 1./(supp*tmp);
}
static size_t get_supp(double epsilon)
{
static const vector<double> maxmaperr { 1e8, 0.32, 0.021, 6.2e-4,
1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17,
6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 };
double epssq = epsilon*epsilon;
for (size_t i=1; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
myfail("requested epsilon too small - minimum is 2e-13");
}
};
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, size_t nval, size_t supp,
size_t nthreads)
{
ES_Kernel_with_correction kernel(supp, nthreads);
ES_Kernel kernel(supp, nthreads);
vector<double> res(nval);
double xn = 1./n;
#pragma omp parallel for schedule(static) num_threads(nthreads)
......@@ -455,7 +448,8 @@ class Baselines
size_t shift, mask;
public:
template<typename T> Baselines(const const_mav<T,2> &coord_, const const_mav<T,1> &freq)
template<typename T> Baselines(const const_mav<T,2> &coord_,
const const_mav<T,1> &freq)
{
constexpr double speedOfLight = 299792458.;
myassert(coord_.shape(1)==3, "dimension mismatch");
......@@ -492,8 +486,7 @@ class Baselines
mav<T,2> &res) const
{
size_t nvis = idx.shape(0);
myassert(res.shape(0)==nvis, "shape mismatch");
myassert(res.shape(1)==3, "shape mismatch");
checkShape(res.shape(), {idx.shape(0), 3});
for (size_t i=0; i<nvis; i++)
{
auto uvw = effectiveCoord(idx(i));
......@@ -506,10 +499,9 @@ class Baselines
template<typename T> void ms2vis(const mav<const T,2> &ms,
const mav<const uint32_t,1> &idx, mav<T,1> &vis, size_t nthreads) const
{
myassert(ms.shape(0)==nrows, "shape mismatch");
myassert(ms.shape(1)==nchan, "shape mismatch");
checkShape(ms.shape(), {nrows, nchan});
size_t nvis = idx.shape(0);
myassert(vis.shape(0)==nvis, "shape mismatch");
checkShape(vis.shape(), {nvis});
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis; ++i)
{
......@@ -522,9 +514,8 @@ class Baselines
const mav<const uint32_t,1> &idx, mav<T,2> &ms, size_t nthreads) const
{
size_t nvis = vis.shape(0);
myassert(idx.shape(0)==nvis, "shape mismatch");
myassert(ms.shape(0)==nrows, "shape mismatch");
myassert(ms.shape(1)==nchan, "shape mismatch");
checkShape(idx.shape(), {nvis});
checkShape(ms.shape(), {nrows, nchan});
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis; ++i)
{
......@@ -543,6 +534,8 @@ class GridderConfig
double beta;
vector<double> cfu, cfv;
size_t nthreads;
double ushift, vshift;
int maxiu0, maxiv0;
complex<double> wscreen(double x, double y, double w, bool adjoint) const
{
......@@ -564,7 +557,9 @@ class GridderConfig
supp(ES_Kernel::get_supp(epsilon)), nsafe((supp+1)/2),
nu(max(2*nsafe,2*nx_dirty)), nv(max(2*nsafe,2*ny_dirty)),
beta(2.3*supp),
cfu(nx_dirty), cfv(ny_dirty), nthreads(nthreads_)
cfu(nx_dirty), cfv(ny_dirty), nthreads(nthreads_),
ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp)
{
myassert((nx_dirty&1)==0, "nx_dirty must be even");
myassert((ny_dirty&1)==0, "ny_dirty must be even");
......@@ -682,12 +677,10 @@ class GridderConfig
void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{
u=fmod1(u_in*psx)*nu,
iu0 = int(u-supp*0.5 + 1 + nu) - nu;
iu0 = min<int>(iu0, (nu+nsafe)-supp);
u=fmod1(u_in*psx)*nu;
iu0 = min(int(u+ushift)-int(nu), maxiu0);
v=fmod1(v_in*psy)*nv;
iv0 = int(v-supp*0.5 + 1 + nv) - nv;
iv0 = min<int>(iv0, (nv+nsafe)-supp);
iv0 = min(int(v+vshift)-int(nv), maxiv0);
}
template<typename T> void apply_wscreen(const const_mav<complex<T>,2> &dirty,
......@@ -1220,7 +1213,7 @@ template<typename T> void get_correlations
template<typename T> void apply_wcorr(const GridderConfig &gconf,
const mav<T,2> &dirty, const ES_Kernel_with_correction &kernel, double dw)
const mav<T,2> &dirty, const ES_Kernel &kernel, double dw)
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
......@@ -1321,7 +1314,7 @@ template<typename Serv> void wstack_common(
template<typename T, typename Serv> void x2dirty(
const GridderConfig &gconf, const Serv &srv, const mav<T,2> &dirty,
bool do_wstacking, size_t verbosity=0)
bool do_wstacking, size_t verbosity)
{
if (do_wstacking)
{
......@@ -1369,7 +1362,7 @@ template<typename T, typename Serv> void x2dirty(
dirty(i,j) += tdirty(i,j).real();
}
// correct for w gridding
apply_wcorr(gconf, dirty, ES_Kernel_with_correction(supp, nthreads), dw);
apply_wcorr(gconf, dirty, ES_Kernel(supp, nthreads), dw);
}
else
{
......@@ -1391,14 +1384,15 @@ template<typename T, typename Serv> void x2dirty(
template<typename T> void vis2dirty(const Baselines &baselines,
const GridderConfig &gconf, const const_mav<uint32_t,1> &idx,
const const_mav<complex<T>,1> &vis, const const_mav<T,1> &wgt,
mav<T,2> &dirty, bool do_wstacking)
mav<T,2> &dirty, bool do_wstacking, size_t verbosity)
{
x2dirty(gconf, makeVisServ(baselines, idx, vis, wgt), dirty, do_wstacking);
x2dirty(gconf, makeVisServ(baselines, idx, vis, wgt), dirty, do_wstacking,
verbosity);
}
template<typename T, typename Serv> void dirty2x(
const GridderConfig &gconf, const const_mav<T,2> &dirty,
const Serv &srv, bool do_wstacking, size_t verbosity=0)
const Serv &srv, bool do_wstacking, size_t verbosity)
{
if (do_wstacking)
{
......@@ -1420,7 +1414,7 @@ template<typename T, typename Serv> void dirty2x(
for (size_t j=0; j<ny_dirty; ++j)
tdirty(i,j) = dirty(i,j);
// correct for w gridding
apply_wcorr(gconf, tdirty, ES_Kernel_with_correction(supp, nthreads), dw);
apply_wcorr(gconf, tdirty, ES_Kernel(supp, nthreads), dw);
vector<uint32_t> subidx;
tmpStorage<complex<T>,2> grid_({gconf.Nu(),gconf.Nv()});
......@@ -1472,9 +1466,10 @@ template<typename T, typename Serv> void dirty2x(
template<typename T> void dirty2vis(const Baselines &baselines,
const GridderConfig &gconf, const const_mav<uint32_t,1> &idx,
const const_mav<T,2> &dirty, const const_mav<T,1> &wgt,
mav<complex<T>,1> &vis, bool do_wstacking)
mav<complex<T>,1> &vis, bool do_wstacking, size_t verbosity)
{
dirty2x(gconf, dirty, makeVisServ(baselines, idx, vis, wgt), do_wstacking);
dirty2x(gconf, dirty, makeVisServ(baselines, idx, vis, wgt), do_wstacking,
verbosity);
}
......
......@@ -587,9 +587,9 @@ template<typename T> pyarr<complex<T>> Pygrid2vis_c(
auto wgt2 = make_const_mav<1>(wgt);
auto res = makeArray<complex<T>>({nvis});
auto vis = make_mav<1>(res);
vis.fill(0);
{
py::gil_scoped_release release;
vis.fill(0);
grid2vis_c<T>(baselines, gconf, idx2, grid2, vis, wgt2);
}
return res;
......@@ -750,7 +750,8 @@ pyarr<uint32_t> PygetIndices(const PyBaselines &baselines,
template<typename T> pyarr<T> vis2dirty2(const PyBaselines &baselines,
const PyGridderConfig &gconf, const py::array &idx_,
const py::array &vis_, const py::object &wgt_, bool do_wstacking)
const py::array &vis_, const py::object &wgt_, bool do_wstacking,
size_t verbosity)
{
auto idx = getPyarr<uint32_t>(idx_, "idx");
auto idx2 = make_const_mav<1>(idx);
......@@ -762,7 +763,8 @@ template<typename T> pyarr<T> vis2dirty2(const PyBaselines &baselines,
auto wgt2 = make_const_mav<1>(wgt);
{
py::gil_scoped_release release;
vis2dirty<T>(baselines, gconf, idx2, vis2, wgt2, dirty2, do_wstacking);
vis2dirty<T>(baselines, gconf, idx2, vis2, wgt2, dirty2, do_wstacking,
verbosity);
}
return dirty;
}
......@@ -788,6 +790,10 @@ wgt: np.array((nvis,), dtype=float with same precision as `vis`, optional
do_wstacking: bool
if True, the full improved w-stacking algorithm is carried out, otherwise
the w values are assumed to be zero.
verbosity: int
0: no output
1: some output
2: detailed output
Returns
=======
......@@ -797,18 +803,22 @@ np.array((nxdirty, nydirty), dtype=float of same precision as `vis`.)
py::array Pyvis2dirty(const PyBaselines &baselines,
const PyGridderConfig &gconf, const py::array &idx,
const py::array &vis, const py::object &wgt, bool do_wstacking)
const py::array &vis, const py::object &wgt, bool do_wstacking,
size_t verbosity)
{
if (isPytype<complex<float>>(vis))
return vis2dirty2<float>(baselines, gconf, idx, vis, wgt, do_wstacking);
return vis2dirty2<float>(baselines, gconf, idx, vis, wgt, do_wstacking,
verbosity);
if (isPytype<complex<double>>(vis))
return vis2dirty2<double>(baselines, gconf, idx, vis, wgt, do_wstacking);
return vis2dirty2<double>(baselines, gconf, idx, vis, wgt, do_wstacking,
verbosity);
myfail("type matching failed: 'vis' has neither type 'c8' nor 'c16'");
}
template<typename T> pyarr<complex<T>> dirty2vis2(const PyBaselines &baselines,
const PyGridderConfig &gconf, const pyarr<uint32_t> &idx_,
const pyarr<T> &dirty_, const py::object &wgt_, bool do_wstacking)
const pyarr<T> &dirty_, const py::object &wgt_, bool do_wstacking,
size_t verbosity)
{
auto idx = getPyarr<uint32_t>(idx_, "idx");
auto idx2 = make_const_mav<1>(idx);
......@@ -818,11 +828,11 @@ template<typename T> pyarr<complex<T>> dirty2vis2(const PyBaselines &baselines,
auto wgt2 = make_const_mav<1>(wgt);
auto vis = makeArray<complex<T>>({idx2.shape(0)});
auto vis2 = make_mav<1>(vis);
vis2.fill(0);
{
py::gil_scoped_release release;
vis2.fill(0);
dirty2vis<T>(baselines, gconf, idx2, dirty2, wgt2, vis2, do_wstacking);
dirty2vis<T>(baselines, gconf, idx2, dirty2, wgt2, vis2, do_wstacking,
verbosity);
}
return vis;
}
......@@ -848,6 +858,10 @@ wgt: np.array((nvis,), same dtype as `dirty`, optional
do_wstacking: bool
if True, the full improved w-stacking algorithm is carried out, otherwise
the w values are assumed to be zero.
verbosity: int
0: no output
1: some output
2: detailed output
Returns
=======
......@@ -856,12 +870,14 @@ np.array((nvis,), dtype=complex of same precision as `dirty`.)
)""";
py::array Pydirty2vis(const PyBaselines &baselines,
const PyGridderConfig &gconf, const py::array &idx, const py::array &dirty,
const py::object &wgt, bool do_wstacking)
const py::object &wgt, bool do_wstacking, size_t verbosity)
{
if (isPytype<float>(dirty))
return dirty2vis2<float>(baselines, gconf, idx, dirty, wgt, do_wstacking);
return dirty2vis2<float>(baselines, gconf, idx, dirty, wgt, do_wstacking,
verbosity);
if (isPytype<double>(dirty))
return dirty2vis2<double>(baselines, gconf, idx, dirty, wgt, do_wstacking);
return dirty2vis2<double>(baselines, gconf, idx, dirty, wgt, do_wstacking,
verbosity);
myfail("type matching failed: 'dirty' has neither type 'f4' nor 'f8'");
}
......@@ -1091,9 +1107,9 @@ PYBIND11_MODULE(nifty_gridder, m)
m.def("get_correlations", &Pyget_correlations<double>, "baselines"_a, "gconf"_a,
"idx"_a, "du"_a, "dv"_a, "wgt"_a=None);
m.def("vis2dirty",&Pyvis2dirty, vis2dirty_DS, "baselines"_a, "gconf"_a,
"idx"_a, "vis"_a, "wgt"_a=None, "do_wstacking"_a=false);
"idx"_a, "vis"_a, "wgt"_a=None, "do_wstacking"_a=false, "verbosity"_a=0);
m.def("dirty2vis",&Pydirty2vis, "baselines"_a, dirty2vis_DS, "gconf"_a,
"idx"_a, "dirty"_a, "wgt"_a=None, "do_wstacking"_a=false);
"idx"_a, "dirty"_a, "wgt"_a=None, "do_wstacking"_a=false, "verbosity"_a=0);
m.def("ms2dirty", &Pyms2dirty, ms2dirty_DS, "uvw"_a, "freq"_a, "ms"_a,
"wgt"_a=None, "npix_x"_a, "npix_y"_a, "pixsize_x"_a, "pixsize_y"_a,
......
......@@ -102,7 +102,7 @@ def test_adjointness_ms2dirty(nxdirty, nydirty, nrow, nchan, epsilon, singleprec
if wgt is not None:
wgt = wgt.astype("f4")
dirty2 = ng.ms2dirty(uvw, freq, ms, wgt, nxdirty, nydirty, pixsizex,
pixsizey, epsilon, wstacking, nthreads, 2).astype("f8")
pixsizey, epsilon, wstacking, nthreads, 0).astype("f8")
ms2 = ng.dirty2ms(uvw, freq, dirty, wgt, pixsizex, pixsizey, epsilon,
wstacking, nthreads, 0).astype("c16")
tol = 5e-5 if singleprec else 5e-13
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment