Commit 7ef19ec7 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

accelerate asserts

parent c571989f
......@@ -106,7 +106,8 @@ template<typename T> class membuf
: d(d_), rw(false) {}
membuf(size_t sz)
: ptr(make_unique<vector<T>>(sz)), d(ptr->data()), rw(true) {}
membuf(const membuf &other) = default;
membuf(const membuf &other)
: ptr(other.ptr), d(other.d), rw(false) {}
membuf(membuf &other) = default;
membuf(membuf &&other) = default;
// Not for public use!
......@@ -152,6 +153,7 @@ template<typename T> class fmav: public fmav_info, public membuf<T>
fmav(const T* d_, const fmav_info &info)
: fmav_info(info), membuf<T>(d_) {}
fmav(const fmav &other) = default;
fmav(fmav &other) = default;
fmav(fmav &&other) = default;
// Not for public use!
fmav(membuf<T> &buf, const shape_t &shp_, const stride_t &str_)
......@@ -231,7 +233,6 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
using typename mav_info<ndim>::stride_t;
protected:
// using membuf<T>::Tsp;
using membuf<T>::d;
using membuf<T>::ptr;
using mav_info<ndim>::shp;
......@@ -257,6 +258,7 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
mav(const array<size_t,ndim> &shp_)
: mav_info<ndim>(shp_), membuf<T>(size()) {}
mav(const mav &other) = default;
mav(mav &other) = default;
mav(mav &&other) = default;
operator fmav<T>() const
{
......@@ -297,6 +299,78 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
}
};
template<typename T, size_t ndim> class MavIter
{
protected:
fmav<T> mav;
array<size_t, ndim> shp;
array<ptrdiff_t, ndim> str;
shape_t pos;
ptrdiff_t idx_;
bool done_;
public:
MavIter(const fmav<T> &mav_)
: mav(mav_), pos(mav.ndim()-ndim,0), idx_(0), done_(false)
{
for (size_t i=0; i<ndim; ++i)
{
shp[i] = mav.shape(mav.ndim()-ndim+i);
str[i] = mav.stride(mav.ndim()-ndim+i);
}
}
MavIter(fmav<T> &mav_)
: mav(mav_), pos(mav.ndim()-ndim,0), idx_(0), done_(false)
{
for (size_t i=0; i<ndim; ++i)
{
shp[i] = mav.shape(mav.ndim()-ndim+i);
str[i] = mav.stride(mav.ndim()-ndim+i);
}
}
bool done() const
{ return done_; }
void inc()
{
for (ptrdiff_t i=mav.ndim()-ndim-1; i>=0; --i)
{
idx_+=mav.stride(i);
if (++pos[i]<mav.shape(i)) return;
pos[i]=0;
idx_-=mav.shape(i)*mav.stride(i);
}
done_=true;
}
size_t shape(size_t i) const { return shp[i]; }
ptrdiff_t idx(size_t i) const
{
static_assert(ndim==1, "ndim must be 1");
return idx_+i*str[0];
}
ptrdiff_t idx(size_t i, size_t j) const
{
static_assert(ndim==2, "ndim must be 2");
return idx_+i*str[0]+j*str[1];
}
ptrdiff_t idx(size_t i, size_t j, size_t k) const
{
static_assert(ndim==3, "ndim must be 3");
return idx_+i*str[0]+j*str[1]+k*str[2];
}
const T &operator()(size_t i) const
{ return mav[idx(i)]; }
const T &operator()(size_t i, size_t j) const
{ return mav[idx(i,j)]; }
const T &operator()(size_t i, size_t j, size_t k) const
{ return mav[idx(i,j,k)]; }
T &v(size_t i)
{ return mav.vraw(idx(i)); }
T &v(size_t i, size_t j)
{ return mav.vraw(idx(i,j)); }
T &v(size_t i, size_t j, size_t k)
{ return mav.vraw(idx(i,j,k)); }
};
}
using detail_mav::shape_t;
......@@ -304,6 +378,7 @@ using detail_mav::stride_t;
using detail_mav::fmav_info;
using detail_mav::fmav;
using detail_mav::mav;
using detail_mav::MavIter;
}
......
......@@ -67,7 +67,6 @@ template<typename T> void complex2hartley
(const mav<complex<T>, 2> &grid, mav<T,2> &grid2, size_t nthreads)
{
checkShape(grid.shape(), grid2.shape());
auto g2w = grid2.vdata();
size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched)
......@@ -78,8 +77,8 @@ template<typename T> void complex2hartley
for (size_t v=0; v<nv; ++v)
{
size_t xv = (v==0) ? 0 : nv-v;
g2w[grid2.idx(u,v)] = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
grid(xu,xv).real()-grid(xu,xv).imag());
grid2.v(u,v) = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
grid(xu,xv).real()-grid(xu,xv).imag());
}
}
});
......@@ -89,7 +88,6 @@ template<typename T> void hartley2complex
(const mav<T,2> &grid, mav<complex<T>,2> &grid2, size_t nthreads)
{
checkShape(grid.shape(), grid2.shape());
auto g2w = grid2.vdata();
size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched)
......@@ -102,7 +100,7 @@ template<typename T> void hartley2complex
size_t xv = (v==0) ? 0 : nv-v;
T v1 = T(0.5)*grid( u, v);
T v2 = T(0.5)*grid(xu,xv);
g2w[grid2.idx(u,v)] = std::complex<T>(v1+v2, v1-v2);
grid2.v(u,v) = std::complex<T>(v1+v2, v1-v2);
}
}
});
......@@ -112,7 +110,6 @@ template<typename T> void hartley2_2D(const mav<T,2> &in,
mav<T,2> &out, size_t nthreads)
{
checkShape(in.shape(), out.shape());
auto vout = out.vdata();
size_t nu=in.shape(0), nv=in.shape(1);
fmav<T> fin(in), fout(out);
r2r_separable_hartley(fin, fout, {0,1}, T(1), nthreads);
......@@ -125,10 +122,10 @@ template<typename T> void hartley2_2D(const mav<T,2> &in,
T b = out(nu-i,j);
T c = out(i,nv-j);
T d = out(nu-i,nv-j);
vout[out.idx(i,j)] = T(0.5)*(a+b+c-d);
vout[out.idx(nu-i,j)] = T(0.5)*(a+b+d-c);
vout[out.idx(i,nv-j)] = T(0.5)*(a+c+d-b);
vout[out.idx(nu-i,nv-j)] = T(0.5)*(b+c+d-a);
out.v(i,j) = T(0.5)*(a+b+c-d);
out.v(nu-i,j) = T(0.5)*(a+b+d-c);
out.v(i,nv-j) = T(0.5)*(a+c+d-b);
out.v(nu-i,nv-j) = T(0.5)*(b+c+d-a);
}
});
}
......@@ -374,7 +371,6 @@ class GridderConfig
mav<T,2> &dirty) const
{
checkShape(dirty.shape(), {nx_dirty,ny_dirty});
auto vdirty =dirty.vdata();
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
......@@ -391,7 +387,7 @@ class GridderConfig
if (j2>=nv) j2-=nv;
// FIXME: for some reason g++ warns about double-to-float conversion
// here, even though there is an explicit cast...
vdirty[dirty.idx(i,j)] = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
dirty.v(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
}
}
});
......@@ -400,7 +396,6 @@ class GridderConfig
mav<complex<T>,2> &tmav, mav<T,2> &dirty, T w) const
{
checkShape(dirty.shape(), {nx_dirty,ny_dirty});
auto vdirty = dirty.vdata();
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
......@@ -417,7 +412,7 @@ class GridderConfig
if (ix>=nu) ix-=nu;
size_t jx = nv-ny_dirty/2+j;
if (jx>=nv) jx-=nv;
vdirty[dirty.idx(i,j)] += (tmav(ix,jx)*ws).real(); // lower left
dirty.v(i,j) += (tmav(ix,jx)*ws).real(); // lower left
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
size_t ix2 = nu-nx_dirty/2+i2;
if (ix2>=nu) ix2-=nu;
......@@ -425,12 +420,12 @@ class GridderConfig
if (jx2>=nv) jx2-=nv;
if ((i>0)&&(i<i2))
{
vdirty[dirty.idx(i2,j)] += (tmav(ix2,jx)*ws).real(); // lower right
dirty.v(i2,j) += (tmav(ix2,jx)*ws).real(); // lower right
if ((j>0)&&(j<j2))
vdirty[dirty.idx(i2,j2)] += (tmav(ix2,jx2)*ws).real(); // upper right
dirty.v(i2,j2) += (tmav(ix2,jx2)*ws).real(); // upper right
}
if ((j>0)&&(j<j2))
vdirty[dirty.idx(i,j2)] += (tmav(ix,jx2)*ws).real(); // upper left
dirty.v(i,j2) += (tmav(ix,jx2)*ws).real(); // upper left
}
}
});
......@@ -462,7 +457,6 @@ class GridderConfig
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
grid.fill(0);
auto vgrid = grid.vdata();
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
......@@ -475,7 +469,7 @@ class GridderConfig
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
vgrid[grid.idx(i2,j2)] = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
grid.v(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
}
}
});
......@@ -486,7 +480,6 @@ class GridderConfig
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
checkShape(grid.shape(), {nu, nv});
grid.fill(0);
auto vgrid = grid.vdata();
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
......@@ -504,7 +497,7 @@ class GridderConfig
if (ix>=nu) ix-=nu;
size_t jx = nv-ny_dirty/2+j;
if (jx>=nv) jx-=nv;
vgrid[grid.idx(ix,jx)] = dirty(i,j)*ws; // lower left
grid.v(ix,jx) = dirty(i,j)*ws; // lower left
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
size_t ix2 = nu-nx_dirty/2+i2;
if (ix2>=nu) ix2-=nu;
......@@ -512,12 +505,12 @@ class GridderConfig
if (jx2>=nv) jx2-=nv;
if ((i>0)&&(i<i2))
{
vgrid[grid.idx(ix2,jx)] = dirty(i2,j)*ws; // lower right
grid.v(ix2,jx) = dirty(i2,j)*ws; // lower right
if ((j>0)&&(j<j2))
vgrid[grid.idx(ix2,jx2)] = dirty(i2,j2)*ws; // upper right
grid.v(ix2,jx2) = dirty(i2,j2)*ws; // upper right
}
if ((j>0)&&(j<j2))
vgrid[grid.idx(ix,jx2)] = dirty(i,j2)*ws; // upper left
grid.v(ix,jx2) = dirty(i,j2)*ws; // upper left
}
}
});
......@@ -872,7 +865,6 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
auto vdirty = dirty.vdata();
size_t nthreads = gconf.Nthreads();
auto psx=gconf.Pixsize_x();
auto psy=gconf.Pixsize_y();
......@@ -913,15 +905,15 @@ template<typename T> void apply_global_corrections(const GridderConfig &gconf,
}
fct *= T(cfu[nx_dirty/2-i]*cfv[ny_dirty/2-j]);
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
vdirty[dirty.idx(i,j)]*=fct;
dirty.v(i,j)*=fct;
if ((i>0)&&(i<i2))
{
vdirty[dirty.idx(i2,j)]*=fct;
dirty.v(i2,j)*=fct;
if ((j>0)&&(j<j2))
vdirty[dirty.idx(i2,j2)]*=fct;
dirty.v(i2,j2)*=fct;
}
if ((j>0)&&(j<j2))
vdirty[dirty.idx(i,j2)]*=fct;
dirty.v(i,j2)*=fct;
}
}
});
......@@ -1108,10 +1100,9 @@ template<typename T, typename Serv> void dirty2x(
WgridHelper<Serv> hlp(gconf, srv, verbosity);
double dw = hlp.DW();
mav<T,2> tdirty({nx_dirty,ny_dirty});
auto vtdirty = tdirty.vdata();
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
vtdirty[tdirty.idx(i,j)] = dirty(i,j);
tdirty.v(i,j) = dirty(i,j);
// correct for w gridding etc.
apply_global_corrections(gconf, tdirty, ES_Kernel(gconf.Supp(), gconf.Ofactor(), nthreads), dw, true);
mav<complex<T>,2> grid({gconf.Nu(),gconf.Nv()});
......
......@@ -37,6 +37,7 @@
#include "mr_util/constants.h"
#include "mr_util/string_utils.h"
#include "mr_util/geom_utils.h"
#include "mr_util/pybind_utils.h"
using namespace std;
using namespace mr;
......@@ -45,119 +46,31 @@ using namespace healpix;
namespace py = pybind11;
namespace {
class Itnew
{
protected:
using stv = vector<size_t>;
using pdv = vector<ptrdiff_t>;
stv shape, pos;
pdv stride;
bool done_;
const char *ptr;
public:
Itnew (const py::array &in) : shape(max<int>(2,in.ndim())),
pos(max<int>(2,in.ndim()),0), stride(max<int>(2,in.ndim())), done_(false),
ptr(reinterpret_cast<const char *>(in.data()))
{
for (size_t i=0; i<size_t(in.ndim()); ++i)
{
shape[i]=in.shape(in.ndim()-1-i);
stride[i]=(shape[i]==1) ? 0 : in.strides(in.ndim()-1-i);
}
for (size_t i=in.ndim(); i<shape.size(); ++i)
{
shape[i]=1;
stride[i]=0;
}
}
size_t dim(size_t idim) const
{ return shape[idim]; }
bool done() const
{ return done_; }
void inc (size_t dim)
{
for (size_t i=dim; i<shape.size(); ++i)
{
ptr+=stride[i];
if (++pos[i]<shape[i]) return;
pos[i]=0;
ptr-=shape[i]*stride[i];
}
done_=true;
}
};
template<typename T> class Itnew_w: public Itnew
{
private:
const T *cptr (const char *p) const { return reinterpret_cast<const T *>(p); }
T *vptr (const char *p) { return const_cast<T *>(cptr(p)); }
public:
using Itnew::Itnew;
T &operator() (size_t i)
{ return *vptr(ptr+i*stride[0]); }
T &operator() (size_t i, size_t j)
{ return *vptr(ptr+j*stride[0]+i*stride[1]); }
};
template<typename T> class Itnew_r: public Itnew
{
private:
const T *cptr (const char *p) const { return reinterpret_cast<const T *>(p); }
public:
using Itnew::Itnew;
const T &operator() (size_t i) const
{ return *cptr(ptr+i*stride[0]); }
const T &operator() (size_t i, size_t j) const
{ return *cptr(ptr+j*stride[0]+i*stride[1]); }
};
using a_d = py::array_t<double>;
using a_i = py::array_t<int64_t>;
using a_d_c = py::array_t<double, py::array::c_style | py::array::forcecast>;
void assert_equal_shape(const py::array &a, const py::array &b)
{
MR_assert(a.ndim()==b.ndim(),"array dimensions mismatch");
for (size_t i=0; i<size_t(a.ndim()); ++i)
MR_assert(a.shape(i)==b.shape(i), "array hape mismatch");
}
vector<size_t> add_dim(const py::array &a, size_t dim)
vector<size_t> add_dim(const vector<size_t> &a, size_t dim)
{
vector<size_t> res(a.ndim()+1);
for (size_t i=0; i<size_t(a.ndim()); ++i) res[i]=a.shape(i);
vector<size_t> res(a.size()+1);
for (size_t i=0; i<a.size(); ++i) res[i]=a[i];
res.back()=dim;
return res;
}
vector<size_t> subst_dim(const py::array &a, size_t d1, size_t d2)
vector<size_t> subst_dim(const vector<size_t> &a, size_t d1, size_t d2)
{
MR_assert(a.ndim()>0,"too few array dimensions");
MR_assert(size_t(a.shape(a.ndim()-1))==d1,
"incorrect last array dimension");
vector<size_t> res(a.ndim());
for (size_t i=0; i<size_t(a.ndim()-1); ++i) res[i]=a.shape(i);
MR_assert(a.size()>0,"too few array dimensions");
MR_assert(a.back()==d1, "incorrect last dimension");
vector<size_t> res(a);
res.back()=d2;
return res;
}
vector<size_t> rem_dim(const py::array &a, size_t dim)
vector<size_t> rem_dim(const vector<size_t> &a, size_t dim)
{
MR_assert(a.ndim()>0,"too few array dimensions");
MR_assert(size_t(a.shape(a.ndim()-1))==dim,
"incorrect last array dimension");
vector<size_t> res(a.ndim()-1);
for (size_t i=0; i<size_t(a.ndim())-1; ++i) res[i]=a.shape(i);
return res;
}
vector<size_t> copy_dim(const py::array &a)
{
vector<size_t> res(a.ndim());
for (size_t i=0; i<size_t(a.ndim()); ++i) res[i]=a.shape(i);
return res;
MR_assert(a.size()>0,"too few array dimensions");
MR_assert(a.back()==dim, "incorrect last dimension");
return vector<size_t> {a.begin(), a.end()-1};
}
class Pyhpbase
......@@ -181,132 +94,150 @@ class Pyhpbase
a_d pix2ang (const a_i &pix) const
{
a_d ang(add_dim(pix,2));
Itnew_w<double> iout (ang);
Itnew_r<int64_t> iin (pix);
auto pix2 = to_fmav<int64_t>(pix);
a_d ang(add_dim(pix2.shape(), 2));
auto ang2 = to_fmav<double>(ang,true);
MavIter<int64_t,1> iin(pix2);
MavIter<double,2> iout(ang2);
while (!iin.done())
{
for (size_t i=0; i<iin.dim(0); ++i)
for (size_t i=0; i<iin.shape(0); ++i)
{
pointing ptg=base.pix2ang(iin(i));
iout(i,0)=ptg.theta; iout(i,1)=ptg.phi;
iout.v(i,0) = ptg.theta; iout.v(i,1) = ptg.phi;
}
iin.inc(1);iout.inc(2);
iin.inc();iout.inc();
}
return ang;
}
a_i ang2pix (const a_d &ang) const
{
a_i pix(rem_dim(ang,2));
Itnew_r<double> iin (ang);
Itnew_w<int64_t> iout (pix);
auto ang2 = to_fmav<double>(ang);
a_i pix(rem_dim(ang2.shape(), 2));
auto pix2 = to_fmav<int64_t>(pix,true);
MavIter<double,2> iin(ang2);
MavIter<int64_t,1> iout(pix2);
while (!iin.done())
{
for (size_t i=0; i<iout.dim(0); ++i)
iout(i)=base.ang2pix(pointing(iin(i,0),iin(i,1)));
iin.inc(2);iout.inc(1);
for (size_t i=0; i<iout.shape(0); ++i)
iout.v(i)=base.ang2pix(pointing(iin(i,0),iin(i,1)));
iin.inc();iout.inc();
}
return pix;
}
a_d pix2vec (const a_i &pix) const
{
a_d vec(add_dim(pix,3));
Itnew_w<double> iout (vec);
Itnew_r<int64_t> iin (pix);
auto pix2 = to_fmav<int64_t>(pix);
a_d vec(add_dim(pix2.shape(), 3));
auto vec2 = to_fmav<double>(vec,true);
MavIter<int64_t,1> iin(pix2);
MavIter<double,2> iout(vec2);
while (!iin.done())
{
for (size_t i=0; i<iin.dim(0); ++i)
for (size_t i=0; i<iin.shape(0); ++i)
{
vec3 v=base.pix2vec(iin(i));
iout(i,0)=v.x; iout(i,1)=v.y; iout(i,2)=v.z;
iout.v(i,0)=v.x; iout.v(i,1)=v.y; iout.v(i,2)=v.z;
}
iin.inc(1);iout.inc(2);
iin.inc();iout.inc();
}
return vec;
}
a_i vec2pix (const a_d &vec) const
{
a_i pix(rem_dim(vec,3));
Itnew_r<double> iin (vec);
Itnew_w<int64_t> iout (pix);
auto vec2 = to_fmav<double>(vec);
a_i pix(rem_dim(vec2.shape(), 3));
auto pix2 = to_fmav<int64_t>(pix,true);
MavIter<double,2> iin(vec2);
MavIter<int64_t,1> iout(pix2);
while (!iin.done())
{
for (size_t i=0; i<iout.dim(0); ++i)
iout(i)=base.vec2pix(vec3(iin(i,0),iin(i,1),iin(i,2)));
iin.inc(2);iout.inc(1);
for (size_t i=0; i<iout.shape(0); ++i)
iout.v(i)=base.vec2pix(vec3(iin(i,0),iin(i,1),iin(i,2)));
iin.inc();iout.inc();
}
return pix;
}
a_i pix2xyf (const a_i &pix) const
{
a_i xyf(add_dim(pix,3));
Itnew_w<int64_t> iout (xyf);
Itnew_r<int64_t> iin (pix);
auto pix2 = to_fmav<int64_t>(pix);
a_i xyf(add_dim(pix2.shape(),3));
auto xyf2 = to_fmav<int64_t>(xyf,true);
MavIter<int64_t,1> iin(pix2);
MavIter<int64_t,2> iout(xyf2);
while (!iin.done())
{
for (size_t i=0; i<iin.dim(0); ++i)
for (size_t i=0; i<iin.shape(0); ++i)
{
int x,y,f;
base.pix2xyf(iin(i),x,y,f);
iout(i,0)=x; iout(i,1)=y; iout(i,2)=f;
iout.v(i,0)=x; iout.v(i,1)=y; iout.v(i,2)=f;
}
iin.inc(1);iout.inc(2);
iin.inc();iout.inc();
}
return xyf;
}
a_i xyf2pix (const a_i &xyf) const
{
a_i pix(rem_dim(xyf,3));
Itnew_r<int64_t> iin (xyf);
Itnew_w<int64_t> iout (pix);
auto xyf2 = to_fmav<int64_t>(xyf);
a_i pix(rem_dim(xyf2.shape(),3));
auto pix2 = to_fmav<int64_t>(pix,true);
MavIter<int64_t,2> iin(xyf2);
MavIter<int64_t,1> iout(pix2);
while (!iin.done())
{
for (size_t i=0; i<iout.dim(0); ++i)
iout(i)=base.xyf2pix(iin(i,0),iin(i,1),iin(i,2));
iin.inc(2);iout.inc(1);
for (size_t i=0; i<iout.shape(0); ++i)
iout.v(i)=base.xyf2pix(iin(i,0),iin(i,1),iin(i,2));
iin.inc();iout.inc();
}