Commit 33251a05 authored by Martin Reinecke's avatar Martin Reinecke

stage 2

parent d73de808
......@@ -498,9 +498,9 @@ template<typename I> template<typename I2>
double dr=base[o].max_pixrad(); // safety distance
for (size_t i=0; i<nv; ++i)
{
crlimit(o,i,0) = (rad[i]+dr>pi) ? -1. : cos(rad[i]+dr);
crlimit(o,i,1) = (o==0) ? cos(rad[i]) : crlimit(0,i,1);
crlimit(o,i,2) = (rad[i]-dr<0.) ? 1. : cos(rad[i]-dr);
crlimit.v(o,i,0) = (rad[i]+dr>pi) ? -1. : cos(rad[i]+dr);
crlimit.v(o,i,1) = (o==0) ? cos(rad[i]) : crlimit(0,i,1);
crlimit.v(o,i,2) = (rad[i]-dr<0.) ? 1. : cos(rad[i]-dr);
}
}
......@@ -563,9 +563,9 @@ template<typename I> void T_Healpix_Base<I>::query_multidisc_general
double dr=base[o].max_pixrad(); // safety distance
for (size_t i=0; i<nv; ++i)
{
crlimit(o,i,0) = (rad[i]+dr>pi) ? -1. : cos(rad[i]+dr);
crlimit(o,i,1) = (o==0) ? cos(rad[i]) : crlimit(0,i,1);
crlimit(o,i,2) = (rad[i]-dr<0.) ? 1. : cos(rad[i]-dr);
crlimit.v(o,i,0) = (rad[i]+dr>pi) ? -1. : cos(rad[i]+dr);
crlimit.v(o,i,1) = (o==0) ? cos(rad[i]) : crlimit(0,i,1);
crlimit.v(o,i,2) = (rad[i]-dr<0.) ? 1. : cos(rad[i]-dr);
}
}
......
......@@ -650,25 +650,28 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it,
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it,
const Cmplx<native_simd<T>> *MRUTIL_RESTRICT src, fmav<Cmplx<T>> &dst)
{
auto ptr=dst.vdata();
for (size_t i=0; i<it.length_out(); ++i)
for (size_t j=0; j<vlen; ++j)
dst[it.oofs(j,i)].Set(src[i].r[j],src[i].i[j]);
ptr[it.oofs(j,i)].Set(src[i].r[j],src[i].i[j]);
}
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it,
const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst)
{
auto ptr=dst.vdata();
for (size_t i=0; i<it.length_out(); ++i)
for (size_t j=0; j<vlen; ++j)
dst[it.oofs(j,i)] = src[i][j];
ptr[it.oofs(j,i)] = src[i][j];
}
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it,
const T *MRUTIL_RESTRICT src, fmav<T> &dst)
{
auto ptr=dst.vdata();
if (src == &dst[it.oofs(0)]) return; // in-place
for (size_t i=0; i<it.length_out(); ++i)
dst[it.oofs(i)] = src[i];
ptr[it.oofs(i)] = src[i];
}
template <typename T> struct add_vec { using type = native_simd<T>; };
......@@ -709,7 +712,7 @@ MRUTIL_NOINLINE void general_nd(const fmav<T> &in, fmav<T> &out,
{
it.advance(1);
auto buf = allow_inplace && it.stride_out() == 1 ?
&out[it.oofs(0)] : reinterpret_cast<T *>(storage.data());
&out.vraw(it.oofs(0)) : reinterpret_cast<T *>(storage.data());
exec(it, tin, out, buf, *plan, fct);
}
}); // end of parallel region
......@@ -734,32 +737,34 @@ struct ExecC2C
template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst)
{
auto ptr = dst.vdata();
for (size_t j=0; j<vlen; ++j)
dst[it.oofs(j,0)] = src[0][j];
ptr[it.oofs(j,0)] = src[0][j];
size_t i=1, i1=1, i2=it.length_out()-1;
for (i=1; i<it.length_out()-1; i+=2, ++i1, --i2)
for (size_t j=0; j<vlen; ++j)
{
dst[it.oofs(j,i1)] = src[i][j]+src[i+1][j];
dst[it.oofs(j,i2)] = src[i][j]-src[i+1][j];
ptr[it.oofs(j,i1)] = src[i][j]+src[i+1][j];
ptr[it.oofs(j,i2)] = src[i][j]-src[i+1][j];
}
if (i<it.length_out())
for (size_t j=0; j<vlen; ++j)
dst[it.oofs(j,i1)] = src[i][j];
ptr[it.oofs(j,i1)] = src[i][j];
}
template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
const T *MRUTIL_RESTRICT src, fmav<T> &dst)
{
dst[it.oofs(0)] = src[0];
auto ptr = dst.vdata();
ptr[it.oofs(0)] = src[0];
size_t i=1, i1=1, i2=it.length_out()-1;
for (i=1; i<it.length_out()-1; i+=2, ++i1, --i2)
{
dst[it.oofs(i1)] = src[i]+src[i+1];
dst[it.oofs(i2)] = src[i]-src[i+1];
ptr[it.oofs(i1)] = src[i]+src[i+1];
ptr[it.oofs(i2)] = src[i]-src[i+1];
}
if (i<it.length_out())
dst[it.oofs(i1)] = src[i];
ptr[it.oofs(i1)] = src[i];
}
struct ExecHartley
......@@ -810,20 +815,21 @@ template<typename T> MRUTIL_NOINLINE void general_r2c(
auto tdatav = reinterpret_cast<native_simd<T> *>(storage.data());
copy_input(it, in, tdatav);
plan->exec(tdatav, fct, true);
auto vout = out.vdata();
for (size_t j=0; j<vlen; ++j)
out[it.oofs(j,0)].Set(tdatav[0][j]);
vout[it.oofs(j,0)].Set(tdatav[0][j]);
size_t i=1, ii=1;
if (forward)
for (; i<len-1; i+=2, ++ii)
for (size_t j=0; j<vlen; ++j)
out[it.oofs(j,ii)].Set(tdatav[i][j], tdatav[i+1][j]);
vout[it.oofs(j,ii)].Set(tdatav[i][j], tdatav[i+1][j]);
else
for (; i<len-1; i+=2, ++ii)
for (size_t j=0; j<vlen; ++j)
out[it.oofs(j,ii)].Set(tdatav[i][j], -tdatav[i+1][j]);
vout[it.oofs(j,ii)].Set(tdatav[i][j], -tdatav[i+1][j]);
if (i<len)
for (size_t j=0; j<vlen; ++j)
out[it.oofs(j,ii)].Set(tdatav[i][j]);
vout[it.oofs(j,ii)].Set(tdatav[i][j]);
}
#endif
while (it.remaining()>0)
......@@ -832,16 +838,17 @@ template<typename T> MRUTIL_NOINLINE void general_r2c(
auto tdata = reinterpret_cast<T *>(storage.data());
copy_input(it, in, tdata);
plan->exec(tdata, fct, true);
out[it.oofs(0)].Set(tdata[0]);
auto vout = out.vdata();
vout[it.oofs(0)].Set(tdata[0]);
size_t i=1, ii=1;
if (forward)
for (; i<len-1; i+=2, ++ii)
out[it.oofs(ii)].Set(tdata[i], tdata[i+1]);
vout[it.oofs(ii)].Set(tdata[i], tdata[i+1]);
else
for (; i<len-1; i+=2, ++ii)
out[it.oofs(ii)].Set(tdata[i], -tdata[i+1]);
vout[it.oofs(ii)].Set(tdata[i], -tdata[i+1]);
if (i<len)
out[it.oofs(ii)].Set(tdata[i]);
vout[it.oofs(ii)].Set(tdata[i]);
}
}); // end of parallel region
}
......@@ -944,7 +951,7 @@ template<typename T> void c2c(const fmav<std::complex<T>> &in,
util::sanity_check_onetype(in, out, in.data()==out.data(), axes);
if (in.size()==0) return;
fmav<Cmplx<T>> in2(reinterpret_cast<const Cmplx<T> *>(in.data()), in);
fmav<Cmplx<T>> out2(reinterpret_cast<Cmplx<T> *>(out.data()), out, out.writable());
fmav<Cmplx<T>> out2(reinterpret_cast<Cmplx<T> *>(out.vdata()), out, out.writable());
general_nd<pocketfft_c<T>>(in2, out2, axes, fct, nthreads, ExecC2C{forward});
}
......@@ -983,7 +990,7 @@ template<typename T> void r2c(const fmav<T> &in,
{
util::sanity_check_cr(out, in, axis);
if (in.size()==0) return;
fmav<Cmplx<T>> out2(reinterpret_cast<Cmplx<T> *>(out.data()), out, out.writable());
fmav<Cmplx<T>> out2(reinterpret_cast<Cmplx<T> *>(out.vdata()), out, out.writable());
general_r2c(in, out2, axis, forward, fct, nthreads);
}
......@@ -1055,11 +1062,12 @@ template<typename T> void r2r_genuine_hartley(const fmav<T> &in,
r2c(in, atmp, axes, true, fct, nthreads);
simple_iter iin(atmp);
rev_iter iout(out, axes);
auto vout = out.vdata();
while(iin.remaining()>0)
{
auto v = atmp[iin.ofs()];
out[iout.ofs()] = v.real()+v.imag();
out[iout.rev_ofs()] = v.real()-v.imag();
vout[iout.ofs()] = v.real()+v.imag();
vout[iout.rev_ofs()] = v.real()-v.imag();
iin.advance(); iout.advance();
}
}
......
......@@ -107,33 +107,27 @@ template<typename T> class membuf
membuf(size_t sz)
: ptr(make_unique<vector<T>>(sz)), d(ptr->data()), rw(true) {}
membuf(const membuf &other) = default;
membuf(membuf &other) = default;
membuf(membuf &&other) = default;
// Not for public use!
membuf(T *d_, const Tsp &p, bool rw_)
: ptr(p), d(d_), rw(rw_) {}
template<typename I> const T &val(I i) const
{ return d[i]; }
template<typename I> T &val(I i)
template<typename I> T &vraw(I i)
{
MR_assert(rw, "array is nor writable");
MR_assert(rw, "array is not writable");
return const_cast<T *>(d)[i];
}
template<typename I> const T &operator[](I i) const
{ return d[i]; }
template<typename I> T &operator[](I i)
{
MR_assert(rw, "array is nor writable");
return const_cast<T *>(d)[i];
}
const T *data() const
{ return d; }
T *data()
T *vdata()
{
MR_assert(rw, "array is nor writable");
MR_assert(rw, "array is not writable");
return const_cast<T *>(d);
}
bool writable() const { return rw; }
bool writable() { return rw; }
};
// "mav" stands for "multidimensional array view"
......@@ -157,11 +151,11 @@ template<typename T> class fmav: public fmav_info, public membuf<T>
: fmav_info(info), membuf<T>(d_, rw_) {}
fmav(const T* d_, const fmav_info &info)
: fmav_info(info), membuf<T>(d_) {}
fmav(const fmav &other) = delete;
fmav(const fmav &other) = default;
fmav(fmav &&other) = default;
// Not for public use!
fmav(T *d_, const Tsp &p, const shape_t &shp_, const stride_t &str_, bool rw_)
: fmav_info(shp_, str_), membuf<T>(d_, p, rw_) {}
fmav(membuf<T> &buf, const shape_t &shp_, const stride_t &str_)
: fmav_info(shp_, str_), membuf<T>(buf) {}
};
template<size_t ndim> class mav_info
......@@ -194,7 +188,6 @@ template<size_t ndim> class mav_info
for (size_t i=2; i<=ndim; ++i)
str[ndim-i] = str[ndim-i+1]*ptrdiff_t(shp[ndim-i+1]);
}
// size_t ndim() const { return shp.size(); }
size_t size() const { return sz; }
const shape_t &shape() const { return shp; }
size_t shape(size_t i) const { return shp[i]; }
......@@ -214,6 +207,21 @@ template<size_t ndim> class mav_info
}
bool conformable(const mav_info &other) const
{ return shp==other.shp; }
ptrdiff_t idx(size_t i) const
{
static_assert(ndim==1, "ndim must be 1");
return str[0]*i;
}
ptrdiff_t idx(size_t i, size_t j) const
{
static_assert(ndim==2, "ndim must be 2");
return str[0]*i + str[1]*j;
}
ptrdiff_t idx(size_t i, size_t j, size_t k) const
{
static_assert(ndim==3, "ndim must be 3");
return str[0]*i + str[1]*j + str[2]*k;
}
};
template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membuf<T>
......@@ -223,16 +231,21 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
using typename mav_info<ndim>::stride_t;
protected:
using membuf<T>::Tsp;
using mav_info<ndim>::size;
// using membuf<T>::Tsp;
using membuf<T>::d;
using membuf<T>::ptr;
using mav_info<ndim>::shp;
using mav_info<ndim>::str;
using membuf<T>::rw;
using membuf<T>::val;
using membuf<T>::vraw;
public:
using membuf<T>::operator[];
using membuf<T>::vdata;
using membuf<T>::data;
using mav_info<ndim>::size;
using mav_info<ndim>::idx;
mav(const T *d_, const shape_t &shp_, const stride_t &str_)
: mav_info<ndim>(shp_, str_), membuf<T>(d_) {}
mav(T *d_, const shape_t &shp_, const stride_t &str_, bool rw_=false)
......@@ -243,58 +256,44 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
: mav_info<ndim>(shp_), membuf<T>(d_, rw_) {}
mav(const array<size_t,ndim> &shp_)
: mav_info<ndim>(shp_), membuf<T>(size()) {}
mav(const mav &other) = delete;
mav(const mav &other) = default;
mav(mav &&other) = default;
operator fmav<T>() const
{
return fmav<T>(const_cast<T *>(d), ptr, {shp.begin(), shp.end()}, {str.begin(), str.end()}, rw);
return fmav<T>(data(), {shp.begin(), shp.end()}, {str.begin(), str.end()});
}
const T &operator()(size_t i) const
operator fmav<T>()
{
static_assert(ndim==1, "ndim must be 1");
return val(str[0]*i);
return fmav<T>(*this, {shp.begin(), shp.end()}, {str.begin(), str.end()});
}
const T &operator()(size_t i) const
{ return operator[](idx(i)); }
const T &operator()(size_t i, size_t j) const
{
static_assert(ndim==2, "ndim must be 2");
return val(str[0]*i + str[1]*j);
}
{ return operator[](idx(i,j)); }
const T &operator()(size_t i, size_t j, size_t k) const
{
static_assert(ndim==3, "ndim must be 3");
return val(str[0]*i + str[1]*j + str[2]*k);
}
T &operator()(size_t i)
{
static_assert(ndim==1, "ndim must be 1");
return val(str[0]*i);
}
T &operator()(size_t i, size_t j)
{
static_assert(ndim==2, "ndim must be 2");
return val(str[0]*i + str[1]*j);
}
T &operator()(size_t i, size_t j, size_t k)
{
static_assert(ndim==3, "ndim must be 3");
return val(str[0]*i + str[1]*j + str[2]*k);
}
{ return operator[](idx(i,j,k)); }
T &v(size_t i)
{ return vraw(idx(i)); }
T &v(size_t i, size_t j)
{ return vraw(idx(i,j)); }
T &v(size_t i, size_t j, size_t k)
{ return vraw(idx(i,j,k)); }
void fill(const T &val)
{
MR_assert(rw, "array is nor writable");
T *d2 = vdata();
// FIXME: special cases for contiguous arrays and/or zeroing?
if (ndim==1)
for (size_t i=0; i<shp[0]; ++i)
d[str[0]*i]=val;
d2[str[0]*i]=val;
else if (ndim==2)
for (size_t i=0; i<shp[0]; ++i)
for (size_t j=0; j<shp[1]; ++j)
d[str[0]*i + str[1]*j] = val;
d2[str[0]*i + str[1]*j] = val;
else if (ndim==3)
for (size_t i=0; i<shp[0]; ++i)
for (size_t j=0; j<shp[1]; ++j)
for (size_t k=0; k<shp[2]; ++k)
d[str[0]*i + str[1]*j + str[2]*k] = val;
d2[str[0]*i + str[1]*j + str[2]*k] = val;
}
};
......@@ -302,6 +301,7 @@ template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membu
using detail_mav::shape_t;
using detail_mav::stride_t;
using detail_mav::fmav_info;
using detail_mav::fmav;
using detail_mav::mav;
......
This diff is collapsed.
......@@ -40,13 +40,13 @@ template<typename T> py::array ms2dirty_general2(const py::array &uvw_,
size_t nv, double epsilon, bool do_wstacking, size_t nthreads,
size_t verbosity)
{
auto uvw = to_cmav<double,2>(uvw_);
auto freq = to_cmav<double,1>(freq_);
auto ms = to_cmav<complex<T>,2>(ms_);
auto uvw = to_mav<double,2>(uvw_, false);
auto freq = to_mav<double,1>(freq_, false);
auto ms = to_mav<complex<T>,2>(ms_, false);
auto wgt = get_optional_const_Pyarr<T>(wgt_, {ms.shape(0),ms.shape(1)});
auto wgt2 = to_cmav<T,2>(wgt);
auto wgt2 = to_mav<T,2>(wgt, false);
auto dirty = make_Pyarr<T>({npix_x,npix_y});
auto dirty2 = to_mav<T,2>(dirty);
auto dirty2 = to_mav<T,2>(dirty, true);
{
py::gil_scoped_release release;
ms2dirty_general(uvw,freq,ms,wgt2,pixsize_x,pixsize_y,nu,nv,epsilon,
......@@ -120,13 +120,13 @@ template<typename T> py::array dirty2ms_general2(const py::array &uvw_,
double pixsize_x, double pixsize_y, size_t nu, size_t nv, double epsilon,
bool do_wstacking, size_t nthreads, size_t verbosity)
{
auto uvw = to_cmav<double,2>(uvw_);
auto freq = to_cmav<double,1>(freq_);
auto dirty = to_cmav<T,2>(dirty_);
auto uvw = to_mav<double,2>(uvw_, false);
auto freq = to_mav<double,1>(freq_, false);
auto dirty = to_mav<T,2>(dirty_, false);
auto wgt = get_optional_const_Pyarr<T>(wgt_, {uvw.shape(0),freq.shape(0)});
auto wgt2 = to_cmav<T,2>(wgt);
auto wgt2 = to_mav<T,2>(wgt, false);
auto ms = make_Pyarr<complex<T>>({uvw.shape(0),freq.shape(0)});
auto ms2 = to_mav<complex<T>,2>(ms);
auto ms2 = to_mav<complex<T>,2>(ms, true);
{
py::gil_scoped_release release;
dirty2ms_general(uvw,freq,dirty,wgt2,pixsize_x,pixsize_y,nu,nv,epsilon,
......
......@@ -126,7 +126,7 @@ template<typename T> py::array c2c_sym_internal(const py::array &in,
while(iter.remaining()>0)
{
auto v = aout[iter.ofs()];
aout[iter.rev_ofs()] = conj(v);
aout.vraw(iter.rev_ofs()) = conj(v);
iter.advance();
}
}
......
......@@ -6,9 +6,11 @@
import pysharp
import numpy as np
from numpy.testing import assert_allclose
from time import time
def _l2error(a, b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
# set maximum multipole moment
lmax = 2047
# maximum m. For SHTOOLS this is alway equal to lmax, if I understand correctly.
......@@ -47,7 +49,7 @@ print("testing Gauss-Legendre grid")
nlat = lmax+1
# describe the Gauss-Legendre geometry to the job
job.set_Gauss_geometry(nlat, nlon)
job.set_gauss_geometry(nlat, nlon)
# go from a_lm to map
t0=time()
......@@ -65,16 +67,16 @@ alm2 = job.map2alm(map)
print("time for map analysis: {}s".format(time()-t0))
# make sure input was recovered accurately
assert_allclose(alm, alm2)
print("L2 error: ", _l2error(alm,alm2))
print("testing Driscoll-Healy grid")
# Number of iso-latitude rings required for Gauss-Legendre grid
nlat = 2*lmax
# Number of iso-latitude rings required for Driscoll-Healy grid
nlat = 2*lmax+2
# describe the Gauss-Legendre geometry to the job
job.set_DH_geometry(nlat, nlon)
job.set_dh_geometry(nlat, nlon)
# go from a_lm to map
t0=time()
......@@ -92,4 +94,4 @@ alm2 = job.map2alm(map)
print("time for map analysis: {}s".format(time()-t0))
# make sure input was recovered accurately
assert_allclose(alm, alm2)
print("L2 error: ", _l2error(alm,alm2))
......@@ -206,21 +206,21 @@ void upsample_to_cc(const mav<double,2> &in, bool has_np, bool has_sp,
size_t je = min(js+delta, nphi);
mav<double,2> tmp({nrings_out,je-js});
fmav<double> ftmp(tmp);
mav<double,2> tmp2(tmp.data(),{nrings_in, je-js}, true);
mav<double,2> tmp2(tmp.vdata(),{nrings_in, je-js}, true);
fmav<double> ftmp2(tmp2);
// enhance to "double sphere"
if (has_np)
for (size_t j=js; j<je; ++j)
tmp2(0,j-js) = in(0,j);
tmp2.v(0,j-js) = in(0,j);
if (has_sp)
for (size_t j=js; j<je; ++j)
tmp2(ntheta_in-1,j-js) = in(ntheta_in-1,j);
tmp2.v(ntheta_in-1,j-js) = in(ntheta_in-1,j);
for (size_t i=has_np, i2=nrings_in-1; i+has_sp<ntheta_in; ++i,--i2)
for (size_t j=js,j2=js+nphi/2; j<je; ++j,++j2)
{
if (j2>=nphi) j2-=nphi;
tmp2(i,j-js) = in(i,j);
tmp2(i2,j-js) = in(i,j2);
tmp2.v(i,j-js) = in(i,j);
tmp2.v(i2,j-js) = in(i,j2);
}
// FFT in theta direction
r2r_fftpack(ftmp2,ftmp2,{0},true,true,1./nrings_in,0);
......@@ -234,21 +234,21 @@ void upsample_to_cc(const mav<double,2> &in, bool has_np, bool has_sp,
{
complex<double> ctmp(tmp2(2*i-1,j-js),tmp2(2*i,j-js));
ctmp *= rot;
tmp2(2*i-1,j-js) = ctmp.real();
tmp2(2*i ,j-js) = ctmp.imag();
tmp2.v(2*i-1,j-js) = ctmp.real();
tmp2.v(2*i ,j-js) = ctmp.imag();
}
}
}
// zero-padding
for (size_t i=nrings_in; i<nrings_out; ++i)
for (size_t j=js; j<je; ++j)
tmp(i,j-js) = 0;
tmp.v(i,j-js) = 0;
// FFT back
r2r_fftpack(ftmp,ftmp,{0},false,false,1.,0);
// copy to output map
for (size_t i=0; i<ntheta_out; ++i)
for (size_t j=js; j<je; ++j)
out(i,j) = tmp(i,j-js);
out.v(i,j) = tmp(i,j-js);
}
}
......
import pysharp
import numpy as np
from numpy.testing import assert_allclose
from time import time
def _l2error(a, b):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment