Commit 54db3189 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

temporary

parent f53dd11d
......@@ -39,14 +39,17 @@ def convolve(alm1, alm2, lmax):
return job.map2alm(map)[0]*np.sqrt(4*np.pi)
lmax=60
kmax=13
lmax=2048
kmax=8
ncomp=1
separate=True
separate=False
nptg = 10000000
ncomp2 = ncomp if separate else 1
epsilon = 1e-4
ofactor = 2
nthreads = 0
ofactor = 1.5
nthreads = 0 # use as many threads as available
ncomp2 = ncomp if separate else 1
# get random sky a_lm
# the a_lm arrays follow the same conventions as those in healpy
......@@ -68,38 +71,39 @@ nph = 2*lmax+1
# compute a convolved map at a fixed psi and compare it to a map convolved
# "by hand"
ptg = np.zeros((nth,nph,3))
ptg[:,:,0] = (np.pi*(0.5+np.arange(nth))/nth).reshape((-1,1))
ptg[:,:,1] = (2*np.pi*(0.5+np.arange(nph))/nph).reshape((1,-1))
ptg[:,:,2] = np.pi*0.2
t0=time.time()
# do the actual interpolation
bar=foo.interpol(ptg.reshape((-1,3))).reshape((nth,nph,ncomp2))
print("interpolation time: ", time.time()-t0)
plt.subplot(2,2,1)
plt.imshow(bar[:,:,0])
bar2 = np.zeros((nth,nph))
blmfull = np.zeros(slm.shape)+0j
blmfull[0:blm.shape[0],:] = blm
for ith in range(nth):
rbeamth=pyinterpol_ng.rotate_alm(blmfull[:,0], lmax, ptg[ith,0,2],ptg[ith,0,0],0)
for iph in range(nph):
rbeam=pyinterpol_ng.rotate_alm(rbeamth, lmax, 0, 0, ptg[ith,iph,1])
bar2[ith,iph] = convolve(slm[:,0], rbeam, lmax).real
plt.subplot(2,2,2)
plt.imshow(bar2)
plt.subplot(2,2,3)
plt.imshow(bar2-bar[:,:,0])
plt.show()
ptg=np.random.uniform(0.,1.,3*1000000).reshape(1000000,3)
# ptg = np.zeros((nth,nph,3))
# ptg[:,:,0] = (np.pi*(0.5+np.arange(nth))/nth).reshape((-1,1))
# ptg[:,:,1] = (2*np.pi*(0.5+np.arange(nph))/nph).reshape((1,-1))
# ptg[:,:,2] = np.pi*0.2
# t0=time.time()
# # do the actual interpolation
# bar=foo.interpol(ptg.reshape((-1,3))).reshape((nth,nph,ncomp2))
# print("interpolation time: ", time.time()-t0)
# plt.subplot(2,2,1)
# plt.imshow(bar[:,:,0])
# bar2 = np.zeros((nth,nph))
# blmfull = np.zeros(slm.shape)+0j
# blmfull[0:blm.shape[0],:] = blm
# for ith in range(nth):
# rbeamth=pyinterpol_ng.rotate_alm(blmfull[:,0], lmax, ptg[ith,0,2],ptg[ith,0,0],0)
# for iph in range(nph):
# rbeam=pyinterpol_ng.rotate_alm(rbeamth, lmax, 0, 0, ptg[ith,iph,1])
# bar2[ith,iph] = convolve(slm[:,0], rbeam, lmax).real
# plt.subplot(2,2,2)
# plt.imshow(bar2)
# plt.subplot(2,2,3)
# plt.imshow(bar2-bar[:,:,0])
# plt.show()
ptg=np.random.uniform(0.,1.,3*nptg).reshape(nptg,3)
ptg[:,0]*=np.pi
ptg[:,1]*=2*np.pi
ptg[:,2]*=2*np.pi
#foo = pyinterpol_ng.PyInterpolator(slm,blm,separate,lmax, kmax, epsilon=1e-6, nthreads=2)
t0=time.time()
bar=foo.interpol(ptg)
del foo
print("interpolation time: ", time.time()-t0)
fake = np.random.uniform(0.,1., (ptg.shape[0],ncomp2))
foo2 = pyinterpol_ng.PyInterpolator(lmax, kmax, ncomp2, epsilon=epsilon, ofactor=ofactor, nthreads=nthreads)
......
......@@ -22,6 +22,97 @@
namespace mr {
#if 0
namespace detail_fft {
using std::vector;
template<typename T, typename T0> aligned_array<T> alloc_tmp_conv
(const fmav_info &info, size_t axis, size_t len)
{
auto othersize = info.size()/info.shape(axis);
constexpr auto vlen = native_simd<T0>::size();
auto tmpsize = len*((othersize>=vlen) ? vlen : 1);
return aligned_array<T>(tmpsize);
}
template<typename Tplan, typename T, typename T0, typename Exec>
MRUTIL_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out,
const size_t axis, const vector<T0> &kernel, size_t nthreads,
const Exec &exec, const bool allow_inplace=true)
{
std::shared_ptr<Tplan> plan1, plan2;
size_t l_in=in.shape(axis), l_out=out.shape(axis);
size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out);
MR_assert(kernel.size()==l_min/2+1, "bad kernel size");
plan1 = get_plan<Tplan>(l_in);
plan2 = get_plan<Tplan>(l_out);
execParallel(
util::thread_count(nthreads, in, axis, native_simd<T0>::size()),
[&](Scheduler &sched) {
constexpr auto vlen = native_simd<T0>::size();
auto storage = alloc_tmp_conv<T,T0>(in, axis, l_max); //FIXME!
multi_iter<vlen> it(in, out, axis, sched.num_threads(), sched.thread_num());
#ifndef MRUTIL_NO_SIMD
if (vlen>1)
while (it.remaining()>=vlen)
{
it.advance(vlen);
auto tdatav = reinterpret_cast<add_vec_t<T> *>(storage.data());
exec(it, in, out, tdatav, *plan1, *plan2, kernel);
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto buf = allow_inplace && it.stride_out() == 1 ?
&out.vraw(it.oofs(0)) : reinterpret_cast<T *>(storage.data());
exec(it, in, out, buf, *plan1, *plan2, kernel);
}
}); // end of parallel region
}
struct ExecConvR1
{
template <typename T0, typename T, size_t vlen> void operator() (
const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out,
T * buf, const pocketfft_r<T0> &plan1, const pocketfft_r<T0> &plan2,
const vector<T0> &kernel) const
{
size_t l_in = plan1.length(),
l_out = plan2.length(),
l_min = std::min(l_in, l_out);
copy_input(it, in, buf);
plan1.exec(buf, T0(1), true);
buf[0] *= kernel[0];
for (size_t i=1; i<l_min; ++i)
{ buf[2*i-1]*=kernel[i]; buf[2*i] *=kernel[i]; }
for (size_t i=l_in; i<l_out; ++i) buf[i] = T(0);
plan2.exec(buf, T0(1), false);
copy_output(it, buf, out);
}
};
template<typename T> void convolve_1d(const fmav<T> &in,
fmav<T> &out, size_t axis, const vector<T> &kernel, size_t nthreads=1)
{
// util::sanity_check_onetype(in, out, in.data()==out.data(), axes);
MR_assert(axis<in.ndim(), "bad axis number");
MR_assert(in.ndim()==out.ndim(), "dimensionality mismatch");
if (in.data()==out.data())
MR_assert(in.strides()==out.strides(), "strides mismatch");
for (size_t i=0; i<in.ndim(); ++i)
if (i!=axis)
MR_assert(in.shape(i)==out.shape(i), "shape mismatch");
if (in.size()==0) return;
general_convolve<pocketfft_r<T>>(in, out, axis, kernel, nthreads,
ExecConvR1());
}
}
#endif
namespace detail_interpol_ng {
using namespace std;
......@@ -49,6 +140,7 @@ template<typename T> class Interpolator
for (size_t j=0; j<nphi0; ++j)
tmp0.v(i,j) = arr(i,j);
// extend to second half
// FIXME: merge with loop above to avoid edundant memory reads.
for (size_t i=1, i2=nphi0-1; i+1<ntheta0; ++i,--i2)
for (size_t j=0,j2=nphi0/2; j<nphi0; ++j,++j2)
{
......@@ -56,7 +148,9 @@ template<typename T> class Interpolator
tmp0.v(i2,j) = sfct*tmp0(i,j2);
}
// FFT to frequency domain on minimal grid
// one bad FFT axis
r2r_fftpack(ftmp0,ftmp0,{0,1},true,true,T(1./(nphi0*nphi0)),nthreads);
// correct amplitude at Nyquist frequency
for (size_t i=0; i<nphi0; ++i)
{
......@@ -70,7 +164,9 @@ template<typename T> class Interpolator
auto tmp1=tmp.template subarray<2>({0,0},{nphi, nphi0});
fmav<T> ftmp1(tmp1);
// zero-padded FFT in theta direction
// one bad FFT axis
r2r_fftpack(ftmp1,ftmp1,{0},false,false,T(1),nthreads);
auto tmp2=tmp.template subarray<2>({0,0},{ntheta, nphi});
fmav<T> ftmp2(tmp2);
fmav<T> farr(arr);
......
......@@ -242,16 +242,28 @@ PYBIND11_MODULE(pyinterpol_ng, m)
m.doc() = pyinterpol_ng_DS;
py::class_<PyInterpolator<fptype>> (m, "PyInterpolator", pyinterpolator_DS)
using inter_d = PyInterpolator<double>;
py::class_<inter_d> (m, "PyInterpolator", pyinterpolator_DS)
.def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, fptype, fptype, int>(),
initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=fptype(1.5),
"nthreads"_a=0)
.def(py::init<int64_t, int64_t, int64_t, fptype, fptype, int>(), initadjoint_DS,
"lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=fptype(1.5), "nthreads"_a=0)
.def ("interpol", &PyInterpolator<fptype>::pyinterpol, interpol_DS, "ptg"_a)
.def ("deinterpol", &PyInterpolator<fptype>::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &PyInterpolator<fptype>::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &PyInterpolator<fptype>::support);
.def ("interpol", &inter_d::pyinterpol, interpol_DS, "ptg"_a)
.def ("deinterpol", &inter_d::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &inter_d::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &inter_d::support);
// using inter_f = PyInterpolator<float>;
// py::class_<inter_f> (m, "PyInterpolator_f", pyinterpolator_DS)
// .def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, fptype, fptype, int>(),
// initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=fptype(1.5),
// "nthreads"_a=0)
// .def(py::init<int64_t, int64_t, int64_t, fptype, fptype, int>(), initadjoint_DS,
// "lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=fptype(1.5), "nthreads"_a=0)
// .def ("interpol", &inter_f::pyinterpol, interpol_DS, "ptg"_a)
// .def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
// .def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
// .def ("support", &inter_f::support);
#if 1
m.def("rotate_alm", &pyrotate_alm<fptype>, "alm"_a, "lmax"_a, "psi"_a, "theta"_a,
"phi"_a);
......
......@@ -2,8 +2,15 @@ import numpy as np
import pypocketfft
#def _l2error(a, b, axes):
# return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))/np.log2(np.max([2,np.prod(np.take(a.shape,axes))]))
def _l2error(a, b, axes):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))/np.log2(np.max([2,np.prod(np.take(a.shape,axes))]))
x1 = np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))/np.log2(np.max([2,np.prod(np.take(a.shape,axes))]))
a = a*np.array([1.])
b = b*np.array([1.])
x2 = np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))/np.log2(np.max([2,np.prod(np.take(a.shape,axes))]))
print(x1, x2, x1-x2)
return x2
def fftn(a, axes=None, inorm=0, out=None, nthreads=1):
......
......@@ -604,21 +604,21 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp
auto tmpsize = axsize*((othersize>=vlen) ? vlen : 1);
return aligned_array<T>(tmpsize);
}
template<typename T, typename T0> aligned_array<T> alloc_tmp
(const fmav_info &info, const shape_t &axes)
{
size_t fullsize=info.size();
size_t tmpsize=0;
for (size_t i=0; i<axes.size(); ++i)
{
auto axsize = info.shape(axes[i]);
auto othersize = fullsize/axsize;
constexpr auto vlen = native_simd<T0>::size();
auto sz = axsize*((othersize>=vlen) ? vlen : 1);
if (sz>tmpsize) tmpsize=sz;
}
return aligned_array<T>(tmpsize);
}
// template<typename T, typename T0> aligned_array<T> alloc_tmp
// (const fmav_info &info, const shape_t &axes)
// {
// size_t fullsize=info.size();
// size_t tmpsize=0;
// for (size_t i=0; i<axes.size(); ++i)
// {
// auto axsize = info.shape(axes[i]);
// auto othersize = fullsize/axsize;
// constexpr auto vlen = native_simd<T0>::size();
// auto sz = axsize*((othersize>=vlen) ? vlen : 1);
// if (sz>tmpsize) tmpsize=sz;
// }
// return aligned_array<T>(tmpsize);
// }
template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment