Commit 76a024d9 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

Merge branch 'vectorization' into 'master'

Vectorization

See merge request mtr/cxxbase!8
parents 4b36a29f 1da6d79c
......@@ -162,7 +162,7 @@ class wigner_d_risbo_openmp
wigner_d_risbo_openmp(size_t lmax, double ang)
: p(sin(ang/2)), q(cos(ang/2)), sqt(2*lmax+1),
d({lmax+1,2*lmax+1}), dd({lmax+1,2*lmax+1}), n(-1)
{ for (size_t m=0; m<sqt.size(); ++m) sqt[m] = sqrt(double(m)); }
{ for (size_t m=0; m<sqt.size(); ++m) sqt[m] = std::sqrt(double(m)); }
const mav<double,2> &recurse()
{
......
import pyinterpol_ng
import numpy as np
import pysharp
import time
import matplotlib.pyplot as plt
np.random.seed(48)
......@@ -39,11 +37,11 @@ def convolve(alm1, alm2, lmax):
return job.map2alm(map)[0]*np.sqrt(4*np.pi)
lmax=60
lmax=1024
kmax=13
ncomp=1
separate=False
nptg = 1000000
separate=True
nptg = 50000000
epsilon = 1e-4
ofactor = 1.5
nthreads = 0 # use as many threads as available
......@@ -61,57 +59,70 @@ blm = random_alm(lmax, kmax, ncomp)
t0=time.time()
# build interpolator object for slm and blm
foo = pyinterpol_ng.PyInterpolator(slm,blm,separate,lmax, kmax, epsilon=epsilon, ofactor=ofactor, nthreads=nthreads)
print("setup time: ",time.time()-t0)
print("support:",foo.support())
t1 = time.time()-t0
print("Convolving sky and beam with lmax=mmax={}, kmax={}".format(lmax,kmax))
print("Interpolation taking place with a maximum error of {}\n"
"and an oversampling factor of {}".format(epsilon, ofactor))
supp = foo.support()
print("(resulting in a kernel support size of {}x{})".format(supp,supp))
if ncomp == 1:
print("One component")
else:
print("{} components, which are {}coadded".format(ncomp, "not " if separate else ""))
print("\nDouble precision convolution/interpolation:")
print("preparation of interpolation grid: {}s".format(t1))
t0=time.time()
nth = lmax+1
nph = 2*lmax+1
# compute a convolved map at a fixed psi and compare it to a map convolved
# "by hand"
ptg = np.zeros((nth,nph,3))
ptg[:,:,0] = (np.pi*(0.5+np.arange(nth))/nth).reshape((-1,1))
ptg[:,:,1] = (2*np.pi*(0.5+np.arange(nph))/nph).reshape((1,-1))
ptg[:,:,2] = np.pi*0.2
t0=time.time()
# do the actual interpolation
bar=foo.interpol(ptg.reshape((-1,3))).reshape((nth,nph,ncomp2))
print("interpolation time: ", time.time()-t0)
plt.subplot(2,2,1)
plt.imshow(bar[:,:,0])
bar2 = np.zeros((nth,nph))
blmfull = np.zeros(slm.shape)+0j
blmfull[0:blm.shape[0],:] = blm
for ith in range(nth):
rbeamth=pyinterpol_ng.rotate_alm(blmfull[:,0], lmax, ptg[ith,0,2],ptg[ith,0,0],0)
for iph in range(nph):
rbeam=pyinterpol_ng.rotate_alm(rbeamth, lmax, 0, 0, ptg[ith,iph,1])
bar2[ith,iph] = convolve(slm[:,0], rbeam, lmax).real
plt.subplot(2,2,2)
plt.imshow(bar2)
plt.subplot(2,2,3)
plt.imshow(bar2-bar[:,:,0])
plt.show()
ptg=np.random.uniform(0.,1.,3*nptg).reshape(nptg,3)
ptg[:,0]*=np.pi
ptg[:,1]*=2*np.pi
ptg[:,2]*=2*np.pi
#foo = pyinterpol_ng.PyInterpolator(slm,blm,separate,lmax, kmax, epsilon=1e-6, nthreads=2)
t0=time.time()
bar=foo.interpol(ptg)
del foo
print("interpolation time: ", time.time()-t0)
print("Interpolating {} random angle triplets: {}s".format(nptg, time.time() -t0))
t0=time.time()
fake = np.random.uniform(0.,1., (ptg.shape[0],ncomp2))
foo2 = pyinterpol_ng.PyInterpolator(lmax, kmax, ncomp2, epsilon=epsilon, ofactor=ofactor, nthreads=nthreads)
t0=time.time()
foo2.deinterpol(ptg.reshape((-1,3)), fake)
print("deinterpolation time: ", time.time()-t0)
print("Adjoint interpolation: {}s".format(time.time() -t0))
t0=time.time()
bla=foo2.getSlm(blm)
print("getSlm time: ", time.time()-t0)
del foo2
print("Computing s_lm: {}s".format(time.time() -t0))
v1 = np.sum([myalmdot(slm[:,i], bla[:,i] , lmax, lmax, 0) for i in range(ncomp)])
v2 = np.sum([np.vdot(fake[:,i],bar[:,i]) for i in range(ncomp2)])
print(v1/v2-1.)
print("Adjointness error: {}".format(v1/v2-1.))
# build interpolator object for slm and blm
t0=time.time()
foo_f = pyinterpol_ng.PyInterpolator_f(slm.astype(np.complex64),blm.astype(np.complex64),separate,lmax, kmax, epsilon=epsilon, ofactor=ofactor, nthreads=nthreads)
print("\nSingle precision convolution/interpolation:")
print("preparation of interpolation grid: {}s".format(time.time()-t0))
ptgf = ptg.astype(np.float32)
del ptg
fake_f = fake.astype(np.float32)
del fake
t0=time.time()
bar_f=foo_f.interpol(ptgf)
del foo_f
print("Interpolating {} random angle triplets: {}s".format(nptg, time.time() -t0))
foo2_f = pyinterpol_ng.PyInterpolator_f(lmax, kmax, ncomp2, epsilon=epsilon, ofactor=ofactor, nthreads=nthreads)
t0=time.time()
foo2_f.deinterpol(ptgf.reshape((-1,3)), fake_f)
print("Adjoint interpolation: {}s".format(time.time() -t0))
t0=time.time()
bla_f=foo2_f.getSlm(blm.astype(np.complex64))
del foo2_f
print("Computing s_lm: {}s".format(time.time() -t0))
v1 = np.sum([myalmdot(slm[:,i], bla_f[:,i] , lmax, lmax, 0) for i in range(ncomp)])
v2 = np.sum([np.vdot(fake_f[:,i],bar_f[:,i]) for i in range(ncomp2)])
print("Adjointness error: {}".format(v1/v2-1.))
This diff is collapsed.
......@@ -14,8 +14,6 @@ namespace py = pybind11;
namespace {
using fptype = double;
template<typename T> class PyInterpolator: public Interpolator<T>
{
protected:
......@@ -244,28 +242,28 @@ PYBIND11_MODULE(pyinterpol_ng, m)
using inter_d = PyInterpolator<double>;
py::class_<inter_d> (m, "PyInterpolator", pyinterpolator_DS)
.def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, fptype, fptype, int>(),
initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=fptype(1.5),
.def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, double, double, int>(),
initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=1.5,
"nthreads"_a=0)
.def(py::init<int64_t, int64_t, int64_t, fptype, fptype, int>(), initadjoint_DS,
"lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=fptype(1.5), "nthreads"_a=0)
.def(py::init<int64_t, int64_t, int64_t, double, double, int>(), initadjoint_DS,
"lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=1.5, "nthreads"_a=0)
.def ("interpol", &inter_d::pyinterpol, interpol_DS, "ptg"_a)
.def ("deinterpol", &inter_d::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &inter_d::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &inter_d::support);
// using inter_f = PyInterpolator<float>;
// py::class_<inter_f> (m, "PyInterpolator_f", pyinterpolator_DS)
// .def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, fptype, fptype, int>(),
// initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=fptype(1.5),
// "nthreads"_a=0)
// .def(py::init<int64_t, int64_t, int64_t, fptype, fptype, int>(), initadjoint_DS,
// "lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=fptype(1.5), "nthreads"_a=0)
// .def ("interpol", &inter_f::pyinterpol, interpol_DS, "ptg"_a)
// .def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
// .def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
// .def ("support", &inter_f::support);
using inter_f = PyInterpolator<float>;
py::class_<inter_f> (m, "PyInterpolator_f", pyinterpolator_DS)
.def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, float, float, int>(),
initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=1.5f,
"nthreads"_a=0)
.def(py::init<int64_t, int64_t, int64_t, float, float, int>(), initadjoint_DS,
"lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=1.5f, "nthreads"_a=0)
.def ("interpol", &inter_f::pyinterpol, interpol_DS, "ptg"_a)
.def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &inter_f::support);
#if 1
m.def("rotate_alm", &pyrotate_alm<fptype>, "alm"_a, "lmax"_a, "psi"_a, "theta"_a,
m.def("rotate_alm", &pyrotate_alm<double>, "alm"_a, "lmax"_a, "psi"_a, "theta"_a,
"phi"_a);
#endif
m.def("epsilon_guess", &epsilon_guess, "support"_a, "ofactor"_a);
......
......@@ -42,13 +42,15 @@ class ES_Kernel
{
private:
double beta;
float fbeta;
int p;
vector<double> x, wgt, psi;
size_t supp;
public:
ES_Kernel(size_t supp_, double ofactor, size_t nthreads)
: beta(get_beta(supp_,ofactor)*supp_), p(int(1.5*supp_+2)), supp(supp_)
: beta(get_beta(supp_,ofactor)*supp_), fbeta(float(beta)),
p(int(1.5*supp_+2)), supp(supp_)
{
GL_Integrator integ(2*p,nthreads);
x = integ.coordsSymmetric();
......@@ -60,7 +62,10 @@ class ES_Kernel
ES_Kernel(size_t supp_, size_t nthreads)
: ES_Kernel(supp_, 2., nthreads){}
double operator()(double v) const { return (v*v>1.) ? 0. : exp(beta*(std::sqrt(1.-v*v)-1.)); }
double operator()(double v) const
{ return (v*v>1.) ? 0. : exp(beta*(std::sqrt(1.-v*v)-1.)); }
float operator()(float v) const
{ return (v*v>1.f) ? 0.f : exp(fbeta*(std::sqrt(1.f-v*v)-1.f)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment