Commit 37b7ef49 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

improvements

parent 116e9e26
Pipeline #70036 passed with stages
in 12 minutes and 36 seconds
prune mr_util
include alm.h
include mr_util/math/fft1d.h
include mr_util/math/fft.h
include mr_util/infra/threading.h
include mr_util/infra/mav.h
include mr_util/math/math_utils.h
include mr_util/infra/aligned_array.h
include mr_util/math/gl_integrator.h
include mr_util/infra/simd.h
include mr_util/math/cmplx.h
include mr_util/infra/string_utils.h
include mr_util/infra/timers.h
include mr_util/math/constants.h
include mr_util/math/unity_roots.h
include mr_util/math/es_kernel.h
include mr_util/infra/error_handling.h
include mr_util/infra/useful_macros.h
include mr_util/bindings/pybind_utils.h
include mr_util/sharp/sharp.h
include mr_util/sharp/sharp_internal.h
include mr_util/sharp/sharp_geomhelpers.h
include mr_util/sharp/sharp_almhelpers.h
include mr_util/infra/threading.cc
include mr_util/infra/string_utils.cc
include mr_util/sharp/sharp.cc
include mr_util/sharp/sharp_core.cc
include mr_util/sharp/sharp_core_inc.cc
include mr_util/sharp/sharp_geomhelpers.cc
include mr_util/sharp/sharp_almhelpers.cc
include mr_util/sharp/sharp_ylmgen.cc
......@@ -8,11 +8,21 @@
#ifndef MRUTIL_ALM_H
#define MRUTIL_ALM_H
#if 0
#include <complex>
#include <cmath>
#include "mr_util/infra/threading.h"
#endif
#include "mr_util/infra/mav.h"
#include "mr_util/infra/error_handling.h"
namespace mr {
namespace detail_alm {
using namespace std;
/*! Base class for calculating the storage layout of spherical harmonic
coefficients. */
class Alm_Base
......@@ -133,6 +143,128 @@ template<typename T> class Alm: public Alm_Base
}
};
#if 0
/*! Class for calculation of the Wigner matrix at arbitrary angles, using Risbo
recursion in a way that can be OpenMP-parallelised. This approach uses more
memory and is slightly slower than wigner_d_risbo_scalar. */
class wigner_d_risbo_openmp
{
private:
double p,q;
vector<double> sqt;
mav<double,2> d, dd;
ptrdiff_t n;
public:
wigner_d_risbo_openmp(size_t lmax, double ang)
: p(sin(ang/2)), q(cos(ang/2)), sqt(2*lmax+1),
d({lmax+1,2*lmax+1}), dd({lmax+1,2*lmax+1}), n(-1)
{ for (size_t m=0; m<sqt.size(); ++m) sqt[m] = sqrt(double(m)); }
const mav<double,2> &recurse()
{
++n;
if (n==0)
d.v(0,0) = 1;
else if (n==1)
{
d.v(0,0) = q*q; d.v(0,1) = -p*q*sqt[2]; d.v(0,2) = p*p;
d.v(1,0) = -d(0,1); d.v(1,1) = q*q-p*p; d.v(1,2) = d(0,1);
}
else
{
// padding
int sign = (n&1)? -1 : 1;
for (int i=0; i<=2*n-2; ++i)
{
d.v(n,i) = sign*d(n-2,2*n-2-i);
sign=-sign;
}
for (int j=2*n-1; j<=2*n; ++j)
{
auto &xd((j&1) ? d : dd);
auto &xdd((j&1) ? dd: d);
double xj = 1./j;
xdd.v(0,0) = q*xd(0,0);
for (int i=1;i<j; ++i)
xdd.v(0,i) = xj*sqt[j]*(q*sqt[j-i]*xd(0,i) - p*sqt[i]*xd(0,i-1));
xdd.v(0,j) = -p*xd(0,j-1);
// parallelize
for (int k=1; k<=n; ++k)
{
double t1 = xj*sqt[j-k]*q, t2 = xj*sqt[j-k]*p;
double t3 = xj*sqt[k ]*p, t4 = xj*sqt[k ]*q;
xdd.v(k,0) = xj*sqt[j]*(q*sqt[j-k]*xd(k,0) + p*sqt[k]*xd(k-1,0));
for (int i=1; i<j; ++i)
xdd.v(k,i) = t1*sqt[j-i]*xd(k,i) - t2*sqt[i]*xd(k,i-1)
+ t3*sqt[j-i]*xd(k-1,i) + t4*sqt[i]*xd(k-1,i-1);
xdd.v(k,j) = -t2*sqt[j]*xd(k,j-1) + t4*sqt[j]*xd(k-1,j-1);
}
}
}
return d;
}
};
template<typename T> void rotate_alm (Alm<complex<T>> &alm,
double psi, double theta, double phi)
{
MR_assert (alm.Lmax()==alm.Mmax(),
"rotate_alm: lmax must be equal to mmax");
auto lmax=alm.Lmax();
vector<complex<double> > exppsi(lmax+1), expphi(lmax+1);
for (size_t m=0; m<=lmax; ++m)
{
exppsi[m] = polar(1.,-psi*m);
expphi[m] = polar(1.,-phi*m);
}
wigner_d_risbo_openmp rec(lmax,theta);
vector<complex<double> > almtmp(lmax+1);
size_t nthreads=1;
for (size_t l=0; l<=lmax; ++l)
{
const auto &d(rec.recurse());
for (size_t m=0; m<=l; ++m)
almtmp[m] = complex<double>(alm(l,0))*d(l,l+m);
execStatic(l+1, nthreads, 0, [&](Scheduler &sched)
{
auto rng=sched.getNext();
auto lo=rng.lo;
auto hi=rng.hi;
bool flip = true;
for (size_t mm=1; mm<=l; ++mm)
{
auto t1 = complex<double>(alm(l,mm))*exppsi[mm];
bool flip2 = ((mm+lo)&1);
for (auto m=lo; m<hi; ++m)
{
double d1 = flip2 ? -d(l-mm,l-m) : d(l-mm,l-m);
double d2 = flip ? -d(l-mm,l+m) : d(l-mm,l+m);
double f1 = d1+d2, f2 = d1-d2;
almtmp[m]+=complex<double>(t1.real()*f1,t1.imag()*f2);
flip2 = !flip2;
}
flip = !flip;
}
});
for (size_t m=0; m<=l; ++m)
alm(l,m) = complex<T>(almtmp[m]*expphi[m]);
}
}
#endif
}
using detail_alm::Alm_Base;
using detail_alm::Alm;
#if 0
using detail_alm::rotate_alm;
#endif
}
#endif
# Elementary demo for pysharp interface using a Gauss-Legendre grid
# I'm not sure I have a perfect equivalent for the DH grid(s) at the moment,
# since they apparently do not include the South Pole. The Clenshaw-Curtis
# and Fejer quadrature rules are very similar (see the documentation in
# sharp_geomhelpers.h). An exact analogon to DH can be added easily, I expect.
import interpol_ng
import numpy as np
import pysharp
import time
np.random.seed(42)
lmax=2047
mmax=lmax
kmax=2
np.random.seed(42)
def nalm(lmax, mmax):
return ((mmax+1)*(mmax+2))//2 + (mmax+1)*(lmax-mmax)
def idx(l,m):
return (m*((2*lmax+1)-m))//2 + l
def deltabeam(lmax,kmax):
beam=np.zeros(nalm(lmax, kmax))+0j
for l in range(lmax+1):
......@@ -29,8 +16,15 @@ def deltabeam(lmax,kmax):
return beam
# get random a_lm
slmT = np.random.uniform(-1., 1., nalm(lmax, mmax)) + 1j*np.random.uniform(-1., 1., nalm(lmax,mmax))
lmax=2047
mmax=lmax
kmax=2 # doesn't make any sense for the beam we are using, but just for demonstration purposes ...
# get random sky a_lm
# the a_lm arrays follow the same conventions as those in healpy
slmT = np.random.uniform(-1., 1., nalm(lmax, mmax)) \
+ 1j*np.random.uniform(-1., 1., nalm(lmax, mmax))
# make a_lm with m==0 real-valued
slmT[0:lmax+1].imag = 0.
......@@ -38,31 +32,30 @@ slmT[0:lmax+1].imag = 0.
blmT = deltabeam(lmax,kmax)
t0=time.time()
foo = interpol_ng.PyInterpolator(slmT,blmT,lmax, kmax, 1e-5)
print("setup: ",time.time()-t0)
foo = interpol_ng.PyInterpolator(slmT,blmT,lmax, kmax, epsilon=1e-6, nthreads=2)
print("setup time: ",time.time()-t0)
# evaluate total convolution on a sufficiently resolved Gauss-Legendre grid
nth = lmax+1
nph = 2*mmax+1
ptg = np.zeros((nth,nph,3))
th, wgt = np.polynomial.legendre.leggauss(nth)
th = np.arccos(th)
for it in range(nth):
for ip in range(nph):
ptg[it,ip,0] = th[it]
ptg[it,ip,1] = 2*np.pi*ip/nph
ptg[it,ip,2] = 0
th, _ = np.polynomial.legendre.leggauss(nth)
th = np.arccos(-th)
ptg[:,:,0] = th.reshape((-1,1))
ptg[:,:,1] = (2*np.pi*np.arange(nph)/nph).reshape((1,-1))
ptg[:,:,2] = 0
t0=time.time()
bar=foo.interpol(ptg.reshape((-1,3))).reshape((nth,nph))
print("interpol: ", time.time()-t0)
print("interpolation time: ", time.time()-t0)
# get a_lm back from interpolated array
job = pysharp.sharpjob_d()
job.set_triangular_alm_info(lmax, mmax)
job.set_gauss_geometry(nth, nph)
alm2 = job.map2alm(bar.reshape((-1,)))
#compare with original a_lm
import matplotlib.pyplot as plt
plt.plot(np.abs(alm2/slmT))
plt.plot(np.abs(alm2-slmT))
plt.suptitle("Deviations between original and reconstructed a_lm")
plt.show()
......@@ -31,6 +31,7 @@ template<typename T> class Interpolator
{
protected:
size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
int nthreads;
double ofactor;
size_t supp;
ES_Kernel kernel;
......@@ -52,34 +53,35 @@ template<typename T> class Interpolator
tmp0.v(i2,j) = arr(i,j2);
}
// FFT to frequency domain on minimal grid
r2r_fftpack(ftmp0,ftmp0,{0,1},true,true,1./(nphi0*nphi0),0);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, 0);
r2r_fftpack(ftmp0,ftmp0,{0,1},true,true,1./(nphi0*nphi0),nthreads);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
for (size_t i=0; i<nphi0; ++i)
for (size_t j=0; j<nphi0; ++j)
tmp0.v(i,j) *= fct[(i+1)/2] * fct[(j+1)/2];
auto tmp1=tmp.template subarray<2>({0,0},{nphi, nphi0});
fmav<T> ftmp1(tmp1);
// zero-padded FFT in theta direction
r2r_fftpack(ftmp1,ftmp1,{0},false,false,1.,0);
r2r_fftpack(ftmp1,ftmp1,{0},false,false,1.,nthreads);
auto tmp2=tmp.template subarray<2>({0,0},{ntheta, nphi});
fmav<T> ftmp2(tmp2);
fmav<T> farr(arr);
// zero-padded FFT in phi direction
r2r_fftpack(ftmp2,farr,{1},false,false,1.,0);
r2r_fftpack(ftmp2,farr,{1},false,false,1.,nthreads);
}
public:
Interpolator(const Alm<complex<T>> &slmT, const Alm<complex<T>> &blmT,
double epsilon)
double epsilon, int nthreads_)
: lmax(slmT.Lmax()),
kmax(blmT.Mmax()),
nphi0(2*good_size_real(lmax+1)),
ntheta0(nphi0/2+1),
nphi(2*good_size_real(size_t((2*lmax+1)*ofmin/2.))),
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(double(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, 1),
kernel(supp, ofactor, nthreads),
cube({ntheta+2*supp, nphi+2*supp, 2*kmax+1})
{
MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!");
......@@ -124,9 +126,9 @@ template<typename T> class Interpolator
auto m1 = cube.template subarray<2>({supp,supp,kidx1},{ntheta,nphi,0});
auto m2 = cube.template subarray<2>({supp,supp,kidx2},{ntheta,nphi,0});
if (k==0)
sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, nthreads, nullptr, nullptr);
else
sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(), m2.vdata(), *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(), m2.vdata(), *ginfo, *ainfo, nthreads, nullptr, nullptr);
correct(m1);
if (k!=0) correct(m2);
......@@ -155,12 +157,12 @@ template<typename T> class Interpolator
{
MR_assert(ptg.shape(0)==res.shape(0), "dimension mismatch");
MR_assert(ptg.shape(1)==3, "second dimension must have length 3");
vector<T> wt(supp), wp(supp);
vector<T> psiarr(2*kmax+1);
double delta = 2./supp;
double xdtheta = (ntheta-1)/pi,
xdphi = nphi/(2*pi);
vector<size_t> idx(ptg.shape(0));
#if 1
{
constexpr size_t cellsize=16;
size_t nct = ntheta/cellsize+1,
ncp = nphi/cellsize+1;
......@@ -171,48 +173,60 @@ template<typename T> class Interpolator
iphi=size_t(ptg(i,1)/(2*pi)*ncp);
mapper[itheta*ncp+iphi].push_back(i);
}
size_t cnt=0;
for (const auto &vec: mapper)
for (auto i:vec)
for (auto i:vec)
idx[cnt++] = i;
}
#else
for (size_t i=0; i<ptg.shape(0); ++i)
for (size_t i=0; i<idx.size(); ++i)
idx[i]=i;
#endif
execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
{
double f0=0.5*supp+ptg(i,0)*xdtheta;
size_t i0 = size_t(f0+1.);
for (size_t t=0; t<supp; ++t)
wt[t] = kernel((t+i0-f0)*delta - 1);
double f1=0.5*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = kernel((t+i1-f1)*delta - 1);
double val=0;
psiarr[0]=1.;
double cpsi=cos(ptg(i,2)), spsi=sin(ptg(i,2));
double cnpsi=cpsi, snpsi=spsi;
for (size_t l=1; l<=kmax; ++l)
vector<T> wt(supp), wp(supp);
vector<T> psiarr(2*kmax+1);
while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
{
psiarr[2*l-1]=cnpsi;
psiarr[2*l]=-snpsi;
const double tmp = snpsi*cpsi + cnpsi*spsi;
cnpsi=cnpsi*cpsi - snpsi*spsi;
snpsi=tmp;
size_t i=idx[ind];
double f0=0.5*supp+ptg(i,0)*xdtheta;
size_t i0 = size_t(f0+1.);
for (size_t t=0; t<supp; ++t)
wt[t] = kernel((t+i0-f0)*delta - 1);
double f1=0.5*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = kernel((t+i1-f1)*delta - 1);
double val=0;
psiarr[0]=1.;
double cpsi=cos(ptg(i,2)), spsi=sin(ptg(i,2));
double cnpsi=cpsi, snpsi=spsi;
for (size_t l=1; l<=kmax; ++l)
{
psiarr[2*l-1]=cnpsi;
psiarr[2*l]=-snpsi;
const double tmp = snpsi*cpsi + cnpsi*spsi;
cnpsi=cnpsi*cpsi - snpsi*spsi;
snpsi=tmp;
}
for (size_t j=0; j<supp; ++j)
for (size_t k=0; k<supp; ++k)
for (size_t l=0; l<2*kmax+1; ++l)
val += cube(i0+j,i1+k,l)*wt[j]*wp[k]*psiarr[l];
res.v(i) = val;
}
for (size_t j=0; j<supp; ++j)
for (size_t k=0; k<supp; ++k)
for (size_t l=0; l<2*kmax+1; ++l)
val += cube(i0+j,i1+k,l)*wt[j]*wp[k]*psiarr[l];
res.v(i) = val;
}
});
}
};
template<typename T> class PyInterpolator: public Interpolator<T>
{
public:
PyInterpolator(const py::array &slmT, const py::array &blmT, int64_t lmax, int64_t kmax, double epsilon)
PyInterpolator(const py::array &slmT, const py::array &blmT, int64_t lmax, int64_t kmax, double epsilon, int nthreads=0)
: Interpolator<T>(Alm<complex<T>>(to_mav<complex<T>,1>(slmT), lmax, lmax),
Alm<complex<T>>(to_mav<complex<T>,1>(blmT), lmax, kmax),
epsilon) {}
epsilon, nthreads) {}
using Interpolator<T>::interpolx;
py::array interpol(const py::array &ptg)
{
......@@ -224,6 +238,16 @@ template<typename T> class PyInterpolator: public Interpolator<T>
}
};
#if 0
template<typename T> py::array pyrotate_alm(const py::array &alm_, int64_t lmax,
double psi, double theta, double phi)
{
Alm<complex<T>> alm(to_mav<complex<T>,1>(alm_), lmax, lmax);
rotate_alm(alm, psi, theta, phi);
return alm_;
}
#endif
} // unnamed namespace
PYBIND11_MODULE(interpol_ng, m)
......@@ -231,7 +255,11 @@ PYBIND11_MODULE(interpol_ng, m)
using namespace pybind11::literals;
py::class_<PyInterpolator<double>> (m, "PyInterpolator")
.def(py::init<const py::array &, const py::array &, int64_t, int64_t, double>(),
"sky"_a, "beam"_a, "lmax"_a, "kmax"_a, "epsilon"_a)
.def(py::init<const py::array &, const py::array &, int64_t, int64_t, double, int>(),
"sky"_a, "beam"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "nthreads"_a)
.def ("interpol", &PyInterpolator<double>::interpol, "ptg"_a);
#if 0
m.def("rotate_alm", &pyrotate_alm<double>, "alm"_a, "lmax"_a, "psi"_a, "theta"_a,
"phi"_a);
#endif
}
......@@ -41,7 +41,30 @@ def get_extension_modules():
'mr_util/sharp/sharp_geomhelpers.cc',
'mr_util/sharp/sharp_almhelpers.cc',
'mr_util/sharp/sharp_ylmgen.cc'],
depends=['setup.py'],
depends=[
'mr_util/math/fft1d.h',
'mr_util/math/fft.h',
'mr_util/infra/threading.h',
'mr_util/infra/mav.h',
'mr_util/math/math_utils.h',
'mr_util/infra/aligned_array.h',
'mr_util/math/gl_integrator.h',
'mr_util/infra/simd.h',
'mr_util/math/cmplx.h',
'mr_util/infra/string_utils.h',
'mr_util/infra/timers.h',
'mr_util/math/constants.h',
'mr_util/math/unity_roots.h',
'mr_util/math/es_kernel.h',
'mr_util/infra/error_handling.h',
'mr_util/infra/useful_macros.h',
'mr_util/bindings/pybind_utils.h',
'mr_util/sharp/sharp.h',
'mr_util/sharp/sharp_internal.h',
'mr_util/sharp/sharp_geomhelpers.h',
'mr_util/sharp/sharp_almhelpers.h',
'setup.py',
'alm.h'],
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment