/* * Copyright (C) 2020 Max-Planck-Society * Author: Martin Reinecke */ #ifndef MRUTIL_INTERPOL_NG_H #define MRUTIL_INTERPOL_NG_H #include #include #include #include "mr_util/math/constants.h" #include "mr_util/math/gl_integrator.h" #include "mr_util/math/es_kernel.h" #include "mr_util/infra/mav.h" #include "mr_util/sharp/sharp.h" #include "mr_util/sharp/sharp_almhelpers.h" #include "mr_util/sharp/sharp_geomhelpers.h" #include "alm.h" #include "mr_util/math/fft.h" #include "mr_util/bindings/pybind_utils.h" namespace mr { namespace detail_interpol_ng { using namespace std; constexpr double ofmin=1.5; template class Interpolator { protected: bool adjoint; size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta; int nthreads; double ofactor; size_t supp; ES_Kernel kernel; size_t ncomp; mav cube; // the data cube (theta, phi, 2*mbeam+1, TGC) void correct(mav &arr, int spin) { double sfct = (spin&1) ? -1 : 1; mav tmp({nphi,nphi}); tmp.apply([](T &v){v=0.;}); auto tmp0=tmp.template subarray<2>({0,0},{nphi0, nphi0}); fmav ftmp0(tmp0); for (size_t i=0; i=nphi0) j2-=nphi0; tmp0.v(i2,j) = sfct*tmp0(i,j2); } // FFT to frequency domain on minimal grid r2r_fftpack(ftmp0,ftmp0,{0,1},true,true,1./(nphi0*nphi0),nthreads); // correct amplitude at Nyquist frequency for (size_t i=0; i({0,0},{nphi, nphi0}); fmav ftmp1(tmp1); // zero-padded FFT in theta direction r2r_fftpack(ftmp1,ftmp1,{0},false,false,1.,nthreads); auto tmp2=tmp.template subarray<2>({0,0},{ntheta, nphi}); fmav ftmp2(tmp2); fmav farr(arr); // zero-padded FFT in phi direction r2r_fftpack(ftmp2,farr,{1},false,false,1.,nthreads); } void decorrect(mav &arr, int spin) { double sfct = (spin&1) ? -1 : 1; mav tmp({nphi,nphi}); fmav ftmp(tmp); for (size_t i=0; i=nphi) j2-=nphi; tmp.v(i2,j) = sfct*tmp(i,j2); } r2r_fftpack(ftmp,ftmp,{1},true,true,1.,nthreads); auto tmp1=tmp.template subarray<2>({0,0},{nphi, nphi0}); fmav ftmp1(tmp1); r2r_fftpack(ftmp1,ftmp1,{0},true,true,1.,nthreads); auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads); auto tmp0=tmp.template subarray<2>({0,0},{nphi0, nphi0}); fmav ftmp0(tmp0); for (size_t i=0; i getIdx(const mav &ptg) const { vector idx(ptg.shape(0)); constexpr size_t cellsize=16; size_t nct = ntheta/cellsize+1, ncp = nphi/cellsize+1; vector> mapper(nct*ncp); for (size_t i=0; i>> &slm, const vector>> &blm, bool separate, double epsilon, int nthreads_) : adjoint(false), lmax(slm.at(0).Lmax()), kmax(blm.at(0).Mmax()), nphi0(2*good_size_real(lmax+1)), ntheta0(nphi0/2+1), nphi(std::max(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))), ntheta(nphi/2+1), nthreads(nthreads_), ofactor(double(nphi)/(2*lmax+1)), supp(ES_Kernel::get_supp(epsilon, ofactor)), kernel(supp, ofactor, nthreads), ncomp(separate ? slm.size() : 1), cube({ntheta+2*supp, nphi+2*supp, 2*kmax+1, ncomp}) { MR_assert(slm.size()==blm.size(), "inconsistent slm and blm vectors"); for (size_t i=0; i> a1(lmax, lmax), a2(lmax,lmax); auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0)); auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1); vectorlnorm(lmax+1); for (size_t i=0; i<=lmax; ++i) lnorm[i]=std::sqrt(4*pi/(2*i+1.)); for (size_t icomp=0; icomp({supp,supp,0,icomp},{ntheta,nphi,0,0}); sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nthreads); correct(m1,0); for (size_t k=1; k<=kmax; ++k) { for (size_t m=0; m<=lmax; ++m) for (size_t l=m; l<=lmax; ++l) { if (l({supp,supp,2*k-1,icomp},{ntheta,nphi,0,0}); auto m2 = cube.template subarray<2>({supp,supp,2*k ,icomp},{ntheta,nphi,0,0}); sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(), m2.vdata(), *ginfo, *ainfo, 0, nthreads); correct(m1,k); correct(m2,k); } } // fill border regions for (size_t i=0; i=nphi) j2-=nphi; for (size_t l=0; l(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))), ntheta(nphi/2+1), nthreads(nthreads_), ofactor(double(nphi)/(2*lmax+1)), supp(ES_Kernel::get_supp(epsilon, ofactor)), kernel(supp, ofactor, nthreads), ncomp(ncomp_), cube({ntheta+2*supp, nphi+2*supp, 2*kmax+1, ncomp_}) { MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!"); cube.apply([](T &v){v=0.;}); } void interpol (const mav &ptg, mav &res) const { MR_assert(!adjoint, "cannot be called in adjoint mode"); MR_assert(ptg.shape(0)==res.shape(0), "dimension mismatch"); MR_assert(ptg.shape(1)==3, "second dimension must have length 3"); MR_assert(res.shape(1)==ncomp, "# of components mismatch"); double delta = 2./supp; double xdtheta = (ntheta-1)/pi, xdphi = nphi/(2*pi); auto idx = getIdx(ptg); execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched) { vector wt(supp), wp(supp); vector psiarr(2*kmax+1); vector val(ncomp); while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind &ptg, const mav &data) { MR_assert(adjoint, "can only be called in adjoint mode"); MR_assert(ptg.shape(0)==data.shape(0), "dimension mismatch"); MR_assert(ptg.shape(1)==3, "second dimension must have length 3"); MR_assert(data.shape(1)==ncomp, "# of components mismatch"); double delta = 2./supp; double xdtheta = (ntheta-1)/pi, xdphi = nphi/(2*pi); auto idx = getIdx(ptg); constexpr size_t cellsize=16; size_t nct = ntheta/cellsize+5, ncp = nphi/cellsize+5; mav locks({nct,ncp}); execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched) { size_t b_theta=99999999999999, b_phi=9999999999999999; vector wt(supp), wp(supp); vector psiarr(2*kmax+1); vector val(ncomp); while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind>> &blm, vector>> &slm) { MR_assert(adjoint, "can only be called in adjoint mode"); MR_assert((blm.size()==ncomp) || (ncomp==1), "incorrect number of beam a_lm sets"); MR_assert((slm.size()==ncomp) || (ncomp==1), "incorrect number of sky a_lm sets"); Alm> a1(lmax, lmax), a2(lmax,lmax); auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0)); auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1); // move stuff from border regions onto the main grid for (size_t i=0; i=nphi) j2-=nphi; for (size_t l=0; l=nphi) j2-=nphi; double tval = (cube(supp,j+supp,k,l) + fct*cube(supp,j2+supp,k,l)); cube.v(supp,j+supp,k,l) = tval; cube.v(supp,j2+supp,k,l) = fct*tval; tval = (cube(supp+ntheta-1,j+supp,k,l) + fct*cube(supp+ntheta-1,j2+supp,k,l)); cube.v(supp+ntheta-1,j+supp,k,l) = tval; cube.v(supp+ntheta-1,j2+supp,k,l) = fct*tval; } vectorlnorm(lmax+1); for (size_t i=0; i<=lmax; ++i) lnorm[i]=std::sqrt(4*pi/(2*i+1.)); for (size_t j=0; j1; { auto m1 = cube.template subarray<2>({supp,supp,0,icomp},{ntheta,nphi,0,0}); decorrect(m1,0); sharp_alm2map_adjoint(a1.Alms().vdata(), m1.data(), *ginfo, *ainfo, 0, nthreads); for (size_t m=0; m<=lmax; ++m) for (size_t l=m; l<=lmax; ++l) if (separate) slm[icomp](l,m) += conj(a1(l,m))*blm[icomp](l,0).real()*T(lnorm[l]); else for (size_t j=0; j({supp,supp,2*k-1,icomp},{ntheta,nphi,0,0}); auto m2 = cube.template subarray<2>({supp,supp,2*k ,icomp},{ntheta,nphi,0,0}); decorrect(m1,k); decorrect(m2,k); sharp_alm2map_spin_adjoint(k, a1.Alms().vdata(), a2.Alms().vdata(), m1.data(), m2.data(), *ginfo, *ainfo, 0, nthreads); for (size_t m=0; m<=lmax; ++m) for (size_t l=m; l<=lmax; ++l) if (l>=k) { if (separate) { auto tmp = -2.*conj(blm[icomp](l,k))*T(lnorm[l]); slm[icomp](l,m) += conj(a1(l,m))*tmp.real(); slm[icomp](l,m) -= conj(a2(l,m))*tmp.imag(); } else for (size_t j=0; j