Commit d7d34c48 authored by Martin Reinecke's avatar Martin Reinecke

Merge branch 'kernels' into 'ducc0'

More flexible gridding kernel support

See merge request !22
parents 758f5d3e e1417dac
Pipeline #78940 passed with stages
in 16 minutes and 52 seconds
......@@ -20,12 +20,12 @@ include src/ducc0/bindings/pybind_utils.h
include src/ducc0/math/cmplx.h
include src/ducc0/math/constants.h
include src/ducc0/math/es_kernel.h
include src/ducc0/math/fft1d.h
include src/ducc0/math/fft.h
include src/ducc0/math/geom_utils.cc
include src/ducc0/math/geom_utils.h
include src/ducc0/math/gl_integrator.h
include src/ducc0/math/gridding_kernel.h
include src/ducc0/math/math_utils.h
include src/ducc0/math/pointing.cc
include src/ducc0/math/pointing.h
......
This diff is collapsed.
......@@ -98,11 +98,11 @@ def test_adjointness_ms2dirty(nxdirty, nydirty, ofactor, nrow, nchan, epsilon,
@pmp('nxdirty', [16, 64])
@pmp('nydirty', [64])
@pmp('ofactor', [1.2, 1.4, 1.7, 2])
@pmp("nrow", (2, 27))
@pmp("nrow", (1, 2, 27))
@pmp("nchan", (1, 5))
@pmp("epsilon", (1e-2, 1e-3, 1e-4, 1e-7))
@pmp("singleprec", (False,))
@pmp("wstacking", (True,))
@pmp("wstacking", (False, True))
@pmp("use_wgt", (True,))
@pmp("nthreads", (1, 2))
@pmp("fov", (1., 20.))
......
......@@ -264,7 +264,6 @@ void add_totalconvolve(py::module &msup)
.def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
.def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
.def ("support", &inter_f::support);
m.def("epsilon_guess", &epsilon_guess, "support"_a, "ofactor"_a);
}
}
......
......@@ -30,7 +30,7 @@
#include <cmath>
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/math/gridding_kernel.h"
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/sharp/sharp.h"
......@@ -142,8 +142,8 @@ template<typename T> class Interpolator
size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
int nthreads;
T ofactor;
shared_ptr<GriddingKernel<T>> kernel;
size_t supp;
ES_Kernel kernel;
size_t ncomp;
#ifdef SIMD_INTERPOL
mav<native_simd<T>,4> scube;
......@@ -166,7 +166,7 @@ template<typename T> class Interpolator
}
for (size_t j=0; j<nphi0; ++j)
tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> ftmp(tmp);
......@@ -180,7 +180,7 @@ template<typename T> class Interpolator
{
T sfct = (spin&1) ? -1 : 1;
mav<T,2> tmp({nphi,nphi0});
auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> farr(arr);
......@@ -207,7 +207,6 @@ template<typename T> class Interpolator
vector<size_t> getIdx(const mav<T,2> &ptg) const
{
vector<size_t> idx(ptg.shape(0));
constexpr size_t cellsize=16;
size_t nct = ntheta/cellsize+1,
ncp = nphi/cellsize+1;
......@@ -219,6 +218,7 @@ template<typename T> class Interpolator
// MR_assert((itheta<nct)&&(iphi<ncp), "oops");
mapper[itheta*ncp+iphi].push_back(i);
}
vector<size_t> idx(ptg.shape(0));
size_t cnt=0;
for (auto &vec: mapper)
{
......@@ -243,8 +243,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
kernel(selectKernel<T>(ofactor, 0.5*epsilon)),
supp(kernel->support()),
ncomp(separate ? slm.size() : 1),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -359,8 +359,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(ES_Kernel::get_supp(epsilon, ofactor)),
kernel(supp, ofactor, nthreads),
kernel(selectKernel<T>(ofactor, 0.5*epsilon)),
supp(kernel->support()),
ncomp(ncomp_),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -441,11 +441,11 @@ template<typename T> class Interpolator
union {
native_simd<T> simd[64/vl];
T scalar[64];
} kdata;
T *wt(kdata.scalar), *wp(kdata.scalar+supp);
size_t nvec = (2*supp+vl-1)/vl;
for (auto &v: kdata.simd) v=0;
MR_assert(2*supp<=64, "support too large");
} tbuf, pbuf;
T *wt(tbuf.scalar), *wp(pbuf.scalar);
for (auto &v: tbuf.simd) v=0;
for (auto &v: pbuf.simd) v=0;
MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
......@@ -456,14 +456,10 @@ template<typename T> class Interpolator
size_t i=idx[ind];
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
for (size_t t=0; t<supp; ++t)
wt[t] = (t+i0-f0)*delta - 1;
kernel->eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
kernel->eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......@@ -645,11 +641,11 @@ template<typename T> class Interpolator
union {
native_simd<T> simd[64/vl];
T scalar[64];
} kdata;
T *wt(kdata.scalar), *wp(kdata.scalar+supp);
size_t nvec = (2*supp+vl-1)/vl;
for (auto &v: kdata.simd) v=0;
MR_assert(2*supp<=64, "support too large");
} tbuf, pbuf;
T *wt(tbuf.scalar), *wp(pbuf.scalar);
for (auto &v: tbuf.simd) v=0;
for (auto &v: pbuf.simd) v=0;
MR_assert(supp<=64, "support too large");
vector<T> psiarr(2*kmax+1);
#ifdef SIMD_INTERPOL
vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
......@@ -658,16 +654,12 @@ template<typename T> class Interpolator
while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
{
size_t i=idx[ind];
T f0=T(0.5)*supp+ptg(i,0)*xdtheta;
size_t i0 = size_t(f0+1.);
for (size_t t=0; t<supp; ++t)
wt[t] = (t+i0-f0)*delta - 1;
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
kernel->eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
for (size_t t=0; t<supp; ++t)
wp[t] = (t+i1-f1)*delta - 1;
for (size_t t=0; t<nvec; ++t)
kdata.simd[t] = kernel(kdata.simd[t]);
kernel->eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......@@ -936,13 +928,9 @@ template<typename T> class Interpolator
}
};
double epsilon_guess(size_t support, double ofactor)
{ return std::sqrt(12.)*std::exp(-(support*ofactor)); }
}
using detail_totalconvolve::Interpolator;
using detail_totalconvolve::epsilon_guess;
}
......
......@@ -394,6 +394,19 @@ template<typename T> class fmav: public fmav_info, public membuf<T>
}
};
// template<typename Func, typename T0, typename Ts...> void fmav_pointwise_op(Func func, T0 & arg0, Ts&... args)
// {
// MR_assert(multiequal(arg0.shape()==args.shape()...), "fmav shape mismatch");
// if (multiequal(true, arg0.stride()==args.stride()...)) // equal strides, we can make simplifications
// {
// if (arg0.compact()) // even better, we can go through everything in a single loop
// {
// for (size_t i=0; i<arg0.size(); ++i)
// func(arg0.ptr[i], args.ptr[i]...);
// }
// else
// }
template<typename T, size_t ndim> class mav: public mav_info<ndim>, public membuf<T>
{
// static_assert((ndim>0) && (ndim<4), "only supports 1D, 2D, and 3D arrays");
......
/*
* This file is part of the MR utility library.
*
* This code is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this code; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2019-2020 Max-Planck-Society
Author: Martin Reinecke */
#ifndef DUCC0_ES_KERNEL_H
#define DUCC0_ES_KERNEL_H
#include <cmath>
#include <vector>
#include <array>
#include "ducc0/infra/simd.h"
#include "ducc0/infra/error_handling.h"
#include "ducc0/infra/threading.h"
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
namespace ducc0 {
namespace detail_es_kernel {
using namespace std;
template<typename T> T esk (T v, T beta)
{
auto tmp = (1-v)*(1+v);
auto tmp2 = tmp>=0;
return tmp2*exp(T(beta)*(sqrt(tmp*tmp2)-1));
}
template<typename T> class exponator
{
public:
T operator()(T val) { return exp(val); }
};
template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
{
auto tmp = (T(1)-v)*(T(1)+v);
auto mask = tmp<T(0);
where(mask,tmp)*=T(0);
auto res = (beta*(sqrt(tmp)-T(1))).apply(exponator<T>());
where (mask,res)*=T(0);
return res;
}
/* This class implements the "exponential of semicircle" gridding kernel
described in https://arxiv.org/abs/1808.06736 */
class ES_Kernel
{
private:
double beta;
int p;
vector<double> x, wgt, psi;
size_t supp;
public:
ES_Kernel(size_t supp_, double ofactor, size_t nthreads)
: beta(get_beta(supp_,ofactor)*supp_),
p(int(1.5*supp_+2)), supp(supp_)
{
GL_Integrator integ(2*p,nthreads);
x = integ.coordsSymmetric();
wgt = integ.weightsSymmetric();
psi=x;
for (auto &v:psi)
v=operator()(v);
}
ES_Kernel(size_t supp_, size_t nthreads)
: ES_Kernel(supp_, 2., nthreads){}
template<typename T> T operator()(T v) const
{ return esk(v, T(beta)); }
template<typename T> native_simd<T> operator()(native_simd<T> v) const
{ return esk(v, T(beta)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
{
double tmp=0;
for (int i=0; i<p; ++i)
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return 2./(supp*tmp);
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, size_t nval, size_t nthreads)
{
vector<double> res(nval);
double xn = 1./n;
execStatic(nval, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = corfac(i*xn);
});
return res;
}
static double get_beta(size_t supp, double ofactor=2)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
static size_t get_supp(double epsilon, double ofactor=2)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
};
}
using detail_es_kernel::esk;
using detail_es_kernel::ES_Kernel;
}
#endif
This diff is collapsed.
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=16)
def trap(vec, dx):
# Perform trapezoidal integration
return dx * (np.sum(vec) - 0.5 * (vec[0] + vec[-1]))
def make_evaluation_grids(W, M, N):
"""Generate vectors nu and x on which the gridder and gridding correction functions need to be evaluated.
W is the number of integer gridpoints in total
M determines the sampling of the nu grid, dnu = 1/(2*M)
N determines the sampling of the x grid, dx = 1/(2*N)
"""
nu = (np.arange(W * M, dtype=float) + 0.5) / (2 * M)
x = np.arange(N+1, dtype=float)/(2 * N)
return nu, x
def gridder_to_C(gridder, W):
"""Reformat gridder evaluated on the nu grid returned by make_evaluation_grids into the sampled C function
which has an index for the closest gridpoint and an index for the fractional distance from that gridpoint
"""
M = len(gridder) // W
C = np.zeros((W, M), dtype=float)
for r in range(0, W):
l = r - (W/2) + 1
indx = (np.arange(M) - 2 * M * l).astype(int)
# Use symmetry to deal with negative indices
indx[indx<0] = -indx[indx<0] - 1
C[r, :] = gridder[indx]
return C
def gridder_to_grid_correction(gridder, nu, x, W):
"""Calculate the optimal grid correction function from the gridding function. The vectors x and nu should
have been constructed using make_evaluation_grids"""
M = len(nu) // W
N = len(x) - 1
dnu = nu[1] - nu[0]
C = gridder_to_C(gridder, W)
c = np.zeros(x.shape, dtype=float)
d = np.zeros(x.shape, dtype=float)
for n, x_val in enumerate(x):
for rp in range(0, W):
lp = rp - (W/2) + 1
for r in range(0, W):
l = r - (W/2) + 1
d[n] += np.sum(C[rp, :] * C[r, :] * np.cos(2 * np.pi * (lp - l) * x_val)) * dnu
c[n] += np.sum(C[rp, :] * np.cos(2 * np.pi * (lp - nu[:M]) * x_val)) * dnu
return c/d
def calc_map_error(gridder, grid_correction, nu, x, W):
M = len(nu) // W
N = len(x) - 1
dnu = nu[1] - nu[0]
C = gridder_to_C(gridder, W)
loss = np.zeros((len(x), 2, M), dtype=float)
for n, x_val in enumerate(x):
one_app = 0
for r in range(0, W):
l = r - (W/2) + 1
one_app += grid_correction[n] * C[r, :] * np.exp(2j * np.pi * (l - nu[:M]) * x_val)
loss[n, 0, :] = 1.0 - np.real(one_app)
loss[n, 1, :] = np.imag(one_app)
map_error = np.zeros(len(x), dtype=float)
for i in range(len(x)):
map_error[i] = 2 * np.sum((loss[i, :, :].flatten())**2) * dnu
return map_error
minsupp=2
maxsupp=9
minx=25
maxx=43
stepx=3
nsteps = 100
maperr = np.zeros(nsteps)
interr = np.zeros(nsteps)
res={}
xval=np.arange(minx, maxx, stepx)
for w in range(minsupp, maxsupp):
betamin, betamax = 1.2, 2.5
dbeta = 0.01
betas = np.arange(betamin, betamax, dbeta)
nsteps = betas.size
for i in range(nsteps):
beta0=betas[i]
M = 64
N = 100
nu, x = make_evaluation_grids(w, M, N)
beta=beta0*w
gridfunc = lambda nu: np.exp(beta*(np.sqrt(1.-nu*nu)-1))
gridder = gridfunc(nu/(w*0.5))
grid_correction = gridder_to_grid_correction(gridder, nu, x, w)
map_err = calc_map_error(gridder, grid_correction, nu, x, w)
for xv in xval:
x0 = xv*0.01
which = np.digitize(x0, x)
maperr= np.max(np.abs(map_err[:which]))
interr=trap(map_err[:which], 1.0/(2*N))
if (w, xv) not in res:
res[(w, xv)] = (maperr, interr, beta0)
print(w, xv, res[(w, xv)])
else:
if interr < res[(w, xv)][1]:
res[(w, xv)] = (maperr, interr, beta0)
print(w, xv, res[(w, xv)])
def model_interr0(x0,W):
c1 = -5.7
return np.exp(c1*W*(1-x0))
def model_interr1(x0,W):
osf = 1./(x0*2)
c1 = -2.1
return np.exp(c1*W*osf)
def model_maxerr0(x0,W):
osf = 1./(x0*2)
c1 = -2.0
return 12*np.exp(c1*W*osf)
def model_beta0(x0,W):
betacorr=[0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0,]
bcstrength=1.+(x0-0.25)*2.5
return 2.32+bcstrength*betacorr[W]+(0.25-x0)*3.1
supps=np.arange(minsupp, maxsupp)
for i in range(minx, maxx, stepx):
#betas = np.array([res[(w,i)][2] for w in range(minsupp, maxsupp)])
#plt.plot(supps, betas,label="{}".format(i))
#betas2 = np.array([model_beta0(0.01*i, w) for w in range(minsupp, maxsupp)])
#plt.plot(supps, betas2,label="m{}".format(i))
errs = np.array([res[(w,i)][0] for w in range(minsupp, maxsupp)])
# errs = np.array([res[(w,i)][1] for w in range(minsupp, maxsupp)])
errs2 = np.array([model_maxerr0(0.01*i, w) for w in range(minsupp, maxsupp)])
plt.semilogy(supps, errs,label="{}".format(i))
plt.semilogy(supps, errs2,label="m{}".format(i))
plt.legend()
plt.show()
#log(eps**2) propto W
/*
* This file is part of the MR utility library.
*
* This code is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this code; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2020 Max-Planck-Society
Author: Martin Reinecke */
#ifndef DUCC0_HORNER_KERNEL_H
#define DUCC0_HORNER_KERNEL_H
#include <cmath>
#include <functional>
#include <array>
#include <vector>
#include "ducc0/infra/simd.h"
#include "ducc0/infra/useful_macros.h"
namespace ducc0 {
namespace detail_horner_kernel {
using namespace std;
constexpr double pi=3.141592653589793238462643383279502884197;
vector<double> getCoeffs(size_t W, size_t D, const function<double(double)> &func)
{
vector<double> coeff(W*(D+1));
vector<double> chebroot(D+1);
for (size_t i=0; i<=D; ++i)
chebroot[i] = cos((2*i+1.)*pi/(2*D+2));
vector<double> y(D+1), lcf(D+1), C((D+1)*(D+1)), lcf2(D+1);
for (size_t i=0; i<W; ++i)
{
double l = -1+2.*i/double(W);
double r = -1+2.*(i+1)/double(W);
// function values at Chebyshev nodes
for (size_t j=0; j<=D; ++j)
y[j] = func(chebroot[j]*(r-l)*0.5 + (r+l)*0.5);
// Chebyshev coefficients
for (size_t j=0; j<=D; ++j)
{
lcf[j] = 0;
for (size_t k=0; k<=D; ++k)
lcf[j] += 2./(D+1)*y[k]*cos(j*(2*k+1)*pi/(2*D+2));
}
lcf[0] *= 0.5;
// Polynomial coefficients
fill(C.begin(), C.end(), 0.);
C[0] = 1.;
C[1*(D+1) + 1] = 1.;
for (size_t j=2; j<=D; ++j)
{
C[j*(D+1) + 0] = -C[(j-2)*(D+1) + 0];
for (size_t k=1; k<=j; ++k)
C[j*(D+1) + k] = 2*C[(j-1)*(D+1) + k-1] - C[(j-2)*(D+1) + k];
}
for (size_t j=0; j<=D; ++j) lcf2[j] = 0;
for (size_t j=0; j<=D; ++j)
for (size_t k=0; k<=