Commit a7c5d872 authored by Martin Reinecke's avatar Martin Reinecke

more cleanups

parent 4f4c03bb
Pipeline #78287 passed with stages
in 13 minutes and 11 seconds
......@@ -20,12 +20,13 @@ include src/ducc0/bindings/pybind_utils.h
include src/ducc0/math/cmplx.h
include src/ducc0/math/constants.h
include src/ducc0/math/es_kernel.h
include src/ducc0/math/fft1d.h
include src/ducc0/math/fft.h
include src/ducc0/math/geom_utils.cc
include src/ducc0/math/geom_utils.h
include src/ducc0/math/gl_integrator.h
include src/ducc0/math/gridding_kernel.h
include src/ducc0/math/least_misfit.h
include src/ducc0/math/math_utils.h
include src/ducc0/math/pointing.cc
include src/ducc0/math/pointing.h
......
......@@ -142,8 +142,8 @@ template<typename T> class Interpolator
size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
int nthreads;
T ofactor;
shared_ptr<GriddingKernel<T>> kernel;
size_t supp;
ES_Kernel<T> kernel;
size_t ncomp;
#ifdef SIMD_INTERPOL
mav<native_simd<T>,4> scube;
......@@ -166,7 +166,7 @@ template<typename T> class Interpolator
}
for (size_t j=0; j<nphi0; ++j)
tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> ftmp(tmp);
......@@ -180,7 +180,7 @@ template<typename T> class Interpolator
{
T sfct = (spin&1) ? -1 : 1;
mav<T,2> tmp({nphi,nphi0});
auto fct = kernel.corfunc(nphi0/2+1, 1./nphi, nthreads);
auto fct = kernel->corfunc(nphi0/2+1, 1./nphi, nthreads);
vector<T> k2(fct.size());
for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
fmav<T> farr(arr);
......@@ -243,8 +243,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(esk_support(epsilon, ofactor)),
kernel(supp, esk_beta(supp, ofactor)*supp),
kernel(selectESKernel<T>(ofactor, epsilon)),
supp(kernel->support()),
ncomp(separate ? slm.size() : 1),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -359,8 +359,8 @@ template<typename T> class Interpolator
ntheta(nphi/2+1),
nthreads(nthreads_),
ofactor(T(nphi)/(2*lmax+1)),
supp(esk_support(epsilon, ofactor)),
kernel(supp, esk_beta(supp, ofactor)*supp),
kernel(selectESKernel<T>(ofactor, epsilon)),
supp(kernel->support()),
ncomp(ncomp_),
#ifdef SIMD_INTERPOL
scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
......@@ -456,10 +456,10 @@ template<typename T> class Interpolator
size_t i=idx[ind];
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
kernel.eval((i0-f0)*delta-1, tbuf.simd);
kernel->eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
kernel.eval((i1-f1)*delta-1, pbuf.simd);
kernel->eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......@@ -656,10 +656,10 @@ template<typename T> class Interpolator
size_t i=idx[ind];
T f0=T(0.5*supp+ptg(i,0)*xdtheta);
size_t i0 = size_t(f0+T(1));
kernel.eval((i0-f0)*delta-1, tbuf.simd);
kernel->eval((i0-f0)*delta-1, tbuf.simd);
T f1=T(0.5)*supp+ptg(i,1)*xdphi;
size_t i1 = size_t(f1+1.);
kernel.eval((i1-f1)*delta-1, pbuf.simd);
kernel->eval((i1-f1)*delta-1, pbuf.simd);
psiarr[0]=1.;
double psi=ptg(i,2);
double cpsi=cos(psi), spsi=sin(psi);
......
/*
* This file is part of the MR utility library.
*
* This code is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this code; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2019-2020 Max-Planck-Society
Author: Martin Reinecke */
#ifndef DUCC0_ES_KERNEL_H
#define DUCC0_ES_KERNEL_H
#include <cmath>
#include <vector>
#include <array>
#include "ducc0/infra/simd.h"
#include "ducc0/infra/error_handling.h"
#include "ducc0/infra/threading.h"
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
namespace ducc0 {
namespace detail_es_kernel {
using namespace std;
template<typename T> T esk (T v, T beta)
{
auto tmp = (1-v)*(1+v);
auto tmp2 = tmp>=0;
return tmp2*exp(T(beta)*(sqrt(tmp*tmp2)-1));
}
template<typename T> class exponator
{
public:
T operator()(T val) { return exp(val); }
};
template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
{
auto tmp = (T(1)-v)*(T(1)+v);
auto mask = tmp<T(0);
where(mask,tmp)*=T(0);
auto res = (beta*(sqrt(tmp)-T(1))).apply(exponator<T>());
where (mask,res)*=T(0);
return res;
}
double esk_beta(size_t supp, double ofactor)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
size_t esk_support(double epsilon, double ofactor)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
}
using detail_es_kernel::esk;
using detail_es_kernel::esk_beta;
using detail_es_kernel::esk_support;
}
#endif
......@@ -26,7 +26,6 @@
#include "ducc0/infra/simd.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/least_misfit.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/math/constants.h"
namespace ducc0 {
......@@ -341,6 +340,75 @@ template<typename T> class HornerKernel: public GriddingKernel<T>
{ return corr.corfunc(n, dx, nthreads); }
};
template<typename T> T esk (T v, T beta)
{
auto tmp = (1-v)*(1+v);
auto tmp2 = tmp>=0;
return tmp2*exp(T(beta)*(sqrt(tmp*tmp2)-1));
}
template<typename T> class exponator
{
public:
T operator()(T val) { return exp(val); }
};
template<typename T> native_simd<T> esk (native_simd<T> v, T beta)
{
auto tmp = (T(1)-v)*(T(1)+v);
auto mask = tmp<T(0);
where(mask,tmp)*=T(0);
auto res = (beta*(sqrt(tmp)-T(1))).apply(exponator<T>());
where (mask,res)*=T(0);
return res;
}
double esk_beta(size_t supp, double ofactor)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const array<double,16> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.175)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
size_t esk_support(double epsilon, double ofactor)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const array<double,16> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.175)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
template<typename T> class ES_Kernel: public GriddingKernel<T>
{
private:
......@@ -397,8 +465,6 @@ template<typename T> auto selectESKernel(double ofactor, double epsilon)
}
using detail_gridding_kernel::GriddingKernel;
using detail_gridding_kernel::HornerKernel;
using detail_gridding_kernel::ES_Kernel;
using detail_gridding_kernel::selectESKernel;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment