Commit 04d17c37 authored by Martin Reinecke's avatar Martin Reinecke

cleanup and verion bump

parent 18b311de
Pipeline #79484 canceled with stages
in 5 minutes and 7 seconds
......@@ -7,7 +7,7 @@ from setuptools import setup, Extension
import pybind11
pkgname = 'ducc0'
version = '0.1.0'
version = '0.1.9'
def _get_files_by_suffix(directory, suffix):
......
// compile with "g++ -O3 -ffast-math -std=c++17 -march=native -Isrc horner_test.cc"
#include "ducc0/math/horner_kernel.h"
#include "ducc0/infra/timers.h"
#include <iostream>
using namespace ducc0;
using namespace std;
// you can use any kernel defined on [-1; 1] here
double es_kernel(size_t w, double x)
{
//return exp(-9*x*x);
auto beta = 2.3*w;
return exp(beta*(sqrt((1-x)*(1+x))-1));
}
int main()
{
constexpr size_t W=8; // kernel support in pixels
constexpr size_t D=12; // degree of approximating polynomials
using FLT=double;
size_t Neval = 1000000000; // we will do a billion function evaluations altogether
size_t Ncall = Neval/W;
FLT delta = 2./W/double(Ncall); // small offsets between individual calls to prevent the compiler from optimizing too much
{
HornerKernel<W,D,FLT> hk([](double x){return es_kernel(W,x);});
double sum=0;
for (size_t i=0; i<Ncall; ++i)
{
FLT p0 = -FLT(1)+i*delta; // position of first sample
auto res = hk.eval(p0);
for (size_t j=0; j<W; ++j)
sum = max(sum,abs(res[j]- hk.eval_single(p0+j*2./W)));
}
cout << "Maximum deviation measured: " << sum << endl;
SimpleTimer timer;
sum = 0;
for (size_t i=0; i<Ncall; ++i)
{
FLT p0 = -FLT(1)+i*delta; // position of first sample
auto res = hk.eval(p0);
for (size_t j=0; j<W; ++j)
sum += res[j]; // needed to avoid over-optimization
}
cout << "HornerKernel: " << Neval/timer() << " function approximations per second" << endl;
cout << sum << endl;
}
{
HornerKernelFlexible<FLT> hk2(W,D,[](double x){return es_kernel(W,x);});
union {
FLT scalar[64];
native_simd<FLT> simd[64/native_simd<FLT>::size()];
} buf;
double sum=0;
for (size_t i=0; i<Ncall; ++i)
{
FLT p0 = -FLT(1)+i*delta; // position of first sample
hk2.eval(p0, buf.simd);
for (size_t j=0; j<W; ++j)
sum = max(sum,abs(buf.scalar[j]- hk2.eval_single(p0+j*2./W)));
}
cout << "HornerKernelFlexible: Maximum deviation measured: " << sum << endl;
SimpleTimer timer;
sum = 0;
for (size_t i=0; i<Ncall; ++i)
{
FLT p0 = -FLT(1)+i*delta; // position of first sample
hk2.eval(p0, buf.simd);
for (size_t j=0; j<W; ++j)
sum += buf.scalar[j]; // needed to avoid over-optimization
}
cout << "HornerKernelFlexible: " << Neval/timer() << " function approximations per second" << endl;
cout << sum << endl;
}
}
import numpy as np
from scipy.optimize import leastsq
def gridder_to_C(gridder, W):
M = len(gridder) // W
......@@ -29,10 +28,7 @@ def C_to_grid_correction(C, nu_C, x, optimal=True):
ell = r - (W / 2) + 1
xmn = np.mean(C[rp, :, :]*C[r, :, :], axis=0)
tmp = xmn * cosarr[abs(rp-r)]
# tmp = C[rp, :, :] * C[r, :, :] * np.cos(2 * np.pi * (ellp - ell) * x)
# print(np.max(np.abs(tmp-np.mean(tmp2,axis=0))))
d += tmp
# d += np.mean(tmp,axis=0)
tmp2 = C[rp, :, :] * np.cos(2 * np.pi * (ellp-nu_C) * x)
c += np.mean(tmp2,axis=0)
return c / d if optimal else 1 / c
......@@ -63,8 +59,6 @@ def calc_map_error(gridder, grid_correction, nu, x, W):
return calc_map_error_from_C(C, grid_correction, nu[:M], x, W)
import matplotlib.pyplot as plt
def eskapprox(parm, nu, x, W):
nunorm=2*nu/W
beta=parm[0]
......@@ -78,17 +72,15 @@ def getmaxerr(approx, coeff, nu, x, W, M, N, x0):
krn = approx(coeff, nu, x, W)
err = kernel2error(krn, nu, x, W)
err = err[0:int(2*x0*N+0.9999)+1]
# print(coeff, np.max(err))
return np.max(np.abs(err))
def scan_esk(rbeta, re0, nu, x, W, M, N, x0):
def scan_esk(rbeta, re0, nu, x, W, M, N, x0, nsamp):
curmin=1e30
for e0 in np.linspace(re0[0], re0[1], 10):
for beta in np.linspace(rbeta[0], rbeta[1], 10):
for e0 in np.linspace(re0[0], re0[1], nsamp):
for beta in np.linspace(rbeta[0], rbeta[1], nsamp):
test = getmaxerr(eskapprox, [beta,e0], nu, x, W, M, N, x0)
if test<curmin:
curmin, coeffmin = test, [beta,e0]
# print(coeffmin, np.sqrt(curmin))
return coeffmin
def kernel2error(krn, nu, x, W):
......@@ -101,55 +93,25 @@ x=np.arange(N+1)/(2*N)
# for quick experiments, just enter the desired oversampling factor and support
# as single elements in the tuples below
ofactors = np.linspace(1.2,2.0,9)
Ws = reversed((15,))
ofactors = np.linspace(1.15,2.00,18)
Ws = np.arange(4,17)
results = []
#plt.ion()
np.set_printoptions(floatmode='unique')
for W in Ws:
for ofactor in ofactors:
x0 = 0.5/ofactor
# plt.title("W={}, ofactor={}".format(W, ofactor))
nu=(np.arange(W*M)+0.5)/(2*M)
ulim = int(2*x0*N+0.9999)+1
# res1 = leastsq(lambda c:geterr(eskapprox, c,nu,x,W,M, N, x0, 1), [2.3,0.5], full_output=True)[0]
# krn1 = eskapprox(res1, nu, x, W)
# err1 = kernel2error(krn1, nu, x, W)
# results.append(err1)
# maxerr1 = np.sqrt(np.max(err1[0:ulim]))
# print(W, ofactor, maxerr1, res1)
rbeta=[1., 2.5]
re0=[0.48, 0.58]
re0=[0.48, 0.65]
dbeta = rbeta[1]-rbeta[0]
de0 = re0[1]-re0[0]
for i in range(8):
res1 = scan_esk(rbeta, re0, nu, x, W, M, N, x0)
dbeta*=0.25
de0*=0.25
for i in range(30):
res1 = scan_esk(rbeta, re0, nu, x, W, M, N, x0, 10)
dbeta*=0.5
de0*=0.5
rbeta = [res1[0]-0.5*dbeta, res1[0]+0.5*dbeta]
re0 = [res1[1]-0.5*de0, res1[1]+0.5*de0]
# res1 = leastsq(lambda c:geterr(eskapprox, c,nu,x,W,M, N, x0, 1), res1, full_output=True)[0]
krn1 = eskapprox(res1, nu, x, W)
# kpoly,cf = polyize(krn1,W,W+3)
# print(_l2error(kpoly,krn1))
# plt.plot(np.log10(np.abs(kpoly-krn1)))
err1 = kernel2error(krn1, nu, x, W)
# results.append(err1)
maxerr1 = np.sqrt(np.max(err1[0:ulim]))
print("{{{}, {}, {}, {}, {}}},".format(W, ofactor, maxerr1, res1[0], res1[1]))
# for r in results:
# plt.semilogy(x, np.sqrt(r))
# plt.show()
# print("2XXXX:", maxerr1, res1)
# plt.semilogy(x, np.sqrt(err1), label="ESK (2 params)")
# res1 = leastsq(lambda c:geterr(eskapprox, c,nu,x,W,M, N, x0), [res1[0], res1[1], 2.], full_output=True)[0]
# krn1 = eskapprox(res1, nu, x, W)
# err1 = kernel2error(krn1, nu, x, W)
# maxerr1 = np.sqrt(np.max(err1[0:ulim]))
# # print("3XXXX:", maxerr1, res1)
# plt.semilogy(x, np.sqrt(err1), label="ESK (2 params)")
# plt.axvline(x=x0)
# plt.axhline(y=maxerr1)
# plt.legend()
# plt.show()
print("{{{0:2d}, {1:4.2f}, {2:13.8g}, {3:12.10f}, {4:12.10f}}},".format(W, ofactor, maxerr1, res1[0], res1[1]))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment