Commit 8d465109 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

reduce library size

parent fe100790
Pipeline #80683 passed with stages
in 17 minutes and 52 seconds
...@@ -828,23 +828,34 @@ template<bool wgrid, typename T, typename Serv> void x2grid_c ...@@ -828,23 +828,34 @@ template<bool wgrid, typename T, typename Serv> void x2grid_c
gconf.timers.push("gridding proper"); gconf.timers.push("gridding proper");
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()}); checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
switch(gconf.Supp()) if constexpr (is_same<T, float>::value)
{ switch(gconf.Supp())
case 4: x2grid_c_helper< 4, wgrid>(gconf, srv, grid, w0, dw); break; {
case 5: x2grid_c_helper< 5, wgrid>(gconf, srv, grid, w0, dw); break; case 4: x2grid_c_helper< 4, wgrid>(gconf, srv, grid, w0, dw); break;
case 6: x2grid_c_helper< 6, wgrid>(gconf, srv, grid, w0, dw); break; case 5: x2grid_c_helper< 5, wgrid>(gconf, srv, grid, w0, dw); break;
case 7: x2grid_c_helper< 7, wgrid>(gconf, srv, grid, w0, dw); break; case 6: x2grid_c_helper< 6, wgrid>(gconf, srv, grid, w0, dw); break;
case 8: x2grid_c_helper< 8, wgrid>(gconf, srv, grid, w0, dw); break; case 7: x2grid_c_helper< 7, wgrid>(gconf, srv, grid, w0, dw); break;
case 9: x2grid_c_helper< 9, wgrid>(gconf, srv, grid, w0, dw); break; case 8: x2grid_c_helper< 8, wgrid>(gconf, srv, grid, w0, dw); break;
case 10: x2grid_c_helper<10, wgrid>(gconf, srv, grid, w0, dw); break; default: MR_fail("must not happen");
case 11: x2grid_c_helper<11, wgrid>(gconf, srv, grid, w0, dw); break; }
case 12: x2grid_c_helper<12, wgrid>(gconf, srv, grid, w0, dw); break; else
case 13: x2grid_c_helper<13, wgrid>(gconf, srv, grid, w0, dw); break; switch(gconf.Supp())
case 14: x2grid_c_helper<14, wgrid>(gconf, srv, grid, w0, dw); break; {
case 15: x2grid_c_helper<15, wgrid>(gconf, srv, grid, w0, dw); break; case 4: x2grid_c_helper< 4, wgrid>(gconf, srv, grid, w0, dw); break;
case 16: x2grid_c_helper<16, wgrid>(gconf, srv, grid, w0, dw); break; case 5: x2grid_c_helper< 5, wgrid>(gconf, srv, grid, w0, dw); break;
default: MR_fail("must not happen"); case 6: x2grid_c_helper< 6, wgrid>(gconf, srv, grid, w0, dw); break;
} case 7: x2grid_c_helper< 7, wgrid>(gconf, srv, grid, w0, dw); break;
case 8: x2grid_c_helper< 8, wgrid>(gconf, srv, grid, w0, dw); break;
case 9: x2grid_c_helper< 9, wgrid>(gconf, srv, grid, w0, dw); break;
case 10: x2grid_c_helper<10, wgrid>(gconf, srv, grid, w0, dw); break;
case 11: x2grid_c_helper<11, wgrid>(gconf, srv, grid, w0, dw); break;
case 12: x2grid_c_helper<12, wgrid>(gconf, srv, grid, w0, dw); break;
case 13: x2grid_c_helper<13, wgrid>(gconf, srv, grid, w0, dw); break;
case 14: x2grid_c_helper<14, wgrid>(gconf, srv, grid, w0, dw); break;
case 15: x2grid_c_helper<15, wgrid>(gconf, srv, grid, w0, dw); break;
case 16: x2grid_c_helper<16, wgrid>(gconf, srv, grid, w0, dw); break;
default: MR_fail("must not happen");
}
gconf.timers.pop(); gconf.timers.pop();
} }
...@@ -900,23 +911,34 @@ template<bool wgrid, typename T, typename Serv> void grid2x_c ...@@ -900,23 +911,34 @@ template<bool wgrid, typename T, typename Serv> void grid2x_c
gconf.timers.push("degridding proper"); gconf.timers.push("degridding proper");
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()}); checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
switch(gconf.Supp()) if constexpr (is_same<T, float>::value)
{ switch(gconf.Supp())
case 4: grid2x_c_helper< 4, wgrid>(gconf, grid, srv, w0, dw); break; {
case 5: grid2x_c_helper< 5, wgrid>(gconf, grid, srv, w0, dw); break; case 4: grid2x_c_helper< 4, wgrid>(gconf, grid, srv, w0, dw); break;
case 6: grid2x_c_helper< 6, wgrid>(gconf, grid, srv, w0, dw); break; case 5: grid2x_c_helper< 5, wgrid>(gconf, grid, srv, w0, dw); break;
case 7: grid2x_c_helper< 7, wgrid>(gconf, grid, srv, w0, dw); break; case 6: grid2x_c_helper< 6, wgrid>(gconf, grid, srv, w0, dw); break;
case 8: grid2x_c_helper< 8, wgrid>(gconf, grid, srv, w0, dw); break; case 7: grid2x_c_helper< 7, wgrid>(gconf, grid, srv, w0, dw); break;
case 9: grid2x_c_helper< 9, wgrid>(gconf, grid, srv, w0, dw); break; case 8: grid2x_c_helper< 8, wgrid>(gconf, grid, srv, w0, dw); break;
case 10: grid2x_c_helper<10, wgrid>(gconf, grid, srv, w0, dw); break; default: MR_fail("must not happen");
case 11: grid2x_c_helper<11, wgrid>(gconf, grid, srv, w0, dw); break; }
case 12: grid2x_c_helper<12, wgrid>(gconf, grid, srv, w0, dw); break; else
case 13: grid2x_c_helper<13, wgrid>(gconf, grid, srv, w0, dw); break; switch(gconf.Supp())
case 14: grid2x_c_helper<14, wgrid>(gconf, grid, srv, w0, dw); break; {
case 15: grid2x_c_helper<15, wgrid>(gconf, grid, srv, w0, dw); break; case 4: grid2x_c_helper< 4, wgrid>(gconf, grid, srv, w0, dw); break;
case 16: grid2x_c_helper<16, wgrid>(gconf, grid, srv, w0, dw); break; case 5: grid2x_c_helper< 5, wgrid>(gconf, grid, srv, w0, dw); break;
default: MR_fail("must not happen"); case 6: grid2x_c_helper< 6, wgrid>(gconf, grid, srv, w0, dw); break;
} case 7: grid2x_c_helper< 7, wgrid>(gconf, grid, srv, w0, dw); break;
case 8: grid2x_c_helper< 8, wgrid>(gconf, grid, srv, w0, dw); break;
case 9: grid2x_c_helper< 9, wgrid>(gconf, grid, srv, w0, dw); break;
case 10: grid2x_c_helper<10, wgrid>(gconf, grid, srv, w0, dw); break;
case 11: grid2x_c_helper<11, wgrid>(gconf, grid, srv, w0, dw); break;
case 12: grid2x_c_helper<12, wgrid>(gconf, grid, srv, w0, dw); break;
case 13: grid2x_c_helper<13, wgrid>(gconf, grid, srv, w0, dw); break;
case 14: grid2x_c_helper<14, wgrid>(gconf, grid, srv, w0, dw); break;
case 15: grid2x_c_helper<15, wgrid>(gconf, grid, srv, w0, dw); break;
case 16: grid2x_c_helper<16, wgrid>(gconf, grid, srv, w0, dw); break;
default: MR_fail("must not happen");
}
gconf.timers.pop(); gconf.timers.pop();
} }
...@@ -1260,7 +1282,7 @@ template<typename T> auto getNuNv(double epsilon, ...@@ -1260,7 +1282,7 @@ template<typename T> auto getNuNv(double epsilon,
double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.; double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
if (x0*x0+y0*y0>1.) if (x0*x0+y0*y0>1.)
nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.; nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
auto idx = getAvailableKernels(epsilon, sizeof(T)<8); auto idx = getAvailableKernels<T>(epsilon);
double mincost = 1e300; double mincost = 1e300;
constexpr double nref_fft=2048; constexpr double nref_fft=2048;
constexpr double costref_fft=0.0693; constexpr double costref_fft=0.0693;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <cmath> #include <cmath>
#include <type_traits>
#include "ducc0/infra/simd.h" #include "ducc0/infra/simd.h"
#include "ducc0/math/gl_integrator.h" #include "ducc0/math/gl_integrator.h"
#include "ducc0/math/constants.h" #include "ducc0/math/constants.h"
...@@ -664,7 +665,7 @@ template<typename T> auto selectKernel(size_t idx) ...@@ -664,7 +665,7 @@ template<typename T> auto selectKernel(size_t idx)
factor and error. */ factor and error. */
template<typename T> auto selectKernel(double ofactor, double epsilon) template<typename T> auto selectKernel(double ofactor, double epsilon)
{ {
size_t Wmin=1000; size_t Wmin = is_same<T, float>::value ? 8 : 1000;
size_t idx = KernelDB.size(); size_t idx = KernelDB.size();
for (size_t i=0; i<KernelDB.size(); ++i) for (size_t i=0; i<KernelDB.size(); ++i)
if ((KernelDB[i].ofactor<=ofactor) && (KernelDB[i].epsilon<=epsilon) && (KernelDB[i].W<=Wmin)) if ((KernelDB[i].ofactor<=ofactor) && (KernelDB[i].epsilon<=epsilon) && (KernelDB[i].W<=Wmin))
...@@ -680,11 +681,11 @@ template<typename T> auto selectKernel(double ofactor, double epsilon, size_t id ...@@ -680,11 +681,11 @@ template<typename T> auto selectKernel(double ofactor, double epsilon, size_t id
selectKernel<T>(idx) : selectKernel<T>(ofactor, epsilon); selectKernel<T>(idx) : selectKernel<T>(ofactor, epsilon);
} }
auto getAvailableKernels(double epsilon, bool single_precision) template<typename T> auto getAvailableKernels(double epsilon)
{ {
vector<double> ofc(20, 100.); vector<double> ofc(20, 100.);
vector<size_t> idx(20, KernelDB.size()); vector<size_t> idx(20, KernelDB.size());
size_t Wlim = single_precision ? 8 : 16; size_t Wlim = is_same<T, float>::value ? 8 : 16;
for (size_t i=0; i<KernelDB.size(); ++i) for (size_t i=0; i<KernelDB.size(); ++i)
{ {
size_t W = KernelDB[i].W; size_t W = KernelDB[i].W;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment