Commit c37e6c89 by Martin Reinecke

### smaller

parent 656634b4
Pipeline #77751 passed with stages
in 13 minutes and 3 seconds
 ... ... @@ -34,46 +34,21 @@ namespace detail_horner_kernel { using namespace std; constexpr double pi=3.141592653589793238462643383279502884197; /*! Class providing fast piecewise polynomial approximation of a function which is defined on the interval [-1;1] W is the number of equal-length intervals into which [-1;1] is subdivided. D is the degree of the approximating polynomials. T is the type at which the approximation is calculated; should be float or double. */ template class HornerKernel template vector getCoeffs(size_t W, size_t D, Func func) { private: using Tsimd = native_simd; static constexpr auto vlen = Tsimd::size(); static constexpr auto nvec = (W+vlen-1)/vlen; array,D+1> coeff; union { array v; array s; } res; public: template HornerKernel(Func func) { for (size_t i=0; i<=D; ++i) for (size_t j=0; j chebroot; vector coeff(W*(D+1)); vector chebroot(D+1); for (size_t i=0; i<=D; ++i) chebroot[i] = cos((2*i+1.)*pi/(2*D+2)); vector y(D+1), lcf(D+1), C((D+1)*(D+1)), lcf2(D+1); for (size_t i=0; i y; for (size_t j=0; j<=D; ++j) y[j] = func(chebroot[j]*(r-l)*0.5 + (r+l)*0.5); // Chebyshev coefficients array lcf; for (size_t j=0; j<=D; ++j) { lcf[j] = 0; ... ... @@ -82,25 +57,56 @@ template class HornerKernel } lcf[0] *= 0.5; // Polynomial coefficients array,D+1> C; for (size_t j=0; j<=D; ++j) for (size_t k=0; k<=D; ++k) C[j][k] = 0; C[0][0] = 1.; C[1][1] = 1.; fill(C.begin(), C.end(), 0.); C[0] = 1.; C[1*(D+1) + 1] = 1.; for (size_t j=2; j<=D; ++j) { C[j][0] = -C[j-2][0]; C[j*(D+1) + 0] = -C[(j-2)*(D+1) + 0]; for (size_t k=1; k<=j; ++k) C[j][k] = 2*C[j-1][k-1] - C[j-2][k]; C[j*(D+1) + k] = 2*C[(j-1)*(D+1) + k-1] - C[(j-2)*(D+1) + k]; } array lcf2; for (size_t j=0; j<=D; ++j) lcf2[j] = 0; for (size_t j=0; j<=D; ++j) for (size_t k=0; k<=D; ++k) lcf2[k] += C[j][k]*lcf[j]; lcf2[k] += C[j*(D+1) + k]*lcf[j]; for (size_t j=0; j<=D; ++j) coeff[j][i/vlen][i%vlen] = T(lcf2[D-j]); coeff[j*W + i] = lcf2[D-j]; } return coeff; } /*! Class providing fast piecewise polynomial approximation of a function which is defined on the interval [-1;1] W is the number of equal-length intervals into which [-1;1] is subdivided. D is the degree of the approximating polynomials. T is the type at which the approximation is calculated; should be float or double. */ template class HornerKernel { private: using Tsimd = native_simd; static constexpr auto vlen = Tsimd::size(); static constexpr auto nvec = (W+vlen-1)/vlen; array,D+1> coeff; union { array v; array s; } res; public: template HornerKernel(Func func) { auto coeff_raw = getCoeffs(W,D,func); for (size_t j=0; j<=D; ++j) { for (size_t i=0; i class HornerKernelFlexible : W(W_), D(D_), nvec((W+vlen-1)/vlen), res(nvec), coeff(nvec*(D+1), 0), evalfunc(evfhelper1<1>()) { vector chebroot(D+1); for (size_t i=0; i<=D; ++i) chebroot[i] = cos((2*i+1.)*pi/(2*D+2)); vector y(D+1), lcf(D+1), C((D+1)*(D+1)), lcf2(D+1); for (size_t i=0; i
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!