Commit bb7722ce by Peter Bell

### Add custom sinpi, cospi and sincospi for all float types. Use for dcst twiddles.

parent a9a6e6ab
 ... @@ -258,50 +258,177 @@ template void ROTX90(cmplx &a) ... @@ -258,50 +258,177 @@ template void ROTX90(cmplx &a) // twiddle factor section // twiddle factor section // // /** Approximate sin(pi*a) within the range [-0.25, 0.25] */ inline float sinpi0(float a) { // adapted from https://stackoverflow.com/questions/42792939/ float s = a * a; float r = -5.957031250000000000e-01f; r = fma (r, s, 2.550399541854858398e+00f); r = fma (r, s, -5.167724132537841797e+00f); r = (a * s) * r; return fma (a, 3.141592741012573242e+00f, r); } /** Approximate sin(pi*a) within the range [-0.25, 0.25] */ inline double sinpi0(double a) { // adapted from https://stackoverflow.com/questions/42792939/ double s = a * a; double r = 4.6151442520157035e-4; r = fma (r, s, -7.3700183130883555e-3); r = fma (r, s, 8.2145868949323936e-2); r = fma (r, s, -5.9926452893214921e-1); r = fma (r, s, 2.5501640398732688e+0); r = fma (r, s, -5.1677127800499516e+0); s = s * a; r = r * s; return fma (a, 3.1415926535897931e+0, r); } /** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ inline float cosm1pi0(float a) { // adapted from https://stackoverflow.com/questions/42792939/ float s = a * a; float r = 2.313842773437500000e-01f; r = fmaf (r, s, -1.335021972656250000e+00f); r = fmaf (r, s, 4.058703899383544922e+00f); r = fmaf (r, s, -4.934802055358886719e+00f); return r*s; } /** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ inline double cosm1pi0(double a) { // adapted from https://stackoverflow.com/questions/42792939/ double s = a * a; double r = -1.0369917389758117e-4; r = fma (r, s, 1.9294935641298806e-3); r = fma (r, s, -2.5806887942825395e-2); r = fma (r, s, 2.3533063028328211e-1); r = fma (r, s, -1.3352627688538006e+0); r = fma (r, s, 4.0587121264167623e+0); r = fma (r, s, -4.9348022005446790e+0); return r*s; } template void sincosm1pi0(T a_, T *POCKETFFT_RESTRICT res) { if (sizeof(T)>sizeof(double)) // don't have the code for long double { constexpr T pi = T(3.141592653589793238462643383279502884197L); auto s = sin(pi*a_); res[1] = s; res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1); return; } res[0] = T(cosm1pi0(double(a_))); res[1] = T(sinpi0(double(a_))); } template T sinpi(T a) { // reduce argument to primary approximation interval (-0.25, 0.25) auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding auto i = (int64_t)r; auto t = fma (T(-0.5), r, a); switch (i%4) { case 0: return sinpi0(t); case 1: case -3: return cosm1pi0(t) + T(1.); case 2: case -2: return T(0.) - sinpi0(t); case 3: case -1: return T(-1.) - cosm1pi0(t); } } template T cospi(T a) { // reduce argument to primary approximation interval (-0.25, 0.25) auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding auto i = (int64_t)r; auto t = fma (T(-0.5), r, a); switch (i%4) { case 0: return cosm1pi0(t) + T(1.); case 1: case -3: return T(0.) - sinpi0(t); case 2: case -2: return T(-1.) - cosm1pi0(t); case 3: case -1: return sinpi0(t); } } inline long double cospi(long double a) { constexpr auto pi = 3.141592653589793238462643383279502884197L; return sizeof(long double) > sizeof(double) ? cos(a * pi) : static_cast(cospi(static_cast(a))); } inline long double sinpi(long double a) { constexpr auto pi = 3.141592653589793238462643383279502884197L; return sizeof(long double) > sizeof(double) ? sin(a * pi) : static_cast(sinpi(static_cast(a))); } template void sincospi(T a, T *POCKETFFT_RESTRICT res) { // reduce argument to primary approximation interval (-0.25, 0.25) auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding auto i = (int64_t)r; auto t = fma (T(-0.5), r, a); auto c=cosm1pi0(t)+T(1.); auto s=sinpi0(t); // map results according to quadrant if (i & 2) { s = T(0.)-s; c = T(0.)-c; } if (i & 1) { swap(s, c); c = T(0.)-c; } res[0]=c; res[1]=s; } inline void sincospi(long double a, long double *POCKETFFT_RESTRICT res) { if (sizeof(long double) > sizeof(double)) { constexpr auto pi = 3.141592653589793238462643383279502884197L; res[0] = cos(pi * a); res[1] = sin(pi * a); } else { double sincos[2]; sincospi(static_cast(a), sincos); res[0] = static_cast(sincos[0]); res[1] = static_cast(sincos[1]); } } template class sincos_2pibyn template class sincos_2pibyn { { private: private: using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type; using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type; arr data; arr data; void my_sincosm1pi (Thigh a_, Thigh *POCKETFFT_RESTRICT res) { if (sizeof(Thigh)>sizeof(double)) // don't have the code for long double { constexpr Thigh pi = Thigh(3.141592653589793238462643383279502884197L); auto s = sin(pi*a_); res[1] = s; res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1); return; } // adapted from https://stackoverflow.com/questions/42792939/ // CAUTION: this function only works for arguments in the range // [-0.25; 0.25]! double a = double(a_); double s = a * a; /* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ double r = -1.0369917389758117e-4; r = fma (r, s, 1.9294935641298806e-3); r = fma (r, s, -2.5806887942825395e-2); r = fma (r, s, 2.3533063028328211e-1); r = fma (r, s, -1.3352627688538006e+0); r = fma (r, s, 4.0587121264167623e+0); r = fma (r, s, -4.9348022005446790e+0); double c = r*s; /* Approximate sin(pi*x) for x in [-0.25,0.25] */ r = 4.6151442520157035e-4; r = fma (r, s, -7.3700183130883555e-3); r = fma (r, s, 8.2145868949323936e-2); r = fma (r, s, -5.9926452893214921e-1); r = fma (r, s, 2.5501640398732688e+0); r = fma (r, s, -5.1677127800499516e+0); s = s * a; r = r * s; s = fma (a, 3.1415926535897931e+0, r); res[0] = c; res[1] = s; } POCKETFFT_NOINLINE void calc_first_octant(size_t den, POCKETFFT_NOINLINE void calc_first_octant(size_t den, T * POCKETFFT_RESTRICT res) T * POCKETFFT_RESTRICT res) { { ... @@ -313,7 +440,7 @@ template class sincos_2pibyn ... @@ -313,7 +440,7 @@ template class sincos_2pibyn arr tmp(2*l1); arr tmp(2*l1); for (size_t i=1; i class sincos_2pibyn ... @@ -321,7 +448,7 @@ template class sincos_2pibyn while(start class T_dcst23 ... @@ -2369,9 +2496,9 @@ template class T_dcst23 POCKETFFT_NOINLINE T_dcst23(size_t length) POCKETFFT_NOINLINE T_dcst23(size_t length) : fftplan(length), twiddle(length) : fftplan(length), twiddle(length) { { constexpr T0 pi = T0(3.141592653589793238462643383279502884197L); const auto oo2n = T0(0.5)/T0(length); for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho, template POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho, ... @@ -2446,12 +2573,13 @@ template class T_dcst4 ... @@ -2446,12 +2573,13 @@ template class T_dcst4 rfft((N&1)? new pocketfft_r(N) : nullptr), rfft((N&1)? new pocketfft_r(N) : nullptr), C2((N&1) ? 0 : N/2) C2((N&1) ? 0 : N/2) { { constexpr T0 pi = T0(3.141592653589793238462643383279502884197L); const auto oon = -T0(1.)/T0(N); if ((N&1)==0) if ((N&1)==0) for (size_t i=0; i
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!