Commit bb7722ce authored by Peter Bell's avatar Peter Bell

Add custom sinpi, cospi and sincospi for all float types. Use for dcst twiddles.

parent a9a6e6ab
......@@ -258,50 +258,177 @@ template<bool fwd, typename T> void ROTX90(cmplx<T> &a)
// twiddle factor section
//
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline float sinpi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = -5.957031250000000000e-01f;
r = fma (r, s, 2.550399541854858398e+00f);
r = fma (r, s, -5.167724132537841797e+00f);
r = (a * s) * r;
return fma (a, 3.141592741012573242e+00f, r);
}
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline double sinpi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = 4.6151442520157035e-4;
r = fma (r, s, -7.3700183130883555e-3);
r = fma (r, s, 8.2145868949323936e-2);
r = fma (r, s, -5.9926452893214921e-1);
r = fma (r, s, 2.5501640398732688e+0);
r = fma (r, s, -5.1677127800499516e+0);
s = s * a;
r = r * s;
return fma (a, 3.1415926535897931e+0, r);
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline float cosm1pi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = 2.313842773437500000e-01f;
r = fmaf (r, s, -1.335021972656250000e+00f);
r = fmaf (r, s, 4.058703899383544922e+00f);
r = fmaf (r, s, -4.934802055358886719e+00f);
return r*s;
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline double cosm1pi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = -1.0369917389758117e-4;
r = fma (r, s, 1.9294935641298806e-3);
r = fma (r, s, -2.5806887942825395e-2);
r = fma (r, s, 2.3533063028328211e-1);
r = fma (r, s, -1.3352627688538006e+0);
r = fma (r, s, 4.0587121264167623e+0);
r = fma (r, s, -4.9348022005446790e+0);
return r*s;
}
template <typename T> void sincosm1pi0(T a_, T *POCKETFFT_RESTRICT res)
{
if (sizeof(T)>sizeof(double)) // don't have the code for long double
{
constexpr T pi = T(3.141592653589793238462643383279502884197L);
auto s = sin(pi*a_);
res[1] = s;
res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1);
return;
}
res[0] = T(cosm1pi0(double(a_)));
res[1] = T(sinpi0(double(a_)));
}
template <typename T> T sinpi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return sinpi0(t);
case 1: case -3:
return cosm1pi0(t) + T(1.);
case 2: case -2:
return T(0.) - sinpi0(t);
case 3: case -1:
return T(-1.) - cosm1pi0(t);
}
}
template <typename T> T cospi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return cosm1pi0(t) + T(1.);
case 1: case -3:
return T(0.) - sinpi0(t);
case 2: case -2:
return T(-1.) - cosm1pi0(t);
case 3: case -1:
return sinpi0(t);
}
}
inline long double cospi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? cos(a * pi) :
static_cast<long double>(cospi(static_cast<double>(a)));
}
inline long double sinpi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? sin(a * pi) :
static_cast<long double>(sinpi(static_cast<double>(a)));
}
template <typename T> void sincospi(T a, T *POCKETFFT_RESTRICT res)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
auto c=cosm1pi0(t)+T(1.);
auto s=sinpi0(t);
// map results according to quadrant
if (i & 2)
{
s = T(0.)-s;
c = T(0.)-c;
}
if (i & 1)
{
swap(s, c);
c = T(0.)-c;
}
res[0]=c;
res[1]=s;
}
inline void sincospi(long double a, long double *POCKETFFT_RESTRICT res)
{
if (sizeof(long double) > sizeof(double))
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
res[0] = cos(pi * a);
res[1] = sin(pi * a);
}
else
{
double sincos[2];
sincospi(static_cast<double>(a), sincos);
res[0] = static_cast<long double>(sincos[0]);
res[1] = static_cast<long double>(sincos[1]);
}
}
template<typename T> class sincos_2pibyn
{
private:
using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type;
arr<T> data;
void my_sincosm1pi (Thigh a_, Thigh *POCKETFFT_RESTRICT res)
{
if (sizeof(Thigh)>sizeof(double)) // don't have the code for long double
{
constexpr Thigh pi = Thigh(3.141592653589793238462643383279502884197L);
auto s = sin(pi*a_);
res[1] = s;
res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1);
return;
}
// adapted from https://stackoverflow.com/questions/42792939/
// CAUTION: this function only works for arguments in the range
// [-0.25; 0.25]!
double a = double(a_);
double s = a * a;
/* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
double r = -1.0369917389758117e-4;
r = fma (r, s, 1.9294935641298806e-3);
r = fma (r, s, -2.5806887942825395e-2);
r = fma (r, s, 2.3533063028328211e-1);
r = fma (r, s, -1.3352627688538006e+0);
r = fma (r, s, 4.0587121264167623e+0);
r = fma (r, s, -4.9348022005446790e+0);
double c = r*s;
/* Approximate sin(pi*x) for x in [-0.25,0.25] */
r = 4.6151442520157035e-4;
r = fma (r, s, -7.3700183130883555e-3);
r = fma (r, s, 8.2145868949323936e-2);
r = fma (r, s, -5.9926452893214921e-1);
r = fma (r, s, 2.5501640398732688e+0);
r = fma (r, s, -5.1677127800499516e+0);
s = s * a;
r = r * s;
s = fma (a, 3.1415926535897931e+0, r);
res[0] = c;
res[1] = s;
}
POCKETFFT_NOINLINE void calc_first_octant(size_t den,
T * POCKETFFT_RESTRICT res)
{
......@@ -313,7 +440,7 @@ template<typename T> class sincos_2pibyn
arr<Thigh> tmp(2*l1);
for (size_t i=1; i<l1; ++i)
{
my_sincosm1pi(Thigh(2*i)/Thigh(den),&tmp[2*i]);
sincosm1pi0(Thigh(2*i)/Thigh(den),&tmp[2*i]);
res[2*i ] = T(tmp[2*i]+1);
res[2*i+1] = T(tmp[2*i+1]);
}
......@@ -321,7 +448,7 @@ template<typename T> class sincos_2pibyn
while(start<n)
{
Thigh cs[2];
my_sincosm1pi((Thigh(2*start))/Thigh(den),cs);
sincosm1pi0((Thigh(2*start))/Thigh(den),cs);
res[2*start] = T(cs[0]+1);
res[2*start+1] = T(cs[1]);
size_t end = l1;
......@@ -2369,9 +2496,9 @@ template<typename T0> class T_dcst23
POCKETFFT_NOINLINE T_dcst23(size_t length)
: fftplan(length), twiddle(length)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
const auto oo2n = T0(0.5)/T0(length);
for (size_t i=0; i<length; ++i)
twiddle[i] = T0(cos(0.5*pi*T0(i+1)/T0(length)));
twiddle[i] = cospi(oo2n*T0(i+1));
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho,
......@@ -2446,12 +2573,13 @@ template<typename T0> class T_dcst4
rfft((N&1)? new pocketfft_r<T0>(N) : nullptr),
C2((N&1) ? 0 : N/2)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
const auto oon = -T0(1.)/T0(N);
if ((N&1)==0)
for (size_t i=0; i<N/2; ++i)
{
T0 ang = -pi/T0(N)*(T0(i)+T0(0.125));
C2[i].Set(cos(ang), sin(ang));
T0 sincos[2];
sincospi(oon*(T0(i)+T0(0.125)), sincos);
C2[i].Set(sincos[0], sincos[1]);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment