Commit 954f7419 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

update pocketfft

parent 323c2636
......@@ -266,305 +266,78 @@ template<bool fwd, typename T> void ROTX90(cmplx<T> &a)
//
// twiddle factor section
//
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline float sinpi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = -5.957031250000000000e-01f;
r = fma (r, s, 2.550399541854858398e+00f);
r = fma (r, s, -5.167724132537841797e+00f);
r = (a * s) * r;
return fma (a, 3.141592741012573242e+00f, r);
}
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline double sinpi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = 4.6151442520157035e-4;
r = fma (r, s, -7.3700183130883555e-3);
r = fma (r, s, 8.2145868949323936e-2);
r = fma (r, s, -5.9926452893214921e-1);
r = fma (r, s, 2.5501640398732688e+0);
r = fma (r, s, -5.1677127800499516e+0);
s = s * a;
r = r * s;
return fma (a, 3.1415926535897931e+0, r);
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline float cosm1pi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = 2.313842773437500000e-01f;
r = fmaf (r, s, -1.335021972656250000e+00f);
r = fmaf (r, s, 4.058703899383544922e+00f);
r = fmaf (r, s, -4.934802055358886719e+00f);
return r*s;
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline double cosm1pi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = -1.0369917389758117e-4;
r = fma (r, s, 1.9294935641298806e-3);
r = fma (r, s, -2.5806887942825395e-2);
r = fma (r, s, 2.3533063028328211e-1);
r = fma (r, s, -1.3352627688538006e+0);
r = fma (r, s, 4.0587121264167623e+0);
r = fma (r, s, -4.9348022005446790e+0);
return r*s;
}
template <typename T> void sincosm1pi0(T a_, T *POCKETFFT_RESTRICT res)
{
if (sizeof(T)>sizeof(double)) // don't have the code for long double
{
constexpr T pi = T(3.141592653589793238462643383279502884197L);
auto s = sin(pi*a_);
res[1] = s;
res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1);
return;
}
res[0] = T(cosm1pi0(double(a_)));
res[1] = T(sinpi0(double(a_)));
}
template <typename T> T sinpi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return sinpi0(t);
case 1: case -3:
return cosm1pi0(t) + T(1.);
case 2: case -2:
return T(0.) - sinpi0(t);
case 3: case -1:
return T(-1.) - cosm1pi0(t);
}
throw runtime_error("cannot happen");
}
template <typename T> T cospi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return cosm1pi0(t) + T(1.);
case 1: case -3:
return T(0.) - sinpi0(t);
case 2: case -2:
return T(-1.) - cosm1pi0(t);
case 3: case -1:
return sinpi0(t);
}
throw runtime_error("cannot happen");
}
inline long double cospi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? cos(a * pi) :
static_cast<long double>(cospi(static_cast<double>(a)));
}
inline long double sinpi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? sin(a * pi) :
static_cast<long double>(sinpi(static_cast<double>(a)));
}
template <typename T> void sincospi(T a, T *POCKETFFT_RESTRICT res)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
auto c=cosm1pi0(t)+T(1.);
auto s=sinpi0(t);
// map results according to quadrant
if (i & 2)
{
s = T(0.)-s;
c = T(0.)-c;
}
if (i & 1)
{
swap(s, c);
c = T(0.)-c;
}
res[0]=c;
res[1]=s;
}
inline void sincospi(long double a, long double *POCKETFFT_RESTRICT res)
{
if (sizeof(long double) > sizeof(double))
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
res[0] = cos(pi * a);
res[1] = sin(pi * a);
}
else
{
double sincos[2];
sincospi(static_cast<double>(a), sincos);
res[0] = static_cast<long double>(sincos[0]);
res[1] = static_cast<long double>(sincos[1]);
}
}
template<typename T> class sincos_2pibyn
{
private:
using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type;
arr<T> data;
POCKETFFT_NOINLINE void calc_first_octant(size_t den,
T * POCKETFFT_RESTRICT res)
{
size_t n = (den+4)>>3;
if (n==0) return;
res[0]=1.; res[1]=0.;
if (n==1) return;
size_t l1 = size_t(sqrt(n));
arr<Thigh> tmp(2*l1);
for (size_t i=1; i<l1; ++i)
{
sincosm1pi0(Thigh(2*i)/Thigh(den),&tmp[2*i]);
res[2*i ] = T(tmp[2*i]+1);
res[2*i+1] = T(tmp[2*i+1]);
}
size_t start=l1;
while(start<n)
{
Thigh cs[2];
sincosm1pi0((Thigh(2*start))/Thigh(den),cs);
res[2*start] = T(cs[0]+1);
res[2*start+1] = T(cs[1]);
size_t end = l1;
if (start+end>n) end = n-start;
for (size_t i=1; i<end; ++i)
{
Thigh csx[2]={tmp[2*i], tmp[2*i+1]};
res[2*(start+i)] = T(((cs[0]*csx[0] - cs[1]*csx[1] + cs[0]) + csx[0]) + 1);
res[2*(start+i)+1] = T((cs[0]*csx[1] + cs[1]*csx[0]) + cs[1] + csx[1]);
}
start += l1;
}
}
size_t N, mask, shift;
arr<cmplx<Thigh>> v1, v2;
void calc_first_quadrant(size_t n, T * POCKETFFT_RESTRICT res)
static cmplx<Thigh> calc(size_t x, size_t n, Thigh ang)
{
T * POCKETFFT_RESTRICT p = res+n;
calc_first_octant(n<<1, p);
size_t ndone=(n+2)>>2;
size_t i=0, idx1=0, idx2=2*ndone-2;
for (; i+1<ndone; i+=2, idx1+=2, idx2-=2)
x<<=3;
if (x<4*n) // first half
{
res[idx1] = p[2*i ]; res[idx1+1] = p[2*i+1];
res[idx2] = p[2*i+3]; res[idx2+1] = p[2*i+2];
if (x<2*n) // first quadrant
{
if (x<n) return cmplx<Thigh>(cos(Thigh(x)*ang), sin(Thigh(x)*ang));
return cmplx<Thigh>(sin(Thigh(2*n-x)*ang), cos(Thigh(2*n-x)*ang));
}
else // second quadrant
{
x-=2*n;
if (x<n) return cmplx<Thigh>(-sin(Thigh(x)*ang), cos(Thigh(x)*ang));
return cmplx<Thigh>(-cos(Thigh(2*n-x)*ang), sin(Thigh(2*n-x)*ang));
}
}
if (i!=ndone)
{ res[idx1] = p[2*i]; res[idx1+1] = p[2*i+1]; }
}
void calc_first_half(size_t n, T * POCKETFFT_RESTRICT res)
{
int ndone=int(n+1)>>1;
T * p = res+n-1;
calc_first_octant(n<<2, p);
int i4=0, in=int(n), i=0;
for (; i4<=in-i4; ++i, i4+=4) // octant 0
{ res[2*i] = p[2*i4]; res[2*i+1] = p[2*i4+1]; }
for (; i4-in <= 0; ++i, i4+=4) // octant 1
{ auto xm = in-i4; res[2*i] = p[2*xm+1]; res[2*i+1] = p[2*xm]; }
for (; i4<=3*in-i4; ++i, i4+=4) // octant 2
{ auto xm = i4-in; res[2*i] = -p[2*xm+1]; res[2*i+1] = p[2*xm]; }
for (; i<ndone; ++i, i4+=4) // octant 3
{ auto xm = 2*in-i4; res[2*i] = -p[2*xm]; res[2*i+1] = p[2*xm+1]; }
}
void fill_first_quadrant(size_t n, T * POCKETFFT_RESTRICT res)
{
constexpr T hsqt2 = T(0.707106781186547524400844362104849L);
size_t quart = n>>2;
if ((n&7)==0)
res[quart] = res[quart+1] = hsqt2;
for (size_t i=2, j=2*quart-2; i<quart; i+=2, j-=2)
{ res[j] = res[i+1]; res[j+1] = res[i]; }
}
POCKETFFT_NOINLINE void fill_first_half(size_t n, T * POCKETFFT_RESTRICT res)
{
size_t half = n>>1;
if ((n&3)==0)
for (size_t i=0; i<half; i+=2)
{ res[i+half] = -res[i+1]; res[i+half+1] = res[i]; }
else
for (size_t i=2, j=2*half-2; i<half; i+=2, j-=2)
{ res[j] = -res[i]; res[j+1] = res[i+1]; }
}
void fill_second_half(size_t n, T * POCKETFFT_RESTRICT res)
{
if ((n&1)==0)
for (size_t i=0; i<n; ++i)
res[i+n] = -res[i];
else
for (size_t i=2, j=2*n-2; i<n; i+=2, j-=2)
{ res[j] = res[i]; res[j+1] = -res[i+1]; }
}
POCKETFFT_NOINLINE void sincos_2pibyn_half(size_t n, T * POCKETFFT_RESTRICT res)
{
if ((n&3)==0)
{
calc_first_octant(n, res);
fill_first_quadrant(n, res);
fill_first_half(n, res);
}
else if ((n&1)==0)
{
calc_first_quadrant(n, res);
fill_first_half(n, res);
x=8*n-x;
if (x<2*n) // third quadrant
{
if (x<n) return cmplx<Thigh>(cos(Thigh(x)*ang), -sin(Thigh(x)*ang));
return cmplx<Thigh>(sin(Thigh(2*n-x)*ang), -cos(Thigh(2*n-x)*ang));
}
else // fourth quadrant
{
x-=2*n;
if (x<n) return cmplx<Thigh>(-sin(Thigh(x)*ang), -cos(Thigh(x)*ang));
return cmplx<Thigh>(-cos(Thigh(2*n-x)*ang), -sin(Thigh(2*n-x)*ang));
}
}
else
calc_first_half(n, res);
}
public:
POCKETFFT_NOINLINE sincos_2pibyn(size_t n, bool half)
: data(2*n)
{
sincos_2pibyn_half(n, data.data());
if (!half) fill_second_half(n, data.data());
POCKETFFT_NOINLINE sincos_2pibyn(size_t n)
: N(n)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
Thigh ang = Thigh(0.25L*pi/n);
size_t nval = (n+2)/2;
shift = 1;
while((size_t(1)<<shift)*(size_t(1)<<shift) < nval) ++shift;
mask = (size_t(1)<<shift)-1;
v1.resize(mask+1);
v1[0].Set(Thigh(1), Thigh(0));
for (size_t i=1; i<v1.size(); ++i)
v1[i]=calc(i,n,ang);
v2.resize((nval+mask)/(mask+1));
v2[0].Set(Thigh(1), Thigh(0));
for (size_t i=1; i<v2.size(); ++i)
v2[i]=calc(i*(mask+1),n,ang);
}
cmplx<T> operator[](size_t idx) const
{
if (2*idx<=N)
{
auto x1=v1[idx&mask], x2=v2[idx>>shift];
return cmplx<T>(T(x1.r*x2.r-x1.i*x2.i), T(x1.r*x2.i+x1.i*x2.r));
}
idx = N-idx;
auto x1=v1[idx&mask], x2=v2[idx>>shift];
return cmplx<T>(T(x1.r*x2.r-x1.i*x2.i), -T(x1.r*x2.i+x1.i*x2.r));
}
T operator[](size_t idx) const { return data[idx]; }
const T *rdata() const { return data; }
const cmplx<T> *cdata() const
{ return reinterpret_cast<const cmplx<T> *>(data.data()); }
};
struct util // hack to avoid duplicate symbols
......@@ -940,12 +713,10 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=2;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+2*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -989,14 +760,13 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=3;
constexpr T0 tw1r=-0.5,
tw1i= (fwd ? -1: 1) * T0(0.8660254037844386467637231707529362L);
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+3*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1029,12 +799,10 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=4;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+4*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1105,7 +873,6 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=5;
constexpr T0 tw1r= T0(0.3090169943749474241022934171828191L),
tw1i= (fwd ? -1: 1) * T0(0.9510565162951535721164393333793821L),
tw2r= T0(-0.8090169943749474241022934171828191L),
......@@ -1113,8 +880,8 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+5*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1177,7 +944,6 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=7;
constexpr T0 tw1r= T0(0.6234898018587335305250048840042398L),
tw1i= (fwd ? -1 : 1) * T0(0.7818314824680298087084445266740578L),
tw2r= T0(-0.2225209339563144042889025644967948L),
......@@ -1187,8 +953,8 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+7*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1245,12 +1011,10 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=8;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+8*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1360,7 +1124,6 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=11;
constexpr T0 tw1r= T0(0.8412535328311811688618116489193677L),
tw1i= (fwd ? -1 : 1) * T0(0.5406408174555975821076359543186917L),
tw2r= T0(0.4154150130018864255292741492296232L),
......@@ -1374,8 +1137,8 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+11*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
......@@ -1618,8 +1381,7 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct) const
void comp_twiddle()
{
sincos_2pibyn<T0> twid(length, false);
auto twiddle = twid.cdata();
sincos_2pibyn<T0> twiddle(length);
size_t l1=1;
size_t memofs=0;
for (size_t k=0; k<fact.size(); ++k)
......@@ -1682,13 +1444,11 @@ template<typename T> void radf2 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=2;
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+2*c)]; };
for (size_t k=0; k<l1; k++)
PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1));
......@@ -1721,14 +1481,13 @@ template<typename T> void radf3(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+3*c)]; };
for (size_t k=0; k<l1; k++)
{
......@@ -1761,14 +1520,13 @@ template<typename T> void radf4(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=4;
constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+4*c)]; };
for (size_t k=0; k<l1; k++)
{
......@@ -1809,7 +1567,6 @@ template<typename T> void radf5(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=5;
constexpr T0 tr11= T0(0.3090169943749474241022934171828191L),
ti11= T0(0.9510565162951535721164393333793821L),
tr12= T0(-0.8090169943749474241022934171828191L),
......@@ -1818,8 +1575,8 @@ template<typename T> void radf5(size_t ido, size_t l1,
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+5*c)]; };
for (size_t k=0; k<l1; k++)
{
......@@ -2008,11 +1765,9 @@ template<typename T> void radb2(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=2;
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+2*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
......@@ -2040,12 +1795,11 @@ template<typename T> void radb3(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const
{
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+3*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };