Commit 6f26ecd1 authored by Martin Reinecke's avatar Martin Reinecke

get rid of most problematic macros

parent 732b25fa
......@@ -502,10 +502,6 @@ struct util // hack to avoid duplicate symbols
#endif
};
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define WA(x,i) wa[(i)-1+(x)*(ido-1)]
//
// complex FFTPACK transforms
//
......@@ -531,6 +527,13 @@ template<bool bwd, typename T> void pass2 (size_t ido, size_t l1,
{
constexpr size_t cdim=2;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
......@@ -550,18 +553,18 @@ template<bool bwd, typename T> void pass2 (size_t ido, size_t l1,
}
}
#define PREP3(idx) \
#define POCKETFFT_PREP3(idx) \
T t0 = CC(idx,0,k), t1, t2; \
PMC (t1,t2,CC(idx,1,k),CC(idx,2,k)); \
CH(idx,k,0)=t0+t1;
#define PARTSTEP3a(u1,u2,twr,twi) \
#define POCKETFFT_PARTSTEP3a(u1,u2,twr,twi) \
{ \
T ca,cb; \
ca=t0+t1*twr; \
cb=t2*twi; ROT90(cb); \
PMC(CH(0,k,u1),CH(0,k,u2),ca,cb) ;\
}
#define PARTSTEP3b(u1,u2,twr,twi) \
#define POCKETFFT_PARTSTEP3b(u1,u2,twr,twi) \
{ \
T ca,cb,da,db; \
ca=t0+t1*twr; \
......@@ -577,36 +580,50 @@ template<bool bwd, typename T> void pass3 (size_t ido, size_t l1,
constexpr T0 tw1r=-0.5,
tw1i= (bwd ? 1: -1) * T0(0.8660254037844386467637231707529362L);
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
PREP3(0)
PARTSTEP3a(1,2,tw1r,tw1i)
POCKETFFT_PREP3(0)
POCKETFFT_PARTSTEP3a(1,2,tw1r,tw1i)
}
else
for (size_t k=0; k<l1; ++k)
{
{
PREP3(0)
PARTSTEP3a(1,2,tw1r,tw1i)
POCKETFFT_PREP3(0)
POCKETFFT_PARTSTEP3a(1,2,tw1r,tw1i)
}
for (size_t i=1; i<ido; ++i)
{
PREP3(i)
PARTSTEP3b(1,2,tw1r,tw1i)
POCKETFFT_PREP3(i)
POCKETFFT_PARTSTEP3b(1,2,tw1r,tw1i)
}
}
}
#undef PARTSTEP3b
#undef PARTSTEP3a
#undef PREP3
#undef POCKETFFT_PARTSTEP3b
#undef POCKETFFT_PARTSTEP3a
#undef POCKETFFT_PREP3
template<bool bwd, typename T> void pass4 (size_t ido, size_t l1,
const T * RESTRICT cc, T * RESTRICT ch, const cmplx<T0> * RESTRICT wa)
{
constexpr size_t cdim=4;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
......@@ -644,14 +661,14 @@ template<bool bwd, typename T> void pass4 (size_t ido, size_t l1,
}
}
#define PREP5(idx) \
#define POCKETFFT_PREP5(idx) \
T t0 = CC(idx,0,k), t1, t2, t3, t4; \
PMC (t1,t4,CC(idx,1,k),CC(idx,4,k)); \
PMC (t2,t3,CC(idx,2,k),CC(idx,3,k)); \
CH(idx,k,0).r=t0.r+t1.r+t2.r; \
CH(idx,k,0).i=t0.i+t1.i+t2.i;
#define PARTSTEP5a(u1,u2,twar,twbr,twai,twbi) \
#define POCKETFFT_PARTSTEP5a(u1,u2,twar,twbr,twai,twbi) \
{ \
T ca,cb; \
ca.r=t0.r+twar*t1.r+twbr*t2.r; \
......@@ -661,7 +678,7 @@ template<bool bwd, typename T> void pass4 (size_t ido, size_t l1,
PMC(CH(0,k,u1),CH(0,k,u2),ca,cb); \
}
#define PARTSTEP5b(u1,u2,twar,twbr,twai,twbi) \
#define POCKETFFT_PARTSTEP5b(u1,u2,twar,twbr,twai,twbi) \
{ \
T ca,cb,da,db; \
ca.r=t0.r+twar*t1.r+twbr*t2.r; \
......@@ -681,35 +698,42 @@ template<bool bwd, typename T> void pass5 (size_t ido, size_t l1,
tw2r= T0(-0.8090169943749474241022934171828191L),
tw2i= (bwd ? 1: -1) * T0(0.5877852522924731291687059546390728L);
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
PREP5(0)
PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
POCKETFFT_PREP5(0)
POCKETFFT_PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
POCKETFFT_PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
}
else
for (size_t k=0; k<l1; ++k)
{
{
PREP5(0)
PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
POCKETFFT_PREP5(0)
POCKETFFT_PARTSTEP5a(1,4,tw1r,tw2r,+tw1i,+tw2i)
POCKETFFT_PARTSTEP5a(2,3,tw2r,tw1r,+tw2i,-tw1i)
}
for (size_t i=1; i<ido; ++i)
{
PREP5(i)
PARTSTEP5b(1,4,tw1r,tw2r,+tw1i,+tw2i)
PARTSTEP5b(2,3,tw2r,tw1r,+tw2i,-tw1i)
POCKETFFT_PREP5(i)
POCKETFFT_PARTSTEP5b(1,4,tw1r,tw2r,+tw1i,+tw2i)
POCKETFFT_PARTSTEP5b(2,3,tw2r,tw1r,+tw2i,-tw1i)
}
}
}
#undef PARTSTEP5b
#undef PARTSTEP5a
#undef PREP5
#undef POCKETFFT_PARTSTEP5b
#undef POCKETFFT_PARTSTEP5a
#undef POCKETFFT_PREP5
#define PREP7(idx) \
#define POCKETFFT_PREP7(idx) \
T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7; \
PMC (t2,t7,CC(idx,1,k),CC(idx,6,k)); \
PMC (t3,t6,CC(idx,2,k),CC(idx,5,k)); \
......@@ -717,7 +741,7 @@ template<bool bwd, typename T> void pass5 (size_t ido, size_t l1,
CH(idx,k,0).r=t1.r+t2.r+t3.r+t4.r; \
CH(idx,k,0).i=t1.i+t2.i+t3.i+t4.i;
#define PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,out1,out2) \
#define POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,out1,out2) \
{ \
T ca,cb; \
ca.r=t1.r+x1*t2.r+x2*t3.r+x3*t4.r; \
......@@ -726,12 +750,12 @@ template<bool bwd, typename T> void pass5 (size_t ido, size_t l1,
cb.r=-(y1*t7.i y2*t6.i y3*t5.i); \
PMC(out1,out2,ca,cb); \
}
#define PARTSTEP7a(u1,u2,x1,x2,x3,y1,y2,y3) \
PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,CH(0,k,u1),CH(0,k,u2))
#define PARTSTEP7(u1,u2,x1,x2,x3,y1,y2,y3) \
#define POCKETFFT_PARTSTEP7a(u1,u2,x1,x2,x3,y1,y2,y3) \
POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,CH(0,k,u1),CH(0,k,u2))
#define POCKETFFT_PARTSTEP7(u1,u2,x1,x2,x3,y1,y2,y3) \
{ \
T da,db; \
PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,da,db) \
POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,da,db) \
CH(i,k,u1) = da.template special_mul<bwd>(WA(u1-1,i)); \
CH(i,k,u2) = db.template special_mul<bwd>(WA(u2-1,i)); \
}
......@@ -747,37 +771,44 @@ template<bool bwd, typename T> void pass7(size_t ido, size_t l1,
tw3r= T0(-0.9009688679024191262361023195074451L),
tw3i= (bwd ? 1 : -1) * T0(0.433883739117558120475768332848359L);
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
PREP7(0)
PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
POCKETFFT_PREP7(0)
POCKETFFT_PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
POCKETFFT_PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
POCKETFFT_PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
}
else
for (size_t k=0; k<l1; ++k)
{
{
PREP7(0)
PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
POCKETFFT_PREP7(0)
POCKETFFT_PARTSTEP7a(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
POCKETFFT_PARTSTEP7a(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
POCKETFFT_PARTSTEP7a(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
}
for (size_t i=1; i<ido; ++i)
{
PREP7(i)
PARTSTEP7(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
PARTSTEP7(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
PARTSTEP7(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
POCKETFFT_PREP7(i)
POCKETFFT_PARTSTEP7(1,6,tw1r,tw2r,tw3r,+tw1i,+tw2i,+tw3i)
POCKETFFT_PARTSTEP7(2,5,tw2r,tw3r,tw1r,+tw2i,-tw3i,-tw1i)
POCKETFFT_PARTSTEP7(3,4,tw3r,tw1r,tw2r,+tw3i,-tw1i,+tw2i)
}
}
}
#undef PARTSTEP7
#undef PARTSTEP7a0
#undef PARTSTEP7a
#undef PREP7
#undef POCKETFFT_PARTSTEP7
#undef POCKETFFT_PARTSTEP7a0
#undef POCKETFFT_PARTSTEP7a
#undef POCKETFFT_PREP7
template <bool bwd, typename T> void ROTX45(T &a)
{
......@@ -803,6 +834,13 @@ template<bool bwd, typename T> void pass8 (size_t ido, size_t l1,
{
constexpr size_t cdim=8;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
......@@ -880,7 +918,7 @@ template<bool bwd, typename T> void pass8 (size_t ido, size_t l1,
}
#define PREP11(idx) \
#define POCKETFFT_PREP11(idx) \
T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; \
PMC (t2,t11,CC(idx,1,k),CC(idx,10,k)); \
PMC (t3,t10,CC(idx,2,k),CC(idx, 9,k)); \
......@@ -923,10 +961,17 @@ template<bool bwd, typename T> void pass11 (size_t ido, size_t l1,
tw5r= T0(-0.9594929736144973898903680570663277L),
tw5i= (bwd ? 1 : -1) * T0(0.2817325568414296977114179153466169L);
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; };
if (ido==1)
for (size_t k=0; k<l1; ++k)
{
PREP11(0)
POCKETFFT_PREP11(0)
PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
......@@ -937,7 +982,7 @@ template<bool bwd, typename T> void pass11 (size_t ido, size_t l1,
for (size_t k=0; k<l1; ++k)
{
{
PREP11(0)
POCKETFFT_PREP11(0)
PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
......@@ -946,7 +991,7 @@ template<bool bwd, typename T> void pass11 (size_t ido, size_t l1,
}
for (size_t i=1; i<ido; ++i)
{
PREP11(i)
POCKETFFT_PREP11(i)
PARTSTEP11(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
PARTSTEP11(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
PARTSTEP11(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
......@@ -959,11 +1004,7 @@ template<bool bwd, typename T> void pass11 (size_t ido, size_t l1,
#undef PARTSTEP11
#undef PARTSTEP11a0
#undef PARTSTEP11a
#undef PREP11
#define CX(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CX2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)]
#undef POCKETFFT_PREP11
template<bool bwd, typename T> void passg (size_t ido, size_t ip,
size_t l1, T * RESTRICT cc, T * RESTRICT ch, const cmplx<T0> * RESTRICT wa,
......@@ -973,6 +1014,17 @@ template<bool bwd, typename T> void passg (size_t ido, size_t ip,
size_t ipph = (ip+1)/2;
size_t idl1 = ido*l1;
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CX = [cc, ido, l1](size_t a, size_t b, size_t c) -> T&
{ return cc[a+ido*(b+l1*c)]; };
auto CX2 = [cc, idl1](size_t a, size_t b) -> T&
{ return cc[a+idl1*b]; };
auto CH2 = [ch, idl1](size_t a, size_t b) -> const T&
{ return ch[a+idl1*b]; };
arr<cmplx<T0>> wal(ip);
wal[0] = cmplx<T0>(1., 0.);
for (size_t i=1; i<ip; ++i)
......@@ -1062,10 +1114,6 @@ template<bool bwd, typename T> void passg (size_t ido, size_t ip,
}
}
#undef CH2
#undef CX2
#undef CX
template<bool bwd, typename T> void pass_all(T c[], T0 fct)
{
if (length==1) { c[0]*=fct; return; }
......@@ -1114,10 +1162,6 @@ template<bool bwd, typename T> void pass_all(T c[], T0 fct)
c[i] *= fct;
}
#undef WA
#undef CC
#undef CH
public:
template<typename T> void forward(T c[], T0 fct)
{ pass_all<false>(c, fct); }
......@@ -1220,7 +1264,6 @@ template<typename T0> class rfftp
void add_factor(size_t factor)
{ fact.push_back({factor, nullptr, nullptr}); }
#define WA(x,i) wa[(i)+(x)*(ido-1)]
template<typename T> inline void PM(T &a, T &b, T c, T d)
{ a=c+d; b=c-d; }
......@@ -1229,14 +1272,17 @@ template<typename T1, typename T2, typename T3> inline void MULPM
(T1 &a, T1 &b, T2 c, T2 d, T3 e, T3 f)
{ a=c*e+d*f; b=c*f-d*e; }
#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
template<typename T> void radf2 (size_t ido, size_t l1,
const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa)
{
constexpr size_t cdim=2;
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++)
PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1));
if ((ido&1)==0)
......@@ -1258,7 +1304,7 @@ template<typename T> void radf2 (size_t ido, size_t l1,
}
// a2=a+b; b2=i*(b-a);
#define REARRANGE(rx, ix, ry, iy) \
#define POCKETFFT_REARRANGE(rx, ix, ry, iy) \
{\
auto t1=rx+ry, t2=ry-rx, t3=ix+iy, t4=ix-iy; \
rx=t1; ix=t3; ry=t4; iy=t2; \
......@@ -1270,6 +1316,12 @@ template<typename T> void radf3(size_t ido, size_t l1,
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++)
{
T cr2=CC(0,k,1)+CC(0,k,2);
......@@ -1285,7 +1337,7 @@ template<typename T> void radf3(size_t ido, size_t l1,
T di2, di3, dr2, dr3;
MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1)); // d2=conj(WA0)*CC1
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2)); // d3=conj(WA1)*CC2
REARRANGE(dr2, di2, dr3, di3);
POCKETFFT_REARRANGE(dr2, di2, dr3, di3);
CH(i-1,0,k) = CC(i-1,k,0)+dr2; // c add
CH(i ,0,k) = CC(i ,k,0)+di2;
T tr2 = CC(i-1,k,0)+taur*dr2; // c add
......@@ -1303,6 +1355,12 @@ template<typename T> void radf4(size_t ido, size_t l1,
constexpr size_t cdim=4;
constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++)
{
T tr1,tr2;
......@@ -1347,6 +1405,12 @@ template<typename T> void radf5(size_t ido, size_t l1,
tr12= T0(-0.8090169943749474241022934171828191L),
ti12= T0(0.5877852522924731291687059546390728L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++)
{
T cr2, cr3, ci4, ci5;
......@@ -1367,8 +1431,8 @@ template<typename T> void radf5(size_t ido, size_t l1,
MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2));
MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3));
MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4));
REARRANGE(dr2, di2, dr5, di5);
REARRANGE(dr3, di3, dr4, di4);
POCKETFFT_REARRANGE(dr2, di2, dr5, di5);
POCKETFFT_REARRANGE(dr3, di3, dr4, di4);
CH(i-1,0,k)=CC(i-1,k,0)+dr2+dr3;
CH(i ,0,k)=CC(i ,k,0)+di2+di3;
T tr2=CC(i-1,k,0)+tr11*dr2+tr12*dr3;
......@@ -1386,13 +1450,8 @@ template<typename T> void radf5(size_t ido, size_t l1,
}
}
#undef CC
#undef CH
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define C2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#undef POCKETFFT_REARRANGE
template<typename T> void radfg(size_t ido, size_t ip, size_t l1,
T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa,
const T0 * RESTRICT csarr)
......@@ -1401,6 +1460,17 @@ template<typename T> void radfg(size_t ido, size_t ip, size_t l1,
size_t ipph=(ip+1)/2;
size_t idl1 = ido*l1;
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return ch[a+ido*(b+l1*c)]; };
auto C1 = [cc,ido,l1] (size_t a, size_t b, size_t c) -> T&
{ return cc[a+ido*(b+l1*c)]; };
auto C2 = [cc,idl1] (size_t a, size_t b) -> T&
{ return cc[a+idl1*b]; };
auto CH2 = [ch,idl1] (size_t a, size_t b) -> T&
{ return ch[a+idl1*b]; };
if (ido>1)
{
for (size_t j=1, jc=ip-1; j<ipph; ++j,--jc) // 114
......@@ -1529,20 +1599,18 @@ template<typename T> void radfg(size_t ido, size_t ip, size_t l1,
}
}
}
#undef C1
#undef C2
#undef CH2
#undef CH
#undef CC
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
template<typename T> void radb2(size_t ido, size_t l1, const T * RESTRICT cc,
T * RESTRICT ch, const T0 * RESTRICT wa)
{
constexpr size_t cdim=2;
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
for (size_t k=0; k<l1; k++)
PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k));
if ((ido&1)==0)
......@@ -1569,6 +1637,12 @@ template<typename T> void radb3(size_t ido, size_t l1,
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
for (size_t k=0; k<l1; k++)
{
T tr2=2*CC(ido-1,1,k);
......@@ -1603,6 +1677,12 @@ template<typename T> void radb4(size_t ido, size_t l1,
constexpr size_t cdim=4;
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
for (size_t k=0; k<l1; k++)
{
T tr1, tr2;
......@@ -1652,6 +1732,12 @@ template<typename T> void radb5(size_t ido, size_t l1,
tr12= T0(-0.8090169943749474241022934171828191L),
ti12= T0(0.5877852522924731291687059546390728L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
for (size_t k=0; k<l1; k++)
{
T ti5=CC(0,2,k)+CC(0,2,k);
......@@ -1696,14 +1782,6 @@ template<typename T> void radb5(size_t ido, size_t l1,
}
}
#undef CH
#undef CC
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define C2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)]
template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa,
const T0 * RESTRICT csarr)
......@@ -1712,6 +1790,17 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
size_t ipph=(ip+1)/ 2;
size_t idl1 = ido*l1;
auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; };
auto C1 = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; };
auto C2 = [cc,idl1](size_t a, size_t b) -> T&
{ return cc[a+idl1*b]; };
auto CH2 = [ch,idl1](size_t a, size_t b) -> T&
{ return ch[a+idl1*b]; };