Commit 8a1d38e4 authored by Martin Reinecke's avatar Martin Reinecke

make code a bit more compact

parent fb991029
...@@ -595,7 +595,7 @@ template<typename T0> class cfftp ...@@ -595,7 +595,7 @@ template<typename T0> class cfftp
template<bool fwd, typename T> void pass2 (size_t ido, size_t l1, template<bool fwd, typename T> void pass2 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=2; constexpr size_t cdim=2;
...@@ -644,7 +644,7 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1, ...@@ -644,7 +644,7 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1,
} }
template<bool fwd, typename T> void pass3 (size_t ido, size_t l1, template<bool fwd, typename T> void pass3 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=3; constexpr size_t cdim=3;
constexpr T0 tw1r=-0.5, constexpr T0 tw1r=-0.5,
...@@ -684,7 +684,7 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1, ...@@ -684,7 +684,7 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1,
template<bool fwd, typename T> void pass4 (size_t ido, size_t l1, template<bool fwd, typename T> void pass4 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=4; constexpr size_t cdim=4;
...@@ -760,7 +760,7 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1, ...@@ -760,7 +760,7 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1,
} }
template<bool fwd, typename T> void pass5 (size_t ido, size_t l1, template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=5; constexpr size_t cdim=5;
constexpr T0 tw1r= T0(0.3090169943749474241022934171828191L), constexpr T0 tw1r= T0(0.3090169943749474241022934171828191L),
...@@ -832,7 +832,7 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1, ...@@ -832,7 +832,7 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
template<bool fwd, typename T> void pass7(size_t ido, size_t l1, template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=7; constexpr size_t cdim=7;
constexpr T0 tw1r= T0(0.6234898018587335305250048840042398L), constexpr T0 tw1r= T0(0.6234898018587335305250048840042398L),
...@@ -881,7 +881,7 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1, ...@@ -881,7 +881,7 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
#undef POCKETFFT_PARTSTEP7a #undef POCKETFFT_PARTSTEP7a
#undef POCKETFFT_PREP7 #undef POCKETFFT_PREP7
template <bool fwd, typename T> void ROTX45(T &a) template <bool fwd, typename T> void ROTX45(T &a) const
{ {
constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L); constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
if (fwd) if (fwd)
...@@ -889,7 +889,7 @@ template <bool fwd, typename T> void ROTX45(T &a) ...@@ -889,7 +889,7 @@ template <bool fwd, typename T> void ROTX45(T &a)
else else
{ auto tmp_=a.r; a.r=hsqt2*(a.r-a.i); a.i=hsqt2*(a.i+tmp_); } { auto tmp_=a.r; a.r=hsqt2*(a.r-a.i); a.i=hsqt2*(a.i+tmp_); }
} }
template <bool fwd, typename T> void ROTX135(T &a) template <bool fwd, typename T> void ROTX135(T &a) const
{ {
constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L); constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
if (fwd) if (fwd)
...@@ -900,7 +900,7 @@ template <bool fwd, typename T> void ROTX135(T &a) ...@@ -900,7 +900,7 @@ template <bool fwd, typename T> void ROTX135(T &a)
template<bool fwd, typename T> void pass8 (size_t ido, size_t l1, template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=8; constexpr size_t cdim=8;
...@@ -1015,7 +1015,7 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1, ...@@ -1015,7 +1015,7 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
template<bool fwd, typename T> void pass11 (size_t ido, size_t l1, template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa) const cmplx<T0> * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=11; constexpr size_t cdim=11;
constexpr T0 tw1r= T0(0.8412535328311811688618116489193677L), constexpr T0 tw1r= T0(0.8412535328311811688618116489193677L),
...@@ -1077,7 +1077,7 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1, ...@@ -1077,7 +1077,7 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
template<bool fwd, typename T> void passg (size_t ido, size_t ip, template<bool fwd, typename T> void passg (size_t ido, size_t ip,
size_t l1, T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, size_t l1, T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const cmplx<T0> * POCKETFFT_RESTRICT wa, const cmplx<T0> * POCKETFFT_RESTRICT wa,
const cmplx<T0> * POCKETFFT_RESTRICT csarr) const cmplx<T0> * POCKETFFT_RESTRICT csarr) const
{ {
const size_t cdim=ip; const size_t cdim=ip;
size_t ipph = (ip+1)/2; size_t ipph = (ip+1)/2;
...@@ -1183,7 +1183,7 @@ template<bool fwd, typename T> void passg (size_t ido, size_t ip, ...@@ -1183,7 +1183,7 @@ template<bool fwd, typename T> void passg (size_t ido, size_t ip,
} }
} }
template<bool fwd, typename T> void pass_all(T c[], T0 fct) template<bool fwd, typename T> void pass_all(T c[], T0 fct) const
{ {
if (length==1) { c[0]*=fct; return; } if (length==1) { c[0]*=fct; return; }
size_t l1=1; size_t l1=1;
...@@ -1232,11 +1232,8 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct) ...@@ -1232,11 +1232,8 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct)
} }
public: public:
template<typename T> void forward(T c[], T0 fct) template<typename T> void exec(T c[], T0 fct, bool fwd) const
{ pass_all<true>(c, fct); } { fwd ? pass_all<true>(c, fct) : pass_all<false>(c, fct); }
template<typename T> void backward(T c[], T0 fct)
{ pass_all<false>(c, fct); }
private: private:
POCKETFFT_NOINLINE void factorize() POCKETFFT_NOINLINE void factorize()
...@@ -1335,12 +1332,12 @@ template<typename T0> class rfftp ...@@ -1335,12 +1332,12 @@ template<typename T0> class rfftp
/* (a+ib) = conj(c+id) * (e+if) */ /* (a+ib) = conj(c+id) * (e+if) */
template<typename T1, typename T2, typename T3> inline void MULPM template<typename T1, typename T2, typename T3> inline void MULPM
(T1 &a, T1 &b, T2 c, T2 d, T3 e, T3 f) (T1 &a, T1 &b, T2 c, T2 d, T3 e, T3 f) const
{ a=c*e+d*f; b=c*f-d*e; } { a=c*e+d*f; b=c*f-d*e; }
template<typename T> void radf2 (size_t ido, size_t l1, template<typename T> void radf2 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=2; constexpr size_t cdim=2;
...@@ -1379,7 +1376,7 @@ template<typename T> void radf2 (size_t ido, size_t l1, ...@@ -1379,7 +1376,7 @@ template<typename T> void radf2 (size_t ido, size_t l1,
template<typename T> void radf3(size_t ido, size_t l1, template<typename T> void radf3(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=3; constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L); constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
...@@ -1419,7 +1416,7 @@ template<typename T> void radf3(size_t ido, size_t l1, ...@@ -1419,7 +1416,7 @@ template<typename T> void radf3(size_t ido, size_t l1,
template<typename T> void radf4(size_t ido, size_t l1, template<typename T> void radf4(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=4; constexpr size_t cdim=4;
constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L); constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
...@@ -1467,7 +1464,7 @@ template<typename T> void radf4(size_t ido, size_t l1, ...@@ -1467,7 +1464,7 @@ template<typename T> void radf4(size_t ido, size_t l1,
template<typename T> void radf5(size_t ido, size_t l1, template<typename T> void radf5(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=5; constexpr size_t cdim=5;
constexpr T0 tr11= T0(0.3090169943749474241022934171828191L), constexpr T0 tr11= T0(0.3090169943749474241022934171828191L),
...@@ -1524,7 +1521,7 @@ template<typename T> void radf5(size_t ido, size_t l1, ...@@ -1524,7 +1521,7 @@ template<typename T> void radf5(size_t ido, size_t l1,
template<typename T> void radfg(size_t ido, size_t ip, size_t l1, template<typename T> void radfg(size_t ido, size_t ip, size_t l1,
T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) const
{ {
const size_t cdim=ip; const size_t cdim=ip;
size_t ipph=(ip+1)/2; size_t ipph=(ip+1)/2;
...@@ -1666,7 +1663,7 @@ template<typename T> void radfg(size_t ido, size_t ip, size_t l1, ...@@ -1666,7 +1663,7 @@ template<typename T> void radfg(size_t ido, size_t ip, size_t l1,
template<typename T> void radb2(size_t ido, size_t l1, template<typename T> void radb2(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=2; constexpr size_t cdim=2;
...@@ -1698,7 +1695,7 @@ template<typename T> void radb2(size_t ido, size_t l1, ...@@ -1698,7 +1695,7 @@ template<typename T> void radb2(size_t ido, size_t l1,
template<typename T> void radb3(size_t ido, size_t l1, template<typename T> void radb3(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=3; constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L); constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
...@@ -1739,7 +1736,7 @@ template<typename T> void radb3(size_t ido, size_t l1, ...@@ -1739,7 +1736,7 @@ template<typename T> void radb3(size_t ido, size_t l1,
template<typename T> void radb4(size_t ido, size_t l1, template<typename T> void radb4(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=4; constexpr size_t cdim=4;
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L); constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
...@@ -1792,7 +1789,7 @@ template<typename T> void radb4(size_t ido, size_t l1, ...@@ -1792,7 +1789,7 @@ template<typename T> void radb4(size_t ido, size_t l1,
template<typename T> void radb5(size_t ido, size_t l1, template<typename T> void radb5(size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa) const T0 * POCKETFFT_RESTRICT wa) const
{ {
constexpr size_t cdim=5; constexpr size_t cdim=5;
constexpr T0 tr11= T0(0.3090169943749474241022934171828191L), constexpr T0 tr11= T0(0.3090169943749474241022934171828191L),
...@@ -1852,7 +1849,7 @@ template<typename T> void radb5(size_t ido, size_t l1, ...@@ -1852,7 +1849,7 @@ template<typename T> void radb5(size_t ido, size_t l1,
template<typename T> void radbg(size_t ido, size_t ip, size_t l1, template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) const
{ {
const size_t cdim=ip; const size_t cdim=ip;
size_t ipph=(ip+1)/ 2; size_t ipph=(ip+1)/ 2;
...@@ -1985,7 +1982,7 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1, ...@@ -1985,7 +1982,7 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
} }
} }
template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct) template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct) const
{ {
if (p1!=c) if (p1!=c)
{ {
...@@ -2002,60 +1999,51 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1, ...@@ -2002,60 +1999,51 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
} }
public: public:
template<typename T> void forward(T c[], T0 fct) template<typename T> void exec(T c[], T0 fct, bool r2hc) const
{ {
if (length==1) { c[0]*=fct; return; } if (length==1) { c[0]*=fct; return; }
size_t n=length; size_t n=length, nf=fact.size();
size_t l1=n, nf=fact.size();
arr<T> ch(n); arr<T> ch(n);
T *p1=c, *p2=ch.data(); T *p1=c, *p2=ch.data();
for(size_t k1=0; k1<nf;++k1) if (r2hc)
{ for(size_t k1=0, l1=n; k1<nf;++k1)
size_t k=nf-k1-1; {
size_t ip=fact[k].fct; size_t k=nf-k1-1;
size_t ido=n / l1; size_t ip=fact[k].fct;
l1 /= ip; size_t ido=n / l1;
if(ip==4) l1 /= ip;
radf4(ido, l1, p1, p2, fact[k].tw); if(ip==4)
else if(ip==2) radf4(ido, l1, p1, p2, fact[k].tw);
radf2(ido, l1, p1, p2, fact[k].tw); else if(ip==2)
else if(ip==3) radf2(ido, l1, p1, p2, fact[k].tw);
radf3(ido, l1, p1, p2, fact[k].tw); else if(ip==3)
else if(ip==5) radf3(ido, l1, p1, p2, fact[k].tw);
radf5(ido, l1, p1, p2, fact[k].tw); else if(ip==5)
else radf5(ido, l1, p1, p2, fact[k].tw);
{ radfg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws); swap (p1,p2); } else
swap (p1,p2); { radfg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws); swap (p1,p2); }
} swap (p1,p2);
copy_and_norm(c,p1,n,fct); }
} else
for(size_t k=0, l1=1; k<nf; k++)
template<typename T> void backward(T c[], T0 fct) {
{ size_t ip = fact[k].fct,
if (length==1) { c[0]*=fct; return; } ido= n/(ip*l1);
size_t n=length; if(ip==4)
size_t l1=1, nf=fact.size(); radb4(ido, l1, p1, p2, fact[k].tw);
arr<T> ch(n); else if(ip==2)
T *p1=c, *p2=ch.data(); radb2(ido, l1, p1, p2, fact[k].tw);
else if(ip==3)
radb3(ido, l1, p1, p2, fact[k].tw);
else if(ip==5)
radb5(ido, l1, p1, p2, fact[k].tw);
else
radbg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws);
swap (p1,p2);
l1*=ip;
}
for(size_t k=0; k<nf; k++)
{
size_t ip = fact[k].fct,
ido= n/(ip*l1);
if(ip==4)
radb4(ido, l1, p1, p2, fact[k].tw);
else if(ip==2)
radb2(ido, l1, p1, p2, fact[k].tw);
else if(ip==3)
radb3(ido, l1, p1, p2, fact[k].tw);
else if(ip==5)
radb5(ido, l1, p1, p2, fact[k].tw);
else
radbg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws);
swap (p1,p2);
l1*=ip;
}
copy_and_norm(c,p1,n,fct); copy_and_norm(c,p1,n,fct);
} }
...@@ -2153,7 +2141,7 @@ template<typename T0> class fftblue ...@@ -2153,7 +2141,7 @@ template<typename T0> class fftblue
arr<cmplx<T0>> mem; arr<cmplx<T0>> mem;
cmplx<T0> *bk, *bkf; cmplx<T0> *bk, *bkf;
template<bool fwd, typename T> void fft(cmplx<T> c[], T0 fct) template<bool fwd, typename T> void fft(cmplx<T> c[], T0 fct) const
{ {
arr<cmplx<T>> akf(n2); arr<cmplx<T>> akf(n2);
...@@ -2164,14 +2152,14 @@ template<typename T0> class fftblue ...@@ -2164,14 +2152,14 @@ template<typename T0> class fftblue
for (size_t m=n; m<n2; ++m) for (size_t m=n; m<n2; ++m)
akf[m]=zero; akf[m]=zero;
plan.forward (akf.data(),1.); plan.exec (akf.data(),1.,true);
/* do the convolution */ /* do the convolution */
for (size_t m=0; m<n2; ++m) for (size_t m=0; m<n2; ++m)
akf[m] = akf[m].template special_mul<!fwd>(bkf[m]); akf[m] = akf[m].template special_mul<!fwd>(bkf[m]);
/* inverse FFT */ /* inverse FFT */
plan.backward (akf.data(),1.); plan.exec (akf.data(),1.,false);
/* multiply by b_k */ /* multiply by b_k */
for (size_t m=0; m<n; ++m) for (size_t m=0; m<n; ++m)
...@@ -2203,40 +2191,38 @@ template<typename T0> class fftblue ...@@ -2203,40 +2191,38 @@ template<typename T0> class fftblue
bkf[m] = bkf[n2-m] = bk[m]*xn2; bkf[m] = bkf[n2-m] = bk[m]*xn2;
for (size_t m=n;m<=(n2-n);++m) for (size_t m=n;m<=(n2-n);++m)
bkf[m].Set(0.,0.); bkf[m].Set(0.,0.);
plan.forward(bkf,1.); plan.exec(bkf,1.,true);
} }
template<typename T> void backward(cmplx<T> c[], T0 fct) template<typename T> void exec(cmplx<T> c[], T0 fct, bool fwd) const
{ fft<false>(c,fct); } { fwd ? fft<true>(c,fct) : fft<false>(c,fct); }
template<typename T> void forward(cmplx<T> c[], T0 fct) template<typename T> void exec_r(T c[], T0 fct, bool fwd)
{ fft<true>(c,fct); }
template<typename T> void backward_r(T c[], T0 fct)
{
arr<cmplx<T>> tmp(n);
tmp[0].Set(c[0],c[0]*0);
memcpy (reinterpret_cast<void *>(tmp.data()+1),
reinterpret_cast<void *>(c+1), (n-1)*sizeof(T));
if ((n&1)==0) tmp[n/2].i=T0(0)*c[0];
for (size_t m=1; 2*m<n; ++m)
tmp[n-m].Set(tmp[m].r, -tmp[m].i);
fft<false>(tmp.data(),fct);
for (size_t m=0; m<n; ++m)
c[m] = tmp[m].r;
}
template<typename T> void forward_r(T c[], T0 fct)
{ {
arr<cmplx<T>> tmp(n); arr<cmplx<T>> tmp(n);
auto zero = T0(0)*c[0]; if (fwd)
for (size_t m=0; m<n; ++m) {
tmp[m].Set(c[m], zero); auto zero = T0(0)*c[0];
fft<true>(tmp.data(),fct); for (size_t m=0; m<n; ++m)
c[0] = tmp[0].r; tmp[m].Set(c[m], zero);
memcpy (c+1, tmp.data()+1, (n-1)*sizeof(T)); fft<true>(tmp.data(),fct);
c[0] = tmp[0].r;
memcpy (c+1, tmp.data()+1, (n-1)*sizeof(T));
}
else
{
tmp[0].Set(c[0],c[0]*0);
memcpy (reinterpret_cast<void *>(tmp.data()+1),
reinterpret_cast<void *>(c+1), (n-1)*sizeof(T));
if ((n&1)==0) tmp[n/2].i=T0(0)*c[0];
for (size_t m=1; 2*m<n; ++m)
tmp[n-m].Set(tmp[m].r, -tmp[m].i);
fft<false>(tmp.data(),fct);
for (size_t m=0; m<n; ++m)
c[m] = tmp[m].r;
}
} }
}; };
// //
// flexible (FFTPACK/Bluestein) complex 1D transform // flexible (FFTPACK/Bluestein) complex 1D transform
...@@ -2269,11 +2255,8 @@ template<typename T0> class pocketfft_c ...@@ -2269,11 +2255,8 @@ template<typename T0> class pocketfft_c
packplan=unique_ptr<cfftp<T0>>(new cfftp<T0>(length)); packplan=unique_ptr<cfftp<T0>>(new cfftp<T0>(length));
} }
template<typename T> POCKETFFT_NOINLINE void backward(cmplx<T> c[], T0 fct) const template<typename T> POCKETFFT_NOINLINE void exec(cmplx<T> c[], T0 fct, bool fwd) const
{ packplan ? packplan->backward(c,fct) : blueplan->backward(c,fct); } { packplan ? packplan->exec(c,fct,fwd) : blueplan->exec(c,fct,fwd); }
template<typename T> POCKETFFT_NOINLINE void forward(cmplx<T> c[], T0 fct) const
{ packplan ? packplan->forward(c,fct) : blueplan->forward(c,fct); }
size_t length() const { return len; } size_t length() const { return len; }
}; };
...@@ -2309,17 +2292,8 @@ template<typename T0> class pocketfft_r ...@@ -2309,17 +2292,8 @@ template<typename T0> class pocketfft_r
packplan=unique_ptr<rfftp<T0>>(new rfftp<T0>(length)); packplan=unique_ptr<rfftp<T0>>(new rfftp<T0>(length));
} }
template<typename T> POCKETFFT_NOINLINE void backward(T c[], T0 fct) const template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool fwd) const
{ { packplan ? packplan->exec(c,fct,fwd) : blueplan->exec_r(c,fct,fwd); }
packplan ? packplan->backward(c,fct)
: blueplan->backward_r(c,fct);
}
template<typename T> POCKETFFT_NOINLINE void forward(T c[], T0 fct) const
{
packplan ? packplan->forward(c,fct)
: blueplan->forward_r(c,fct);
}
size_t length() const { return len; } size_t length() const { return len; }
}; };
...@@ -2349,7 +2323,7 @@ template<typename T0> class T_dct1 ...@@ -2349,7 +2323,7 @@ template<typename T0> class T_dct1
tmp[0] = c[0]; tmp[0] = c[0];
for (size_t i=1; i<n; ++i) for (size_t i=1; i<n; ++i)
tmp[i] = tmp[N-i] = c[i]; tmp[i] = tmp[N-i] = c[i];
fftplan.forward(tmp.data(), fct); fftplan.exec(tmp.data(), fct, true);
c[0] = tmp[0]; c[0] = tmp[0];
for (size_t i=1; i<n; ++i) for (size_t i=1; i<n; ++i)
c[i] = tmp[2*i-1]; c[i] = tmp[2*i-1];
...@@ -2377,7 +2351,7 @@ template<typename T0> class T_dst1 ...@@ -2377,7 +2351,7 @@ template<typename T0> class T_dst1
tmp[0] = tmp[n+1] = c[0]*0; tmp[0] = tmp[n+1] = c[0]*0;
for (size_t i=0; i<n; ++i) for (size_t i=0; i<n; ++i)
{ tmp[i+1]=c[i]; tmp[N-1-i]=-c[i]; } { tmp[i+1]=c[i]; tmp[N-1-i]=-c[i]; }
fftplan.forward(tmp.data(), fct); fftplan.exec(tmp.data(), fct, true);
for (size_t i=0; i<n; ++i) for (size_t i=0; i<n; ++i)
c[i] = -tmp[2*i+2]; c[i] = -tmp[2*i+2];
} }
...@@ -2415,7 +2389,7 @@ template<typename T0> class T_dcst23 ...@@ -2415,7 +2389,7 @@ template<typename T0> class T_dcst23
if ((N&1)==0) c[N-1]*=2; if ((N&1)==0) c[N-1]*=2;
for (size_t k=1; k<N-1; k+=2) for (size_t k=1; k<N-1; k+=2)
MPINPLACE(c[k+1], c[k]); MPINPLACE(c[k+1], c[k]);
fftplan.backward(c, fct); fftplan.exec(c, fct, false);
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc) for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)