Commit 88f64fcf authored by Martin Reinecke's avatar Martin Reinecke

more compactification

parent c9dc81da
......@@ -209,13 +209,19 @@ template<typename T> struct cmplx {
r = tmp;
return *this;
}
cmplx operator+ (const cmplx &other) const
{ return cmplx(r+other.r, i+other.i); }
cmplx operator- (const cmplx &other) const
{ return cmplx(r-other.r, i-other.i); }
template<typename T2>cmplx &operator+= (const cmplx<T2> &other)
{ r+=other.r; i+=other.i; return *this; }
template<typename T2>cmplx &operator-= (const cmplx<T2> &other)
{ r-=other.r; i-=other.i; return *this; }
template<typename T2> auto operator* (const T2 &other) const
-> cmplx<decltype(r*other)>
{ return {r*other, i*other}; }
template<typename T2> auto operator+ (const cmplx<T2> &other) const
-> cmplx<decltype(r+other.r)>
{ return {r+other.r, i+other.i}; }
template<typename T2> auto operator- (const cmplx<T2> &other) const
-> cmplx<decltype(r+other.r)>
{ return {r-other.r, i-other.i}; }
template<typename T2> auto operator* (const cmplx<T2> &other) const
-> cmplx<decltype(r+other.r)>
{ return {r*other.r-i*other.i, r*other.i + i*other.r}; }
......@@ -227,8 +233,9 @@ template<typename T> struct cmplx {
: Tres(r*other.r-i*other.i, r*other.i+i*other.r);
}
};
template<typename T> void PMC(cmplx<T> &a, cmplx<T> &b,
const cmplx<T> &c, const cmplx<T> &d)
template<typename T> inline void PMINPLACE(T &a, T &b)
{ T t = a; a+=b; b=t-b; }
template<typename T> void PMC(T &a, T &b, const T &c, const T &d)
{ a = c+d; b = c-d; }
template<typename T> cmplx<T> conj(const cmplx<T> &a)
{ return {a.r, -a.i}; }
......@@ -845,8 +852,6 @@ template <bool fwd, typename T> void ROTX135(T &a)
else
{ auto tmp_=a.r; a.r=hsqt2*(-a.r-a.i); a.i=hsqt2*(tmp_-a.i); }
}
template<typename T> inline void PMINPLACE(T &a, T &b)
{ T t = a; a.r+=b.r; a.i+=b.i; b.r=t.r-b.r; b.i=t.i-b.i; }
template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch,
......@@ -2359,12 +2364,15 @@ template<typename T0> class T_dcst23
pocketfft_r<T0> fftplan;
vector<T0> twiddle;
template<typename T> POCKETFFT_NOINLINE void exec2c(T c[], T0 fct,
bool ortho) const
template<typename T> POCKETFFT_NOINLINE void exec2(T c[], T0 fct,
bool ortho, bool cosine) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
size_t NS2 = (N+1)/2;
if (!cosine)
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
for (size_t i=2; i<N; i+=2)
{
T xim1 = T0(0.5)*(c[i-1]+c[i]);
......@@ -2381,28 +2389,26 @@ template<typename T0> class T_dcst23
if ((N&1)==0)
c[NS2] = twiddle[NS2-1]*(c[NS2]+c[NS2]);
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = c[k]+c[kc];
c[kc] = c[k]-c[kc];
c[k] = tmp;
}
PMINPLACE(c[k], c[kc]);
c[0] *= 2;
if (!cosine)
for (size_t k=0, kc=N-1; k<kc; ++k, --kc)
swap(c[k], c[kc]);
if (ortho) c[0]/=sqrt2;
}
template<typename T> POCKETFFT_NOINLINE void exec3c(T c[], T0 fct,
bool ortho) const
template<typename T> POCKETFFT_NOINLINE void exec3(T c[], T0 fct,
bool ortho, bool cosine) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
if (ortho) c[0]*=sqrt2;
size_t NS2 = (N+1)/2;
if (!cosine)
for (size_t k=0, kc=N-1; k<NS2; ++k, --kc)
swap(c[k], c[kc]);
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = c[k]-c[kc];
c[k] = c[k]+c[kc];
c[kc] = tmp;
}
PMINPLACE(c[k], c[kc]);
if ((N&1)==0)
c[NS2] = c[NS2]+c[NS2];
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
......@@ -2420,33 +2426,9 @@ template<typename T0> class T_dcst23
c[i] += c[i-1];
c[i-1] = xim1;
}
}
template<typename T> POCKETFFT_NOINLINE void exec2s(T c[], T0 fct,
bool ortho) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
exec2c(c, fct, false);
for (size_t k=0, kc=N-1; k<kc; ++k, --kc)
swap(c[k], c[kc]);
if (ortho) c[0]/=sqrt2;
}
template<typename T> POCKETFFT_NOINLINE void exec3s(T c[], T0 fct,
bool ortho) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
if (ortho) c[0]*=sqrt2;
size_t NS2 = N/2;
for (size_t k=0, kc=N-1; k<NS2; ++k, --kc)
swap(c[k], c[kc]);
exec3c(c, fct, false);
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
if (!cosine)
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
}
public:
......@@ -2460,12 +2442,7 @@ template<typename T0> class T_dcst23
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho,
int type, bool cosine) const
{
if (type==2)
cosine ? exec2c(c, fct, ortho) : exec2s(c, fct, ortho);
else
cosine ? exec3c(c, fct, ortho) : exec3s(c, fct, ortho);
}
{ type==2 ? exec2(c, fct, ortho, cosine) : exec3(c, fct, ortho, cosine); }
size_t length() const { return fftplan.length(); }
};
......@@ -2480,9 +2457,30 @@ template<typename T0> class T_dcst4
unique_ptr<pocketfft_r<T0>> rfft;
arr<cmplx<T0>> C2;
template<typename T> POCKETFFT_NOINLINE void exec4c(T c[], T0 fct) const
public:
POCKETFFT_NOINLINE T_dcst4(size_t length)
: N(length),
fft((N&1) ? nullptr : new pocketfft_c<T0>(N/2)),
rfft((N&1)? new pocketfft_r<T0>(N) : nullptr),
C2((N&1) ? 0 : N/2)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
if ((N&1)==0)
for (size_t i=0; i<N/2; ++i)
{
T0 ang = -pi/T0(N)*(T0(i)+T0(0.125));
C2[i].Set(cos(ang), sin(ang));
}
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct,
bool /*ortho*/, int /*type*/, bool cosine) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t n2 = N/2;
if (!cosine)
for (size_t k=0, kc=N-1; k<n2; ++k, --kc)
swap(c[k], c[kc]);
if (N&1)
{
// The following code is derived from the FFTW3 function apply_re11()
......@@ -2491,7 +2489,6 @@ template<typename T0> class T_dcst4
auto SGN_SET = [](T x, size_t i) {return (i%2) ? -x : x;};
arr<T> y(N);
size_t n2 = N/2;
{
size_t i=0, m=n2;
for (; m<N; ++i, m+=4)
......@@ -2533,54 +2530,24 @@ template<typename T0> class T_dcst4
{
// even length algorithm from
// https://www.appletonaudio.com/blog/2013/derivation-of-fast-dct-4-algorithm-based-on-dft/
arr<cmplx<T>> y(N/2);
for(size_t i=0; i<N/2; ++i)
arr<cmplx<T>> y(n2);
for(size_t i=0; i<n2; ++i)
{
y[i].Set(c[2*i],c[N-1-2*i]);
y[i] *= C2[i];
}
fft->forward(y.data(), fct);
for(size_t i=0; i<N/2; ++i)
y[i] *= C2[i];
for(size_t i=0; i<N/2; ++i)
for(size_t i=0, ic=n2-1; i<n2; ++i, --ic)
{
c[2*i] = 2*y[i].r;
c[2*i+1] = -2*y[N/2-1-i].i;
c[2*i ] = 2*(y[i ].r*C2[i ].r-y[i ].i*C2[i ].i);
c[2*i+1] = -2*(y[ic].i*C2[ic].r+y[ic].r*C2[ic].i);
}
}
if (!cosine)
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
}
template<typename T> POCKETFFT_NOINLINE void exec4s(T c[], T0 fct) const
{
size_t N=length();
size_t NS2 = N/2;
for (size_t k=0, kc=N-1; k<NS2; ++k, --kc)
swap(c[k], c[kc]);
exec4c(c, fct);
for (size_t k=1; k<N; k+=2)
c[k] = -c[k];
}
public:
POCKETFFT_NOINLINE T_dcst4(size_t length)
: N(length),
fft((N&1) ? nullptr : new pocketfft_c<T0>(N/2)),
rfft((N&1)? new pocketfft_r<T0>(N) : nullptr),
C2((N&1) ? 0 : N/2)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
if ((N&1)==0)
for (size_t i=0; i<N/2; ++i)
{
T0 ang = -pi/T0(N)*(T0(i)+T0(0.125));
C2[i].Set(cos(ang), sin(ang));
}
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct,
bool /*ortho*/, int /*type*/, bool cosine) const
{ cosine ? exec4c(c, fct) : exec4s(c, fct); }
size_t length() const { return N; }
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment