Commit 656b2373 authored by Martin Reinecke's avatar Martin Reinecke

Merge branch 'sincos' into 'master'

DCT/DST support

See merge request !13
parents 0276f55e 20f3b84b
pypocketfft pypocketfft
=========== ===========
This package provides Fast Fourier and Hartley transforms with a simple This package provides Fast Fourier, trigonometric and Hartley transforms with a
Python interface. simple Python interface.
The central algorithms are derived from Paul Swarztrauber's FFTPACK code The central algorithms are derived from Paul Swarztrauber's FFTPACK code
(http://www.netlib.org/fftpack). (http://www.netlib.org/fftpack).
...@@ -10,11 +10,11 @@ The central algorithms are derived from Paul Swarztrauber's FFTPACK code ...@@ -10,11 +10,11 @@ The central algorithms are derived from Paul Swarztrauber's FFTPACK code
Features Features
-------- --------
- supports fully complex and half-complex (i.e. complex-to-real and - supports fully complex and half-complex (i.e. complex-to-real and
real-to-complex) FFTs real-to-complex) FFTs, discrete sine/cosine transforms and Hartley transforms
- supports multidimensional arrays and selection of the axes to be transformed. - achieves very high accuracy for all transforms
- supports single and double precision - supports multidimensional arrays and selection of the axes to be transformed
- supports single, double, and long double precision
- makes use of CPU vector instructions when performing 2D and higher-dimensional - makes use of CPU vector instructions when performing 2D and higher-dimensional
transforms transforms
- does not have persistent transform plans, which makes the interface simpler
- supports prime-length transforms without degrading to O(N**2) performance - supports prime-length transforms without degrading to O(N**2) performance
- Has optional OpenMP support for multidimensional transforms - has optional OpenMP support for multidimensional transforms
...@@ -2,7 +2,13 @@ ...@@ -2,7 +2,13 @@
This file is part of pocketfft. This file is part of pocketfft.
Copyright (C) 2010-2019 Max-Planck-Society Copyright (C) 2010-2019 Max-Planck-Society
Author: Martin Reinecke Copyright (C) 2019 Peter Bell
For the odd-sized DCT-IV transforms:
Copyright (C) 2003, 2007-14 Matteo Frigo
Copyright (C) 2003, 2007-14 Massachusetts Institute of Technology
Authors: Martin Reinecke, Peter Bell
All rights reserved. All rights reserved.
...@@ -196,6 +202,13 @@ template<typename T> struct cmplx { ...@@ -196,6 +202,13 @@ template<typename T> struct cmplx {
{ r+=other.r; i+=other.i; return *this; } { r+=other.r; i+=other.i; return *this; }
template<typename T2>cmplx &operator*= (T2 other) template<typename T2>cmplx &operator*= (T2 other)
{ r*=other; i*=other; return *this; } { r*=other; i*=other; return *this; }
template<typename T2>cmplx &operator*= (const cmplx<T2> &other)
{
T tmp = r*other.r - i*other.i;
i = r*other.i + i*other.r;
r = tmp;
return *this;
}
cmplx operator+ (const cmplx &other) const cmplx operator+ (const cmplx &other) const
{ return cmplx(r+other.r, i+other.i); } { return cmplx(r+other.r, i+other.i); }
cmplx operator- (const cmplx &other) const cmplx operator- (const cmplx &other) const
...@@ -474,8 +487,8 @@ struct util // hack to avoid duplicate symbols ...@@ -474,8 +487,8 @@ struct util // hack to avoid duplicate symbols
shape_t tmp(ndim,0); shape_t tmp(ndim,0);
for (auto ax : axes) for (auto ax : axes)
{ {
if (ax>=ndim) throw runtime_error("bad axis number"); if (ax>=ndim) throw invalid_argument("bad axis number");
if (++tmp[ax]>1) throw runtime_error("axis specified repeatedly"); if (++tmp[ax]>1) throw invalid_argument("axis specified repeatedly");
} }
} }
...@@ -484,7 +497,7 @@ struct util // hack to avoid duplicate symbols ...@@ -484,7 +497,7 @@ struct util // hack to avoid duplicate symbols
size_t axis) size_t axis)
{ {
sanity_check(shape, stride_in, stride_out, inplace); sanity_check(shape, stride_in, stride_out, inplace);
if (axis>=shape.size()) throw runtime_error("bad axis number"); if (axis>=shape.size()) throw invalid_argument("bad axis number");
} }
#ifdef POCKETFFT_OPENMP #ifdef POCKETFFT_OPENMP
...@@ -531,7 +544,7 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1, ...@@ -531,7 +544,7 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -585,7 +598,7 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1, ...@@ -585,7 +598,7 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -623,7 +636,7 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1, ...@@ -623,7 +636,7 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -705,7 +718,7 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1, ...@@ -705,7 +718,7 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -779,7 +792,7 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1, ...@@ -779,7 +792,7 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -843,7 +856,7 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1, ...@@ -843,7 +856,7 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -971,7 +984,7 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1, ...@@ -971,7 +984,7 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto WA = [wa, ido](size_t x, size_t i) auto WA = [wa, ido](size_t x, size_t i)
{ return wa[i-1+x*(ido-1)]; }; { return wa[i-1+x*(ido-1)]; };
...@@ -1245,7 +1258,7 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct) ...@@ -1245,7 +1258,7 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct)
POCKETFFT_NOINLINE cfftp(size_t length_) POCKETFFT_NOINLINE cfftp(size_t length_)
: length(length_) : length(length_)
{ {
if (length==0) throw runtime_error("zero length FFT requested"); if (length==0) throw runtime_error("zero-length FFT requested");
if (length==1) return; if (length==1) return;
factorize(); factorize();
mem.resize(twsize()); mem.resize(twsize());
...@@ -1290,7 +1303,7 @@ template<typename T> void radf2 (size_t ido, size_t l1, ...@@ -1290,7 +1303,7 @@ template<typename T> void radf2 (size_t ido, size_t l1,
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; }; { return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; }; { return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++) for (size_t k=0; k<l1; k++)
...@@ -1330,7 +1343,7 @@ template<typename T> void radf3(size_t ido, size_t l1, ...@@ -1330,7 +1343,7 @@ template<typename T> void radf3(size_t ido, size_t l1,
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; }; { return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; }; { return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++) for (size_t k=0; k<l1; k++)
...@@ -1370,7 +1383,7 @@ template<typename T> void radf4(size_t ido, size_t l1, ...@@ -1370,7 +1383,7 @@ template<typename T> void radf4(size_t ido, size_t l1,
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; }; { return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; }; { return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++) for (size_t k=0; k<l1; k++)
...@@ -1421,7 +1434,7 @@ template<typename T> void radf5(size_t ido, size_t l1, ...@@ -1421,7 +1434,7 @@ template<typename T> void radf5(size_t ido, size_t l1,
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,l1](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+l1*c)]; }; { return cc[a+ido*(b+l1*c)]; };
auto CH = [ch,ido](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,cdim](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+cdim*c)]; }; { return ch[a+ido*(b+cdim*c)]; };
for (size_t k=0; k<l1; k++) for (size_t k=0; k<l1; k++)
...@@ -1620,7 +1633,7 @@ template<typename T> void radb2(size_t ido, size_t l1, ...@@ -1620,7 +1633,7 @@ template<typename T> void radb2(size_t ido, size_t l1,
constexpr size_t cdim=2; constexpr size_t cdim=2;
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
...@@ -1653,7 +1666,7 @@ template<typename T> void radb3(size_t ido, size_t l1, ...@@ -1653,7 +1666,7 @@ template<typename T> void radb3(size_t ido, size_t l1,
constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L); constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
...@@ -1694,7 +1707,7 @@ template<typename T> void radb4(size_t ido, size_t l1, ...@@ -1694,7 +1707,7 @@ template<typename T> void radb4(size_t ido, size_t l1,
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L); constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
...@@ -1750,7 +1763,7 @@ template<typename T> void radb5(size_t ido, size_t l1, ...@@ -1750,7 +1763,7 @@ template<typename T> void radb5(size_t ido, size_t l1,
ti12= T0(0.5877852522924731291687059546390728L); ti12= T0(0.5877852522924731291687059546390728L);
auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; }; auto WA = [wa,ido](size_t x, size_t i) { return wa[i+x*(ido-1)]; };
auto CC = [cc,ido](size_t a, size_t b, size_t c) -> const T& auto CC = [cc,ido,cdim](size_t a, size_t b, size_t c) -> const T&
{ return cc[a+ido*(b+cdim*c)]; }; { return cc[a+ido*(b+cdim*c)]; };
auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T& auto CH = [ch,ido,l1](size_t a, size_t b, size_t c) -> T&
{ return ch[a+ido*(b+l1*c)]; }; { return ch[a+ido*(b+l1*c)]; };
...@@ -2085,7 +2098,7 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1, ...@@ -2085,7 +2098,7 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
POCKETFFT_NOINLINE rfftp(size_t length_) POCKETFFT_NOINLINE rfftp(size_t length_)
: length(length_) : length(length_)
{ {
if (length==0) throw runtime_error("zero-sized FFT"); if (length==0) throw runtime_error("zero-length FFT requested");
if (length==1) return; if (length==1) return;
factorize(); factorize();
mem.resize(twsize()); mem.resize(twsize());
...@@ -2221,10 +2234,10 @@ template<typename T0> class pocketfft_c ...@@ -2221,10 +2234,10 @@ template<typename T0> class pocketfft_c
packplan=unique_ptr<cfftp<T0>>(new cfftp<T0>(length)); packplan=unique_ptr<cfftp<T0>>(new cfftp<T0>(length));
} }
template<typename T> POCKETFFT_NOINLINE void backward(cmplx<T> c[], T0 fct) template<typename T> POCKETFFT_NOINLINE void backward(cmplx<T> c[], T0 fct) const
{ packplan ? packplan->backward(c,fct) : blueplan->backward(c,fct); } { packplan ? packplan->backward(c,fct) : blueplan->backward(c,fct); }
template<typename T> POCKETFFT_NOINLINE void forward(cmplx<T> c[], T0 fct) template<typename T> POCKETFFT_NOINLINE void forward(cmplx<T> c[], T0 fct) const
{ packplan ? packplan->forward(c,fct) : blueplan->forward(c,fct); } { packplan ? packplan->forward(c,fct) : blueplan->forward(c,fct); }
size_t length() const { return len; } size_t length() const { return len; }
...@@ -2261,13 +2274,13 @@ template<typename T0> class pocketfft_r ...@@ -2261,13 +2274,13 @@ template<typename T0> class pocketfft_r
packplan=unique_ptr<rfftp<T0>>(new rfftp<T0>(length)); packplan=unique_ptr<rfftp<T0>>(new rfftp<T0>(length));
} }
template<typename T> POCKETFFT_NOINLINE void backward(T c[], T0 fct) template<typename T> POCKETFFT_NOINLINE void backward(T c[], T0 fct) const
{ {
packplan ? packplan->backward(c,fct) packplan ? packplan->backward(c,fct)
: blueplan->backward_r(c,fct); : blueplan->backward_r(c,fct);
} }
template<typename T> POCKETFFT_NOINLINE void forward(T c[], T0 fct) template<typename T> POCKETFFT_NOINLINE void forward(T c[], T0 fct) const
{ {
packplan ? packplan->forward(c,fct) packplan ? packplan->forward(c,fct)
: blueplan->forward_r(c,fct); : blueplan->forward_r(c,fct);
...@@ -2276,6 +2289,365 @@ template<typename T0> class pocketfft_r ...@@ -2276,6 +2289,365 @@ template<typename T0> class pocketfft_r
size_t length() const { return len; } size_t length() const { return len; }
}; };
//
// sine/cosine transforms
//
template<typename T0> class T_dct1
{
private:
pocketfft_r<T0> fftplan;
public:
POCKETFFT_NOINLINE T_dct1(size_t length)
: fftplan(2*(length-1)) {}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=fftplan.length(), n=N/2+1;
if (ortho)
{ c[0]*=sqrt2; c[n-1]*=sqrt2; }
arr<T> tmp(N);
tmp[0] = c[0];
for (size_t i=1; i<n; ++i)
tmp[i] = tmp[N-i] = c[i];
fftplan.forward(tmp.data(), fct);
c[0] = tmp[0];
for (size_t i=1; i<n; ++i)
c[i] = tmp[2*i-1];
if (ortho)
{ c[0]/=sqrt2; c[n-1]/=sqrt2; }
}
size_t length() const { return fftplan.length()/2+1; }
};
template<typename T0> class T_dct2
{
private:
pocketfft_r<T0> fftplan;
vector<T0> twiddle;
public:
POCKETFFT_NOINLINE T_dct2(size_t length)
: fftplan(length), twiddle(length)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
for (size_t i=0; i<length; ++i)
twiddle[i] = T0(cos(0.5*pi*T0(i+1)/T0(length)));
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
if (N==1)
c[0]*=2*fct;
else if (N==2)
{
T x1 = 2*fct*(c[0]+c[1]);
c[1] = sqrt2*fct*(c[0]-c[1]);
c[0] = x1;
}
else
{
size_t NS2 = (N+1)/2;
for (size_t i=2; i<N; i+=2)
{
T xim1 = T0(0.5)*(c[i-1]+c[i]);
c[i] = T0(0.5)*(c[i]-c[i-1]);
c[i-1] = xim1;
}
fftplan.backward(c, fct);
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = twiddle[k-1]*c[kc]+twiddle[kc-1]*c[k];
c[kc] = twiddle[k-1]*c[k]-twiddle[kc-1]*c[kc];
c[k] = tmp;
}
if ((N&1)==0)
c[NS2] = twiddle[NS2-1]*(c[NS2]+c[NS2]);
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = c[k]+c[kc];
c[kc] = c[k]-c[kc];
c[k] = tmp;
}
c[0] *= 2;
}
if (ortho) c[0]/=sqrt2;
}
size_t length() const { return fftplan.length(); }
};
template<typename T0> class T_dct3
{
private:
pocketfft_r<T0> fftplan;
vector<T0> twiddle;
public:
POCKETFFT_NOINLINE T_dct3(size_t length)
: fftplan(length), twiddle(length)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
for (size_t i=0; i<length; ++i)
twiddle[i] = T0(cos(0.5*pi*T0(i+1)/T0(length)));
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
size_t N=length();
if (ortho) c[0]*=sqrt2;
if (N==1)
c[0]*=fct;
else if (N==2)
{
T TSQX = sqrt2*c[1];
c[1] = fct*(c[0]-TSQX);
c[0] = fct*(c[0]+TSQX);
}
else
{
size_t NS2 = (N+1)/2;
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = c[k]-c[kc];
c[k] = c[k]+c[kc];
c[kc] = tmp;
}
if ((N&1)==0)
c[NS2] = c[NS2]+c[NS2];
for (size_t k=1, kc=N-1; k<NS2; ++k, --kc)
{
T tmp = twiddle[k-1]*c[k]-twiddle[kc-1]*c[kc];
c[k] = twiddle[k-1]*c[kc]+twiddle[kc-1]*c[k];
c[kc] = tmp;
}
if ((N&1)==0)
c[NS2] = twiddle[NS2-1]*c[NS2];
fftplan.forward(c, fct);
for (size_t i=2; i<N; i+=2)
{
T xim1 = c[i-1]-c[i];
c[i] += c[i-1];
c[i-1] = xim1;
}
}
}
size_t length() const { return fftplan.length(); }
};
template<typename T0> class T_dct4
{
// even length algorithm from
// https://www.appletonaudio.com/blog/2013/derivation-of-fast-dct-4-algorithm-based-on-dft/
private:
size_t N;
unique_ptr<pocketfft_c<T0>> fft;
unique_ptr<pocketfft_r<T0>> rfft;
arr<cmplx<T0>> C2;
public:
POCKETFFT_NOINLINE T_dct4(size_t length)
: N(length),
fft((N&1) ? nullptr : new pocketfft_c<T0>(N/2)),
rfft((N&1)? new pocketfft_r<T0>(N) : nullptr),
C2((N&1) ? 0 : N/2)
{
constexpr T0 pi = T0(3.141592653589793238462643383279502884197L);
if ((N&1)==0)
for (size_t i=0; i<N/2; ++i)
{
T0 ang = -pi/T0(N)*(T0(i)+T0(0.125));
C2[i].Set(cos(ang), sin(ang));
}
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool /*ortho*/) const
{
constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
if (N&1)
{
// The following code is derived from the FFTW3 function apply_re11()
// and is released under the 3-clause BSD license with friendly
// permission of Matteo Frigo.
auto SGN_SET = [](T x, size_t i) {return (i%2) ? -x : x;};
arr<T> y(N);
size_t n2 = N/2;
size_t i;
{
size_t m;
for (i=0, m=n2; m<N; ++i, m+=4)
y[i] = c[m];
for (; m<2*N; ++i, m+=4)
y[i] = -c[2*N-m-1];
for (; m<3*N; ++i, m+=4)
y[i] = -c[m-2*N];
for (; m<4*N; ++i, m+=4)
y[i] = c[4*N-m-1];
m -= 4*N;
for (; i<N; ++i, m+=4)
y[i] = c[m];
}
rfft->forward(y.data(), fct);
for (i=0; i+i+1<n2; ++i)
{
size_t k = i+i+1;
T c1=y[2*k-1], s1=y[2*k], c2=y[2*k+1], s2=y[2*k+2];
c[i] = sqrt2 * (SGN_SET(c1, (i+1)/2) + SGN_SET(s1, i/2));
c[N-(i+1)] = sqrt2 * (SGN_SET(c1, (N-i)/2) - SGN_SET(s1, (N-(i+1))/2));
c[n2-(i+1)] = sqrt2 * (SGN_SET(c2, (n2-i)/2) - SGN_SET(s2, (n2-(i+1))/2));
c[n2+(i+1)] = sqrt2 * (SGN_SET(c2, (n2+i+2)/2) + SGN_SET(s2, (n2+(i+1))/2));
}
if (i+i+1 == n2)
{
T cx=y[2*n2-1], sx=y[2*n2];
c[i] = sqrt2 * (SGN_SET(cx, (i+1)/2) + SGN_SET(sx, i/2));
c[N-(i+1)] = sqrt2 * (SGN_SET(cx, (i+2