From 8a6495d14a87003033434b71f102e46202bdf171 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Sat, 20 Jul 2019 12:35:55 +0200 Subject: [PATCH] use high-quality algorithm for odd-sized type IV DCTs (derived from FFTW with friendly permission); update README --- README.md | 14 ++++----- pocketfft_hdronly.h | 71 +++++++++++++++++++++++++++++++-------------- test.py | 3 -- 3 files changed, 57 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 1fb76bd..9948ff1 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ pypocketfft =========== -This package provides Fast Fourier and Hartley transforms with a simple -Python interface. +This package provides Fast Fourier, trigonometric and Hartley transforms with a +simple Python interface. The central algorithms are derived from Paul Swarztrauber's FFTPACK code (http://www.netlib.org/fftpack). @@ -10,11 +10,11 @@ The central algorithms are derived from Paul Swarztrauber's FFTPACK code Features -------- - supports fully complex and half-complex (i.e. complex-to-real and - real-to-complex) FFTs -- supports multidimensional arrays and selection of the axes to be transformed. -- supports single and double precision + real-to-complex) FFTs, discrete sine/cosine transforms and Hartley transforms +- achieves very high accuracy for all transforms +- supports multidimensional arrays and selection of the axes to be transformed +- supports single, double, and long double precision - makes use of CPU vector instructions when performing 2D and higher-dimensional transforms -- does not have persistent transform plans, which makes the interface simpler - supports prime-length transforms without degrading to O(N**2) performance -- Has optional OpenMP support for multidimensional transforms +- has optional OpenMP support for multidimensional transforms diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index fcedf15..6979e43 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -2442,43 +2442,72 @@ template class T_dct4 // https://www.appletonaudio.com/blog/2013/derivation-of-fast-dct-4-algorithm-based-on-dft/ private: size_t N; - T_dct2 dct2; - arr C; - pocketfft_c fft; + unique_ptr> fft; + unique_ptr> rfft; arr> C2; public: POCKETFFT_NOINLINE T_dct4(size_t length) - : N(length), dct2((N&1)?N:1), C((N&1)?N:0), - fft((N&1)?1:N/2), C2((N&1) ? 0:N/2) + : N(length), + fft((N&1) ? nullptr : new pocketfft_c(N/2)), + rfft((N&1)? new pocketfft_r(N) : nullptr), + C2((N&1) ? 0 : N/2) { constexpr T0 pi = T0(3.141592653589793238462643383279502884197L); - if (N&1) - { - for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool /*ortho*/) const { - // FIXME: due to the recurrence below, the algorithm is not very accurate - // Trying to find a better one... + constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L); if (N&1) { - for (size_t i=0; i y(N); + size_t n2 = N/2; + size_t i; + { + size_t m; + for (i=0, m=n2; mforward(y.data(), fct); + for (i=0; i+i+1 class T_dct4 y[i].Set(c[2*i],c[N-1-2*i]); y[i] *= C2[i]; } - fft.forward(y.data(), fct); + fft->forward(y.data(), fct); for(size_t i=0; i 1: # there are no length-1 type 1 DCTs _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps) -- GitLab