diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 7515f79413ff99516511fd384148fea3c30fb8b9..f507572f753f996ee46db3f672c7c14a5034f91c 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -196,6 +196,13 @@ template struct cmplx { { r+=other.r; i+=other.i; return *this; } templatecmplx &operator*= (T2 other) { r*=other; i*=other; return *this; } + templatecmplx &operator*= (const cmplx &other) + { + T tmp = r*other.r - i*other.i; + i = r*other.i + i*other.r; + r = tmp; + return *this; + } cmplx operator+ (const cmplx &other) const { return cmplx(r+other.r, i+other.i); } cmplx operator- (const cmplx &other) const @@ -2221,10 +2228,10 @@ template class pocketfft_c packplan=unique_ptr>(new cfftp(length)); } - template POCKETFFT_NOINLINE void backward(cmplx c[], T0 fct) + template POCKETFFT_NOINLINE void backward(cmplx c[], T0 fct) const { packplan ? packplan->backward(c,fct) : blueplan->backward(c,fct); } - template POCKETFFT_NOINLINE void forward(cmplx c[], T0 fct) + template POCKETFFT_NOINLINE void forward(cmplx c[], T0 fct) const { packplan ? packplan->forward(c,fct) : blueplan->forward(c,fct); } size_t length() const { return len; } @@ -2419,31 +2426,65 @@ template class T_dct3 template class T_dct4 { private: + size_t N; T_dct2 dct2; arr C; + pocketfft_c fft; + arr> C2; public: POCKETFFT_NOINLINE T_dct4(size_t length) - : dct2(length), C(length) + : N(length), dct2((N&1)?N:1), C((N&1)?N:0), + fft((N&1)?1:N/2), C2((N&1) ? 0:N/2) { constexpr T0 pi = T0(3.141592653589793238462643383279502884197L); - for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct) const { // FIXME: due to the recurrence below, the algorithm is not very accurate // Trying to find a better one... - size_t N = length(); - for (size_t i=0; i> y(N/2); + for(size_t i=0; i class T_dst1