diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 7bf274f82fd1dc3bd5b693633e0a0fa54848fa0b..c0abd3714be6a16b2b57281133c52bdbbbf3dafd 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -270,7 +270,7 @@ template class sincos_2pibyn { private: using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type; - size_t mask, shift; + size_t N, mask, shift; arr> v1, v2; static cmplx calc(size_t x, size_t n, Thigh ang) @@ -309,17 +309,19 @@ template class sincos_2pibyn public: POCKETFFT_NOINLINE sincos_2pibyn(size_t n) + : N(n) { constexpr auto pi = 3.141592653589793238462643383279502884197L; Thigh ang = Thigh(0.25L*pi/n); + size_t nval = (n+2)/2; shift = 1; - while((size_t(1)< class sincos_2pibyn cmplx operator[](size_t idx) const { + if (2*idx<=N) + { + auto x1=v1[idx&mask], x2=v2[idx>>shift]; + return cmplx(T(x1.r*x2.r-x1.i*x2.i), T(x1.r*x2.i+x1.i*x2.r)); + } + idx = N-idx; auto x1=v1[idx&mask], x2=v2[idx>>shift]; - return cmplx(T(x1.r*x2.r-x1.i*x2.i), T(x1.r*x2.i+x1.i*x2.r)); + return cmplx(T(x1.r*x2.r-x1.i*x2.i), -T(x1.r*x2.i+x1.i*x2.r)); } }; @@ -2484,10 +2492,9 @@ template class T_dcst23 POCKETFFT_NOINLINE T_dcst23(size_t length) : fftplan(length), twiddle(length) { - constexpr auto pi = T0(3.141592653589793238462643383279502884197L); - const auto oo2n = pi*T0(0.5)/T0(length); + sincos_2pibyn tw(4*length); for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho, @@ -2560,14 +2567,12 @@ template class T_dcst4 rfft((N&1)? new pocketfft_r(N) : nullptr), C2((N&1) ? 0 : N/2) { - const auto oon = -T0(1.)/T0(N); if ((N&1)==0) + { + sincos_2pibyn tw(8*N); for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct, diff --git a/stress.py b/stress.py index f5e4ce276e63d605bbb7e3f87a2054d0591dc35e..635b351846794249e0394f6a321126d178557918 100644 --- a/stress.py +++ b/stress.py @@ -61,12 +61,12 @@ def test(err): b = fftn(ifftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) err = update_err(err, "cmax", _l2error(a.real, b, axes)) - b = irfftn(rfftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, - lastsize=lastsize, nthreads=nthreads) - err = update_err(err, "rmax", _l2error(a.real, b, axes)) b = ifftn(fftn(a.astype(np.complex64), axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) err = update_err(err, "cmaxf", _l2error(a.astype(np.complex64), b, axes)) + b = irfftn(rfftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, + lastsize=lastsize, nthreads=nthreads) + err = update_err(err, "rmax", _l2error(a.real, b, axes)) b = irfftn(rfftn(a.real.astype(np.float32), axes=axes, nthreads=nthreads), axes=axes, inorm=2, lastsize=lastsize, nthreads=nthreads) err = update_err(err, "rmaxf", _l2error(a.real.astype(np.float32), b, axes)) @@ -113,6 +113,10 @@ def test(err): pypocketfft.dct(a_32.real, axes=axes, nthreads=nthreads, type=4), axes=axes, type=4, nthreads=nthreads, inorm=2) err = update_err(err, "c4maxf", _l2error(a_32.real, b, axes)) + b = pypocketfft.dst( + pypocketfft.dst(a.real, axes=axes, nthreads=nthreads, type=1), + axes=axes, type=1, nthreads=nthreads, inorm=2) + err = update_err(err, "s1max", _l2error(a.real, b, axes)) b = pypocketfft.dst( pypocketfft.dst(a_32.real, axes=axes, nthreads=nthreads, type=1), axes=axes, type=1, nthreads=nthreads, inorm=2)