From 5483e02e515ecdfabf42e2e0cc5c86f5d22092b6 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Fri, 19 Jul 2019 10:33:52 +0200 Subject: [PATCH] add (preliminary) type IV transforms --- pocketfft_hdronly.h | 59 ++++++++++++++++++++++++++++++++++++++++++++- test.py | 19 +++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 3128058..1fbb503 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -2416,6 +2416,36 @@ template class T_dct3 size_t length() const { return fftplan.length(); } }; +template class T_dct4 + { + private: + T_dct2 dct2; + arr C; + + public: + POCKETFFT_NOINLINE T_dct4(size_t length) + : dct2(length), C(length) + { + constexpr T0 pi = T0(3.141592653589793238462643383279502884197L); + for (size_t i=0; i POCKETFFT_NOINLINE void exec(T c[], T0 fct) const + { + // FIXME: due to the recurrence below, the algorithm is not very accurate + // Trying to find a better one... + size_t N = length(); + for (size_t i=0; i class T_dst1 { private: @@ -2456,7 +2486,6 @@ template class T_dst2 { size_t N=length(); if (N==1) { c[0]*=2*fct; return; } - for (size_t k=1; k class T_dst3 size_t length() const { return dct.length(); } }; +template class T_dst4 + { + private: + T_dct4 dct; + + public: + POCKETFFT_NOINLINE T_dst4(size_t length) + : dct(length) {} + + template POCKETFFT_NOINLINE void exec(T c[], T0 fct) + { + size_t N=length(); + //if (N==1) { c[0]*=fct; return; } + size_t NS2 = N/2; + for (size_t k=0, kc=N-1; k void r2r_dct(const shape_t &shape, general_dcst>(ain, aout, axes, fct, nthreads); else if (type==3) general_dcst>(ain, aout, axes, fct, nthreads); + else if (type==4) + general_dcst>(ain, aout, axes, fct, nthreads); else throw runtime_error("unsupported DCT type"); } @@ -3275,6 +3330,8 @@ template void r2r_dst(const shape_t &shape, general_dcst>(ain, aout, axes, fct, nthreads); else if (type==3) general_dcst>(ain, aout, axes, fct, nthreads); + else if (type==4) + general_dcst>(ain, aout, axes, fct, nthreads); else throw runtime_error("unsupported DST type"); } diff --git a/test.py b/test.py index 50d15fe..218fbd1 100644 --- a/test.py +++ b/test.py @@ -221,3 +221,22 @@ def testdcst1D(len, inorm, type): _assert_close(a, pypocketfft.r2r_dst(pypocketfft.r2r_dst(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-18) _assert_close(a, pypocketfft.r2r_dst(pypocketfft.r2r_dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-15) _assert_close(b, pypocketfft.r2r_dst(pypocketfft.r2r_dst(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-7) + + +# TEMPORARY: separate test for DCT/DST IV, since they are less accurate +@pmp("len", len1D) +@pmp("inorm", [0, 1, 2]) +@pmp("type", [4]) +def testdcst1D4(len, inorm, type): + a = np.random.rand(len)-0.5 + b = a.astype(np.float32) + c = a.astype(np.float128) + itp = (0, 1, 3, 2, 4) + itype = itp[type] + if type != 1 or len > 1: + _assert_close(a, pypocketfft.r2r_dct(pypocketfft.r2r_dct(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-16) + _assert_close(a, pypocketfft.r2r_dct(pypocketfft.r2r_dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-13) + _assert_close(b, pypocketfft.r2r_dct(pypocketfft.r2r_dct(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-5) + _assert_close(a, pypocketfft.r2r_dst(pypocketfft.r2r_dst(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-16) + _assert_close(a, pypocketfft.r2r_dst(pypocketfft.r2r_dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-13) + _assert_close(b, pypocketfft.r2r_dst(pypocketfft.r2r_dst(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-5) -- GitLab