diff --git a/pypocketfft.cc b/pypocketfft.cc index c6d72bf7a3cd9c345dd25803ec1c8251c030fa14..2bdb3afb4ddb4b3997e92844f9693ff8c114653a 100644 --- a/pypocketfft.cc +++ b/pypocketfft.cc @@ -227,7 +227,7 @@ py::array r2r_fftpack(const py::array &in, const py::object &axes_, } template<typename T> py::array dct_internal(const py::array &in, - const py::object &axes_, int type, int inorm, bool ortho, py::object &out_, + const py::object &axes_, int type, int inorm, py::object &out_, size_t nthreads) { auto axes = makeaxes(in, axes_); @@ -239,10 +239,9 @@ template<typename T> py::array dct_internal(const py::array &in, auto d_out=reinterpret_cast<T *>(res.mutable_data()); { py::gil_scoped_release release; - // override - if (ortho) inorm=1; T fct = (type==1) ? norm_fct<T>(inorm, dims, axes, 2, -1) : norm_fct<T>(inorm, dims, axes, 2); + bool ortho = inorm == 1; pocketfft::dct(dims, s_in, s_out, axes, type, d_in, d_out, fct, ortho, nthreads); } @@ -250,15 +249,15 @@ template<typename T> py::array dct_internal(const py::array &in, } py::array dct(const py::array &in, int type, const py::object &axes_, - int inorm, bool ortho, py::object &out_, size_t nthreads) + int inorm, py::object &out_, size_t nthreads) { if ((type<1) || (type>4)) throw invalid_argument("invalid DCT type"); - DISPATCH(in, f64, f32, flong, dct_internal, (in, axes_, type, inorm, ortho, - out_, nthreads)) + DISPATCH(in, f64, f32, flong, dct_internal, (in, axes_, type, inorm, out_, + nthreads)) } template<typename T> py::array dst_internal(const py::array &in, - const py::object &axes_, int type, int inorm, bool ortho, py::object &out_, + const py::object &axes_, int type, int inorm, py::object &out_, size_t nthreads) { auto axes = makeaxes(in, axes_); @@ -270,10 +269,9 @@ template<typename T> py::array dst_internal(const py::array &in, auto d_out=reinterpret_cast<T *>(res.mutable_data()); { py::gil_scoped_release release; - // override - if (ortho) inorm=1; T fct = (type==1) ? norm_fct<T>(inorm, dims, axes, 2, 1) : norm_fct<T>(inorm, dims, axes, 2); + bool ortho = inorm == 1; pocketfft::dst(dims, s_in, s_out, axes, type, d_in, d_out, fct, ortho, nthreads); } @@ -281,10 +279,10 @@ template<typename T> py::array dst_internal(const py::array &in, } py::array dst(const py::array &in, int type, const py::object &axes_, - int inorm, bool ortho, py::object &out_, size_t nthreads) + int inorm, py::object &out_, size_t nthreads) { if ((type<1) || (type>4)) throw invalid_argument("invalid DST type"); - DISPATCH(in, f64, f32, flong, dst_internal, (in, axes_, type, inorm, ortho, + DISPATCH(in, f64, f32, flong, dst_internal, (in, axes_, type, inorm, out_, nthreads)) } @@ -599,11 +597,18 @@ axes : list of integers inorm : int Normalization type 0 : no normalization - 1 : divide by sqrt(N) + 1 : make transform orthogonal and divide by sqrt(N) 2 : divide by N where N is the product of n_i for every transformed axis i. n_i is 2*(<axis_length>-1 for type 1 and 2*<axis length> for types 2, 3, 4. + Making the transform orthogonal involves the following additional steps + for every 1D sub-transform: + Type 1 : multiply first and last input value by sqrt(2) + divide first and last output value by sqrt(2) + Type 2 : divide first output value by sqrt(2) + Type 3 : multiply first input value by sqrt(2) + Type 4 : nothing out : numpy.ndarray (same shape and data type as `a`) May be identical to `a`, but if it isn't, it must not overlap with `a`. If None, a new array is allocated to store the output. @@ -631,11 +636,17 @@ axes : list of integers inorm : int Normalization type 0 : no normalization - 1 : divide by sqrt(N) + 1 : make transform orthogonal and divide by sqrt(N) 2 : divide by N where N is the product of n_i for every transformed axis i. n_i is 2*(<axis_length>+1 for type 1 and 2*<axis length> for types 2, 3, 4. + Making the transform orthogonal involves the following additional steps + for every 1D sub-transform: + Type 1 : nothing + Type 2 : divide first output value by sqrt(2) + Type 3 : multiply first input value by sqrt(2) + Type 4 : nothing out : numpy.ndarray (same shape and data type as `a`) May be identical to `a`, but if it isn't, it must not overlap with `a`. If None, a new array is allocated to store the output. @@ -669,7 +680,7 @@ PYBIND11_MODULE(pypocketfft, m) m.def("genuine_hartley", genuine_hartley, genuine_hartley_DS, "a"_a, "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); m.def("dct", dct, dct_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, - "ortho"_a=false, "out"_a=None, "nthreads"_a=1); + "out"_a=None, "nthreads"_a=1); m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, - "ortho"_a=false, "out"_a=None, "nthreads"_a=1); + "out"_a=None, "nthreads"_a=1); } diff --git a/test.py b/test.py index 3d22f1ebb4c744b0ab2b6edfa82ae03e2242331e..7ee4505c0f86bb52b597bdacce1fc5333be1737d 100644 --- a/test.py +++ b/test.py @@ -55,36 +55,27 @@ def irfft_scipy(a, axis, inorm=0, out=None, nthreads=1): forward=False, inorm=inorm, out=out, nthreads=nthreads) +tol = {np.float32: 6e-7, np.float64: 1.5e-15, np.longfloat: 1e-18} +ctype = {np.float32: np.complex64, np.float64: np.complex128, np.longfloat: np.longcomplex} @pmp("len", len1D) @pmp("inorm", [0, 1, 2]) -def test1D(len, inorm): +@pmp("dtype", [np.float32, np.float64, np.longfloat]) +def test1D(len, inorm, dtype): a = np.random.rand(len)-0.5 + 1j*np.random.rand(len)-0.5j - b = a.astype(np.complex64) - c = a.astype(np.complex256) - _assert_close(a, ifftn(fftn(c, inorm=inorm), inorm=2-inorm), 1e-18) - assert_(_l2error(a, ifftn(fftn(a, inorm=inorm), inorm=2-inorm)) < 1.5e-15) + a = a.astype(ctype[dtype]) + eps = tol[dtype] + assert_(_l2error(a, ifftn(fftn(a, inorm=inorm), inorm=2-inorm)) < eps) assert_(_l2error(a.real, ifftn(fftn(a.real, inorm=inorm), inorm=2-inorm)) - < 1.5e-15) + < eps) assert_(_l2error(a.real, fftn(ifftn(a.real, inorm=inorm), inorm=2-inorm)) - < 1.5e-15) + < eps) assert_(_l2error(a.real, irfftn(rfftn(a.real, inorm=inorm), - inorm=2-inorm, lastsize=len)) < 1.5e-15) + inorm=2-inorm, lastsize=len)) < eps) tmp = a.copy() assert_(ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm) is tmp) - assert_(_l2error(tmp, a) < 1.5e-15) - assert_(_l2error(b, ifftn(fftn(b, inorm=inorm), inorm=2-inorm)) < 6e-7) - assert_(_l2error(b.real, ifftn(fftn(b.real, inorm=inorm), inorm=2-inorm)) - < 6e-7) - assert_(_l2error(b.real, fftn(ifftn(b.real, inorm=inorm), inorm=2-inorm)) - < 6e-7) - assert_(_l2error(b.real, irfftn(rfftn(b.real, inorm=inorm), lastsize=len, - inorm=2-inorm)) < 6e-7) - tmp = b.copy() - assert_(ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm) - is tmp) - assert_(_l2error(tmp, b) < 6e-7) + assert_(_l2error(tmp, a) < eps) @pmp("shp", shapes) @@ -206,54 +197,17 @@ def test_genuine_hartley_2D(shp, axes): @pmp("len", len1D) -@pmp("inorm", [0, 1, 2]) -@pmp("type", [1, 2, 3]) -def testdcst1D(len, inorm, type): - a = np.random.rand(len)-0.5 - b = a.astype(np.float32) - c = a.astype(np.float128) - itp = (0, 1, 3, 2, 4) - itype = itp[type] - if type != 1 or len > 1: - _assert_close(a, pypocketfft.dct(pypocketfft.dct(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-18) - _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-15) - _assert_close(b, pypocketfft.dct(pypocketfft.dct(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-7) - _assert_close(a, pypocketfft.dst(pypocketfft.dst(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-18) - _assert_close(a, pypocketfft.dst(pypocketfft.dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-15) - _assert_close(b, pypocketfft.dst(pypocketfft.dst(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-7) - -@pmp("len", len1D) -@pmp("type", [1, 2, 3]) -def testdcst1Dortho(len, type): - a = np.random.rand(len)-0.5 - b = a.astype(np.float32) - c = a.astype(np.float128) - itp = (0, 1, 3, 2, 4) - itype = itp[type] - if type != 1 or len > 1: - _assert_close(a, pypocketfft.dct(pypocketfft.dct(c, ortho=True, type=type), ortho=True, type=itype), 2e-18) - _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, ortho=True, type=type), ortho=True, type=itype), 1.5e-15) - _assert_close(b, pypocketfft.dct(pypocketfft.dct(b, ortho=True, type=type), ortho=True, type=itype), 6e-7) - if type != 1: - _assert_close(a, pypocketfft.dst(pypocketfft.dst(c, ortho=True, type=type), ortho=True, type=itype), 2e-18) - _assert_close(a, pypocketfft.dst(pypocketfft.dst(a, ortho=True, type=type), ortho=True, type=itype), 1.5e-15) - _assert_close(b, pypocketfft.dst(pypocketfft.dst(b, ortho=True, type=type), ortho=True, type=itype), 6e-7) - - -# TEMPORARY: separate test for DCT/DST IV, since they are less accurate -@pmp("len", len1D) -@pmp("inorm", [0, 1, 2]) -@pmp("type", [4]) -def testdcst1D4(len, inorm, type): - a = np.random.rand(len)-0.5 - b = a.astype(np.float32) - c = a.astype(np.float128) +@pmp("inorm", [0, 1]) # inorm==2 not needed, tested via inverse +@pmp("type", [1, 2, 3, 4]) +@pmp("dtype", [np.float32, np.float64, np.longfloat]) +def testdcst1D(len, inorm, type, dtype): + a = (np.random.rand(len)-0.5).astype(dtype) + eps = tol[dtype] itp = (0, 1, 3, 2, 4) + if type==4 and len%2 == 1: # relaxed accuracies for odd-length type 4 transforms + special_tol = {np.float32: 4e-5, np.float64: 6e-14, np.longfloat: 4e-17} + eps = special_tol[dtype] itype = itp[type] - if type != 1 or len > 1: - _assert_close(a, pypocketfft.dct(pypocketfft.dct(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-16) - _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-13) - _assert_close(b, pypocketfft.dct(pypocketfft.dct(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-5) - _assert_close(a, pypocketfft.dst(pypocketfft.dst(c, inorm=inorm, type=type), inorm=2-inorm, type=itype), 2e-16) - _assert_close(a, pypocketfft.dst(pypocketfft.dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), 1.5e-13) - _assert_close(b, pypocketfft.dst(pypocketfft.dst(b, inorm=inorm, type=type), inorm=2-inorm, type=itype), 6e-5) + if type != 1 or len > 1: # there are no length-1 type 1 DCTs + _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps) + _assert_close(a, pypocketfft.dst(pypocketfft.dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps)