diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 7d8ce9c9d062d4ddfe59074121653861afd55122..77b3d62b473837be87d949669aea878d0e4d85c9 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -152,7 +152,8 @@ template<> struct VLEN<double> { static constexpr size_t val=2; }; #if __cplusplus >= 201703L inline void *aligned_alloc(size_t align, size_t size) { - void *ptr = ::aligned_alloc(align,size); + // aligned_alloc() requires that the requested size is a multiple of "align" + void *ptr = ::aligned_alloc(align,(size+align-1)&(~(align-1))); if (!ptr) throw std::bad_alloc(); return ptr; } @@ -3362,18 +3363,18 @@ template<typename T> POCKETFFT_NOINLINE void general_c2r( struct ExecR2R { - bool r2c, forward; + bool r2h, forward; template <typename T0, typename T, size_t vlen> void operator () ( const multi_iter<vlen> &it, const cndarr<T0> &in, ndarr<T0> &out, T * buf, const pocketfft_r<T0> &plan, T0 fct) const { copy_input(it, in, buf); - if ((!r2c) && forward) + if ((!r2h) && forward) for (size_t i=2; i<it.length_out(); i+=2) buf[i] = -buf[i]; - plan.exec(buf, fct, forward); - if (r2c && (!forward)) + plan.exec(buf, fct, r2h); + if (r2h && (!forward)) for (size_t i=2; i<it.length_out(); i+=2) buf[i] = -buf[i]; copy_output(it, buf, out); diff --git a/test.py b/test.py index 0a59b295dc2b5451601352b7844fa84ce820e518..9a7f66b279c3ec7c00be6907e021f00d5b11f7b4 100644 --- a/test.py +++ b/test.py @@ -55,6 +55,19 @@ def irfft_scipy(a, axis, inorm=0, out=None, nthreads=1): forward=False, inorm=inorm, out=out, nthreads=nthreads) + +def hc2c(inp, otype): + n = inp.shape[0] + n2 = (n-1)//2 + out = np.zeros_like(inp, dtype=otype) + out[0] = inp[0] + if n % 2 == 0: + out[n//2] = inp[-1] + out[1:n2+1] = inp[1:1+2*n2:2] + 1j*inp[2:2+2*n2:2] + out[-1:-n2-1:-1] = inp[1:1+2*n2:2] - 1j*inp[2:2+2*n2:2] + return out + + tol = {np.float32: 6e-7, np.float64: 1.5e-15, np.longfloat: 1e-18} ctype = {np.float32: np.complex64, np.float64: np.complex128, np.longfloat: np.longcomplex} @@ -217,3 +230,18 @@ def testdcst1D(len, inorm, type, dtype): if type != 1 or len > 1: # there are no length-1 type 1 DCTs _assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps) _assert_close(a, pypocketfft.dst(pypocketfft.dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps) + + +@pmp("len", (3, 4, 5, 6, 7, 8, 9, 10)) +@pmp("dtype", dtypes) +def testfftpack_extra(len, dtype): + rng = np.random.default_rng(42) + a = (rng.random(len)-0.5).astype(dtype) + eps = tol[dtype] + ref = pypocketfft.c2c(a, forward=False) + test = pypocketfft.r2r_fftpack(a, (0,), real2hermitian=True, forward=False) + testc = hc2c(test, ctype[dtype]) + _assert_close(ref, testc, eps) + ref = pypocketfft.c2c(ref, forward=True) + test = pypocketfft.r2r_fftpack(test, (0,), real2hermitian=False, forward=True) + _assert_close(ref, test, eps)