From d1874b1da90a773ce11542f94819155c3056da37 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Tue, 25 Jun 2019 14:33:06 +0200 Subject: [PATCH] no more problematic macros / r2hc -> real2hermitian --- bench.py | 4 +- pocketfft_hdronly.h | 183 ++++++++++++++++++++++++-------------------- pypocketfft.cc | 19 ++--- test.py | 4 +- 4 files changed, 114 insertions(+), 96 deletions(-) diff --git a/bench.py b/bench.py index 764d5ba..85a3434 100644 --- a/bench.py +++ b/bench.py @@ -108,8 +108,8 @@ def bench_nd(ndim, nmax, ntry, tp, funcs, nrepeat, ttl="", filename=""): plt.show() -funcs = (measure_pypocketfft, measure_fftw_np_interface) -ttl = "pypocketfft/fftw_numpy_interface" +funcs = (measure_pypocketfft, measure_fftw) +ttl = "pypocketfft/FFTW()" bench_nd(1, 8192, 100, "c16", funcs, 10, ttl, "1d.png") bench_nd(2, 2048, 100, "c16", funcs, 2, ttl, "2d.png") bench_nd(3, 256, 100, "c16", funcs, 2, ttl, "3d.png") diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 4baffa7..93eefe9 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -62,14 +62,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__GNUC__) -#define NOINLINE __attribute__((noinline)) -#define RESTRICT __restrict__ +#define POCKETFFT_NOINLINE __attribute__((noinline)) +#define POCKETFFT_RESTRICT __restrict__ #elif defined(_MSC_VER) -#define NOINLINE __declspec(noinline) -#define RESTRICT __restrict +#define POCKETFFT_NOINLINE __declspec(noinline) +#define POCKETFFT_RESTRICT __restrict #else -#define NOINLINE -#define RESTRICT +#define POCKETFFT_NOINLINE +#define POCKETFFT_RESTRICT #endif namespace pocketfft { @@ -242,7 +242,7 @@ template class sincos_2pibyn using Thigh = typename TypeSelectorsizeof(double))>::type; arr data; - void my_sincosm1pi (Thigh a_, Thigh *RESTRICT res) + void my_sincosm1pi (Thigh a_, Thigh *POCKETFFT_RESTRICT res) { if (sizeof(Thigh)>sizeof(double)) // don't have the code for long double { @@ -280,7 +280,8 @@ template class sincos_2pibyn res[1] = s; } - NOINLINE void calc_first_octant(size_t den, T * RESTRICT res) + POCKETFFT_NOINLINE void calc_first_octant(size_t den, + T * POCKETFFT_RESTRICT res) { size_t n = (den+4)>>3; if (n==0) return; @@ -313,9 +314,9 @@ template class sincos_2pibyn } } - void calc_first_quadrant(size_t n, T * RESTRICT res) + void calc_first_quadrant(size_t n, T * POCKETFFT_RESTRICT res) { - T * RESTRICT p = res+n; + T * POCKETFFT_RESTRICT p = res+n; calc_first_octant(n<<1, p); size_t ndone=(n+2)>>2; size_t i=0, idx1=0, idx2=2*ndone-2; @@ -328,7 +329,7 @@ template class sincos_2pibyn { res[idx1] = p[2*i]; res[idx1+1] = p[2*i+1]; } } - void calc_first_half(size_t n, T * RESTRICT res) + void calc_first_half(size_t n, T * POCKETFFT_RESTRICT res) { int ndone=int(n+1)>>1; T * p = res+n-1; @@ -344,7 +345,7 @@ template class sincos_2pibyn { auto xm = 2*in-i4; res[2*i] = -p[2*xm]; res[2*i+1] = p[2*xm+1]; } } - void fill_first_quadrant(size_t n, T * RESTRICT res) + void fill_first_quadrant(size_t n, T * POCKETFFT_RESTRICT res) { constexpr T hsqt2 = T(0.707106781186547524400844362104849L); size_t quart = n>>2; @@ -354,7 +355,7 @@ template class sincos_2pibyn { res[j] = res[i+1]; res[j+1] = res[i]; } } - NOINLINE void fill_first_half(size_t n, T * RESTRICT res) + POCKETFFT_NOINLINE void fill_first_half(size_t n, T * POCKETFFT_RESTRICT res) { size_t half = n>>1; if ((n&3)==0) @@ -365,7 +366,7 @@ template class sincos_2pibyn { res[j] = -res[i]; res[j+1] = res[i+1]; } } - void fill_second_half(size_t n, T * RESTRICT res) + void fill_second_half(size_t n, T * POCKETFFT_RESTRICT res) { if ((n&1)==0) for (size_t i=0; i class sincos_2pibyn { res[j] = res[i]; res[j+1] = -res[i+1]; } } - NOINLINE void sincos_2pibyn_half(size_t n, T * RESTRICT res) + POCKETFFT_NOINLINE void sincos_2pibyn_half(size_t n, T * POCKETFFT_RESTRICT res) { if ((n&3)==0) { @@ -393,7 +394,7 @@ template class sincos_2pibyn } public: - NOINLINE sincos_2pibyn(size_t n, bool half) + POCKETFFT_NOINLINE sincos_2pibyn(size_t n, bool half) : data(2*n) { sincos_2pibyn_half(n, data.data()); @@ -408,7 +409,7 @@ template class sincos_2pibyn struct util // hack to avoid duplicate symbols { - static NOINLINE size_t largest_prime_factor (size_t n) + static POCKETFFT_NOINLINE size_t largest_prime_factor (size_t n) { size_t res=1; while ((n&1)==0) @@ -420,7 +421,7 @@ struct util // hack to avoid duplicate symbols return res; } - static NOINLINE double cost_guess (size_t n) + static POCKETFFT_NOINLINE double cost_guess (size_t n) { constexpr double lfp=1.1; // penalty for non-hardcoded larger factors size_t ni=n; @@ -438,7 +439,7 @@ struct util // hack to avoid duplicate symbols } /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ - static NOINLINE size_t good_size(size_t n) + static POCKETFFT_NOINLINE size_t good_size(size_t n) { if (n<=12) return n; @@ -460,7 +461,7 @@ struct util // hack to avoid duplicate symbols return res; } - static NOINLINE void sanity_check(const shape_t &shape, + static POCKETFFT_NOINLINE void sanity_check(const shape_t &shape, const stride_t &stride_in, const stride_t &stride_out, bool inplace) { auto ndim = shape.size(); @@ -471,7 +472,7 @@ struct util // hack to avoid duplicate symbols throw runtime_error("stride mismatch"); } - static NOINLINE void sanity_check(const shape_t &shape, + static POCKETFFT_NOINLINE void sanity_check(const shape_t &shape, const stride_t &stride_in, const stride_t &stride_out, bool inplace, const shape_t &axes) { @@ -485,7 +486,7 @@ struct util // hack to avoid duplicate symbols } } - static NOINLINE void sanity_check(const shape_t &shape, + static POCKETFFT_NOINLINE void sanity_check(const shape_t &shape, const stride_t &stride_in, const stride_t &stride_out, bool inplace, size_t axis) { @@ -530,7 +531,8 @@ template class cfftp { fact.push_back({factor, nullptr, nullptr}); } template void pass2 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=2; @@ -581,7 +583,8 @@ template void pass2 (size_t ido, size_t l1, CH(i,k,u2) = db.template special_mul(WA(u2-1,i)); \ } template void pass3 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=3; constexpr T0 tw1r=-0.5, @@ -620,7 +623,8 @@ template void pass3 (size_t ido, size_t l1, #undef POCKETFFT_PREP3 template void pass4 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=4; @@ -697,7 +701,8 @@ template void pass4 (size_t ido, size_t l1, CH(i,k,u2) = db.template special_mul(WA(u2-1,i)); \ } template void pass5 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=5; constexpr T0 tw1r= T0(0.3090169943749474241022934171828191L), @@ -768,7 +773,8 @@ template void pass5 (size_t ido, size_t l1, } template void pass7(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=7; constexpr T0 tw1r= T0(0.6234898018587335305250048840042398L), @@ -837,7 +843,8 @@ template inline void PMINPLACE(T &a, T &b) { T t = a; a.r+=b.r; a.i+=b.i; b.r=t.r-b.r; b.i=t.i-b.i; } template void pass8 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=8; @@ -935,7 +942,7 @@ template void pass8 (size_t ido, size_t l1, CH(idx,k,0).r=t1.r+t2.r+t3.r+t4.r+t5.r+t6.r; \ CH(idx,k,0).i=t1.i+t2.i+t3.i+t4.i+t5.i+t6.i; -#define PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,out1,out2) \ +#define POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,out1,out2) \ { \ T ca = t1 + t2*x1 + t3*x2 + t4*x3 + t5*x4 +t6*x5, \ cb; \ @@ -943,18 +950,19 @@ template void pass8 (size_t ido, size_t l1, cb.r=-(y1*t11.i y2*t10.i y3*t9.i y4*t8.i y5*t7.i ); \ PMC(out1,out2,ca,cb); \ } -#define PARTSTEP11a(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \ - PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,CH(0,k,u1),CH(0,k,u2)) -#define PARTSTEP11(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \ +#define POCKETFFT_PARTSTEP11a(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \ + POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,CH(0,k,u1),CH(0,k,u2)) +#define POCKETFFT_PARTSTEP11(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \ { \ T da,db; \ - PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,da,db) \ + POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,da,db) \ CH(i,k,u1) = da.template special_mul(WA(u1-1,i)); \ CH(i,k,u2) = db.template special_mul(WA(u2-1,i)); \ } template void pass11 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=11; constexpr T0 tw1r= T0(0.8412535328311811688618116489193677L), @@ -979,31 +987,31 @@ template void pass11 (size_t ido, size_t l1, for (size_t k=0; k void pass11 (size_t ido, size_t l1, #undef POCKETFFT_PREP11 template void passg (size_t ido, size_t ip, - size_t l1, T * RESTRICT cc, T * RESTRICT ch, const cmplx * RESTRICT wa, - const cmplx * RESTRICT csarr) + size_t l1, T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const cmplx * POCKETFFT_RESTRICT wa, + const cmplx * POCKETFFT_RESTRICT csarr) { const size_t cdim=ip; size_t ipph = (ip+1)/2; @@ -1177,7 +1186,7 @@ template void pass_all(T c[], T0 fct) { pass_all(c, fct); } private: - NOINLINE void factorize() + POCKETFFT_NOINLINE void factorize() { size_t len=length; while ((len&7)==0) @@ -1240,7 +1249,7 @@ template void pass_all(T c[], T0 fct) } public: - NOINLINE cfftp(size_t length_) + POCKETFFT_NOINLINE cfftp(size_t length_) : length(length_) { if (length==0) throw runtime_error("zero length FFT requested"); @@ -1280,7 +1289,8 @@ template inline void MULPM { a=c*e+d*f; b=c*f-d*e; } template void radf2 (size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=2; @@ -1318,7 +1328,8 @@ template void radf2 (size_t ido, size_t l1, } template void radf3(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=3; constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L); @@ -1357,7 +1368,8 @@ template void radf3(size_t ido, size_t l1, } template void radf4(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=4; constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L); @@ -1404,7 +1416,8 @@ template void radf4(size_t ido, size_t l1, } template void radf5(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=5; constexpr T0 tr11= T0(0.3090169943749474241022934171828191L), @@ -1460,8 +1473,8 @@ template void radf5(size_t ido, size_t l1, #undef POCKETFFT_REARRANGE template void radfg(size_t ido, size_t ip, size_t l1, - T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa, - const T0 * RESTRICT csarr) + T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) { const size_t cdim=ip; size_t ipph=(ip+1)/2; @@ -1607,8 +1620,9 @@ template void radfg(size_t ido, size_t ip, size_t l1, } } -template void radb2(size_t ido, size_t l1, const T * RESTRICT cc, - T * RESTRICT ch, const T0 * RESTRICT wa) +template void radb2(size_t ido, size_t l1, + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=2; @@ -1639,7 +1653,8 @@ template void radb2(size_t ido, size_t l1, const T * RESTRICT cc, } template void radb3(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=3; constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L); @@ -1679,7 +1694,8 @@ template void radb3(size_t ido, size_t l1, } template void radb4(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=4; constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L); @@ -1731,7 +1747,8 @@ template void radb4(size_t ido, size_t l1, } template void radb5(size_t ido, size_t l1, - const T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa) + const T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa) { constexpr size_t cdim=5; constexpr T0 tr11= T0(0.3090169943749474241022934171828191L), @@ -1790,8 +1807,8 @@ template void radb5(size_t ido, size_t l1, } template void radbg(size_t ido, size_t ip, size_t l1, - T * RESTRICT cc, T * RESTRICT ch, const T0 * RESTRICT wa, - const T0 * RESTRICT csarr) + T * POCKETFFT_RESTRICT cc, T * POCKETFFT_RESTRICT ch, + const T0 * POCKETFFT_RESTRICT wa, const T0 * POCKETFFT_RESTRICT csarr) { const size_t cdim=ip; size_t ipph=(ip+1)/ 2; @@ -2072,7 +2089,7 @@ template void radbg(size_t ido, size_t ip, size_t l1, } public: - NOINLINE rfftp(size_t length_) + POCKETFFT_NOINLINE rfftp(size_t length_) : length(length_) { if (length==0) throw runtime_error("zero-sized FFT"); @@ -2121,7 +2138,7 @@ template class fftblue } public: - NOINLINE fftblue(size_t length) + POCKETFFT_NOINLINE fftblue(size_t length) : n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2), bk(mem.data()), bkf(mem.data()+n) { @@ -2192,7 +2209,7 @@ template class pocketfft_c size_t len; public: - NOINLINE pocketfft_c(size_t length) + POCKETFFT_NOINLINE pocketfft_c(size_t length) : len(length) { if (length==0) throw runtime_error("zero-length FFT requested"); @@ -2211,10 +2228,10 @@ template class pocketfft_c packplan=unique_ptr>(new cfftp(length)); } - template NOINLINE void backward(cmplx c[], T0 fct) + template POCKETFFT_NOINLINE void backward(cmplx c[], T0 fct) { packplan ? packplan->backward(c,fct) : blueplan->backward(c,fct); } - template NOINLINE void forward(cmplx c[], T0 fct) + template POCKETFFT_NOINLINE void forward(cmplx c[], T0 fct) { packplan ? packplan->forward(c,fct) : blueplan->forward(c,fct); } size_t length() const { return len; } @@ -2232,7 +2249,7 @@ template class pocketfft_r size_t len; public: - NOINLINE pocketfft_r(size_t length) + POCKETFFT_NOINLINE pocketfft_r(size_t length) : len(length) { if (length==0) throw runtime_error("zero-length FFT requested"); @@ -2251,13 +2268,13 @@ template class pocketfft_r packplan=unique_ptr>(new rfftp(length)); } - template NOINLINE void backward(T c[], T0 fct) + template POCKETFFT_NOINLINE void backward(T c[], T0 fct) { packplan ? packplan->backward(c,fct) : blueplan->backward_r(c,fct); } - template NOINLINE void forward(T c[], T0 fct) + template POCKETFFT_NOINLINE void forward(T c[], T0 fct) { packplan ? packplan->forward(c,fct) : blueplan->forward_r(c,fct); @@ -2574,7 +2591,7 @@ template arr alloc_tmp(const shape_t &shape, #define POCKETFFT_NTHREADS #endif -template NOINLINE void general_c( +template POCKETFFT_NOINLINE void general_c( const cndarr> &in, ndarr> &out, const shape_t &axes, bool forward, T fct, size_t POCKETFFT_NTHREADS) { @@ -2641,7 +2658,7 @@ template NOINLINE void general_c( } } -template NOINLINE void general_hartley( +template POCKETFFT_NOINLINE void general_hartley( const cndarr &in, ndarr &out, const shape_t &axes, T fct, size_t POCKETFFT_NTHREADS) { @@ -2709,7 +2726,7 @@ template NOINLINE void general_hartley( } } -template NOINLINE void general_r2c( +template POCKETFFT_NOINLINE void general_r2c( const cndarr &in, ndarr> &out, size_t axis, bool forward, T fct, size_t POCKETFFT_NTHREADS) { @@ -2769,7 +2786,7 @@ template NOINLINE void general_r2c( } } // end of parallel region } -template NOINLINE void general_c2r( +template POCKETFFT_NOINLINE void general_c2r( const cndarr> &in, ndarr &out, size_t axis, bool forward, T fct, size_t POCKETFFT_NTHREADS) { @@ -2846,7 +2863,7 @@ template NOINLINE void general_c2r( } // end of parallel region } -template NOINLINE void general_r( +template POCKETFFT_NOINLINE void general_r( const cndarr &in, ndarr &out, const shape_t &axes, bool r2c, bool forward, T fct, size_t POCKETFFT_NTHREADS) { @@ -3027,14 +3044,14 @@ template void c2r(const shape_t &shape_out, template void r2r_fftpack(const shape_t &shape, const stride_t &stride_in, const stride_t &stride_out, const shape_t &axes, - bool r2hc, bool forward, const T *data_in, T *data_out, T fct, + bool real2hermitian, bool forward, const T *data_in, T *data_out, T fct, size_t nthreads=1) { if (util::prod(shape)==0) return; util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes); cndarr ain(data_in, shape, stride_in); ndarr aout(data_out, shape, stride_out); - general_r(ain, aout, axes, r2hc, forward, fct, nthreads); + general_r(ain, aout, axes, real2hermitian, forward, fct, nthreads); } template void r2r_separable_hartley(const shape_t &shape, @@ -3062,7 +3079,7 @@ using detail::r2r_separable_hartley; } // namespace pocketfft -#undef NOINLINE -#undef RESTRICT +#undef POCKETFFT_NOINLINE +#undef POCKETFFT_RESTRICT #endif // POCKETFFT_HDRONLY_H diff --git a/pypocketfft.cc b/pypocketfft.cc index 29fea98..cf2a424 100644 --- a/pypocketfft.cc +++ b/pypocketfft.cc @@ -200,7 +200,7 @@ py::array r2c(const py::array &in, const py::object &axes_, bool forward, } template py::array r2r_fftpack_internal(const py::array &in, - const py::object &axes_, bool r2hc, bool forward, int inorm, + const py::object &axes_, bool real2hermitian, bool forward, int inorm, py::object &out_, size_t nthreads) { auto axes = makeaxes(in, axes_); @@ -213,18 +213,18 @@ template py::array r2r_fftpack_internal(const py::array &in, { py::gil_scoped_release release; T fct = norm_fct(inorm, dims, axes); - pocketfft::r2r_fftpack(dims, s_in, s_out, axes, r2hc, forward, + pocketfft::r2r_fftpack(dims, s_in, s_out, axes, real2hermitian, forward, d_in, d_out, fct, nthreads); } return res; } py::array r2r_fftpack(const py::array &in, const py::object &axes_, - bool r2hc, bool forward, int inorm, py::object &out_, + bool real2hermitian, bool forward, int inorm, py::object &out_, size_t nthreads) { - DISPATCH(in, f64, f32, flong, r2r_fftpack_internal, (in, axes_, r2hc, - forward, inorm, out_, nthreads)) + DISPATCH(in, f64, f32, flong, r2r_fftpack_internal, (in, axes_, + real2hermitian, forward, inorm, out_, nthreads)) } template py::array c2r_internal(const py::array &in, @@ -437,9 +437,10 @@ a : numpy.ndarray (any real type) axes : list of integers The axes along which the FFT is carried out. If not set, all axes will be transformed. -r2hc : bool - if True, the input is purely real and the output will be halfcomplex, - otherwise the opposite +real2hermitian : bool + if True, the input is purely real and the output will have Hermitian + symmetry and be stored in FFTPACK's halfcomplex ordering, otherwise the + opposite. forward : bool If `True`, a negative sign is used in the exponent, else a positive one. inorm : int @@ -538,7 +539,7 @@ PYBIND11_MODULE(pypocketfft, m) m.def("c2r", c2r, c2r_DS, "a"_a, "axes"_a=None, "lastsize"_a=0, "forward"_a=true, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); m.def("r2r_fftpack", r2r_fftpack, r2r_fftpack_DS, "a"_a, "axes"_a, - "r2hc"_a, "forward"_a, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); + "real2hermitian"_a, "forward"_a, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); m.def("separable_hartley", separable_hartley, separable_hartley_DS, "a"_a, "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); m.def("genuine_hartley", genuine_hartley, genuine_hartley_DS, "a"_a, diff --git a/test.py b/test.py index b477358..5c43f82 100644 --- a/test.py +++ b/test.py @@ -38,13 +38,13 @@ def irfftn(a, axes=None, lastsize=0, inorm=0, nthreads=1): def rfft_scipy(a, axis, inorm=0, out=None, nthreads=1): - return pypocketfft.r2r_fftpack(a, axes=(axis,), r2hc=True, + return pypocketfft.r2r_fftpack(a, axes=(axis,), real2hermitian=True, forward=True, inorm=inorm, out=out, nthreads=nthreads) def irfft_scipy(a, axis, inorm=0, out=None, nthreads=1): - return pypocketfft.r2r_fftpack(a, axes=(axis,), r2hc=False, + return pypocketfft.r2r_fftpack(a, axes=(axis,), real2hermitian=False, forward=False, inorm=inorm, out=out, nthreads=nthreads) -- GitLab