From 50eda01cacaad7af05fa45110c01435ee6774647 Mon Sep 17 00:00:00 2001 From: Peter Bell Date: Tue, 30 Jul 2019 16:58:57 +0100 Subject: [PATCH] Improve good_size and export to python - Optimises good_size by reducing search space - Adds real variant for finding 5-smooth numbers - Exports good_size to python --- pocketfft_hdronly.h | 61 +++++++++++++++++++++++++++++++++++++-------- pypocketfft.cc | 23 +++++++++++++++++ 2 files changed, 74 insertions(+), 10 deletions(-) diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 5e3e153..9eeef63 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols } /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ - static POCKETFFT_NOINLINE size_t good_size(size_t n) + static POCKETFFT_NOINLINE size_t good_size_cmplx(size_t n) { if (n<=12) return n; size_t bestfac=2*n; - for (size_t f2=1; f2=n) bestfac=f235711; + for (size_t f11=1; f11n) + { + if (x>=1; + } + else + return n; + } + } + return bestfac; + } + + /* returns the smallest composite of 2, 3, 5 which is >= n */ + static POCKETFFT_NOINLINE size_t good_size_real(size_t n) + { + if (n<=6) return n; + + size_t bestfac=2*n; + for (size_t f5=1; f5n) + { + if (x>=1; + } + else + return n; + } + } return bestfac; } @@ -2154,7 +2195,7 @@ template class fftblue public: POCKETFFT_NOINLINE fftblue(size_t length) - : n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2), + : n(length), n2(util::good_size_cmplx(n*2-1)), plan(n2), mem(n+n2), bk(mem.data()), bkf(mem.data()+n) { /* initialize b_k */ @@ -2235,7 +2276,7 @@ template class pocketfft_c return; } double comp1 = util::cost_guess(length); - double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); + double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); @@ -2275,7 +2316,7 @@ template class pocketfft_r return; } double comp1 = 0.5*util::cost_guess(length); - double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); + double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); diff --git a/pypocketfft.cc b/pypocketfft.cc index 1efd2f3..8b43ac4 100644 --- a/pypocketfft.cc +++ b/pypocketfft.cc @@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_, DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_)) } +size_t good_size(size_t n, bool real) + { + using namespace pocketfft::detail; + return real ? util::good_size_real(n) : util::good_size_cmplx(n); + } + const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms. This module supports @@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`) The transformed data )"""; +const char * good_size_DS = R"""(Returns a good length to pad an FFT to. + +Parameters +---------- +n : int + Minimum transform length +real : bool, optional + True if either input or output of FFT should be fully real. + +Returns +------- +out : int + The smallest fast size >= n + +)"""; + } // unnamed namespace PYBIND11_MODULE(pypocketfft, m) @@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m) "out"_a=None, "nthreads"_a=1); m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); + m.def("good_size", good_size, good_size_DS, "n"_a, "real"_a=false); } -- GitLab