Commit 59ea0f62 by Peter Bell Committed by Martin Reinecke

### Improve good_size and export to python

```- Optimises good_size by reducing search space
- Adds real variant for finding 5-smooth numbers
- Exports good_size to python```
parent 598e9765
 ... @@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols ... @@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols } } /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ static POCKETFFT_NOINLINE size_t good_size(size_t n) static POCKETFFT_NOINLINE size_t good_size_cmplx(size_t n) { { if (n<=12) return n; if (n<=12) return n; size_t bestfac=2*n; size_t bestfac=2*n; for (size_t f2=1; f2=n) bestfac=f235711; while (xn) { if (x>=1; } else return n; } } return bestfac; } /* returns the smallest composite of 2, 3, 5 which is >= n */ static POCKETFFT_NOINLINE size_t good_size_real(size_t n) { if (n<=6) return n; size_t bestfac=2*n; for (size_t f5=1; f5n) { if (x>=1; } else return n; } } return bestfac; return bestfac; } } ... @@ -2154,7 +2195,7 @@ template class fftblue ... @@ -2154,7 +2195,7 @@ template class fftblue public: public: POCKETFFT_NOINLINE fftblue(size_t length) POCKETFFT_NOINLINE fftblue(size_t length) : n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2), : n(length), n2(util::good_size_cmplx(n*2-1)), plan(n2), mem(n+n2), bk(mem.data()), bkf(mem.data()+n) bk(mem.data()), bkf(mem.data()+n) { { /* initialize b_k */ /* initialize b_k */ ... @@ -2235,7 +2276,7 @@ template class pocketfft_c ... @@ -2235,7 +2276,7 @@ template class pocketfft_c return; return; } } double comp1 = util::cost_guess(length); double comp1 = util::cost_guess(length); double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); blueplan=unique_ptr>(new fftblue(length)); ... @@ -2275,7 +2316,7 @@ template class pocketfft_r ... @@ -2275,7 +2316,7 @@ template class pocketfft_r return; return; } } double comp1 = 0.5*util::cost_guess(length); double comp1 = 0.5*util::cost_guess(length); double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); blueplan=unique_ptr>(new fftblue(length)); ... ...
 ... @@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_, ... @@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_, DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_)) DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_)) } } size_t good_size(size_t n, bool real) { using namespace pocketfft::detail; return real ? util::good_size_real(n) : util::good_size_cmplx(n); } const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms. const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms. This module supports This module supports ... @@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`) ... @@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`) The transformed data The transformed data )"""; )"""; const char * good_size_DS = R"""(Returns a good length to pad an FFT to. Parameters ---------- n : int Minimum transform length real : bool, optional True if either input or output of FFT should be fully real. Returns ------- out : int The smallest fast size >= n )"""; } // unnamed namespace } // unnamed namespace PYBIND11_MODULE(pypocketfft, m) PYBIND11_MODULE(pypocketfft, m) ... @@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m) ... @@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m) "out"_a=None, "nthreads"_a=1); "out"_a=None, "nthreads"_a=1); m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1); "out"_a=None, "nthreads"_a=1); m.def("good_size", good_size, good_size_DS, "n"_a, "real"_a=false); } }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!