Commit 59ea0f62 authored by Peter Bell's avatar Peter Bell Committed by Martin Reinecke

Improve good_size and export to python

- Optimises good_size by reducing search space
- Adds real variant for finding 5-smooth numbers
- Exports good_size to python
parent 598e9765
...@@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols ...@@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols
} }
/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */
static POCKETFFT_NOINLINE size_t good_size(size_t n) static POCKETFFT_NOINLINE size_t good_size_cmplx(size_t n)
{ {
if (n<=12) return n; if (n<=12) return n;
size_t bestfac=2*n; size_t bestfac=2*n;
for (size_t f2=1; f2<bestfac; f2*=2) for (size_t f11=1; f11<bestfac; f11*=11)
for (size_t f23=f2; f23<bestfac; f23*=3) for (size_t f117=f11; f117<bestfac; f117*=7)
for (size_t f235=f23; f235<bestfac; f235*=5) for (size_t f1175=f117; f1175<bestfac; f1175*=5)
for (size_t f2357=f235; f2357<bestfac; f2357*=7) {
for (size_t f235711=f2357; f235711<bestfac; f235711*=11) size_t x=f1175;
if (f235711>=n) bestfac=f235711; while (x<n) x*=2;
for (;;)
{
if (x<n)
x*=3;
else if (x>n)
{
if (x<bestfac) bestfac=x;
if (x&1) break;
x>>=1;
}
else
return n;
}
}
return bestfac;
}
/* returns the smallest composite of 2, 3, 5 which is >= n */
static POCKETFFT_NOINLINE size_t good_size_real(size_t n)
{
if (n<=6) return n;
size_t bestfac=2*n;
for (size_t f5=1; f5<bestfac; f5*=5)
{
size_t x = f5;
while (x<n) x *= 2;
for (;;)
{
if (x<n)
x*=3;
else if (x>n)
{
if (x<bestfac) bestfac=x;
if (x&1) break;
x>>=1;
}
else
return n;
}
}
return bestfac; return bestfac;
} }
...@@ -2154,7 +2195,7 @@ template<typename T0> class fftblue ...@@ -2154,7 +2195,7 @@ template<typename T0> class fftblue
public: public:
POCKETFFT_NOINLINE fftblue(size_t length) POCKETFFT_NOINLINE fftblue(size_t length)
: n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2), : n(length), n2(util::good_size_cmplx(n*2-1)), plan(n2), mem(n+n2),
bk(mem.data()), bkf(mem.data()+n) bk(mem.data()), bkf(mem.data()+n)
{ {
/* initialize b_k */ /* initialize b_k */
...@@ -2235,7 +2276,7 @@ template<typename T0> class pocketfft_c ...@@ -2235,7 +2276,7 @@ template<typename T0> class pocketfft_c
return; return;
} }
double comp1 = util::cost_guess(length); double comp1 = util::cost_guess(length);
double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */ comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length)); blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
...@@ -2275,7 +2316,7 @@ template<typename T0> class pocketfft_r ...@@ -2275,7 +2316,7 @@ template<typename T0> class pocketfft_r
return; return;
} }
double comp1 = 0.5*util::cost_guess(length); double comp1 = 0.5*util::cost_guess(length);
double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */ comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length)); blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
......
...@@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_, ...@@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_,
DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_)) DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_))
} }
size_t good_size(size_t n, bool real)
{
using namespace pocketfft::detail;
return real ? util::good_size_real(n) : util::good_size_cmplx(n);
}
const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms. const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
This module supports This module supports
...@@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`) ...@@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`)
The transformed data The transformed data
)"""; )""";
const char * good_size_DS = R"""(Returns a good length to pad an FFT to.
Parameters
----------
n : int
Minimum transform length
real : bool, optional
True if either input or output of FFT should be fully real.
Returns
-------
out : int
The smallest fast size >= n
)""";
} // unnamed namespace } // unnamed namespace
PYBIND11_MODULE(pypocketfft, m) PYBIND11_MODULE(pypocketfft, m)
...@@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m) ...@@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m)
"out"_a=None, "nthreads"_a=1); "out"_a=None, "nthreads"_a=1);
m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0, m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
"out"_a=None, "nthreads"_a=1); "out"_a=None, "nthreads"_a=1);
m.def("good_size", good_size, good_size_DS, "n"_a, "real"_a=false);
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment