Commit 6f1bba7d authored by Martin Reinecke's avatar Martin Reinecke

Merge branch 'good_size' into 'master'

Improve good_size and export to python

See merge request !17
parents 598e9765 59ea0f62
......@@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols
}
/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */
static POCKETFFT_NOINLINE size_t good_size(size_t n)
static POCKETFFT_NOINLINE size_t good_size_cmplx(size_t n)
{
if (n<=12) return n;
size_t bestfac=2*n;
for (size_t f2=1; f2<bestfac; f2*=2)
for (size_t f23=f2; f23<bestfac; f23*=3)
for (size_t f235=f23; f235<bestfac; f235*=5)
for (size_t f2357=f235; f2357<bestfac; f2357*=7)
for (size_t f235711=f2357; f235711<bestfac; f235711*=11)
if (f235711>=n) bestfac=f235711;
for (size_t f11=1; f11<bestfac; f11*=11)
for (size_t f117=f11; f117<bestfac; f117*=7)
for (size_t f1175=f117; f1175<bestfac; f1175*=5)
{
size_t x=f1175;
while (x<n) x*=2;
for (;;)
{
if (x<n)
x*=3;
else if (x>n)
{
if (x<bestfac) bestfac=x;
if (x&1) break;
x>>=1;
}
else
return n;
}
}
return bestfac;
}
/* returns the smallest composite of 2, 3, 5 which is >= n */
static POCKETFFT_NOINLINE size_t good_size_real(size_t n)
{
if (n<=6) return n;
size_t bestfac=2*n;
for (size_t f5=1; f5<bestfac; f5*=5)
{
size_t x = f5;
while (x<n) x *= 2;
for (;;)
{
if (x<n)
x*=3;
else if (x>n)
{
if (x<bestfac) bestfac=x;
if (x&1) break;
x>>=1;
}
else
return n;
}
}
return bestfac;
}
......@@ -2154,7 +2195,7 @@ template<typename T0> class fftblue
public:
POCKETFFT_NOINLINE fftblue(size_t length)
: n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2),
: n(length), n2(util::good_size_cmplx(n*2-1)), plan(n2), mem(n+n2),
bk(mem.data()), bkf(mem.data()+n)
{
/* initialize b_k */
......@@ -2235,7 +2276,7 @@ template<typename T0> class pocketfft_c
return;
}
double comp1 = util::cost_guess(length);
double comp2 = 2*util::cost_guess(util::good_size(2*length-1));
double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
......@@ -2275,7 +2316,7 @@ template<typename T0> class pocketfft_r
return;
}
double comp1 = 0.5*util::cost_guess(length);
double comp2 = 2*util::cost_guess(util::good_size(2*length-1));
double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
......
......@@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_,
DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_))
}
size_t good_size(size_t n, bool real)
{
using namespace pocketfft::detail;
return real ? util::good_size_real(n) : util::good_size_cmplx(n);
}
const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
This module supports
......@@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
const char * good_size_DS = R"""(Returns a good length to pad an FFT to.
Parameters
----------
n : int
Minimum transform length
real : bool, optional
True if either input or output of FFT should be fully real.
Returns
-------
out : int
The smallest fast size >= n
)""";
} // unnamed namespace
PYBIND11_MODULE(pypocketfft, m)
......@@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m)
"out"_a=None, "nthreads"_a=1);
m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
"out"_a=None, "nthreads"_a=1);
m.def("good_size", good_size, good_size_DS, "n"_a, "real"_a=false);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment