diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h
index 5e3e1539100620fc2358bd2d4f1dc9c2323b1357..9eeef6324f598352c3beb80e418a2e1cbcaaa8cf 100644
--- a/pocketfft_hdronly.h
+++ b/pocketfft_hdronly.h
@@ -456,17 +456,58 @@ struct util // hack to avoid duplicate symbols
}
/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */
- static POCKETFFT_NOINLINE size_t good_size(size_t n)
+ static POCKETFFT_NOINLINE size_t good_size_cmplx(size_t n)
{
if (n<=12) return n;
size_t bestfac=2*n;
- for (size_t f2=1; f2<bestfac; f2*=2)
- for (size_t f23=f2; f23<bestfac; f23*=3)
- for (size_t f235=f23; f235<bestfac; f235*=5)
- for (size_t f2357=f235; f2357<bestfac; f2357*=7)
- for (size_t f235711=f2357; f235711<bestfac; f235711*=11)
- if (f235711>=n) bestfac=f235711;
+ for (size_t f11=1; f11<bestfac; f11*=11)
+ for (size_t f117=f11; f117<bestfac; f117*=7)
+ for (size_t f1175=f117; f1175<bestfac; f1175*=5)
+ {
+ size_t x=f1175;
+ while (x<n) x*=2;
+ for (;;)
+ {
+ if (x<n)
+ x*=3;
+ else if (x>n)
+ {
+ if (x<bestfac) bestfac=x;
+ if (x&1) break;
+ x>>=1;
+ }
+ else
+ return n;
+ }
+ }
+ return bestfac;
+ }
+
+ /* returns the smallest composite of 2, 3, 5 which is >= n */
+ static POCKETFFT_NOINLINE size_t good_size_real(size_t n)
+ {
+ if (n<=6) return n;
+
+ size_t bestfac=2*n;
+ for (size_t f5=1; f5<bestfac; f5*=5)
+ {
+ size_t x = f5;
+ while (x<n) x *= 2;
+ for (;;)
+ {
+ if (x<n)
+ x*=3;
+ else if (x>n)
+ {
+ if (x<bestfac) bestfac=x;
+ if (x&1) break;
+ x>>=1;
+ }
+ else
+ return n;
+ }
+ }
return bestfac;
}
@@ -2154,7 +2195,7 @@ template<typename T0> class fftblue
public:
POCKETFFT_NOINLINE fftblue(size_t length)
- : n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2),
+ : n(length), n2(util::good_size_cmplx(n*2-1)), plan(n2), mem(n+n2),
bk(mem.data()), bkf(mem.data()+n)
{
/* initialize b_k */
@@ -2235,7 +2276,7 @@ template<typename T0> class pocketfft_c
return;
}
double comp1 = util::cost_guess(length);
- double comp2 = 2*util::cost_guess(util::good_size(2*length-1));
+ double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
@@ -2275,7 +2316,7 @@ template<typename T0> class pocketfft_r
return;
}
double comp1 = 0.5*util::cost_guess(length);
- double comp2 = 2*util::cost_guess(util::good_size(2*length-1));
+ double comp2 = 2*util::cost_guess(util::good_size_cmplx(2*length-1));
comp2*=1.5; /* fudge factor that appears to give good overall performance */
if (comp2<comp1) // use Bluestein
blueplan=unique_ptr<fftblue<T0>>(new fftblue<T0>(length));
diff --git a/pypocketfft.cc b/pypocketfft.cc
index 1efd2f3b2abe001b832acd1b749e1af4cf292dfd..8b43ac40f586b486ace25d42f5c24122118606d7 100644
--- a/pypocketfft.cc
+++ b/pypocketfft.cc
@@ -377,6 +377,12 @@ py::array genuine_hartley(const py::array &in, const py::object &axes_,
DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_))
}
+size_t good_size(size_t n, bool real)
+ {
+ using namespace pocketfft::detail;
+ return real ? util::good_size_real(n) : util::good_size_cmplx(n);
+ }
+
const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
This module supports
@@ -662,6 +668,22 @@ numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
+const char * good_size_DS = R"""(Returns a good length to pad an FFT to.
+
+Parameters
+----------
+n : int
+ Minimum transform length
+real : bool, optional
+ True if either input or output of FFT should be fully real.
+
+Returns
+-------
+out : int
+ The smallest fast size >= n
+
+)""";
+
} // unnamed namespace
PYBIND11_MODULE(pypocketfft, m)
@@ -685,4 +707,5 @@ PYBIND11_MODULE(pypocketfft, m)
"out"_a=None, "nthreads"_a=1);
m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
"out"_a=None, "nthreads"_a=1);
+ m.def("good_size", good_size, good_size_DS, "n"_a, "real"_a=false);
}