Commit de0d1311 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

prepare for header-only version

parent 8ef32034
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
using namespace std; using namespace std;
namespace { namespace pocketfft_private {
template<typename T> struct arr template<typename T> struct arr
{ {
...@@ -458,6 +458,10 @@ template<bool bwd, typename T> NOINLINE void pass3 (size_t ido, size_t l1, ...@@ -458,6 +458,10 @@ template<bool bwd, typename T> NOINLINE void pass3 (size_t ido, size_t l1,
} }
} }
#undef PARTSTEP3b
#undef PARTSTEP3a
#undef PREP3
template<bool bwd, typename T> NOINLINE void pass4 (size_t ido, size_t l1, template<bool bwd, typename T> NOINLINE void pass4 (size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const cmplx<T0> * restrict wa) const T * restrict cc, T * restrict ch, const cmplx<T0> * restrict wa)
{ {
...@@ -562,6 +566,10 @@ template<bool bwd, typename T> NOINLINE void pass5 (size_t ido, size_t l1, ...@@ -562,6 +566,10 @@ template<bool bwd, typename T> NOINLINE void pass5 (size_t ido, size_t l1,
} }
} }
#undef PARTSTEP5b
#undef PARTSTEP5a
#undef PREP5
#define PREP7(idx) \ #define PREP7(idx) \
T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7; \ T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7; \
PMC (t2,t7,CC(idx,1,k),CC(idx,6,k)); \ PMC (t2,t7,CC(idx,1,k),CC(idx,6,k)); \
...@@ -627,6 +635,11 @@ template<bool bwd, typename T> NOINLINE void pass7(size_t ido, size_t l1, ...@@ -627,6 +635,11 @@ template<bool bwd, typename T> NOINLINE void pass7(size_t ido, size_t l1,
} }
} }
#undef PARTSTEP7
#undef PARTSTEP7a0
#undef PARTSTEP7a
#undef PREP7
#define PREP11(idx) \ #define PREP11(idx) \
T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; \ T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; \
PMC (t2,t11,CC(idx,1,k),CC(idx,10,k)); \ PMC (t2,t11,CC(idx,1,k),CC(idx,10,k)); \
...@@ -703,6 +716,11 @@ template<bool bwd, typename T> NOINLINE void pass11 (size_t ido, size_t l1, ...@@ -703,6 +716,11 @@ template<bool bwd, typename T> NOINLINE void pass11 (size_t ido, size_t l1,
} }
} }
#undef PARTSTEP11
#undef PARTSTEP11a0
#undef PARTSTEP11a
#undef PREP11
#define CX(a,b,c) cc[(a)+ido*((b)+l1*(c))] #define CX(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CX2(a,b) cc[(a)+idl1*(b)] #define CX2(a,b) cc[(a)+idl1*(b)]
#define CH2(a,b) ch[(a)+idl1*(b)] #define CH2(a,b) ch[(a)+idl1*(b)]
...@@ -857,7 +875,6 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact) ...@@ -857,7 +875,6 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact)
#undef WA #undef WA
#undef CC #undef CC
#undef CH #undef CH
#undef PMC
public: public:
template<typename T> NOINLINE void forward(T c[], T0 fct) template<typename T> NOINLINE void forward(T c[], T0 fct)
...@@ -1434,8 +1451,8 @@ template<typename T>NOINLINE void radb5(size_t ido, size_t l1, ...@@ -1434,8 +1451,8 @@ template<typename T>NOINLINE void radb5(size_t ido, size_t l1,
} }
} }
#undef CC
#undef CH #undef CH
#undef CC
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))] #define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
...@@ -1572,10 +1589,11 @@ template<typename T> NOINLINE void radbg(size_t ido, size_t ip, size_t l1, ...@@ -1572,10 +1589,11 @@ template<typename T> NOINLINE void radbg(size_t ido, size_t ip, size_t l1,
#undef C2 #undef C2
#undef CH2 #undef CH2
#undef CC
#undef CH #undef CH
#undef PM #undef CC
#undef MULPM #undef MULPM
#undef PM
#undef WA #undef WA
template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct) template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct)
...@@ -2362,6 +2380,8 @@ template<typename T> NOINLINE void pocketfft_general_r( ...@@ -2362,6 +2380,8 @@ template<typename T> NOINLINE void pocketfft_general_r(
} }
} }
#undef HAVE_VECSUPPORT
// //
// Python interface // Python interface
// //
...@@ -2598,7 +2618,7 @@ Parameters ...@@ -2598,7 +2618,7 @@ Parameters
a : numpy.ndarray (np.complex64 or np.complex128) a : numpy.ndarray (np.complex64 or np.complex128)
The input data The input data
axes : list of integers axes : list of integers
The axes on which the FFT is carried out. The axes along which the FFT is carried out.
If not set, all axes will be transformed. If not set, all axes will be transformed.
fct : float fct : float
Normalization factor Normalization factor
...@@ -2609,7 +2629,7 @@ inplace : bool ...@@ -2609,7 +2629,7 @@ inplace : bool
Returns Returns
------- -------
np.ndarray (same shape and data type as a) np.ndarray (same shape and data type as a)
the transformed data The transformed data.
)DELIM"; )DELIM";
const char *ifftn_DS = R"DELIM(Performs a backward complex FFT. const char *ifftn_DS = R"DELIM(Performs a backward complex FFT.
...@@ -2619,7 +2639,7 @@ Parameters ...@@ -2619,7 +2639,7 @@ Parameters
a : numpy.ndarray (np.complex64 or np.complex128) a : numpy.ndarray (np.complex64 or np.complex128)
The input data The input data
axes : list of integers axes : list of integers
The axes on which the FFT is carried out. The axes along which the FFT is carried out.
If not set, all axes will be transformed. If not set, all axes will be transformed.
fct : float fct : float
Normalization factor Normalization factor
...@@ -2640,7 +2660,7 @@ Parameters ...@@ -2640,7 +2660,7 @@ Parameters
a : numpy.ndarray (np.float32 or np.float64) a : numpy.ndarray (np.float32 or np.float64)
The input data The input data
axes : list of integers axes : list of integers
The axes on which the FFT is carried out. The axes along which the FFT is carried out.
If not set, all axes will be transformed in ascending order. If not set, all axes will be transformed in ascending order.
fct : float fct : float
Normalization factor Normalization factor
...@@ -2653,6 +2673,28 @@ np.ndarray (np.complex64 or np.complex128) ...@@ -2653,6 +2673,28 @@ np.ndarray (np.complex64 or np.complex128)
was n on input, it is n//2+1 on output. was n on input, it is n//2+1 on output.
)DELIM"; )DELIM";
const char *rfft_scipy_DS = R"DELIM(Performs a forward real-valued FFT.
Parameters
----------
a : numpy.ndarray (np.float32 or np.float64)
The input data
axis : int
The axis along which the FFT is carried out.
fct : float
Normalization factor
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
Returns
-------
np.ndarray (np.float32 or np.float64)
The transformed data. The shape is identical to that of the input array.
Along the transformed axis, values are arranged in
FFTPACK half-complex order, i.e. `a[0].re, a[1].re, a[1].im, a[2].re ...`.
)DELIM";
const char *irfftn_DS = R"DELIM(Performs a backward real-valued FFT. const char *irfftn_DS = R"DELIM(Performs a backward real-valued FFT.
Parameters Parameters
...@@ -2660,7 +2702,7 @@ Parameters ...@@ -2660,7 +2702,7 @@ Parameters
a : numpy.ndarray (np.complex64 or np.complex128) a : numpy.ndarray (np.complex64 or np.complex128)
The input data The input data
axes : list of integers axes : list of integers
The axes on which the FFT is carried out. The axes along which the FFT is carried out.
If not set, all axes will be transformed in ascending order. If not set, all axes will be transformed in ascending order.
lastsize : the output size of the last axis to be transformed. lastsize : the output size of the last axis to be transformed.
If the corresponding input axis has size n, this can be 2*n-2 or 2*n-1. If the corresponding input axis has size n, this can be 2*n-2 or 2*n-1.
...@@ -2675,6 +2717,27 @@ np.ndarray (np.float32 or np.float64) ...@@ -2675,6 +2717,27 @@ np.ndarray (np.float32 or np.float64)
entries. entries.
)DELIM"; )DELIM";
const char *irfft_scipy_DS = R"DELIM(Performs a backward real-valued FFT.
Parameters
----------
a : numpy.ndarray (np.float32 or np.float64)
The input data. Along the transformed axis, values are expected in
FFTPACK half-complex order, i.e. `a[0].re, a[1].re, a[1].im, a[2].re ...`.
axis : int
The axis along which the FFT is carried out.
fct : float
Normalization factor
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
Returns
-------
np.ndarray (np.float32 or np.float64)
The transformed data. The shape is identical to that of the input array.
)DELIM";
const char *hartley_DS = R"DELIM(Performs a Hartley transform. const char *hartley_DS = R"DELIM(Performs a Hartley transform.
For every requested axis, a 1D forward Fourier transform is carried out, For every requested axis, a 1D forward Fourier transform is carried out,
and the sum of real and imaginary parts of the result is stored in the output and the sum of real and imaginary parts of the result is stored in the output
...@@ -2685,7 +2748,7 @@ Parameters ...@@ -2685,7 +2748,7 @@ Parameters
a : numpy.ndarray (np.float32 or np.float64) a : numpy.ndarray (np.float32 or np.float64)
The input data The input data
axes : list of integers axes : list of integers
The axes on which the transform is carried out. The axes along which the transform is carried out.
If not set, all axes will be transformed. If not set, all axes will be transformed.
fct : float fct : float
Normalization factor Normalization factor
...@@ -2704,6 +2767,7 @@ np.ndarray (same shape and data type as a) ...@@ -2704,6 +2767,7 @@ np.ndarray (same shape and data type as a)
PYBIND11_MODULE(pypocketfft, m) PYBIND11_MODULE(pypocketfft, m)
{ {
using namespace pybind11::literals; using namespace pybind11::literals;
using namespace pocketfft_private;
m.doc() = pypocketfft_DS; m.doc() = pypocketfft_DS;
m.def("fftn",&fftn, fftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("fftn",&fftn, fftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
...@@ -2711,12 +2775,12 @@ PYBIND11_MODULE(pypocketfft, m) ...@@ -2711,12 +2775,12 @@ PYBIND11_MODULE(pypocketfft, m)
m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false);
m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.); m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.);
m.def("rfft_scipy",&rfft_scipy, "a"_a, "axis"_a, "fct"_a=1., m.def("rfft_scipy",&rfft_scipy, rfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
m.def("irfft_scipy",&irfft_scipy, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false);
m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0, m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0,
"fct"_a=1.); "fct"_a=1.);
m.def("irfft_scipy",&irfft_scipy, irfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false);
m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment