Commit 2970b68c authored by Martin Reinecke's avatar Martin Reinecke

OpenMP support, take 1

parent f7e2dfc1
This diff is collapsed.
......@@ -14,8 +14,6 @@
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#pragma GCC visibility push(hidden)
#include "pocketfft_hdronly.h"
//
......@@ -78,31 +76,31 @@ shape_t makeaxes(const py::array &in, py::object axes)
throw runtime_error("unsupported data type");
template<typename T> py::array xfftn_internal(const py::array &in,
const shape_t &axes, double fct, bool inplace, bool fwd)
const shape_t &axes, double fct, bool inplace, bool fwd, size_t nthreads)
{
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<complex<T>>(dims);
c2c(dims, copy_strides(in), copy_strides(res), axes, fwd,
reinterpret_cast<const complex<T> *>(in.data()),
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct));
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
return res;
}
py::array xfftn(const py::array &a, py::object axes, double fct, bool inplace,
bool fwd)
bool fwd, size_t nthreads)
{
DISPATCH(a, c128, c64, c256, xfftn_internal, (a, makeaxes(a, axes), fct,
inplace, fwd))
inplace, fwd, nthreads))
}
py::array fftn(const py::array &a, py::object axes, double fct, bool inplace)
{ return xfftn(a, axes, fct, inplace, true); }
py::array fftn(const py::array &a, py::object axes, double fct, bool inplace, size_t nthreads)
{ return xfftn(a, axes, fct, inplace, true, nthreads); }
py::array ifftn(const py::array &a, py::object axes, double fct, bool inplace)
{ return xfftn(a, axes, fct, inplace, false); }
py::array ifftn(const py::array &a, py::object axes, double fct, bool inplace, size_t nthreads)
{ return xfftn(a, axes, fct, inplace, false, nthreads); }
template<typename T> py::array rfftn_internal(const py::array &in,
py::object axes_, T fct)
py::object axes_, T fct, size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims_in(copy_shape(in)), dims_out(dims_in);
......@@ -110,38 +108,38 @@ template<typename T> py::array rfftn_internal(const py::array &in,
py::array res = py::array_t<complex<T>>(dims_out);
r2c(dims_in, copy_strides(in), copy_strides(res), axes,
reinterpret_cast<const T *>(in.data()),
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct));
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
return res;
}
py::array rfftn(const py::array &in, py::object axes_, double fct)
py::array rfftn(const py::array &in, py::object axes_, double fct, size_t nthreads)
{
DISPATCH(in, f64, f32, f128, rfftn_internal, (in, axes_, fct))
DISPATCH(in, f64, f32, f128, rfftn_internal, (in, axes_, fct, nthreads))
}
template<typename T> py::array xrfft_scipy(const py::array &in,
size_t axis, double fct, bool inplace, bool fwd)
size_t axis, double fct, bool inplace, bool fwd, size_t nthreads)
{
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims);
r2r_fftpack(dims, copy_strides(in), copy_strides(res), axis, fwd,
reinterpret_cast<const T *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct));
reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res;
}
py::array rfft_scipy(const py::array &in, size_t axis, double fct, bool inplace)
py::array rfft_scipy(const py::array &in, size_t axis, double fct, bool inplace, size_t nthreads)
{
DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, true))
DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, true, nthreads))
}
py::array irfft_scipy(const py::array &in, size_t axis, double fct,
bool inplace)
bool inplace, size_t nthreads)
{
DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, false))
DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, false, nthreads))
}
template<typename T> py::array irfftn_internal(const py::array &in,
py::object axes_, size_t lastsize, T fct)
py::object axes_, size_t lastsize, T fct, size_t nthreads)
{
auto axes = makeaxes(in, axes_);
size_t axis = axes.back();
......@@ -153,31 +151,31 @@ template<typename T> py::array irfftn_internal(const py::array &in,
py::array res = py::array_t<T>(dims_out);
c2r(dims_out, copy_strides(in), copy_strides(res), axes,
reinterpret_cast<const complex<T> *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct));
reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res;
}
py::array irfftn(const py::array &in, py::object axes_, size_t lastsize,
double fct)
double fct, size_t nthreads)
{
DISPATCH(in, c128, c64, c256, irfftn_internal, (in, axes_, lastsize, fct))
DISPATCH(in, c128, c64, c256, irfftn_internal, (in, axes_, lastsize, fct, nthreads))
}
template<typename T> py::array hartley_internal(const py::array &in,
py::object axes_, double fct, bool inplace)
py::object axes_, double fct, bool inplace, size_t nthreads)
{
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims);
r2r_hartley(dims, copy_strides(in), copy_strides(res), makeaxes(in, axes_),
reinterpret_cast<const T *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct));
reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res;
}
py::array hartley(const py::array &in, py::object axes_, double fct,
bool inplace)
bool inplace, size_t nthreads)
{
DISPATCH(in, f64, f32, f128, hartley_internal, (in, axes_, fct, inplace))
DISPATCH(in, f64, f32, f128, hartley_internal, (in, axes_, fct, inplace, nthreads))
}
template<typename T>py::array complex2hartley(const py::array &in,
......@@ -230,8 +228,8 @@ py::array mycomplex2hartley(const py::array &in,
}
py::array hartley2(const py::array &in, py::object axes_, double fct,
bool inplace)
{ return mycomplex2hartley(in, rfftn(in, axes_, fct), axes_, inplace); }
bool inplace, size_t nthreads)
{ return mycomplex2hartley(in, rfftn(in, axes_, fct, nthreads), axes_, inplace); }
const char *pypocketfft_DS = R"DELIM(Fast Fourier and Hartley transforms.
......@@ -399,28 +397,26 @@ np.ndarray (same shape and data type as a)
} // unnamed namespace
#pragma GCC visibility pop
PYBIND11_MODULE(pypocketfft, m)
{
using namespace pybind11::literals;
m.doc() = pypocketfft_DS;
m.def("fftn",&fftn, fftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false);
"inplace"_a=false, "nthreads"_a=1);
m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false);
m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.);
"inplace"_a=false, "nthreads"_a=1);
m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., "nthreads"_a=1);
m.def("rfft_scipy",&rfft_scipy, rfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
"inplace"_a=false, "nthreads"_a=1);
m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0,
"fct"_a=1.);
"fct"_a=1., "nthreads"_a=1);
m.def("irfft_scipy",&irfft_scipy, irfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
"inplace"_a=false, "nthreads"_a=1);
m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false);
"inplace"_a=false, "nthreads"_a=1);
m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false);
"inplace"_a=false, "nthreads"_a=1);
m.def("complex2hartley",&mycomplex2hartley, "in"_a, "tmp"_a, "axes"_a,
"inplace"_a=false);
}
......@@ -63,10 +63,11 @@ if sys.platform == 'darwin':
builder = setuptools.command.build_ext.build_ext(Distribution())
base_library_link_args.append('-dynamiclib')
else:
extra_compile_args += ['-march=native', '-O3', '-Wfatal-errors', '-Wno-ignored-attributes']
extra_compile_args += ['-march=native', '-O3', '-Wfatal-errors', '-Wno-ignored-attributes', '-DPOCKETFFT_OPENMP', '-fopenmp']
python_module_link_args += ['-march=native']
extra_cc_compile_args.append('--std=c++11')
python_module_link_args.append("-Wl,-rpath,$ORIGIN")
python_module_link_args.append('-fopenmp')
extra_cc_compile_args = extra_compile_args + extra_cc_compile_args
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment