Commit 9806d64a authored by Martin Reinecke's avatar Martin Reinecke
Browse files

Merge branch 'develop' into 'master'

Develop

See merge request !6
parents f32ddbcc 7342ee68
...@@ -5,6 +5,7 @@ import pypocketfft ...@@ -5,6 +5,7 @@ import pypocketfft
from time import time from time import time
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
nthreads=0
def _l2error(a,b): def _l2error(a,b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2)) return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
...@@ -22,7 +23,7 @@ def bench_nd_fftn(ndim, nmax, ntry, tp, nrepeat, filename=""): ...@@ -22,7 +23,7 @@ def bench_nd_fftn(ndim, nmax, ntry, tp, nrepeat, filename=""):
tmin_pp=1e38 tmin_pp=1e38
for i in range(nrepeat): for i in range(nrepeat):
t0=time() t0=time()
b=pypocketfft.fftn(a) b=pypocketfft.fftn(a,nthreads=nthreads)
t1=time() t1=time()
tmin_pp = min(tmin_pp,t1-t0) tmin_pp = min(tmin_pp,t1-t0)
a2=pypocketfft.ifftn(b,fct=1./a.size) a2=pypocketfft.ifftn(b,fct=1./a.size)
......
This diff is collapsed.
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
#include <pybind11/numpy.h> #include <pybind11/numpy.h>
#include <pybind11/stl.h> #include <pybind11/stl.h>
#pragma GCC visibility push(hidden)
#include "pocketfft_hdronly.h" #include "pocketfft_hdronly.h"
// //
...@@ -31,31 +29,24 @@ namespace py = pybind11; ...@@ -31,31 +29,24 @@ namespace py = pybind11;
auto c64 = py::dtype("complex64"); auto c64 = py::dtype("complex64");
auto c128 = py::dtype("complex128"); auto c128 = py::dtype("complex128");
auto c256 = py::dtype("complex256");
auto f32 = py::dtype("float32"); auto f32 = py::dtype("float32");
auto f64 = py::dtype("float64"); auto f64 = py::dtype("float64");
auto f128 = py::dtype("float128");
bool tcheck(const py::array &arr, const py::object &t1, const py::object &t2)
{
if (arr.dtype().is(t1))
return true;
if (arr.dtype().is(t2))
return false;
throw runtime_error("unsupported data type");
}
shape_t copy_shape(const py::array &arr) shape_t copy_shape(const py::array &arr)
{ {
shape_t res(arr.ndim()); shape_t res(size_t(arr.ndim()));
for (size_t i=0; i<res.size(); ++i) for (size_t i=0; i<res.size(); ++i)
res[i] = arr.shape(i); res[i] = size_t(arr.shape(int(i)));
return res; return res;
} }
stride_t copy_strides(const py::array &arr) stride_t copy_strides(const py::array &arr)
{ {
stride_t res(arr.ndim()); stride_t res(size_t(arr.ndim()));
for (size_t i=0; i<res.size(); ++i) for (size_t i=0; i<res.size(); ++i)
res[i] = arr.strides(i); res[i] = arr.strides(int(i));
return res; return res;
} }
...@@ -63,7 +54,7 @@ shape_t makeaxes(const py::array &in, py::object axes) ...@@ -63,7 +54,7 @@ shape_t makeaxes(const py::array &in, py::object axes)
{ {
if (axes.is(py::none())) if (axes.is(py::none()))
{ {
shape_t res(in.ndim()); shape_t res(size_t(in.ndim()));
for (size_t i=0; i<res.size(); ++i) for (size_t i=0; i<res.size(); ++i)
res[i]=i; res[i]=i;
return res; return res;
...@@ -77,31 +68,41 @@ shape_t makeaxes(const py::array &in, py::object axes) ...@@ -77,31 +68,41 @@ shape_t makeaxes(const py::array &in, py::object axes)
return tmp; return tmp;
} }
#define DISPATCH(arr, T1, T2, T3, func, args) \
auto dtype = arr.dtype(); \
if (dtype.is(T1)) return func<double> args; \
if (dtype.is(T2)) return func<float> args; \
if (dtype.is(T3)) return func<long double> args; \
throw runtime_error("unsupported data type");
template<typename T> py::array xfftn_internal(const py::array &in, template<typename T> py::array xfftn_internal(const py::array &in,
const shape_t &axes, double fct, bool inplace, bool fwd) const shape_t &axes, double fct, bool inplace, bool fwd, size_t nthreads)
{ {
auto dims(copy_shape(in)); auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<complex<T>>(dims); py::array res = inplace ? in : py::array_t<complex<T>>(dims);
c2c(dims, copy_strides(in), copy_strides(res), axes, fwd, c2c(dims, copy_strides(in), copy_strides(res), axes, fwd,
reinterpret_cast<const complex<T> *>(in.data()), reinterpret_cast<const complex<T> *>(in.data()),
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct)); reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
return res; return res;
} }
py::array xfftn(const py::array &a, py::object axes, double fct, bool inplace, py::array xfftn(const py::array &a, py::object axes, double fct, bool inplace,
bool fwd) bool fwd, size_t nthreads)
{ {
return tcheck(a, c128, c64) ? DISPATCH(a, c128, c64, c256, xfftn_internal, (a, makeaxes(a, axes), fct,
xfftn_internal<double>(a, makeaxes(a, axes), fct, inplace, fwd) : inplace, fwd, nthreads))
xfftn_internal<float> (a, makeaxes(a, axes), fct, inplace, fwd);
} }
py::array fftn(const py::array &a, py::object axes, double fct, bool inplace)
{ return xfftn(a, axes, fct, inplace, true); } py::array fftn(const py::array &a, py::object axes, double fct, bool inplace,
py::array ifftn(const py::array &a, py::object axes, double fct, bool inplace) size_t nthreads)
{ return xfftn(a, axes, fct, inplace, false); } { return xfftn(a, axes, fct, inplace, true, nthreads); }
py::array ifftn(const py::array &a, py::object axes, double fct, bool inplace,
size_t nthreads)
{ return xfftn(a, axes, fct, inplace, false, nthreads); }
template<typename T> py::array rfftn_internal(const py::array &in, template<typename T> py::array rfftn_internal(const py::array &in,
py::object axes_, T fct) py::object axes_, T fct, size_t nthreads)
{ {
auto axes = makeaxes(in, axes_); auto axes = makeaxes(in, axes_);
auto dims_in(copy_shape(in)), dims_out(dims_in); auto dims_in(copy_shape(in)), dims_out(dims_in);
...@@ -109,39 +110,42 @@ template<typename T> py::array rfftn_internal(const py::array &in, ...@@ -109,39 +110,42 @@ template<typename T> py::array rfftn_internal(const py::array &in,
py::array res = py::array_t<complex<T>>(dims_out); py::array res = py::array_t<complex<T>>(dims_out);
r2c(dims_in, copy_strides(in), copy_strides(res), axes, r2c(dims_in, copy_strides(in), copy_strides(res), axes,
reinterpret_cast<const T *>(in.data()), reinterpret_cast<const T *>(in.data()),
reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct)); reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
return res; return res;
} }
py::array rfftn(const py::array &in, py::object axes_, double fct)
py::array rfftn(const py::array &in, py::object axes_, double fct,
size_t nthreads)
{ {
return tcheck(in, f64, f32) ? rfftn_internal<double>(in, axes_, fct) DISPATCH(in, f64, f32, f128, rfftn_internal, (in, axes_, fct, nthreads))
: rfftn_internal<float> (in, axes_, fct);
} }
template<typename T> py::array xrfft_scipy(const py::array &in, template<typename T> py::array xrfft_scipy(const py::array &in,
size_t axis, double fct, bool inplace, bool fwd) size_t axis, double fct, bool inplace, bool fwd, size_t nthreads)
{ {
auto dims(copy_shape(in)); auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims); py::array res = inplace ? in : py::array_t<T>(dims);
r2r_fftpack(dims, copy_strides(in), copy_strides(res), axis, fwd, r2r_fftpack(dims, copy_strides(in), copy_strides(res), axis, fwd,
reinterpret_cast<const T *>(in.data()), reinterpret_cast<const T *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct)); reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res; return res;
} }
py::array rfft_scipy(const py::array &in, size_t axis, double fct, bool inplace)
py::array rfft_scipy(const py::array &in, size_t axis, double fct, bool inplace,
size_t nthreads)
{ {
return tcheck(in, f64, f32) ? DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, true,
xrfft_scipy<double>(in, axis, fct, inplace, true) : nthreads))
xrfft_scipy<float> (in, axis, fct, inplace, true);
} }
py::array irfft_scipy(const py::array &in, size_t axis, double fct, py::array irfft_scipy(const py::array &in, size_t axis, double fct,
bool inplace) bool inplace, size_t nthreads)
{ {
return tcheck(in, f64, f32) ? DISPATCH(in, f64, f32, f128, xrfft_scipy, (in, axis, fct, inplace, false,
xrfft_scipy<double>(in, axis, fct, inplace, false) : nthreads))
xrfft_scipy<float> (in, axis, fct, inplace, false);
} }
template<typename T> py::array irfftn_internal(const py::array &in, template<typename T> py::array irfftn_internal(const py::array &in,
py::object axes_, size_t lastsize, T fct) py::object axes_, size_t lastsize, T fct, size_t nthreads)
{ {
auto axes = makeaxes(in, axes_); auto axes = makeaxes(in, axes_);
size_t axis = axes.back(); size_t axis = axes.back();
...@@ -153,40 +157,40 @@ template<typename T> py::array irfftn_internal(const py::array &in, ...@@ -153,40 +157,40 @@ template<typename T> py::array irfftn_internal(const py::array &in,
py::array res = py::array_t<T>(dims_out); py::array res = py::array_t<T>(dims_out);
c2r(dims_out, copy_strides(in), copy_strides(res), axes, c2r(dims_out, copy_strides(in), copy_strides(res), axes,
reinterpret_cast<const complex<T> *>(in.data()), reinterpret_cast<const complex<T> *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct)); reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res; return res;
} }
py::array irfftn(const py::array &in, py::object axes_, size_t lastsize, py::array irfftn(const py::array &in, py::object axes_, size_t lastsize,
double fct) double fct, size_t nthreads)
{ {
return tcheck(in, c128, c64) ? DISPATCH(in, c128, c64, c256, irfftn_internal, (in, axes_, lastsize, fct,
irfftn_internal<double>(in, axes_, lastsize, fct) : nthreads))
irfftn_internal<float> (in, axes_, lastsize, fct);
} }
template<typename T> py::array hartley_internal(const py::array &in, template<typename T> py::array hartley_internal(const py::array &in,
py::object axes_, double fct, bool inplace) py::object axes_, double fct, bool inplace, size_t nthreads)
{ {
auto dims(copy_shape(in)); auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims); py::array res = inplace ? in : py::array_t<T>(dims);
r2r_hartley(dims, copy_strides(in), copy_strides(res), makeaxes(in, axes_), r2r_hartley(dims, copy_strides(in), copy_strides(res), makeaxes(in, axes_),
reinterpret_cast<const T *>(in.data()), reinterpret_cast<const T *>(in.data()),
reinterpret_cast<T *>(res.mutable_data()), T(fct)); reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
return res; return res;
} }
py::array hartley(const py::array &in, py::object axes_, double fct, py::array hartley(const py::array &in, py::object axes_, double fct,
bool inplace) bool inplace, size_t nthreads)
{ {
return tcheck(in, f64, f32) ? DISPATCH(in, f64, f32, f128, hartley_internal, (in, axes_, fct, inplace,
hartley_internal<double>(in, axes_, fct, inplace) : nthreads))
hartley_internal<float> (in, axes_, fct, inplace);
} }
template<typename T>py::array complex2hartley(const py::array &in, template<typename T>py::array complex2hartley(const py::array &in,
const py::array &tmp, py::object axes_, bool inplace) const py::array &tmp, py::object axes_, bool inplace)
{ {
using namespace pocketfft::detail; using namespace pocketfft::detail;
int ndim = in.ndim(); size_t ndim = size_t(in.ndim());
auto dims_out(copy_shape(in)); auto dims_out(copy_shape(in));
py::array out = inplace ? in : py::array_t<T>(dims_out); py::array out = inplace ? in : py::array_t<T>(dims_out);
ndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp)); ndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
...@@ -205,10 +209,10 @@ template<typename T>py::array complex2hartley(const py::array &in, ...@@ -205,10 +209,10 @@ template<typename T>py::array complex2hartley(const py::array &in,
{ {
if (i==axis) continue; if (i==axis) continue;
if (!swp[i]) if (!swp[i])
rofs += it.pos[i]*it.oarr.stride(i); rofs += ptrdiff_t(it.pos[i])*it.oarr.stride(i);
else else
{ {
auto x = (it.pos[i]==0) ? 0 : it.iarr.shape(i)-it.pos[i]; auto x = ptrdiff_t((it.pos[i]==0) ? 0 : it.iarr.shape(i)-it.pos[i]);
rofs += x*it.oarr.stride(i); rofs += x*it.oarr.stride(i);
} }
} }
...@@ -217,24 +221,28 @@ template<typename T>py::array complex2hartley(const py::array &in, ...@@ -217,24 +221,28 @@ template<typename T>py::array complex2hartley(const py::array &in,
{ {
auto re = it.in(i).r; auto re = it.in(i).r;
auto im = it.in(i).i; auto im = it.in(i).i;
auto rev_i = (i==0) ? 0 : it.length_out()-i; auto rev_i = ptrdiff_t((i==0) ? 0 : it.length_out()-i);
it.out(i) = re+im; it.out(i) = re+im;
aout[rofs + rev_i*it.stride_out()] = re-im; aout[rofs + rev_i*it.stride_out()] = re-im;
} }
} }
return out; return out;
} }
py::array mycomplex2hartley(const py::array &in, py::array mycomplex2hartley(const py::array &in,
const py::array &tmp, py::object axes_, bool inplace) const py::array &tmp, py::object axes_, bool inplace)
{ {
return tcheck(in, f64, f32) ? complex2hartley<double>(in, tmp, axes_, inplace) DISPATCH(in, f64, f32, f128, complex2hartley, (in, tmp, axes_, inplace))
: complex2hartley<float> (in, tmp, axes_, inplace);
} }
py::array hartley2(const py::array &in, py::object axes_, double fct, py::array hartley2(const py::array &in, py::object axes_, double fct,
bool inplace) bool inplace, size_t nthreads)
{ return mycomplex2hartley(in, rfftn(in, axes_, fct), axes_, inplace); } {
return mycomplex2hartley(in, rfftn(in, axes_, fct, nthreads), axes_,
inplace);
}
const char *pypocketfft_DS = R"DELIM(Fast Fourier and Hartley transforms. const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
This module supports This module supports
- single and double precision - single and double precision
...@@ -244,9 +252,9 @@ This module supports ...@@ -244,9 +252,9 @@ This module supports
For two- and higher-dimensional transforms the code will use SSE2 and AVX For two- and higher-dimensional transforms the code will use SSE2 and AVX
vector instructions for faster execution if these are supported by the CPU and vector instructions for faster execution if these are supported by the CPU and
were enabled during compilation. were enabled during compilation.
)DELIM"; )""";
const char *fftn_DS = R"DELIM( const char *fftn_DS = R"""(
Performs a forward complex FFT. Performs a forward complex FFT.
Parameters Parameters
...@@ -266,9 +274,9 @@ Returns ...@@ -266,9 +274,9 @@ Returns
------- -------
np.ndarray (same shape and data type as a) np.ndarray (same shape and data type as a)
The transformed data. The transformed data.
)DELIM"; )""";
const char *ifftn_DS = R"DELIM(Performs a backward complex FFT. const char *ifftn_DS = R"""(Performs a backward complex FFT.
Parameters Parameters
---------- ----------
...@@ -287,9 +295,9 @@ Returns ...@@ -287,9 +295,9 @@ Returns
------- -------
np.ndarray (same shape and data type as a) np.ndarray (same shape and data type as a)
The transformed data The transformed data
)DELIM"; )""";
const char *rfftn_DS = R"DELIM(Performs a forward real-valued FFT. const char *rfftn_DS = R"""(Performs a forward real-valued FFT.
Parameters Parameters
---------- ----------
...@@ -307,9 +315,9 @@ np.ndarray (np.complex64 or np.complex128) ...@@ -307,9 +315,9 @@ np.ndarray (np.complex64 or np.complex128)
The transformed data. The shape is identical to that of the input array, The transformed data. The shape is identical to that of the input array,
except for the axis that was transformed last. If the length of that axis except for the axis that was transformed last. If the length of that axis
was n on input, it is n//2+1 on output. was n on input, it is n//2+1 on output.
)DELIM"; )""";
const char *rfft_scipy_DS = R"DELIM(Performs a forward real-valued FFT. const char *rfft_scipy_DS = R"""(Performs a forward real-valued FFT.
Parameters Parameters
---------- ----------
...@@ -329,9 +337,9 @@ np.ndarray (np.float32 or np.float64) ...@@ -329,9 +337,9 @@ np.ndarray (np.float32 or np.float64)
The transformed data. The shape is identical to that of the input array. The transformed data. The shape is identical to that of the input array.
Along the transformed axis, values are arranged in Along the transformed axis, values are arranged in
FFTPACK half-complex order, i.e. `a[0].re, a[1].re, a[1].im, a[2].re ...`. FFTPACK half-complex order, i.e. `a[0].re, a[1].re, a[1].im, a[2].re ...`.
)DELIM"; )""";
const char *irfftn_DS = R"DELIM(Performs a backward real-valued FFT. const char *irfftn_DS = R"""(Performs a backward real-valued FFT.
Parameters Parameters
---------- ----------
...@@ -351,9 +359,9 @@ np.ndarray (np.float32 or np.float64) ...@@ -351,9 +359,9 @@ np.ndarray (np.float32 or np.float64)
The transformed data. The shape is identical to that of the input array, The transformed data. The shape is identical to that of the input array,
except for the axis that was transformed last, which has now `lastsize` except for the axis that was transformed last, which has now `lastsize`
entries. entries.
)DELIM"; )""";
const char *irfft_scipy_DS = R"DELIM(Performs a backward real-valued FFT. const char *irfft_scipy_DS = R"""(Performs a backward real-valued FFT.
Parameters Parameters
---------- ----------
...@@ -372,9 +380,9 @@ Returns ...@@ -372,9 +380,9 @@ Returns
------- -------
np.ndarray (np.float32 or np.float64) np.ndarray (np.float32 or np.float64)
The transformed data. The shape is identical to that of the input array. The transformed data. The shape is identical to that of the input array.
)DELIM"; )""";
const char *hartley_DS = R"DELIM(Performs a Hartley transform. const char *hartley_DS = R"""(Performs a Hartley transform.
For every requested axis, a 1D forward Fourier transform is carried out, For every requested axis, a 1D forward Fourier transform is carried out,
and the sum of real and imaginary parts of the result is stored in the output and the sum of real and imaginary parts of the result is stored in the output
array. array.
...@@ -396,32 +404,31 @@ Returns ...@@ -396,32 +404,31 @@ Returns
------- -------
np.ndarray (same shape and data type as a) np.ndarray (same shape and data type as a)
The transformed data The transformed data
)DELIM"; )""";
} // unnamed namespace } // unnamed namespace
#pragma GCC visibility pop
PYBIND11_MODULE(pypocketfft, m) PYBIND11_MODULE(pypocketfft, m)
{ {
using namespace pybind11::literals; using namespace pybind11::literals;
m.doc() = pypocketfft_DS; m.doc() = pypocketfft_DS;
m.def("fftn",&fftn, fftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("fftn",&fftn, fftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.); m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"nthreads"_a=1);
m.def("rfft_scipy",&rfft_scipy, rfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1., m.def("rfft_scipy",&rfft_scipy, rfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0, m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0,
"fct"_a=1.); "fct"_a=1., "nthreads"_a=1);
m.def("irfft_scipy",&irfft_scipy, irfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1., m.def("irfft_scipy",&irfft_scipy, irfft_scipy_DS, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1., m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false); "inplace"_a=false, "nthreads"_a=1);
m.def("complex2hartley",&mycomplex2hartley, "in"_a, "tmp"_a, "axes"_a, m.def("complex2hartley",&mycomplex2hartley, "in"_a, "tmp"_a, "axes"_a,
"inplace"_a=false); "inplace"_a=false);
} }
...@@ -63,10 +63,11 @@ if sys.platform == 'darwin': ...@@ -63,10 +63,11 @@ if sys.platform == 'darwin':
builder = setuptools.command.build_ext.build_ext(Distribution()) builder = setuptools.command.build_ext.build_ext(Distribution())
base_library_link_args.append('-dynamiclib') base_library_link_args.append('-dynamiclib')
else: else:
extra_compile_args += ['-march=native', '-O3', '-Wfatal-errors', '-Wno-ignored-attributes'] extra_compile_args += ['-march=native', '-O3', '-Wfatal-errors', '-Wno-ignored-attributes', '-DPOCKETFFT_OPENMP', '-fopenmp', '-Wfloat-conversion' ,'-Wsign-conversion', '-Wconversion' ,'-W', '-Wall']
python_module_link_args += ['-march=native'] python_module_link_args += ['-march=native']
extra_cc_compile_args.append('--std=c++11') extra_cc_compile_args.append('--std=c++11')
python_module_link_args.append("-Wl,-rpath,$ORIGIN") python_module_link_args.append("-Wl,-rpath,$ORIGIN")
python_module_link_args.append('-fopenmp')
extra_cc_compile_args = extra_compile_args + extra_cc_compile_args extra_cc_compile_args = extra_compile_args + extra_cc_compile_args
......
...@@ -4,7 +4,7 @@ import pypocketfft ...@@ -4,7 +4,7 @@ import pypocketfft
def _l2error(a,b): def _l2error(a,b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2)) return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
nthreads=0
cmaxerr=0. cmaxerr=0.
fmaxerr=0. fmaxerr=0.
cmaxerrf=0. cmaxerrf=0.
...@@ -23,32 +23,32 @@ def test(): ...@@ -23,32 +23,32 @@ def test():
lastsize = shape[axes[-1]] lastsize = shape[axes[-1]]
fct = 1./np.prod(np.take(shape, axes))