diff --git a/pypocketfft.cc b/pypocketfft.cc index 597d361ddbdc5940ada23a95ef352f9488f05ebd..6fb321563b61ec91efef90b958f521ad430cbb88 100644 --- a/pypocketfft.cc +++ b/pypocketfft.cc @@ -80,9 +80,14 @@ template<typename T> py::array xfftn_internal(const py::array &in, { auto dims(copy_shape(in)); py::array res = inplace ? in : py::array_t<complex<T>>(dims); - c2c(dims, copy_strides(in), copy_strides(res), axes, fwd, - reinterpret_cast<const complex<T> *>(in.data()), - reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads); + auto s_in=copy_strides(in); + auto s_out=copy_strides(res); + auto d_in=reinterpret_cast<const complex<T> *>(in.data()); + auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data()); + { + py::gil_scoped_release release; + c2c(dims, s_in, s_out, axes, fwd, d_in, d_out, T(fct), nthreads); + } return res; } @@ -108,9 +113,14 @@ template<typename T> py::array rfftn_internal(const py::array &in, auto dims_in(copy_shape(in)), dims_out(dims_in); dims_out[axes.back()] = (dims_out[axes.back()]>>1)+1; py::array res = py::array_t<complex<T>>(dims_out); - r2c(dims_in, copy_strides(in), copy_strides(res), axes, - reinterpret_cast<const T *>(in.data()), - reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads); + auto s_in=copy_strides(in); + auto s_out=copy_strides(res); + auto d_in=reinterpret_cast<const T *>(in.data()); + auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data()); + { + py::gil_scoped_release release; + r2c(dims_in, s_in, s_out, axes, d_in, d_out, T(fct), nthreads); + } return res; } @@ -125,9 +135,14 @@ template<typename T> py::array xrfft_scipy(const py::array &in, { auto dims(copy_shape(in)); py::array res = inplace ? in : py::array_t<T>(dims); - r2r_fftpack(dims, copy_strides(in), copy_strides(res), axis, fwd, - reinterpret_cast<const T *>(in.data()), - reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads); + auto s_in=copy_strides(in); + auto s_out=copy_strides(res); + auto d_in=reinterpret_cast<const T *>(in.data()); + auto d_out=reinterpret_cast<T *>(res.mutable_data()); + { + py::gil_scoped_release release; + r2r_fftpack(dims, s_in, s_out, axis, fwd, d_in, d_out, T(fct), nthreads); + } return res; } @@ -155,9 +170,14 @@ template<typename T> py::array irfftn_internal(const py::array &in, throw runtime_error("bad lastsize"); dims_out[axis] = lastsize; py::array res = py::array_t<T>(dims_out); - c2r(dims_out, copy_strides(in), copy_strides(res), axes, - reinterpret_cast<const complex<T> *>(in.data()), - reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads); + auto s_in=copy_strides(in); + auto s_out=copy_strides(res); + auto d_in=reinterpret_cast<const complex<T> *>(in.data()); + auto d_out=reinterpret_cast<T *>(res.mutable_data()); + { + py::gil_scoped_release release; + c2r(dims_out, s_in, s_out, axes, d_in, d_out, T(fct), nthreads); + } return res; } @@ -173,9 +193,15 @@ template<typename T> py::array hartley_internal(const py::array &in, { auto dims(copy_shape(in)); py::array res = inplace ? in : py::array_t<T>(dims); - r2r_hartley(dims, copy_strides(in), copy_strides(res), makeaxes(in, axes_), - reinterpret_cast<const T *>(in.data()), - reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads); + auto axes = makeaxes(in, axes_); + auto s_in=copy_strides(in); + auto s_out=copy_strides(res); + auto d_in=reinterpret_cast<const T *>(in.data()); + auto d_out=reinterpret_cast<T *>(res.mutable_data()); + { + py::gil_scoped_release release; + r2r_hartley(dims, s_in, s_out, axes, d_in, d_out, T(fct), nthreads); + } return res; } @@ -196,6 +222,8 @@ template<typename T>py::array complex2hartley(const py::array &in, ndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp)); ndarr<T> aout(out.mutable_data(), copy_shape(out), copy_strides(out)); auto axes = makeaxes(in, axes_); + { + py::gil_scoped_release release; size_t axis = axes.back(); multi_iter<1,cmplx<T>,T> it(atmp, aout, axis); vector<bool> swp(ndim,false); @@ -226,6 +254,7 @@ template<typename T>py::array complex2hartley(const py::array &in, aout[rofs + rev_i*it.stride_out()] = re-im; } } + } return out; }