diff --git a/pypocketfft.cc b/pypocketfft.cc
index 597d361ddbdc5940ada23a95ef352f9488f05ebd..6fb321563b61ec91efef90b958f521ad430cbb88 100644
--- a/pypocketfft.cc
+++ b/pypocketfft.cc
@@ -80,9 +80,14 @@ template<typename T> py::array xfftn_internal(const py::array &in,
   {
   auto dims(copy_shape(in));
   py::array res = inplace ? in : py::array_t<complex<T>>(dims);
-  c2c(dims, copy_strides(in), copy_strides(res), axes, fwd,
-    reinterpret_cast<const complex<T> *>(in.data()),
-    reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
+  auto s_in=copy_strides(in);
+  auto s_out=copy_strides(res);
+  auto d_in=reinterpret_cast<const complex<T> *>(in.data());
+  auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data());
+  {
+  py::gil_scoped_release release;
+  c2c(dims, s_in, s_out, axes, fwd, d_in, d_out, T(fct), nthreads);
+  }
   return res;
   }
 
@@ -108,9 +113,14 @@ template<typename T> py::array rfftn_internal(const py::array &in,
   auto dims_in(copy_shape(in)), dims_out(dims_in);
   dims_out[axes.back()] = (dims_out[axes.back()]>>1)+1;
   py::array res = py::array_t<complex<T>>(dims_out);
-  r2c(dims_in, copy_strides(in), copy_strides(res), axes,
-    reinterpret_cast<const T *>(in.data()),
-    reinterpret_cast<complex<T> *>(res.mutable_data()), T(fct), nthreads);
+  auto s_in=copy_strides(in);
+  auto s_out=copy_strides(res);
+  auto d_in=reinterpret_cast<const T *>(in.data());
+  auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data());
+  {
+  py::gil_scoped_release release;
+  r2c(dims_in, s_in, s_out, axes, d_in, d_out, T(fct), nthreads);
+  }
   return res;
   }
 
@@ -125,9 +135,14 @@ template<typename T> py::array xrfft_scipy(const py::array &in,
   {
   auto dims(copy_shape(in));
   py::array res = inplace ? in : py::array_t<T>(dims);
-  r2r_fftpack(dims, copy_strides(in), copy_strides(res), axis, fwd,
-    reinterpret_cast<const T *>(in.data()),
-    reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
+  auto s_in=copy_strides(in);
+  auto s_out=copy_strides(res);
+  auto d_in=reinterpret_cast<const T *>(in.data());
+  auto d_out=reinterpret_cast<T *>(res.mutable_data());
+  {
+  py::gil_scoped_release release;
+  r2r_fftpack(dims, s_in, s_out, axis, fwd, d_in, d_out, T(fct), nthreads);
+  }
   return res;
   }
 
@@ -155,9 +170,14 @@ template<typename T> py::array irfftn_internal(const py::array &in,
     throw runtime_error("bad lastsize");
   dims_out[axis] = lastsize;
   py::array res = py::array_t<T>(dims_out);
-  c2r(dims_out, copy_strides(in), copy_strides(res), axes,
-    reinterpret_cast<const complex<T> *>(in.data()),
-    reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
+  auto s_in=copy_strides(in);
+  auto s_out=copy_strides(res);
+  auto d_in=reinterpret_cast<const complex<T> *>(in.data());
+  auto d_out=reinterpret_cast<T *>(res.mutable_data());
+  {
+  py::gil_scoped_release release;
+  c2r(dims_out, s_in, s_out, axes, d_in, d_out, T(fct), nthreads);
+  }
   return res;
   }
 
@@ -173,9 +193,15 @@ template<typename T> py::array hartley_internal(const py::array &in,
   {
   auto dims(copy_shape(in));
   py::array res = inplace ? in : py::array_t<T>(dims);
-  r2r_hartley(dims, copy_strides(in), copy_strides(res), makeaxes(in, axes_),
-    reinterpret_cast<const T *>(in.data()),
-    reinterpret_cast<T *>(res.mutable_data()), T(fct), nthreads);
+  auto axes = makeaxes(in, axes_);
+  auto s_in=copy_strides(in);
+  auto s_out=copy_strides(res);
+  auto d_in=reinterpret_cast<const T *>(in.data());
+  auto d_out=reinterpret_cast<T *>(res.mutable_data());
+  {
+  py::gil_scoped_release release;
+  r2r_hartley(dims, s_in, s_out, axes, d_in, d_out, T(fct), nthreads);
+  }
   return res;
   }
 
@@ -196,6 +222,8 @@ template<typename T>py::array complex2hartley(const py::array &in,
   ndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
   ndarr<T> aout(out.mutable_data(), copy_shape(out), copy_strides(out));
   auto axes = makeaxes(in, axes_);
+  {
+  py::gil_scoped_release release;
   size_t axis = axes.back();
   multi_iter<1,cmplx<T>,T> it(atmp, aout, axis);
   vector<bool> swp(ndim,false);
@@ -226,6 +254,7 @@ template<typename T>py::array complex2hartley(const py::array &in,
       aout[rofs + rev_i*it.stride_out()] = re-im;
       }
     }
+  }
   return out;
   }