Commit d3732c1b authored by Martin Reinecke's avatar Martin Reinecke
Browse files

more tweaks, tests and benchmarks

parent 8171d7db
# Don't run this benchmark with numpy<1.17 ... it will probably take ages!
import numpy as np
import pypocketfft
from time import time
import matplotlib.pyplot as plt
def bench_nd_fftn(ndim, nmax, ntry, tp, nrepeat, filename=""):
res=[]
for n in range(ntry):
shp = np.random.randint(1,nmax+1,ndim)
a=(np.random.rand(*shp) + 1j*np.random.rand(*shp)).astype(tp)
tmin_np=1e38
for i in range(nrepeat):
t0=time()
b=np.fft.fftn(a)
t1=time()
tmin_np = min(tmin_np,t1-t0)
tmin_pp=1e38
for i in range(nrepeat):
t0=time()
b=pypocketfft.fftn(a)
t1=time()
tmin_pp = min(tmin_pp,t1-t0)
res.append(tmin_pp/tmin_np)
plt.title("t(pypocketfft / numpy 1.17), {}D, {}, max_extent={}".format(ndim, str(tp), nmax))
plt.xlabel("time ratio")
plt.ylabel("counts")
plt.hist(res,bins="auto")
if filename != "":
plt.savefig(filename)
plt.show()
bench_nd_fftn(1, 8192, 1000, "c16", 10, "1d.png")
bench_nd_fftn(2, 2048, 100, "c16", 2, "2d.png")
bench_nd_fftn(3, 256, 100, "c16", 1, "3d.png")
bench_nd_fftn(1, 8192, 1000, "c8", 10, "1d_single.png")
bench_nd_fftn(2, 2048, 100, "c8", 2, "2d_single.png")
bench_nd_fftn(3, 256, 100, "c8", 1, "3d_single.png")
......@@ -50,6 +50,9 @@ template<typename T> struct arr
if ((!p) && (n!=0))
throw bad_alloc();
}
arr(arr &&other)
: p(other.p), sz(other.sz)
{ other.p=nullptr; other.sz=0; }
~arr() { dealloc(p); }
void resize(size_t n)
......@@ -1940,10 +1943,10 @@ class multiarr
vector<diminfo> dim;
public:
multiarr (size_t ndim_, const size_t *n, const int64_t *s)
multiarr (const vector<size_t> &n, const vector<int64_t> &s)
{
dim.reserve(ndim_);
for (size_t i=0; i<ndim_; ++i)
dim.reserve(n.size());
for (size_t i=0; i<n.size(); ++i)
dim.push_back({n[i], s[i]});
}
size_t ndim() const { return dim.size(); }
......@@ -2023,31 +2026,47 @@ template<> struct VTYPE<float>
{ using type = __m128; };
#endif
template<typename T> void pocketfft_general_c(int ndim, const size_t *shape,
const int64_t *stride_in, const int64_t *stride_out, int nax,
const size_t *axes, bool forward, const cmplx<T> *data_in,
cmplx<T> *data_out, T fct)
template<typename T> arr<char> alloc_tmp(const vector<size_t> &shape,
size_t tmpsize, size_t elemsize)
{
// allocate temporary 1D array storage
size_t tmpsize = 0;
for (int iax=0; iax<nax; ++iax)
tmpsize = max(tmpsize, shape[axes[iax]]);
#ifdef HAVE_VECSUPPORT
using vtype = typename VTYPE<T>::type;
constexpr int vlen = sizeof(vtype)/sizeof(T);
return arr<char>(tmpsize*elemsize*vlen);
#endif
return arr<char>(tmpsize*elemsize);
}
template<typename T> arr<char> alloc_tmp(const vector<size_t> &shape,
const vector<size_t> &axes, size_t elemsize)
{
size_t fullsize=1;
size_t ndim = shape.size();
for (size_t i=0; i<ndim; ++i)
fullsize*=shape[i];
size_t tmpsize=0;
for (size_t i=0; i<axes.size(); ++i)
tmpsize = max(tmpsize, shape[axes[i]]);
return alloc_tmp<T>(shape, tmpsize, elemsize);
}
template<typename T> void pocketfft_general_c(const vector<size_t> &shape,
const vector<int64_t> &stride_in, const vector<int64_t> &stride_out,
const vector<size_t> &axes, bool forward, const cmplx<T> *data_in,
cmplx<T> *data_out, T fct)
{
auto storage = alloc_tmp<T>(shape, axes, sizeof(cmplx<T>));
#ifdef HAVE_VECSUPPORT
using vtype = typename VTYPE<T>::type;
auto tdatav = (cmplx<vtype> *)storage.data();
constexpr int vlen = sizeof(vtype)/sizeof(T);
arr<cmplx<vtype>> xdata(tmpsize);
auto tdata = (cmplx<T> *)xdata.data();
auto tdatav = xdata.data();
#else
arr<cmplx<T>> xdata(tmpsize);
auto tdata = xdata.data();
#endif
auto tdata = (cmplx<T> *)storage.data();
multiarr a_in(ndim, shape, stride_in), a_out(ndim, shape, stride_out);
multiarr a_in(shape, stride_in), a_out(shape, stride_out);
unique_ptr<pocketfft_c<T>> plan;
for (int iax=0; iax<nax; ++iax)
for (size_t iax=0; iax<axes.size(); ++iax)
{
int axis = axes[iax];
multi_iter it_in(a_in, axis), it_out(a_out, axis);
......@@ -2097,30 +2116,22 @@ template<typename T> void pocketfft_general_c(int ndim, const size_t *shape,
}
}
template<typename T> void pocketfft_general_hartley(int ndim, const size_t *shape,
const int64_t *stride_in, const int64_t *stride_out, int nax,
const size_t *axes, const T *data_in, T *data_out, T fct)
template<typename T> void pocketfft_general_hartley(const vector<size_t> &shape,
const vector<int64_t> &stride_in, const vector<int64_t> &stride_out,
const vector<size_t> &axes, const T *data_in, T *data_out, T fct)
{
// allocate temporary 1D array storage, if necessary
size_t tmpsize = 0;
for (int iax=0; iax<nax; ++iax)
tmpsize = max(tmpsize, shape[axes[iax]]);
auto storage = alloc_tmp<T>(shape, axes, sizeof(T));
#ifdef HAVE_VECSUPPORT
using vtype = typename VTYPE<T>::type;
auto tdatav = (vtype *)storage.data();
constexpr int vlen = sizeof(vtype)/sizeof(T);
arr<vtype> xdata(tmpsize);
auto tdata = (T *)xdata.data();
auto tdatav = xdata.data();
#else
arr<T> xdata(tmpsize);
auto tdata = xdata.data();
#endif
auto tdata = (T *)storage.data();
multiarr a_in(ndim, shape, stride_in), a_out(ndim, shape, stride_out);
multiarr a_in(shape, stride_in), a_out(shape, stride_out);
unique_ptr<pocketfft_r<T>> plan;
for (int iax=0; iax<nax; ++iax)
for (size_t iax=0; iax<axes.size(); ++iax)
{
int axis = axes[iax];
multi_iter it_in(a_in, axis), it_out(a_out, axis);
......@@ -2179,23 +2190,19 @@ template<typename T> void pocketfft_general_hartley(int ndim, const size_t *shap
}
}
template<typename T> void pocketfft_general_r2c(int ndim, const size_t *shape,
const int64_t *stride_in, const int64_t *stride_out, size_t axis,
template<typename T> void pocketfft_general_r2c(const vector<size_t> &shape,
const vector<int64_t> &stride_in, const vector<int64_t> &stride_out, size_t axis,
const T *data_in, cmplx<T> *data_out, T fct)
{
// allocate temporary 1D array storage
auto storage = alloc_tmp<T>(shape, shape[axis], sizeof(T));
#ifdef HAVE_VECSUPPORT
using vtype = typename VTYPE<T>::type;
auto tdatav = (vtype *)storage.data();
constexpr int vlen = sizeof(vtype)/sizeof(T);
arr<vtype> xdata(shape[axis]);
auto tdata = (T *)xdata.data();
auto tdatav = xdata.data();
#else
arr<T> xdata(shape[axis]);
auto tdata = xdata.data();
#endif
auto tdata = (T *)storage.data();
multiarr a_in(ndim, shape, stride_in), a_out(ndim, shape, stride_out);
multiarr a_in(shape, stride_in), a_out(shape, stride_out);
pocketfft_r<T> plan(shape[axis]);
multi_iter it_in(a_in, axis), it_out(a_out, axis);
size_t len=shape[axis], s_i=it_in.stride(), s_o=it_out.stride();
......@@ -2264,23 +2271,19 @@ template<typename T> void pocketfft_general_r2c(int ndim, const size_t *shape,
it_out.advance();
}
}
template<typename T> void pocketfft_general_c2r(int ndim, const size_t *shape_out,
const int64_t *stride_in, const int64_t *stride_out, size_t axis,
template<typename T> void pocketfft_general_c2r(const vector<size_t> &shape_out,
const vector<int64_t> &stride_in, const vector<int64_t> &stride_out, size_t axis,
const cmplx<T> *data_in, T *data_out, T fct)
{
// allocate temporary 1D array storage
auto storage = alloc_tmp<T>(shape_out, shape_out[axis], sizeof(T));
#ifdef HAVE_VECSUPPORT
using vtype = typename VTYPE<T>::type;
auto tdatav = (vtype *)storage.data();
constexpr int vlen = sizeof(vtype)/sizeof(T);
arr<vtype> xdata(shape_out[axis]);
auto tdata = (T *)xdata.data();
auto tdatav = xdata.data();
#else
arr<T> xdata(shape_out[axis]);
auto tdata = xdata.data();
#endif
auto tdata = (T *)storage.data();
multiarr a_in(ndim, shape_out, stride_in), a_out(ndim, shape_out, stride_out);
multiarr a_in(shape_out, stride_in), a_out(shape_out, stride_out);
pocketfft_r<T> plan(shape_out[axis]);
multi_iter it_in(a_in, axis), it_out(a_out, axis);
size_t len=shape_out[axis], s_i=it_in.stride(), s_o=it_out.stride();
......@@ -2394,14 +2397,12 @@ py::array execute(const py::array &in, vector<size_t> axes, double fct, bool fwd
else throw runtime_error("unsupported data type");
make_strides(in, res, s_i, s_o);
if (in.dtype().is(c128))
pocketfft_general_c<double>(in.ndim(), dims.data(),
s_i.data(), s_o.data(), axes.size(),
axes.data(), fwd, (const cmplx<double> *)in.data(),
pocketfft_general_c<double>(dims,
s_i, s_o, axes, fwd, (const cmplx<double> *)in.data(),
(cmplx<double> *)res.mutable_data(), fct);
else
pocketfft_general_c<float>(in.ndim(), dims.data(),
s_i.data(), s_o.data(), axes.size(),
axes.data(), fwd, (const cmplx<float> *)in.data(),
pocketfft_general_c<float>(dims,
s_i, s_o, axes, fwd, (const cmplx<float> *)in.data(),
(cmplx<float> *)res.mutable_data(), fct);
return res;
}
......@@ -2429,29 +2430,27 @@ py::array rfftn(const py::array &in, py::object axes_, double fct)
else throw runtime_error("unsupported data type");
make_strides(in, res, s_i, s_o);
if (in.dtype().is(f64))
{
pocketfft_general_r2c<double>(in.ndim(), dims_in.data(),
s_i.data(), s_o.data(),
pocketfft_general_r2c<double>(dims_in,
s_i, s_o,
axes.back(), (const double *)in.data(),
(cmplx<double> *)res.mutable_data(), fct);
if (axes.size()>1)
pocketfft_general_c<double>(res.ndim(), dims_out.data(),
s_o.data(), s_o.data(), axes.size()-1,
axes.data(), true, (const cmplx<double> *)res.data(),
(cmplx<double> *)res.mutable_data(), 1.);
}
else
{
pocketfft_general_r2c<float>(in.ndim(), dims_in.data(),
s_i.data(), s_o.data(),
pocketfft_general_r2c<float>(dims_in,
s_i, s_o,
axes.back(), (const float *)in.data(),
(cmplx<float> *)res.mutable_data(), fct);
if (axes.size()>1)
pocketfft_general_c<float>(res.ndim(), dims_out.data(),
s_o.data(), s_o.data(), axes.size()-1,
axes.data(), true, (const cmplx<float> *)res.data(),
(cmplx<float> *)res.mutable_data(), 1.);
}
if (axes.size()==1) return res;
vector<size_t> axes2(axes.size()-1);
for (size_t i=0; i<axes2.size(); ++i)
axes2[i] = axes[i];
if (in.dtype().is(f64))
pocketfft_general_c<double>(dims_out,
s_o, s_o, axes2, true, (const cmplx<double> *)res.data(),
(cmplx<double> *)res.mutable_data(), 1.);
else
pocketfft_general_c<float>(dims_out,
s_o, s_o, axes2, true, (const cmplx<float> *)res.data(),
(cmplx<float> *)res.mutable_data(), 1.);
return res;
}
py::array irfftn(const py::array &in, py::object axes_, size_t lastsize,
......@@ -2489,13 +2488,13 @@ py::array irfftn(const py::array &in, py::object axes_, size_t lastsize,
else throw runtime_error("unsupported data type");
make_strides(inter, res, s_i, s_o);
if (inter.dtype().is(c128))
pocketfft_general_c2r<double>(inter.ndim(), dims_out.data(),
s_i.data(), s_o.data(),
pocketfft_general_c2r<double>(dims_out,
s_i, s_o,
axis, (const cmplx<double> *)inter.data(),
(double *)res.mutable_data(), fct);
else
pocketfft_general_c2r<float>(inter.ndim(), dims_out.data(),
s_i.data(), s_o.data(),
pocketfft_general_c2r<float>(dims_out,
s_i, s_o,
axis, (const cmplx<float> *)inter.data(),
(float *)res.mutable_data(), fct);
return res;
......@@ -2515,14 +2514,12 @@ py::array hartley(const py::array &in, py::object axes_, double fct)
else throw runtime_error("unsupported data type");
make_strides(in, res, s_i, s_o);
if (in.dtype().is(f64))
pocketfft_general_hartley<double>(in.ndim(), dims.data(),
s_i.data(), s_o.data(), axes.size(),
axes.data(), (const double *)in.data(),
pocketfft_general_hartley<double>(dims,
s_i, s_o, axes, (const double *)in.data(),
(double *)res.mutable_data(), 1.);
else
pocketfft_general_hartley<float>(in.ndim(), dims.data(),
s_i.data(), s_o.data(), axes.size(),
axes.data(), (const float *)in.data(),
pocketfft_general_hartley<float>(dims,
s_i, s_o, axes, (const float *)in.data(),
(float *)res.mutable_data(), 1.);
return res;
}
......@@ -2547,20 +2544,15 @@ template<typename T>py::array complex2hartley(const py::array &in,
make_strides(tmp, out, stride_tmp, stride_out);
auto axes = makeaxes(in, axes_);
size_t axis = axes.back();
multiarr a_tmp(ndim, dims_tmp.data(), stride_tmp.data()),
a_out(ndim, dims_out.data(), stride_out.data());
multiarr a_tmp(dims_tmp, stride_tmp),
a_out(dims_out, stride_out);
multi_iter it_tmp(a_tmp, axis), it_out(a_out, axis);
const cmplx<T> *tdata = (const cmplx<T> *)tmp.data();
T *odata = (T *)out.mutable_data();
vector<bool> swp(ndim-1,false);
for (auto i: axes)
{
if (i!=axis)
{
auto i2 = i<axis ? i : i-1;
swp[i2] = true;
}
}
swp[i<axis ? i : i-1] = true;
while(!it_tmp.done())
{
auto tofs = it_tmp.offset();
......@@ -2589,6 +2581,15 @@ template<typename T>py::array complex2hartley(const py::array &in,
}
return out;
}
py::array mycomplex2hartley(const py::array &in,
const py::array &tmp, py::object axes_)
{
if (in.dtype().is(f64))
return complex2hartley<double>(in, tmp, axes_);
else if (in.dtype().is(f32))
return complex2hartley<float>(in, tmp, axes_);
else throw runtime_error("unsupported data type");
}
py::array hartley2(const py::array &in, py::object axes_, double fct)
{
auto tmp = rfftn(in, axes_, fct);
......@@ -2724,4 +2725,5 @@ PYBIND11_MODULE(pypocketfft, m)
m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0, "fct"_a=1.);
m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.);
m.def("hartley2",&hartley2, "a"_a, "axes"_a=py::none(), "fct"_a=1.);
m.def("complex2hartley",&mycomplex2hartley, "in"_a, "tmp"_a, "axes"_a);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment