Commit 39524f23 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

restructuring; add interface for scipy emulation

parent 2cdcfbca
......@@ -976,8 +976,8 @@ template<typename T0> class rfftp
#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
template<typename T> NOINLINE void radf2 (size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radf2 (size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=2;
......@@ -1001,8 +1001,8 @@ template<typename T> NOINLINE void radf2 (size_t ido, size_t l1, const T * restr
}
}
template<typename T> NOINLINE void radf3(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radf3(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=0.86602540378443864676;
......@@ -1035,8 +1035,8 @@ template<typename T> NOINLINE void radf3(size_t ido, size_t l1, const T * restri
}
}
template<typename T> NOINLINE void radf4(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radf4(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=4;
constexpr T0 hsqt2=0.70710678118654752440;
......@@ -1076,8 +1076,8 @@ template<typename T> NOINLINE void radf4(size_t ido, size_t l1, const T * restri
}
}
template<typename T> NOINLINE void radf5(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radf5(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=5;
constexpr T0 tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
......@@ -1301,8 +1301,8 @@ template<typename T> NOINLINE void radb2(size_t ido, size_t l1, const T * restri
}
}
template<typename T> NOINLINE void radb3(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radb3(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=3;
constexpr T0 taur=-0.5, taui=0.86602540378443864676;
......@@ -1336,8 +1336,8 @@ template<typename T> NOINLINE void radb3(size_t ido, size_t l1, const T * restri
}
}
template<typename T> NOINLINE void radb4(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T> NOINLINE void radb4(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=4;
constexpr T0 sqrt2=1.41421356237309504880;
......@@ -1382,8 +1382,8 @@ template<typename T> NOINLINE void radb4(size_t ido, size_t l1, const T * restri
}
}
template<typename T>NOINLINE void radb5(size_t ido, size_t l1, const T * restrict cc,
T * restrict ch, const T0 * restrict wa)
template<typename T>NOINLINE void radb5(size_t ido, size_t l1,
const T * restrict cc, T * restrict ch, const T0 * restrict wa)
{
constexpr size_t cdim=5;
constexpr T0 tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
......@@ -1821,19 +1821,15 @@ template<typename T0> class fftblue
template<typename T> NOINLINE void backward_r(T c[], T0 fct)
{
arr<T> tmp(2*n);
tmp[0]=c[0];
tmp[1]=c[0]*0.;
memcpy (tmp.data()+2,c+1, (n-1)*sizeof(T));
if ((n&1)==0) tmp[n+1]=c[0]*0.;
for (size_t m=2; m<n; m+=2)
{
tmp[2*n-m]=tmp[m];
tmp[2*n-m+1]=-tmp[m+1];
}
fft<true>((cmplx<T> *)tmp.data(),fct);
arr<cmplx<T>> tmp(n);
tmp[0].Set(c[0],c[0]*0);
memcpy ((void *)(tmp.data()+1),(void *)(c+1), (n-1)*sizeof(T));
if ((n&1)==0) tmp[n/2].i=c[0]*0.;
for (size_t m=1; 2*m<n; ++m)
tmp[n-m].Set(tmp[m].r, -tmp[m].i);
fft<true>(tmp.data(),fct);
for (size_t m=0; m<n; ++m)
c[m] = tmp[2*m];
c[m] = tmp[m].r;
}
template<typename T> NOINLINE void forward_r(T c[], T0 fct)
......@@ -2056,12 +2052,12 @@ template<typename T> struct VTYPE{};
template<> struct VTYPE<double>
{
using type = double __attribute__ ((vector_size (VBYTELEN)));
static constexpr int vlen=VBYTELEN/8;
static constexpr int vlen=VBYTELEN/sizeof(double);
};
template<> struct VTYPE<float>
{
using type = float __attribute__ ((vector_size (VBYTELEN)));
static constexpr int vlen=VBYTELEN/4;
static constexpr int vlen=VBYTELEN/sizeof(float);
};
#else
template<typename T> struct VTYPE{};
......@@ -2087,7 +2083,8 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
{
auto axsize = shape[axes[i]];
auto othersize = fullsize/axsize;
tmpsize = max(tmpsize, axsize*((othersize>=VTYPE<T>::vlen) ? VTYPE<T>::vlen : 1));
tmpsize = max(tmpsize,
axsize*((othersize>=VTYPE<T>::vlen) ? VTYPE<T>::vlen : 1));
}
return arr<char>(tmpsize*elemsize);
}
......@@ -2119,7 +2116,7 @@ template<typename T> NOINLINE void pocketfft_general_c(
tdatav[i].r[j] = it.in(j,i).r;
tdatav[i].i[j] = it.in(j,i).i;
}
forward ? plan->forward((vtype *)tdatav, fct)
forward ? plan->forward ((vtype *)tdatav, fct)
: plan->backward((vtype *)tdatav, fct);
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
......@@ -2131,20 +2128,20 @@ template<typename T> NOINLINE void pocketfft_general_c(
it.advance(1);
auto tdata = (cmplx<T> *)storage.data();
if (it.inplace() && it.contiguous_out()) // fully in-place
forward ? plan->forward((T *)(&it.in(0)), fct)
: plan->backward((T *)(&it.in(0)), fct);
forward ? plan->forward ((T *)(&it.out(0)), fct)
: plan->backward((T *)(&it.out(0)), fct);
else if (it.contiguous_out()) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
it.out(i) = it.in(i);
forward ? plan->forward((T *)(&it.out(0)), fct)
forward ? plan->forward ((T *)(&it.out(0)), fct)
: plan->backward((T *)(&it.out(0)), fct);
}
else
{
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
forward ? plan->forward((T *)tdata, fct)
forward ? plan->forward ((T *)tdata, fct)
: plan->backward((T *)tdata, fct);
for (size_t i=0; i<len; ++i)
it.out(i) = tdata[i];
......@@ -2313,6 +2310,58 @@ template<typename T> NOINLINE void pocketfft_general_c2r(
}
}
template<typename T> NOINLINE void pocketfft_general_r(
const ndarr<T> &in, ndarr<T> &out, int axis, bool forward, T fct)
{
auto storage = alloc_tmp<T>(in.shape(), in.shape(axis), sizeof(T));
constexpr int vlen = VTYPE<T>::vlen;
multi_iter<vlen, T, T> it(in, out, axis);
size_t len=it.length_in();
pocketfft_r<T> plan(len);
#if defined(HAVE_VECSUPPORT)
if (vlen>1)
while (it.remaining()>=vlen)
{
using vtype = typename VTYPE<T>::type;
it.advance(vlen);
auto tdatav = (vtype *)storage.data();
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = it.in(j,i);
forward ? plan.forward (tdatav, fct)
: plan.backward(tdatav, fct);
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
it.out(j,i) = tdatav[i][j];
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto tdata = (T *)storage.data();
if (it.inplace() && it.contiguous_out()) // fully in-place
forward ? plan.forward (&it.out(0), fct)
: plan.backward(&it.out(0), fct);
else if (it.contiguous_out()) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
it.out(i) = it.in(i);
forward ? plan.forward (&it.out(0), fct)
: plan.backward(&it.out(0), fct);
}
else
{
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
forward ? plan.forward (tdata, fct)
: plan.backward(tdata, fct);
for (size_t i=0; i<len; ++i)
it.out(i) = tdata[i];
}
}
}
//
// Python interface
//
......@@ -2410,6 +2459,28 @@ py::array rfftn(const py::array &in, py::object axes_, double fct)
return tcheck(in, f64, f32) ? rfftn_internal<double>(in, axes_, fct)
: rfftn_internal<float> (in, axes_, fct);
}
template<typename T> py::array xrfft_scipy(const py::array &in,
int axis, double fct, bool inplace, bool fwd)
{
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims);
ndarr<T> ain(in.data(), dims, copy_strides(in));
ndarr<T> aout(res.mutable_data(), dims, copy_strides(res));
pocketfft_general_r<T>(ain, aout, axis, fwd, fct);
return res;
}
py::array rfft_scipy(const py::array &in, int axis, double fct, bool inplace)
{
return tcheck(in, f64, f32) ?
xrfft_scipy<double>(in, axis, fct, inplace, true) :
xrfft_scipy<float> (in, axis, fct, inplace, true);
}
py::array irfft_scipy(const py::array &in, int axis, double fct, bool inplace)
{
return tcheck(in, f64, f32) ?
xrfft_scipy<double>(in, axis, fct, inplace, false) :
xrfft_scipy<float> (in, axis, fct, inplace, false);
}
template<typename T> py::array irfftn_internal(const py::array &in,
py::object axes_, size_t lastsize, T fct)
{
......@@ -2639,6 +2710,10 @@ PYBIND11_MODULE(pypocketfft, m)
m.def("ifftn",&ifftn, ifftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
"inplace"_a=false);
m.def("rfftn",&rfftn, rfftn_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.);
m.def("rfft_scipy",&rfft_scipy, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
m.def("irfft_scipy",&irfft_scipy, "a"_a, "axis"_a, "fct"_a=1.,
"inplace"_a=false);
m.def("irfftn",&irfftn, irfftn_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0,
"fct"_a=1.);
m.def("hartley",&hartley, hartley_DS, "a"_a, "axes"_a=py::none(), "fct"_a=1.,
......
......@@ -32,96 +32,92 @@ def test1D(len):
@pmp("shp", shapes)
def test_fftn(shp):
a=np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
vmax = np.max(np.abs(pyfftw.interfaces.numpy_fft.fftn(a)))
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a), pypocketfft.fftn(a))<1e-15*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a), pypocketfft.fftn(a))<1e-15)
a=a.astype(np.complex64)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a), pypocketfft.fftn(a))<5e-7*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a), pypocketfft.fftn(a))<5e-7)
@pmp("shp", shapes2D)
@pmp("axes", ((0,),(1,),(0,1),(1,0)))
def test_fftn2D(shp, axes):
a=np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
vmax = np.max(np.abs(pyfftw.interfaces.numpy_fft.fftn(a, axes=axes)))
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a, axes=axes), pypocketfft.fftn(a, axes=axes))<1e-15*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a, axes=axes), pypocketfft.fftn(a, axes=axes))<1e-15)
a=a.astype(np.complex64)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a, axes=axes), pypocketfft.fftn(a, axes=axes))<5e-7*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.fftn(a, axes=axes), pypocketfft.fftn(a, axes=axes))<5e-7)
@pmp("shp", shapes)
def test_rfftn(shp):
a=np.random.rand(*shp)-0.5
vmax = np.max(np.abs(pyfftw.interfaces.numpy_fft.fftn(a)))
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a), pypocketfft.rfftn(a))<1e-15*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a), pypocketfft.rfftn(a))<1e-15)
a=a.astype(np.float32)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a), pypocketfft.rfftn(a))<5e-7*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a), pypocketfft.rfftn(a))<5e-7)
@pmp("shp", shapes)
def test_rfft_scipy(shp):
for i in range(len(shp)):
a=np.random.rand(*shp)-0.5
assert(_l2error(pyfftw.interfaces.scipy_fftpack.rfft(a,axis=i), pypocketfft.rfft_scipy(a,axis=i))<1e-15)
assert(_l2error(pyfftw.interfaces.scipy_fftpack.irfft(a,axis=i), pypocketfft.irfft_scipy(a,axis=i,fct=1./a.shape[i]))<1e-15)
@pmp("shp", shapes2D)
@pmp("axes", ((0,),(1,),(0,1),(1,0)))
def test_rfftn2D(shp, axes):
a=np.random.rand(*shp)-0.5
vmax = np.max(np.abs(pyfftw.interfaces.numpy_fft.rfftn(a, axes=axes)))
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a, axes=axes), pypocketfft.rfftn(a, axes=axes))<1e-15*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a, axes=axes), pypocketfft.rfftn(a, axes=axes))<1e-15)
a=a.astype(np.float32)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a, axes=axes), pypocketfft.rfftn(a, axes=axes))<5e-7*vmax)
assert(_l2error(pyfftw.interfaces.numpy_fft.rfftn(a, axes=axes), pypocketfft.rfftn(a, axes=axes))<5e-7)
@pmp("shp", shapes)
def test_identity(shp):
a=np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
vmax = np.max(np.abs(a))
assert(_l2error(pypocketfft.ifftn(pypocketfft.fftn(a),fct=1./a.size), a)<1.5e-15*vmax)
assert(_l2error(pypocketfft.ifftn(pypocketfft.fftn(a),fct=1./a.size), a)<1.5e-15)
tmp=a.copy()
assert (pypocketfft.ifftn(pypocketfft.fftn(tmp, inplace=True), fct=1./a.size, inplace=True) is tmp)
assert(_l2error(tmp, a)<1.5e-15*vmax)
assert(_l2error(tmp, a)<1.5e-15)
a=a.astype(np.complex64)
assert(_l2error(pypocketfft.ifftn(pypocketfft.fftn(a),fct=1./a.size), a)<6e-7*vmax)
assert(_l2error(pypocketfft.ifftn(pypocketfft.fftn(a),fct=1./a.size), a)<6e-7)
@pmp("shp", shapes)
def test_identity_r(shp):
a=np.random.rand(*shp)-0.5
b=a.astype(np.float32)
vmax = np.max(np.abs(a))
for ax in range(a.ndim):
n = a.shape[ax]
assert(_l2error(pypocketfft.irfftn(pypocketfft.rfftn(a,(ax,)),(ax,),lastsize=n,fct=1./n), a)<1e-15*vmax)
assert(_l2error(pypocketfft.irfftn(pypocketfft.rfftn(b,(ax,)),(ax,),lastsize=n,fct=1./n), b)<5e-7*vmax)
assert(_l2error(pypocketfft.irfftn(pypocketfft.rfftn(a,(ax,)),(ax,),lastsize=n,fct=1./n), a)<1e-15)
assert(_l2error(pypocketfft.irfftn(pypocketfft.rfftn(b,(ax,)),(ax,),lastsize=n,fct=1./n), b)<5e-7)
@pmp("shp", shapes)
def test_identity_r2(shp):
a=np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
a=pypocketfft.rfftn(pypocketfft.irfftn(a))
fct = 1./pypocketfft.irfftn(a).size
vmax = np.max(np.abs(a))
assert(_l2error(pypocketfft.rfftn(pypocketfft.irfftn(a),fct=fct), a)<1e-15*vmax)
assert(_l2error(pypocketfft.rfftn(pypocketfft.irfftn(a),fct=fct), a)<1e-15)
@pmp("shp", shapes2D+shapes3D)
def test_hartley2(shp):
a=np.random.rand(*shp)-0.5
v1 = pypocketfft.hartley2(a)
v2 = pypocketfft.fftn(a.astype(np.complex128))
vmax = np.max(np.abs(v1))
v2 = v2.real+v2.imag
assert(_l2error(v1, v2)<1e-15*vmax)
assert(_l2error(v1, v2)<1e-15)
@pmp("shp", shapes)
def test_hartley_identity(shp):
a=np.random.rand(*shp)-0.5
v1 = pypocketfft.hartley(pypocketfft.hartley(a))/a.size
vmax = np.max(np.abs(a))
assert(_l2error(a, v1)<1e-15*vmax)
assert(_l2error(a, v1)<1e-15)
@pmp("shp", shapes)
def test_hartley2_identity(shp):
a=np.random.rand(*shp)-0.5
v1 = pypocketfft.hartley2(pypocketfft.hartley2(a))/a.size
vmax = np.max(np.abs(a))
assert(_l2error(a, v1)<1e-15*vmax)
assert(_l2error(a, v1)<1e-15)
v1 = a.copy()
assert (pypocketfft.hartley2(pypocketfft.hartley2(v1, inplace=True), fct=1./a.size, inplace=True) is v1)
assert(_l2error(a, v1)<1e-15*vmax)
assert(_l2error(a, v1)<1e-15)
@pmp("shp", shapes2D)
@pmp("axes", ((0,),(1,),(0,1),(1,0)))
def test_hartley2_2D(shp, axes):
a=np.random.rand(*shp)-0.5
fct = 1./np.prod(np.take(shp,axes))
vmax = np.max(np.abs(a))
assert(_l2error(pypocketfft.hartley2(pypocketfft.hartley2(a, axes=axes), axes=axes, fct=fct),a)<1e-15*vmax)
assert(_l2error(pypocketfft.hartley2(pypocketfft.hartley2(a, axes=axes), axes=axes, fct=fct),a)<1e-15)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment