Commit 656b2373 authored by Martin Reinecke's avatar Martin Reinecke

Merge branch 'sincos' into 'master'

DCT/DST support

See merge request !13
parents 0276f55e 20f3b84b
pypocketfft
===========
This package provides Fast Fourier and Hartley transforms with a simple
Python interface.
This package provides Fast Fourier, trigonometric and Hartley transforms with a
simple Python interface.
The central algorithms are derived from Paul Swarztrauber's FFTPACK code
(http://www.netlib.org/fftpack).
......@@ -10,11 +10,11 @@ The central algorithms are derived from Paul Swarztrauber's FFTPACK code
Features
--------
- supports fully complex and half-complex (i.e. complex-to-real and
real-to-complex) FFTs
- supports multidimensional arrays and selection of the axes to be transformed.
- supports single and double precision
real-to-complex) FFTs, discrete sine/cosine transforms and Hartley transforms
- achieves very high accuracy for all transforms
- supports multidimensional arrays and selection of the axes to be transformed
- supports single, double, and long double precision
- makes use of CPU vector instructions when performing 2D and higher-dimensional
transforms
- does not have persistent transform plans, which makes the interface simpler
- supports prime-length transforms without degrading to O(N**2) performance
- Has optional OpenMP support for multidimensional transforms
- has optional OpenMP support for multidimensional transforms
This diff is collapsed.
......@@ -7,7 +7,9 @@
* Python interface.
*
* Copyright (C) 2019 Max-Planck-Society
* Copyright (C) 2019 Peter Bell
* \author Martin Reinecke
* \author Peter Bell
*/
#include <pybind11/pybind11.h>
......@@ -92,12 +94,12 @@ template<typename T> T norm_fct(int inorm, size_t N)
}
template<typename T> T norm_fct(int inorm, const shape_t &shape,
const shape_t &axes)
const shape_t &axes, size_t fct=1, int delta=0)
{
if (inorm==0) return T(1);
size_t N(1);
for (auto a: axes)
N *= shape[a];
N *= fct * size_t(int64_t(shape[a])+delta);
return norm_fct<T>(inorm, N);
}
......@@ -226,6 +228,66 @@ py::array r2r_fftpack(const py::array &in, const py::object &axes_,
real2hermitian, forward, inorm, out_, nthreads))
}
template<typename T> py::array dct_internal(const py::array &in,
const py::object &axes_, int type, int inorm, py::object &out_,
size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims(copy_shape(in));
py::array res = prepare_output<T>(out_, dims);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const T *>(in.data());
auto d_out=reinterpret_cast<T *>(res.mutable_data());
{
py::gil_scoped_release release;
T fct = (type==1) ? norm_fct<T>(inorm, dims, axes, 2, -1)
: norm_fct<T>(inorm, dims, axes, 2);
bool ortho = inorm == 1;
pocketfft::dct(dims, s_in, s_out, axes, type, d_in, d_out, fct, ortho,
nthreads);
}
return res;
}
py::array dct(const py::array &in, int type, const py::object &axes_,
int inorm, py::object &out_, size_t nthreads)
{
if ((type<1) || (type>4)) throw invalid_argument("invalid DCT type");
DISPATCH(in, f64, f32, flong, dct_internal, (in, axes_, type, inorm, out_,
nthreads))
}
template<typename T> py::array dst_internal(const py::array &in,
const py::object &axes_, int type, int inorm, py::object &out_,
size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims(copy_shape(in));
py::array res = prepare_output<T>(out_, dims);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const T *>(in.data());
auto d_out=reinterpret_cast<T *>(res.mutable_data());
{
py::gil_scoped_release release;
T fct = (type==1) ? norm_fct<T>(inorm, dims, axes, 2, 1)
: norm_fct<T>(inorm, dims, axes, 2);
bool ortho = inorm == 1;
pocketfft::dst(dims, s_in, s_out, axes, type, d_in, d_out, fct, ortho,
nthreads);
}
return res;
}
py::array dst(const py::array &in, int type, const py::object &axes_,
int inorm, py::object &out_, size_t nthreads)
{
if ((type<1) || (type>4)) throw invalid_argument("invalid DST type");
DISPATCH(in, f64, f32, flong, dst_internal, (in, axes_, type, inorm,
out_, nthreads))
}
template<typename T> py::array c2r_internal(const py::array &in,
const py::object &axes_, size_t lastsize, bool forward, int inorm,
py::object &out_, size_t nthreads)
......@@ -235,7 +297,7 @@ template<typename T> py::array c2r_internal(const py::array &in,
shape_t dims_in(copy_shape(in)), dims_out=dims_in;
if (lastsize==0) lastsize=2*dims_in[axis]-1;
if ((lastsize/2) + 1 != dims_in[axis])
throw runtime_error("bad lastsize");
throw invalid_argument("bad lastsize");
dims_out[axis] = lastsize;
py::array res = prepare_output<T>(out_, dims_out);
auto s_in=copy_strides(in);
......@@ -297,7 +359,6 @@ template<typename T>py::array complex2hartley(const py::array &in,
py::gil_scoped_release release;
simple_iter iin(atmp);
rev_iter iout(aout, axes);
if (iin.remaining()!=iout.remaining()) throw runtime_error("oops");
while(iin.remaining()>0)
{
auto v = atmp[iin.ofs()];
......@@ -524,6 +585,83 @@ numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
const char *dct_DS = R"""(Performs a discrete cosine transform.
Parameters
----------
a : numpy.ndarray (any real type)
The input data
type : integer
the type of DCT. Must be in [1; 4].
axes : list of integers
The axes along which the transform is carried out.
If not set, all axes will be transformed.
inorm : int
Normalization type
0 : no normalization
1 : make transform orthogonal and divide by sqrt(N)
2 : divide by N
where N is the product of n_i for every transformed axis i.
n_i is 2*(<axis_length>-1 for type 1 and 2*<axis length>
for types 2, 3, 4.
Making the transform orthogonal involves the following additional steps
for every 1D sub-transform:
Type 1 : multiply first and last input value by sqrt(2)
divide first and last output value by sqrt(2)
Type 2 : divide first output value by sqrt(2)
Type 3 : multiply first input value by sqrt(2)
Type 4 : nothing
out : numpy.ndarray (same shape and data type as `a`)
May be identical to `a`, but if it isn't, it must not overlap with `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
const char *dst_DS = R"""(Performs a discrete sine transform.
Parameters
----------
a : numpy.ndarray (any real type)
The input data
type : integer
the type of DST. Must be in [1; 4].
axes : list of integers
The axes along which the transform is carried out.
If not set, all axes will be transformed.
inorm : int
Normalization type
0 : no normalization
1 : make transform orthogonal and divide by sqrt(N)
2 : divide by N
where N is the product of n_i for every transformed axis i.
n_i is 2*(<axis_length>+1 for type 1 and 2*<axis length>
for types 2, 3, 4.
Making the transform orthogonal involves the following additional steps
for every 1D sub-transform:
Type 1 : nothing
Type 2 : divide first output value by sqrt(2)
Type 3 : multiply first input value by sqrt(2)
Type 4 : nothing
out : numpy.ndarray (same shape and data type as `a`)
May be identical to `a`, but if it isn't, it must not overlap with `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
} // unnamed namespace
PYBIND11_MODULE(pypocketfft, m)
......@@ -543,4 +681,8 @@ PYBIND11_MODULE(pypocketfft, m)
"axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("genuine_hartley", genuine_hartley, genuine_hartley_DS, "a"_a,
"axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("dct", dct, dct_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
"out"_a=None, "nthreads"_a=1);
m.def("dst", dst, dst_DS, "a"_a, "type"_a, "axes"_a=None, "inorm"_a=0,
"out"_a=None, "nthreads"_a=1);
}
......@@ -17,6 +17,13 @@ def _l2error(a, b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
def _assert_close(a, b, epsilon):
err = _l2error(a, b)
if (err >= epsilon):
print("Error: {} > {}".format(err, epsilon))
assert_(err<epsilon)
def fftn(a, axes=None, inorm=0, out=None, nthreads=1):
return pypocketfft.c2c(a, axes=axes, forward=True, inorm=inorm,
out=out, nthreads=nthreads)
......@@ -48,36 +55,27 @@ def irfft_scipy(a, axis, inorm=0, out=None, nthreads=1):
forward=False, inorm=inorm, out=out,
nthreads=nthreads)
tol = {np.float32: 6e-7, np.float64: 1.5e-15, np.longfloat: 1e-18}
ctype = {np.float32: np.complex64, np.float64: np.complex128, np.longfloat: np.longcomplex}
@pmp("len", len1D)
@pmp("inorm", [0, 1, 2])
def test1D(len, inorm):
@pmp("dtype", [np.float32, np.float64, np.longfloat])
def test1D(len, inorm, dtype):
a = np.random.rand(len)-0.5 + 1j*np.random.rand(len)-0.5j
b = a.astype(np.complex64)
c = a.astype(np.complex256)
assert_(_l2error(a, ifftn(fftn(c, inorm=inorm), inorm=2-inorm)) < 1e-18)
assert_(_l2error(a, ifftn(fftn(a, inorm=inorm), inorm=2-inorm)) < 1.5e-15)
a = a.astype(ctype[dtype])
eps = tol[dtype]
assert_(_l2error(a, ifftn(fftn(a, inorm=inorm), inorm=2-inorm)) < eps)
assert_(_l2error(a.real, ifftn(fftn(a.real, inorm=inorm), inorm=2-inorm))
< 1.5e-15)
< eps)
assert_(_l2error(a.real, fftn(ifftn(a.real, inorm=inorm), inorm=2-inorm))
< 1.5e-15)
< eps)
assert_(_l2error(a.real, irfftn(rfftn(a.real, inorm=inorm),
inorm=2-inorm, lastsize=len)) < 1.5e-15)
inorm=2-inorm, lastsize=len)) < eps)
tmp = a.copy()
assert_(ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm)
is tmp)
assert_(_l2error(tmp, a) < 1.5e-15)
assert_(_l2error(b, ifftn(fftn(b, inorm=inorm), inorm=2-inorm)) < 6e-7)
assert_(_l2error(b.real, ifftn(fftn(b.real, inorm=inorm), inorm=2-inorm))
< 6e-7)
assert_(_l2error(b.real, fftn(ifftn(b.real, inorm=inorm), inorm=2-inorm))
< 6e-7)
assert_(_l2error(b.real, irfftn(rfftn(b.real, inorm=inorm), lastsize=len,
inorm=2-inorm)) < 6e-7)
tmp = b.copy()
assert_(ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm)
is tmp)
assert_(_l2error(tmp, b) < 6e-7)
assert_(_l2error(tmp, a) < eps)
@pmp("shp", shapes)
......@@ -196,3 +194,17 @@ def test_genuine_hartley_2D(shp, axes):
a = np.random.rand(*shp)-0.5
assert_(_l2error(pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(
a, axes=axes), axes=axes, inorm=2), a) < 1e-15)
@pmp("len", len1D)
@pmp("inorm", [0, 1]) # inorm==2 not needed, tested via inverse
@pmp("type", [1, 2, 3, 4])
@pmp("dtype", [np.float32, np.float64, np.longfloat])
def testdcst1D(len, inorm, type, dtype):
a = (np.random.rand(len)-0.5).astype(dtype)
eps = tol[dtype]
itp = (0, 1, 3, 2, 4)
itype = itp[type]
if type != 1 or len > 1: # there are no length-1 type 1 DCTs
_assert_close(a, pypocketfft.dct(pypocketfft.dct(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps)
_assert_close(a, pypocketfft.dst(pypocketfft.dst(a, inorm=inorm, type=type), inorm=2-inorm, type=itype), eps)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment