Commit ab552715 authored by Martin Reinecke's avatar Martin Reinecke

inplace -> out

parent ab691f16
......@@ -34,6 +34,7 @@ auto clong = py::dtype("longcomplex");
auto f32 = py::dtype("float32");
auto f64 = py::dtype("float64");
auto flong = py::dtype("longfloat");
auto None = py::none();
shape_t copy_shape(const py::array &arr)
{
......@@ -51,9 +52,9 @@ stride_t copy_strides(const py::array &arr)
return res;
}
shape_t makeaxes(const py::array &in, py::object axes)
shape_t makeaxes(const py::array &in, const py::object &axes)
{
if (axes.is(py::none()))
if (axes.is(None))
{
shape_t res(size_t(in.ndim()));
for (size_t i=0; i<res.size(); ++i)
......@@ -101,11 +102,23 @@ template<typename T> T norm_fct(int inorm, const shape_t &shape,
return norm_fct<T>(inorm, N);
}
template<typename T> py::array_t<T> prepare_output(py::object &out_,
shape_t &dims)
{
if (out_.is(None)) return py::array_t<T>(dims);
auto tmp = out_.cast<py::array_t<T>>();
if (!tmp.is(out_)) // a new object was created during casting
throw runtime_error("unexpected data type for output array");
return tmp;
}
template<typename T> py::array c2c_internal(const py::array &in,
const shape_t &axes, bool forward, int inorm, bool inplace, size_t nthreads)
const py::object &axes_, bool forward, int inorm, py::object &out_,
size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<complex<T>>(dims);
auto res = prepare_output<complex<T>>(out_, dims);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const complex<T> *>(in.data());
......@@ -119,10 +132,12 @@ template<typename T> py::array c2c_internal(const py::array &in,
}
template<typename T> py::array c2c_sym_internal(const py::array &in,
const shape_t &axes, bool forward, int inorm, size_t nthreads)
const py::object &axes_, bool forward, int inorm, py::object &out_,
size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims(copy_shape(in));
py::array res = py::array_t<complex<T>>(dims);
auto res = prepare_output<complex<T>>(out_, dims);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const T *>(in.data());
......@@ -145,25 +160,25 @@ template<typename T> py::array c2c_sym_internal(const py::array &in,
return res;
}
py::array c2c(const py::array &a, py::object axes, bool forward, int inorm,
bool inplace, size_t nthreads)
py::array c2c(const py::array &a, const py::object &axes_, bool forward,
int inorm, py::object &out_, size_t nthreads)
{
if (a.dtype().kind() == 'c')
DISPATCH(a, c128, c64, clong, c2c_internal, (a, makeaxes(a, axes), forward,
inorm, inplace, nthreads))
DISPATCH(a, c128, c64, clong, c2c_internal, (a, axes_, forward,
inorm, out_, nthreads))
if (inplace) throw runtime_error("cannot do this operation in-place");
DISPATCH(a, f64, f32, flong, c2c_sym_internal, (a, makeaxes(a, axes), forward,
inorm, nthreads))
DISPATCH(a, f64, f32, flong, c2c_sym_internal, (a, axes_, forward,
inorm, out_, nthreads))
}
template<typename T> py::array r2c_internal(const py::array &in,
py::object axes_, bool forward, int inorm, size_t nthreads)
const py::object &axes_, bool forward, int inorm, py::object &out_,
size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims_in(copy_shape(in)), dims_out(dims_in);
dims_out[axes.back()] = (dims_out[axes.back()]>>1)+1;
py::array res = py::array_t<complex<T>>(dims_out);
py::array res = prepare_output<complex<T>>(out_, dims_out);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const T *>(in.data());
......@@ -177,20 +192,20 @@ template<typename T> py::array r2c_internal(const py::array &in,
return res;
}
py::array r2c(const py::array &in, py::object axes_, bool forward, int inorm,
size_t nthreads)
py::array r2c(const py::array &in, const py::object &axes_, bool forward,
int inorm, py::object &out_, size_t nthreads)
{
DISPATCH(in, f64, f32, flong, r2c_internal, (in, axes_, forward, inorm,
DISPATCH(in, f64, f32, flong, r2c_internal, (in, axes_, forward, inorm, out_,
nthreads))
}
template<typename T> py::array r2r_fftpack_internal(const py::array &in,
py::object axes_, bool input_halfcomplex, bool forward, int inorm,
bool inplace, size_t nthreads)
const py::object &axes_, bool input_halfcomplex, bool forward, int inorm,
py::object &out_, size_t nthreads)
{
auto axes = makeaxes(in, axes_);
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims);
py::array res = prepare_output<T>(out_, dims);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const T *>(in.data());
......@@ -204,16 +219,17 @@ template<typename T> py::array r2r_fftpack_internal(const py::array &in,
return res;
}
py::array r2r_fftpack(const py::array &in, py::object axes_,
bool input_halfcomplex, bool forward, int inorm, bool inplace,
py::array r2r_fftpack(const py::array &in, const py::object &axes_,
bool input_halfcomplex, bool forward, int inorm, py::object &out_,
size_t nthreads)
{
DISPATCH(in, f64, f32, flong, r2r_fftpack_internal, (in, axes_,
input_halfcomplex, forward, inorm, inplace, nthreads))
input_halfcomplex, forward, inorm, out_, nthreads))
}
template<typename T> py::array c2r_internal(const py::array &in,
py::object axes_, size_t lastsize, bool forward, int inorm, size_t nthreads)
const py::object &axes_, size_t lastsize, bool forward, int inorm,
py::object &out_, size_t nthreads)
{
auto axes = makeaxes(in, axes_);
size_t axis = axes.back();
......@@ -222,7 +238,7 @@ template<typename T> py::array c2r_internal(const py::array &in,
if ((lastsize/2) + 1 != dims_in[axis])
throw runtime_error("bad lastsize");
dims_out[axis] = lastsize;
py::array res = py::array_t<T>(dims_out);
py::array res = prepare_output<T>(out_, dims_out);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
auto d_in=reinterpret_cast<const complex<T> *>(in.data());
......@@ -236,18 +252,18 @@ template<typename T> py::array c2r_internal(const py::array &in,
return res;
}
py::array c2r(const py::array &in, py::object axes_, size_t lastsize,
bool forward, int inorm, size_t nthreads)
py::array c2r(const py::array &in, const py::object &axes_, size_t lastsize,
bool forward, int inorm, py::object &out_, size_t nthreads)
{
DISPATCH(in, c128, c64, clong, c2r_internal, (in, axes_, lastsize, forward,
inorm, nthreads))
inorm, out_, nthreads))
}
template<typename T> py::array separable_hartley_internal(const py::array &in,
py::object axes_, int inorm, bool inplace, size_t nthreads)
const py::object &axes_, int inorm, py::object &out_, size_t nthreads)
{
auto dims(copy_shape(in));
py::array res = inplace ? in : py::array_t<T>(dims);
py::array res = prepare_output<T>(out_, dims);
auto axes = makeaxes(in, axes_);
auto s_in=copy_strides(in);
auto s_out=copy_strides(res);
......@@ -261,19 +277,19 @@ template<typename T> py::array separable_hartley_internal(const py::array &in,
return res;
}
py::array separable_hartley(const py::array &in, py::object axes_, int inorm,
bool inplace, size_t nthreads)
py::array separable_hartley(const py::array &in, const py::object &axes_,
int inorm, py::object &out_, size_t nthreads)
{
DISPATCH(in, f64, f32, flong, separable_hartley_internal, (in, axes_, inorm,
inplace, nthreads))
out_, nthreads))
}
template<typename T>py::array complex2hartley(const py::array &in,
const py::array &tmp, py::object axes_, bool inplace)
const py::array &tmp, const py::object &axes_, py::object &out_)
{
using namespace pocketfft::detail;
auto dims_out(copy_shape(in));
py::array out = inplace ? in : py::array_t<T>(dims_out);
py::array out = prepare_output<T>(out_, dims_out);
cndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
ndarr<T> aout(out.mutable_data(), copy_shape(out), copy_strides(out));
auto axes = makeaxes(in, axes_);
......@@ -293,11 +309,11 @@ template<typename T>py::array complex2hartley(const py::array &in,
return out;
}
py::array genuine_hartley(const py::array &in, py::object axes_, int inorm,
bool inplace, size_t nthreads)
py::array genuine_hartley(const py::array &in, const py::object &axes_,
int inorm, py::object &out_, size_t nthreads)
{
auto tmp = r2c(in, axes_, true, inorm, nthreads);
DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, inplace))
auto tmp = r2c(in, axes_, true, inorm, None, nthreads);
DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_))
}
const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
......@@ -330,18 +346,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the product of the lengths of the transformed axes.
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
NOTE: if `a` is real-valued and `inplace` is `True`, an exception will be
raised!
out : numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
May be identical to `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (same shape as `a`, complex type with same accuracy as `a`)
numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
The transformed data.
)""";
......@@ -362,13 +376,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the product of the lengths of the transformed input axes.
out : numpy.ndarray (complex type with same accuracy as `a`)
For the required shape, see the `Returns` section.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (complex type with same accuracy as `a`)
numpy.ndarray (complex type with same accuracy as `a`)
The transformed data. The shape is identical to that of the input array,
except for the axis that was transformed last. If the length of that axis
was n on input, it is n//2+1 on output.
......@@ -393,13 +410,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the product of the lengths of the transformed output axes.
out : numpy.ndarray (real type with same accuracy as `a`)
For the required shape, see the `Returns` section.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (real type with same accuracy as `a`)
numpy.ndarray (real type with same accuracy as `a`)
The transformed data. The shape is identical to that of the input array,
except for the axis that was transformed last, which has now `lastsize`
entries.
......@@ -425,16 +445,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the length of `axis`.
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
out : numpy.ndarray (same shape and data type as `a`)
May be identical to `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (same shape and data type as `a`)
numpy.ndarray (same shape and data type as `a`)
The transformed data. The shape is identical to that of the input array.
)""";
......@@ -445,7 +465,7 @@ processed.
Parameters
----------
a : numpy.ndarray (np.float32 or np.float64)
a : numpy.ndarray (any real type)
The input data
axes : list of integers
The axes along which the transform is carried out.
......@@ -456,16 +476,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the product of the lengths of the transformed axes.
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
out : numpy.ndarray (same shape and data type as `a`)
May be identical to `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (same shape and data type as `a`)
numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
......@@ -477,7 +497,7 @@ but when transforming multiple axes, the results are different.
Parameters
----------
a : numpy.ndarray (np.float32 or np.float64)
a : numpy.ndarray (any real type)
The input data
axes : list of integers
The axes along which the transform is carried out.
......@@ -488,16 +508,16 @@ inorm : int
1 : divide by sqrt(N)
2 : divide by N
where N is the product of the lengths of the transformed axes.
inplace : bool
if False, returns the result in a new array and leaves the input unchanged.
if True, stores the result in the input array and returns a handle to it.
out : numpy.ndarray (same shape and data type as `a`)
May be identical to `a`.
If None, a new array is allocated to store the output.
nthreads : int
Number of threads to use. If 0, use the system default (typically governed
by the `OMP_NUM_THREADS` environment variable).
Returns
-------
np.ndarray (same shape and data type as `a`)
numpy.ndarray (same shape and data type as `a`)
The transformed data
)""";
......@@ -508,17 +528,17 @@ PYBIND11_MODULE(pypocketfft, m)
using namespace pybind11::literals;
m.doc() = pypocketfft_DS;
m.def("c2c",&c2c, c2c_DS, "a"_a, "axes"_a=py::none(), "forward"_a=true,
"inorm"_a=0, "inplace"_a=false, "nthreads"_a=1);
m.def("r2c",&r2c, r2c_DS, "a"_a, "axes"_a=py::none(), "forward"_a=true,
"inorm"_a=0, "nthreads"_a=1);
m.def("c2r",&c2r, c2r_DS, "a"_a, "axes"_a=py::none(), "lastsize"_a=0,
"forward"_a=true, "inorm"_a=0, "nthreads"_a=1);
m.def("c2c",&c2c, c2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
"inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("r2c",&r2c, r2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
"inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("c2r",&c2r, c2r_DS, "a"_a, "axes"_a=None, "lastsize"_a=0,
"forward"_a=true, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("r2r_fftpack",&r2r_fftpack, r2r_fftpack_DS, "a"_a, "axes"_a,
"input_halfcomplex"_a, "forward"_a, "inorm"_a=0, "inplace"_a=false,
"input_halfcomplex"_a, "forward"_a, "inorm"_a=0, "out"_a=None,
"nthreads"_a=1);
m.def("separable_hartley",&separable_hartley, separable_hartley_DS, "a"_a,
"axes"_a=py::none(), "inorm"_a=0, "inplace"_a=false, "nthreads"_a=1);
"axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
m.def("genuine_hartley",&genuine_hartley, genuine_hartley_DS, "a"_a,
"axes"_a=py::none(), "inorm"_a=0, "inplace"_a=false, "nthreads"_a=1);
"axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
}
......@@ -4,13 +4,13 @@ import pypocketfft
def _l2error(a,b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
def fftn(a, axes=None, inorm=0, inplace=False, nthreads=1):
def fftn(a, axes=None, inorm=0, out=None, nthreads=1):
return pypocketfft.c2c(a, axes=axes, forward=True, inorm=inorm,
inplace=inplace, nthreads=nthreads)
out=out, nthreads=nthreads)
def ifftn(a, axes=None, inorm=0, inplace=False, nthreads=1):
def ifftn(a, axes=None, inorm=0, out=None, nthreads=1):
return pypocketfft.c2c(a, axes=axes, forward=False, inorm=inorm,
inplace=inplace, nthreads=nthreads)
out=out, nthreads=nthreads)
def rfftn(a, axes=None, inorm=0, nthreads=1):
return pypocketfft.r2c(a, axes=axes, forward=True, inorm=inorm,
......
......@@ -15,13 +15,13 @@ len1D=range(1,2048)
def _l2error(a,b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
def fftn(a, axes=None, inorm=0, inplace=False, nthreads=1):
def fftn(a, axes=None, inorm=0, out=None, nthreads=1):
return pypocketfft.c2c(a, axes=axes, forward=True, inorm=inorm,
inplace=inplace, nthreads=nthreads)
out=out, nthreads=nthreads)
def ifftn(a, axes=None, inorm=0, inplace=False, nthreads=1):
def ifftn(a, axes=None, inorm=0, out=None, nthreads=1):
return pypocketfft.c2c(a, axes=axes, forward=False, inorm=inorm,
inplace=inplace, nthreads=nthreads)
out=out, nthreads=nthreads)
def rfftn(a, axes=None, inorm=0, nthreads=1):
return pypocketfft.r2c(a, axes=axes, forward=True, inorm=inorm,
......@@ -31,13 +31,13 @@ def irfftn(a, axes=None, lastsize=0, inorm=0, nthreads=1):
return pypocketfft.c2r(a, axes=axes, lastsize=lastsize,forward=False,
inorm=inorm, nthreads=nthreads)
def rfft_scipy(a, axis, inorm=0, inplace=False, nthreads=1):
def rfft_scipy(a, axis, inorm=0, out=None, nthreads=1):
return pypocketfft.r2r_fftpack(a, axes=(axis,), input_halfcomplex=False,
forward=True, inorm=inorm, inplace=inplace,
forward=True, inorm=inorm, out=out,
nthreads=nthreads)
def irfft_scipy(a, axis, inorm=0, inplace=False, nthreads=1):
def irfft_scipy(a, axis, inorm=0, out=None, nthreads=1):
return pypocketfft.r2r_fftpack(a, axes=(axis,), input_halfcomplex=True,
forward=False, inorm=inorm, inplace=inplace,
forward=False, inorm=inorm, out=out,
nthreads=nthreads)
@pmp("len", len1D)
@pmp("inorm", [0,1,2])
......@@ -51,14 +51,14 @@ def test1D(len, inorm):
assert_(_l2error(a.real, fftn(ifftn(a.real,inorm=inorm), inorm=2-inorm))<1.5e-15)
assert_(_l2error(a.real, irfftn(rfftn(a.real,inorm=inorm), inorm=2-inorm,lastsize=len))<1.5e-15)
tmp=a.copy()
assert_ (ifftn(fftn(tmp, inplace=True, inorm=inorm), inplace=True, inorm=2-inorm) is tmp)
assert_ (ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm) is tmp)
assert_(_l2error(tmp, a)<1.5e-15)
assert_(_l2error(b, ifftn(fftn(b, inorm=inorm), inorm=2-inorm))<6e-7)
assert_(_l2error(b.real, ifftn(fftn(b.real,inorm=inorm), inorm=2-inorm))<6e-7)
assert_(_l2error(b.real, fftn(ifftn(b.real,inorm=inorm), inorm=2-inorm))<6e-7)
assert_(_l2error(b.real, irfftn(rfftn(b.real, inorm=inorm), lastsize=len, inorm=2-inorm))<6e-7)
tmp=b.copy()
assert_ (ifftn(fftn(tmp, inplace=True, inorm=inorm), inplace=True, inorm=2-inorm) is tmp)
assert_ (ifftn(fftn(tmp, out=tmp, inorm=inorm), out=tmp, inorm=2-inorm) is tmp)
assert_(_l2error(tmp, b)<6e-7)
@pmp("shp", shapes)
......@@ -106,7 +106,7 @@ def test_identity(shp):
assert_(_l2error(ifftn(fftn(a.real),inorm=2), a.real)<1.5e-15)
assert_(_l2error(fftn(ifftn(a.real),inorm=2), a.real)<1.5e-15)
tmp=a.copy()
assert_ (ifftn(fftn(tmp, inplace=True), inorm=2, inplace=True) is tmp)
assert_ (ifftn(fftn(tmp, out=tmp), inorm=2, out=tmp) is tmp)
assert_(_l2error(tmp, a)<1.5e-15)
a=a.astype(np.complex64)
assert_(_l2error(ifftn(fftn(a),inorm=2), a)<6e-7)
......@@ -146,7 +146,7 @@ def test_genuine_hartley_identity(shp):
v1 = pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(a))/a.size
assert_(_l2error(a, v1)<1e-15)
v1 = a.copy()
assert_(pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(v1, inplace=True), inorm=2, inplace=True) is v1)
assert_(pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(v1, out=v1), inorm=2, out=v1) is v1)
assert_(_l2error(a, v1)<1e-15)
@pmp("shp", shapes2D)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment