pypocketfft.cc 20.1 KB
Newer Older
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
1 2 3 4 5 6
/*
 * This file is part of pocketfft.
 * Licensed under a 3-clause BSD style license - see LICENSE.md
 */

/*
7
 *  Python interface.
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
8
 *
9
 *  Copyright (C) 2019 Max-Planck-Society
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
10 11 12
 *  \author Martin Reinecke
 */

Martin Reinecke's avatar
Martin Reinecke committed
13 14
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
Martin Reinecke's avatar
more  
Martin Reinecke committed
15
#include <pybind11/stl.h>
Martin Reinecke's avatar
Martin Reinecke committed
16

17
#include "pocketfft_hdronly.h"
Martin Reinecke's avatar
Martin Reinecke committed
18

19
namespace {
20

21
using namespace std;
Martin Reinecke's avatar
Martin Reinecke committed
22 23
using pocketfft::shape_t;
using pocketfft::stride_t;
Martin Reinecke's avatar
Martin Reinecke committed
24

Martin Reinecke's avatar
Martin Reinecke committed
25 26
namespace py = pybind11;

27 28 29 30
// Only instantiate long double transforms if they offer more precision
using ldbl_t = typename std::conditional<
  sizeof(long double)==sizeof(double), double, long double>::type;

31 32 33 34 35 36
using c64 = std::complex<float>;
using c128 = std::complex<double>;
using clong = std::complex<ldbl_t>;
using f32 = float;
using f64 = double;
using flong = ldbl_t;
Martin Reinecke's avatar
Martin Reinecke committed
37
auto None = py::none();
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
38 39

shape_t copy_shape(const py::array &arr)
Martin Reinecke's avatar
Martin Reinecke committed
40
  {
41
  shape_t res(size_t(arr.ndim()));
Martin Reinecke's avatar
Martin Reinecke committed
42
  for (size_t i=0; i<res.size(); ++i)
43
    res[i] = size_t(arr.shape(int(i)));
Martin Reinecke's avatar
Martin Reinecke committed
44 45
  return res;
  }
Martin Reinecke's avatar
Martin Reinecke committed
46

Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
47
stride_t copy_strides(const py::array &arr)
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
48
  {
49
  stride_t res(size_t(arr.ndim()));
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
50
  for (size_t i=0; i<res.size(); ++i)
51
    res[i] = arr.strides(int(i));
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
52 53
  return res;
  }
Martin Reinecke's avatar
Martin Reinecke committed
54

Martin Reinecke's avatar
Martin Reinecke committed
55
shape_t makeaxes(const py::array &in, const py::object &axes)
Martin Reinecke's avatar
more  
Martin Reinecke committed
56
  {
57
  if (axes.is_none())
Martin Reinecke's avatar
more  
Martin Reinecke committed
58
    {
59
    shape_t res(size_t(in.ndim()));
Martin Reinecke's avatar
Martin Reinecke committed
60 61 62
    for (size_t i=0; i<res.size(); ++i)
      res[i]=i;
    return res;
Martin Reinecke's avatar
more  
Martin Reinecke committed
63
    }
64 65 66
  auto tmp=axes.cast<std::vector<ptrdiff_t>>();
  auto ndim = in.ndim();
  if ((tmp.size()>size_t(ndim)) || (tmp.size()==0))
Martin Reinecke's avatar
more  
Martin Reinecke committed
67
    throw runtime_error("bad axes argument");
68 69 70 71 72 73 74 75
  for (auto& sz: tmp)
    {
    if (sz<0)
      sz += ndim;
    if ((sz>=ndim) || (sz<0))
      throw invalid_argument("axes exceeds dimensionality of output");
    }
  return shape_t(tmp.begin(), tmp.end());
Martin Reinecke's avatar
more  
Martin Reinecke committed
76 77
  }

78
#define DISPATCH(arr, T1, T2, T3, func, args) \
79
  { \
80 81 82
  if (py::isinstance<py::array_t<T1>>(arr)) return func<double> args; \
  if (py::isinstance<py::array_t<T2>>(arr)) return func<float> args;  \
  if (py::isinstance<py::array_t<T3>>(arr)) return func<ldbl_t> args; \
83 84
  throw runtime_error("unsupported data type"); \
  }
85

86 87 88 89 90 91 92 93 94
template<typename T> T norm_fct(int inorm, size_t N)
  {
  if (inorm==0) return T(1);
  if (inorm==2) return T(1/ldbl_t(N));
  if (inorm==1) return T(1/sqrt(ldbl_t(N)));
  throw invalid_argument("invalid value for inorm (must be 0, 1, or 2)");
  }

template<typename T> T norm_fct(int inorm, const shape_t &shape,
Martin Reinecke's avatar
Martin Reinecke committed
95
  const shape_t &axes, size_t fct=1, int delta=0)
96 97 98 99
  {
  if (inorm==0) return T(1);
  size_t N(1);
  for (auto a: axes)
Martin Reinecke's avatar
Martin Reinecke committed
100
    N *= size_t(int64_t(shape[a]*fct)+delta);
101 102 103
  return norm_fct<T>(inorm, N);
  }

Martin Reinecke's avatar
Martin Reinecke committed
104 105 106
template<typename T> py::array_t<T> prepare_output(py::object &out_,
  shape_t &dims)
  {
107
  if (out_.is_none()) return py::array_t<T>(dims);
Martin Reinecke's avatar
Martin Reinecke committed
108 109 110 111 112 113
  auto tmp = out_.cast<py::array_t<T>>();
  if (!tmp.is(out_)) // a new object was created during casting
    throw runtime_error("unexpected data type for output array");
  return tmp;
  }

Martin Reinecke's avatar
Martin Reinecke committed
114
template<typename T> py::array c2c_internal(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
115 116
  const py::object &axes_, bool forward, int inorm, py::object &out_,
  size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
117
  {
Martin Reinecke's avatar
Martin Reinecke committed
118
  auto axes = makeaxes(in, axes_);
Martin Reinecke's avatar
Martin Reinecke committed
119
  auto dims(copy_shape(in));
Martin Reinecke's avatar
Martin Reinecke committed
120
  auto res = prepare_output<complex<T>>(out_, dims);
Martin Reinecke's avatar
Martin Reinecke committed
121 122 123 124 125 126
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const complex<T> *>(in.data());
  auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data());
  {
  py::gil_scoped_release release;
127
  T fct = norm_fct<T>(inorm, dims, axes);
Martin Reinecke's avatar
Martin Reinecke committed
128
  pocketfft::c2c(dims, s_in, s_out, axes, forward, d_in, d_out, fct, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
129
  }
Martin Reinecke's avatar
Martin Reinecke committed
130 131
  return res;
  }
Martin Reinecke's avatar
Martin Reinecke committed
132

Martin Reinecke's avatar
Martin Reinecke committed
133
template<typename T> py::array c2c_sym_internal(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
134 135
  const py::object &axes_, bool forward, int inorm, py::object &out_,
  size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
136
  {
Martin Reinecke's avatar
Martin Reinecke committed
137
  auto axes = makeaxes(in, axes_);
Martin Reinecke's avatar
Martin Reinecke committed
138
  auto dims(copy_shape(in));
Martin Reinecke's avatar
Martin Reinecke committed
139
  auto res = prepare_output<complex<T>>(out_, dims);
Martin Reinecke's avatar
Martin Reinecke committed
140 141 142 143 144 145 146
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data());
  {
  py::gil_scoped_release release;
  T fct = norm_fct<T>(inorm, dims, axes);
Martin Reinecke's avatar
Martin Reinecke committed
147
  pocketfft::r2c(dims, s_in, s_out, axes, forward, d_in, d_out, fct, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
148 149 150
  // now fill in second half
  using namespace pocketfft::detail;
  ndarr<complex<T>> ares(res.mutable_data(), dims, s_out);
151
  rev_iter iter(ares, axes);
Martin Reinecke's avatar
Martin Reinecke committed
152 153
  while(iter.remaining()>0)
    {
154 155
    auto v = ares[iter.ofs()];
    ares[iter.rev_ofs()] = conj(v);
Martin Reinecke's avatar
Martin Reinecke committed
156 157 158 159 160 161
    iter.advance();
    }
  }
  return res;
  }

Martin Reinecke's avatar
Martin Reinecke committed
162 163
py::array c2c(const py::array &a, const py::object &axes_, bool forward,
  int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
164
  {
165
  if (a.dtype().kind() == 'c')
Martin Reinecke's avatar
Martin Reinecke committed
166 167
    DISPATCH(a, c128, c64, clong, c2c_internal, (a, axes_, forward,
             inorm, out_, nthreads))
168

Martin Reinecke's avatar
Martin Reinecke committed
169 170
  DISPATCH(a, f64, f32, flong, c2c_sym_internal, (a, axes_, forward,
           inorm, out_, nthreads))
Martin Reinecke's avatar
Martin Reinecke committed
171
  }
172

Martin Reinecke's avatar
Martin Reinecke committed
173
template<typename T> py::array r2c_internal(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
174 175
  const py::object &axes_, bool forward, int inorm, py::object &out_,
  size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
176
  {
Martin Reinecke's avatar
Martin Reinecke committed
177
  auto axes = makeaxes(in, axes_);
Martin Reinecke's avatar
Martin Reinecke committed
178
  auto dims_in(copy_shape(in)), dims_out(dims_in);
Martin Reinecke's avatar
more  
Martin Reinecke committed
179
  dims_out[axes.back()] = (dims_out[axes.back()]>>1)+1;
Martin Reinecke's avatar
Martin Reinecke committed
180
  py::array res = prepare_output<complex<T>>(out_, dims_out);
Martin Reinecke's avatar
Martin Reinecke committed
181 182 183 184 185 186
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<complex<T> *>(res.mutable_data());
  {
  py::gil_scoped_release release;
187
  T fct = norm_fct<T>(inorm, dims_in, axes);
188 189
  pocketfft::r2c(dims_in, s_in, s_out, axes, forward, d_in, d_out, fct,
    nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
190
  }
Martin Reinecke's avatar
more  
Martin Reinecke committed
191 192
  return res;
  }
193

Martin Reinecke's avatar
Martin Reinecke committed
194 195
py::array r2c(const py::array &in, const py::object &axes_, bool forward,
  int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
196
  {
Martin Reinecke's avatar
Martin Reinecke committed
197
  DISPATCH(in, f64, f32, flong, r2c_internal, (in, axes_, forward, inorm, out_,
Martin Reinecke's avatar
Martin Reinecke committed
198
    nthreads))
Martin Reinecke's avatar
Martin Reinecke committed
199
  }
200

Martin Reinecke's avatar
Martin Reinecke committed
201
template<typename T> py::array r2r_fftpack_internal(const py::array &in,
202
  const py::object &axes_, bool real2hermitian, bool forward, int inorm,
Martin Reinecke's avatar
Martin Reinecke committed
203
  py::object &out_, size_t nthreads)
204
  {
Martin Reinecke's avatar
Martin Reinecke committed
205
  auto axes = makeaxes(in, axes_);
206
  auto dims(copy_shape(in));
Martin Reinecke's avatar
Martin Reinecke committed
207
  py::array res = prepare_output<T>(out_, dims);
Martin Reinecke's avatar
Martin Reinecke committed
208 209 210 211 212 213
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<T *>(res.mutable_data());
  {
  py::gil_scoped_release release;
Martin Reinecke's avatar
Martin Reinecke committed
214
  T fct = norm_fct<T>(inorm, dims, axes);
215
  pocketfft::r2r_fftpack(dims, s_in, s_out, axes, real2hermitian, forward,
216
    d_in, d_out, fct, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
217
  }
218 219
  return res;
  }
220

Martin Reinecke's avatar
Martin Reinecke committed
221
py::array r2r_fftpack(const py::array &in, const py::object &axes_,
222
  bool real2hermitian, bool forward, int inorm, py::object &out_,
Martin Reinecke's avatar
Martin Reinecke committed
223
  size_t nthreads)
224
  {
225 226
  DISPATCH(in, f64, f32, flong, r2r_fftpack_internal, (in, axes_,
    real2hermitian, forward, inorm, out_, nthreads))
227
  }
228

Martin Reinecke's avatar
Martin Reinecke committed
229 230
template<typename T> py::array r2r_dct_internal(const py::array &in,
  const py::object &axes_, int type, int inorm, py::object &out_,
231 232 233 234 235 236 237 238 239 240 241
  size_t nthreads)
  {
  auto axes = makeaxes(in, axes_);
  auto dims(copy_shape(in));
  py::array res = prepare_output<T>(out_, dims);
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<T *>(res.mutable_data());
  {
  py::gil_scoped_release release;
Martin Reinecke's avatar
Martin Reinecke committed
242 243 244 245 246 247
  if ((type==2)||(type==3))
    {
    T fct = norm_fct<T>(inorm, dims, axes, 2);
    pocketfft::r2r_dct(dims, s_in, s_out, axes, type, d_in, d_out, fct,
      nthreads);
    }
248 249 250 251
  }
  return res;
  }

Martin Reinecke's avatar
Martin Reinecke committed
252 253
py::array r2r_dct(const py::array &in, int type, const py::object &axes_,
  int inorm, py::object &out_, size_t nthreads)
254
  {
Martin Reinecke's avatar
Martin Reinecke committed
255 256 257 258
  if ((type<1) || (type>4)) throw runtime_error("invalid DCT type");
  if ((type==1) || (type==4)) throw runtime_error("DCT type not yet supported ");
  DISPATCH(in, f64, f32, flong, r2r_dct_internal, (in, axes_, type, inorm,
    out_, nthreads))
259 260
  }

Martin Reinecke's avatar
Martin Reinecke committed
261 262
template<typename T> py::array r2r_dst_internal(const py::array &in,
  const py::object &axes_, int type, int inorm, py::object &out_,
263 264 265 266 267 268 269 270 271 272 273
  size_t nthreads)
  {
  auto axes = makeaxes(in, axes_);
  auto dims(copy_shape(in));
  py::array res = prepare_output<T>(out_, dims);
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<T *>(res.mutable_data());
  {
  py::gil_scoped_release release;
Martin Reinecke's avatar
Martin Reinecke committed
274 275 276 277 278 279
  if ((type==2)||(type==3))
    {
    T fct = norm_fct<T>(inorm, dims, axes, 2);
    pocketfft::r2r_dst(dims, s_in, s_out, axes, type, d_in, d_out, fct,
      nthreads);
    }
280 281 282 283
  }
  return res;
  }

Martin Reinecke's avatar
Martin Reinecke committed
284 285
py::array r2r_dst(const py::array &in, int type, const py::object &axes_,
  int inorm, py::object &out_, size_t nthreads)
286
  {
Martin Reinecke's avatar
Martin Reinecke committed
287 288 289 290
  if ((type<1) || (type>4)) throw runtime_error("invalid DST type");
  if ((type==1) || (type==4)) throw runtime_error("DST type not yet supported ");
  DISPATCH(in, f64, f32, flong, r2r_dst_internal, (in, axes_, type, inorm,
    out_, nthreads))
291 292
  }

Martin Reinecke's avatar
Martin Reinecke committed
293
template<typename T> py::array c2r_internal(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
294 295
  const py::object &axes_, size_t lastsize, bool forward, int inorm,
  py::object &out_, size_t nthreads)
Martin Reinecke's avatar
more  
Martin Reinecke committed
296
  {
Martin Reinecke's avatar
Martin Reinecke committed
297
  auto axes = makeaxes(in, axes_);
Martin Reinecke's avatar
more  
Martin Reinecke committed
298
  size_t axis = axes.back();
Martin Reinecke's avatar
Martin Reinecke committed
299 300 301
  shape_t dims_in(copy_shape(in)), dims_out=dims_in;
  if (lastsize==0) lastsize=2*dims_in[axis]-1;
  if ((lastsize/2) + 1 != dims_in[axis])
Martin Reinecke's avatar
more  
Martin Reinecke committed
302 303
    throw runtime_error("bad lastsize");
  dims_out[axis] = lastsize;
Martin Reinecke's avatar
Martin Reinecke committed
304
  py::array res = prepare_output<T>(out_, dims_out);
Martin Reinecke's avatar
Martin Reinecke committed
305 306 307 308 309 310
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const complex<T> *>(in.data());
  auto d_out=reinterpret_cast<T *>(res.mutable_data());
  {
  py::gil_scoped_release release;
311
  T fct = norm_fct<T>(inorm, dims_out, axes);
312 313
  pocketfft::c2r(dims_out, s_in, s_out, axes, forward, d_in, d_out, fct,
    nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
314
  }
Martin Reinecke's avatar
more  
Martin Reinecke committed
315 316
  return res;
  }
317

Martin Reinecke's avatar
Martin Reinecke committed
318 319
py::array c2r(const py::array &in, const py::object &axes_, size_t lastsize,
  bool forward, int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
320
  {
Martin Reinecke's avatar
Martin Reinecke committed
321
  DISPATCH(in, c128, c64, clong, c2r_internal, (in, axes_, lastsize, forward,
Martin Reinecke's avatar
Martin Reinecke committed
322
    inorm, out_, nthreads))
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
323
  }
Martin Reinecke's avatar
Martin Reinecke committed
324

325
template<typename T> py::array separable_hartley_internal(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
326
  const py::object &axes_, int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
more  
Martin Reinecke committed
327
  {
Martin Reinecke's avatar
Martin Reinecke committed
328
  auto dims(copy_shape(in));
Martin Reinecke's avatar
Martin Reinecke committed
329
  py::array res = prepare_output<T>(out_, dims);
Martin Reinecke's avatar
Martin Reinecke committed
330 331 332 333 334 335 336
  auto axes = makeaxes(in, axes_);
  auto s_in=copy_strides(in);
  auto s_out=copy_strides(res);
  auto d_in=reinterpret_cast<const T *>(in.data());
  auto d_out=reinterpret_cast<T *>(res.mutable_data());
  {
  py::gil_scoped_release release;
337
  T fct = norm_fct<T>(inorm, dims, axes);
338 339
  pocketfft::r2r_separable_hartley(dims, s_in, s_out, axes, d_in, d_out, fct,
    nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
340
  }
Martin Reinecke's avatar
Martin Reinecke committed
341 342
  return res;
  }
343

Martin Reinecke's avatar
Martin Reinecke committed
344 345
py::array separable_hartley(const py::array &in, const py::object &axes_,
  int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
346
  {
347
  DISPATCH(in, f64, f32, flong, separable_hartley_internal, (in, axes_, inorm,
Martin Reinecke's avatar
Martin Reinecke committed
348
    out_, nthreads))
Martin Reinecke's avatar
Martin Reinecke committed
349
  }
350

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
351
template<typename T>py::array complex2hartley(const py::array &in,
Martin Reinecke's avatar
Martin Reinecke committed
352
  const py::array &tmp, const py::object &axes_, py::object &out_)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
353
  {
Martin Reinecke's avatar
Martin Reinecke committed
354
  using namespace pocketfft::detail;
Martin Reinecke's avatar
Martin Reinecke committed
355
  auto dims_out(copy_shape(in));
Martin Reinecke's avatar
Martin Reinecke committed
356
  py::array out = prepare_output<T>(out_, dims_out);
357
  cndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
Martin Reinecke's avatar
Martin Reinecke committed
358
  ndarr<T> aout(out.mutable_data(), copy_shape(out), copy_strides(out));
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
359
  auto axes = makeaxes(in, axes_);
Martin Reinecke's avatar
Martin Reinecke committed
360 361
  {
  py::gil_scoped_release release;
362 363
  simple_iter iin(atmp);
  rev_iter iout(aout, axes);
364 365
  if (iin.remaining()!=iout.remaining()) throw runtime_error("oops");
  while(iin.remaining()>0)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
366
    {
367 368 369
    auto v = atmp[iin.ofs()];
    aout[iout.ofs()] = v.r+v.i;
    aout[iout.rev_ofs()] = v.r-v.i;
370
    iin.advance(); iout.advance();
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
371
    }
Martin Reinecke's avatar
Martin Reinecke committed
372
  }
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
373 374
  return out;
  }
375

Martin Reinecke's avatar
Martin Reinecke committed
376 377
py::array genuine_hartley(const py::array &in, const py::object &axes_,
  int inorm, py::object &out_, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
378
  {
Martin Reinecke's avatar
Martin Reinecke committed
379 380
  auto tmp = r2c(in, axes_, true, inorm, None, nthreads);
  DISPATCH(in, f64, f32, flong, complex2hartley, (in, tmp, axes_, out_))
Martin Reinecke's avatar
Martin Reinecke committed
381
  }
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
382

Martin Reinecke's avatar
Martin Reinecke committed
383
const char *pypocketfft_DS = R"""(Fast Fourier and Hartley transforms.
Martin Reinecke's avatar
Martin Reinecke committed
384 385

This module supports
386
- single, double, and long double precision
Martin Reinecke's avatar
Martin Reinecke committed
387 388 389 390 391 392
- complex and real-valued transforms
- multi-dimensional transforms

For two- and higher-dimensional transforms the code will use SSE2 and AVX
vector instructions for faster execution if these are supported by the CPU and
were enabled during compilation.
Martin Reinecke's avatar
Martin Reinecke committed
393
)""";
Martin Reinecke's avatar
Martin Reinecke committed
394

Martin Reinecke's avatar
Martin Reinecke committed
395
const char *c2c_DS = R"""(Performs a complex FFT.
396 397 398

Parameters
----------
399 400 401
a : numpy.ndarray (any complex or real type)
    The input data. If its type is real, a more efficient real-to-complex
    transform will be used.
402
axes : list of integers
403
    The axes along which the FFT is carried out.
404
    If not set, all axes will be transformed.
Martin Reinecke's avatar
Martin Reinecke committed
405 406
forward : bool
    If `True`, a negative sign is used in the exponent, else a positive one.
407 408 409 410 411 412
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the product of the lengths of the transformed axes.
Martin Reinecke's avatar
Martin Reinecke committed
413
out : numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
414
    May be identical to `a`, but if it isn't, it must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
415
    If None, a new array is allocated to store the output.
Martin Reinecke's avatar
Martin Reinecke committed
416 417 418
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).
419 420 421

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
422
numpy.ndarray (same shape as `a`, complex type with same accuracy as `a`)
423
    The transformed data.
Martin Reinecke's avatar
Martin Reinecke committed
424
)""";
425

Martin Reinecke's avatar
Martin Reinecke committed
426
const char *r2c_DS = R"""(Performs an FFT whose input is strictly real.
Martin Reinecke's avatar
Martin Reinecke committed
427 428 429

Parameters
----------
430
a : numpy.ndarray (any real type)
Martin Reinecke's avatar
Martin Reinecke committed
431 432
    The input data
axes : list of integers
433
    The axes along which the FFT is carried out.
Martin Reinecke's avatar
Martin Reinecke committed
434
    If not set, all axes will be transformed in ascending order.
Martin Reinecke's avatar
Martin Reinecke committed
435 436
forward : bool
    If `True`, a negative sign is used in the exponent, else a positive one.
437 438 439 440 441 442
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the product of the lengths of the transformed input axes.
Martin Reinecke's avatar
Martin Reinecke committed
443 444
out : numpy.ndarray (complex type with same accuracy as `a`)
    For the required shape, see the `Returns` section.
Martin Reinecke's avatar
Martin Reinecke committed
445
    Must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
446
    If None, a new array is allocated to store the output.
Martin Reinecke's avatar
Martin Reinecke committed
447 448 449
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).
Martin Reinecke's avatar
Martin Reinecke committed
450 451 452

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
453
numpy.ndarray (complex type with same accuracy as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
454
    The transformed data. The shape is identical to that of the input array,
Martin Reinecke's avatar
Martin Reinecke committed
455 456
    except for the axis that was transformed last. If the length of that axis
    was n on input, it is n//2+1 on output.
Martin Reinecke's avatar
Martin Reinecke committed
457
)""";
Martin Reinecke's avatar
Martin Reinecke committed
458

Martin Reinecke's avatar
Martin Reinecke committed
459
const char *c2r_DS = R"""(Performs an FFT whose output is strictly real.
Martin Reinecke's avatar
Martin Reinecke committed
460 461 462

Parameters
----------
463
a : numpy.ndarray (any complex type)
Martin Reinecke's avatar
Martin Reinecke committed
464 465
    The input data
axes : list of integers
466
    The axes along which the FFT is carried out.
Martin Reinecke's avatar
Martin Reinecke committed
467 468 469
    If not set, all axes will be transformed in ascending order.
lastsize : the output size of the last axis to be transformed.
    If the corresponding input axis has size n, this can be 2*n-2 or 2*n-1.
Martin Reinecke's avatar
Martin Reinecke committed
470 471
forward : bool
    If `True`, a negative sign is used in the exponent, else a positive one.
472 473 474 475 476 477
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the product of the lengths of the transformed output axes.
Martin Reinecke's avatar
Martin Reinecke committed
478 479
out : numpy.ndarray (real type with same accuracy as `a`)
    For the required shape, see the `Returns` section.
Martin Reinecke's avatar
Martin Reinecke committed
480
    Must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
481
    If None, a new array is allocated to store the output.
Martin Reinecke's avatar
Martin Reinecke committed
482 483 484
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).
Martin Reinecke's avatar
Martin Reinecke committed
485 486 487

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
488
numpy.ndarray (real type with same accuracy as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
489 490
    The transformed data. The shape is identical to that of the input array,
    except for the axis that was transformed last, which has now `lastsize`
Martin Reinecke's avatar
Martin Reinecke committed
491
    entries.
Martin Reinecke's avatar
Martin Reinecke committed
492
)""";
Martin Reinecke's avatar
Martin Reinecke committed
493

Martin Reinecke's avatar
Martin Reinecke committed
494
const char *r2r_fftpack_DS = R"""(Performs a real-valued FFT using the FFTPACK storage scheme.
495 496 497

Parameters
----------
498
a : numpy.ndarray (any real type)
Martin Reinecke's avatar
Martin Reinecke committed
499 500 501 502
    The input data
axes : list of integers
    The axes along which the FFT is carried out.
    If not set, all axes will be transformed.
503 504 505 506
real2hermitian : bool
    if True, the input is purely real and the output will have Hermitian
    symmetry and be stored in FFTPACK's halfcomplex ordering, otherwise the
    opposite.
Martin Reinecke's avatar
Martin Reinecke committed
507 508
forward : bool
    If `True`, a negative sign is used in the exponent, else a positive one.
509 510 511 512 513 514
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the length of `axis`.
Martin Reinecke's avatar
Martin Reinecke committed
515
out : numpy.ndarray (same shape and data type as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
516
    May be identical to `a`, but if it isn't, it must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
517
    If None, a new array is allocated to store the output.
Martin Reinecke's avatar
Martin Reinecke committed
518 519 520
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).
521 522 523

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
524
numpy.ndarray (same shape and data type as `a`)
525
    The transformed data. The shape is identical to that of the input array.
Martin Reinecke's avatar
Martin Reinecke committed
526
)""";
527

528 529 530 531 532 533 534
const char *separable_hartley_DS = R"""(Performs a separable Hartley transform.
For every requested axis, a 1D forward Fourier transform is carried out, and
the real and imaginary parts of the result are added before the next axis is
processed.

Parameters
----------
Martin Reinecke's avatar
Martin Reinecke committed
535
a : numpy.ndarray (any real type)
536 537 538 539 540 541 542 543 544 545
    The input data
axes : list of integers
    The axes along which the transform is carried out.
    If not set, all axes will be transformed.
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the product of the lengths of the transformed axes.
Martin Reinecke's avatar
Martin Reinecke committed
546
out : numpy.ndarray (same shape and data type as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
547
    May be identical to `a`, but if it isn't, it must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
548
    If None, a new array is allocated to store the output.
549 550 551 552 553 554
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
555
numpy.ndarray (same shape and data type as `a`)
556 557 558 559 560 561 562 563
    The transformed data
)""";

const char *genuine_hartley_DS = R"""(Performs a full Hartley transform.
A full Fourier transform is carried out over the requested axes, and the
sum of real and imaginary parts of the result is stored in the output
array. For a single transformed axis, this is identical to `separable_hartley`,
but when transforming multiple axes, the results are different.
564 565 566

Parameters
----------
Martin Reinecke's avatar
Martin Reinecke committed
567
a : numpy.ndarray (any real type)
568 569
    The input data
axes : list of integers
570
    The axes along which the transform is carried out.
571
    If not set, all axes will be transformed.
572 573 574 575 576 577
inorm : int
    Normalization type
      0 : no normalization
      1 : divide by sqrt(N)
      2 : divide by N
    where N is the product of the lengths of the transformed axes.
Martin Reinecke's avatar
Martin Reinecke committed
578
out : numpy.ndarray (same shape and data type as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
579
    May be identical to `a`, but if it isn't, it must not overlap with `a`.
Martin Reinecke's avatar
Martin Reinecke committed
580
    If None, a new array is allocated to store the output.
Martin Reinecke's avatar
Martin Reinecke committed
581 582 583
nthreads : int
    Number of threads to use. If 0, use the system default (typically governed
    by the `OMP_NUM_THREADS` environment variable).
584 585 586

Returns
-------
Martin Reinecke's avatar
Martin Reinecke committed
587
numpy.ndarray (same shape and data type as `a`)
Martin Reinecke's avatar
Martin Reinecke committed
588
    The transformed data
Martin Reinecke's avatar
Martin Reinecke committed
589
)""";
590

Martin Reinecke's avatar
Martin Reinecke committed
591 592 593 594
} // unnamed namespace

PYBIND11_MODULE(pypocketfft, m)
  {
Martin Reinecke's avatar
more  
Martin Reinecke committed
595 596
  using namespace pybind11::literals;

Martin Reinecke's avatar
Martin Reinecke committed
597
  m.doc() = pypocketfft_DS;
598
  m.def("c2c", c2c, c2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
Martin Reinecke's avatar
Martin Reinecke committed
599
    "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
600
  m.def("r2c", r2c, r2c_DS, "a"_a, "axes"_a=None, "forward"_a=true,
Martin Reinecke's avatar
Martin Reinecke committed
601
    "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
602
  m.def("c2r", c2r, c2r_DS, "a"_a, "axes"_a=None, "lastsize"_a=0,
Martin Reinecke's avatar
Martin Reinecke committed
603
    "forward"_a=true, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
604
  m.def("r2r_fftpack", r2r_fftpack, r2r_fftpack_DS, "a"_a, "axes"_a,
605
    "real2hermitian"_a, "forward"_a, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
606
  m.def("separable_hartley", separable_hartley, separable_hartley_DS, "a"_a,
Martin Reinecke's avatar
Martin Reinecke committed
607
    "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
608
  m.def("genuine_hartley", genuine_hartley, genuine_hartley_DS, "a"_a,
Martin Reinecke's avatar
Martin Reinecke committed
609
    "axes"_a=None, "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
Martin Reinecke's avatar
Martin Reinecke committed
610 611 612 613
  m.def("r2r_dct", r2r_dct, /*r2r_dct_DS,*/ "a"_a, "type"_a, "axes"_a=None,
    "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
  m.def("r2r_dst", r2r_dst, /*r2r_dst_DS,*/ "a"_a, "type"_a, "axes"_a=None,
    "inorm"_a=0, "out"_a=None, "nthreads"_a=1);
Martin Reinecke's avatar
Martin Reinecke committed
614
  }