Commit ac891a30 authored by Peter Bell's avatar Peter Bell

Fix hartley transforms by disallowing inplace FFT output

parent 1971f449
...@@ -2902,7 +2902,8 @@ template <typename T> using add_vec_t = typename add_vec<T>::type; ...@@ -2902,7 +2902,8 @@ template <typename T> using add_vec_t = typename add_vec<T>::type;
template<typename Tplan, typename T, typename T0, typename Exec> template<typename Tplan, typename T, typename T0, typename Exec>
POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out, POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
const shape_t &axes, T0 fct, size_t POCKETFFT_NTHREADS, const Exec & exec) const shape_t &axes, T0 fct, size_t POCKETFFT_NTHREADS, const Exec & exec,
const bool allow_inplace=true)
{ {
shared_ptr<Tplan> plan; shared_ptr<Tplan> plan;
...@@ -2932,7 +2933,7 @@ POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out, ...@@ -2932,7 +2933,7 @@ POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
while (it.remaining()>0) while (it.remaining()>0)
{ {
it.advance(1); it.advance(1);
auto buf = it.stride_out() == sizeof(T) ? auto buf = allow_inplace && it.stride_out() == sizeof(T) ?
&out[it.oofs(0)] : reinterpret_cast<T *>(storage.data()); &out[it.oofs(0)] : reinterpret_cast<T *>(storage.data());
exec(it, in, out, buf, *plan, fct); exec(it, in, out, buf, *plan, fct);
} }
...@@ -2979,8 +2980,8 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it, ...@@ -2979,8 +2980,8 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
size_t i=1, i1=1, i2=it.length_out()-1; size_t i=1, i1=1, i2=it.length_out()-1;
for (i=1; i<it.length_out()-1; i+=2, ++i1, --i2) for (i=1; i<it.length_out()-1; i+=2, ++i1, --i2)
{ {
dst[it.oofs(i1)] = src[i]+src[i+1]; dst[it.oofs(i1)] = src[i]+src[i+1];
dst[it.oofs(i2)] = src[i]-src[i+1]; dst[it.oofs(i2)] = src[i]-src[i+1];
} }
if (i<it.length_out()) if (i<it.length_out())
dst[it.oofs(i1)] = src[i]; dst[it.oofs(i1)] = src[i];
...@@ -3294,7 +3295,8 @@ template<typename T> void r2r_separable_hartley(const shape_t &shape, ...@@ -3294,7 +3295,8 @@ template<typename T> void r2r_separable_hartley(const shape_t &shape,
util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes); util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes);
cndarr<T> ain(data_in, shape, stride_in); cndarr<T> ain(data_in, shape, stride_in);
ndarr<T> aout(data_out, shape, stride_out); ndarr<T> aout(data_out, shape, stride_out);
general_nd<pocketfft_r<T>>(ain, aout, axes, fct, nthreads, ExecHartley{}); general_nd<pocketfft_r<T>>(ain, aout, axes, fct, nthreads, ExecHartley{},
false);
} }
} // namespace detail } // namespace detail
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment