Commit ac891a30 authored by Peter Bell's avatar Peter Bell

Fix hartley transforms by disallowing inplace FFT output

parent 1971f449
......@@ -2902,7 +2902,8 @@ template <typename T> using add_vec_t = typename add_vec<T>::type;
template<typename Tplan, typename T, typename T0, typename Exec>
POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
const shape_t &axes, T0 fct, size_t POCKETFFT_NTHREADS, const Exec & exec)
const shape_t &axes, T0 fct, size_t POCKETFFT_NTHREADS, const Exec & exec,
const bool allow_inplace=true)
{
shared_ptr<Tplan> plan;
......@@ -2932,7 +2933,7 @@ POCKETFFT_NOINLINE void general_nd(const cndarr<T> &in, ndarr<T> &out,
while (it.remaining()>0)
{
it.advance(1);
auto buf = it.stride_out() == sizeof(T) ?
auto buf = allow_inplace && it.stride_out() == sizeof(T) ?
&out[it.oofs(0)] : reinterpret_cast<T *>(storage.data());
exec(it, in, out, buf, *plan, fct);
}
......@@ -2979,8 +2980,8 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
size_t i=1, i1=1, i2=it.length_out()-1;
for (i=1; i<it.length_out()-1; i+=2, ++i1, --i2)
{
dst[it.oofs(i1)] = src[i]+src[i+1];
dst[it.oofs(i2)] = src[i]-src[i+1];
dst[it.oofs(i1)] = src[i]+src[i+1];
dst[it.oofs(i2)] = src[i]-src[i+1];
}
if (i<it.length_out())
dst[it.oofs(i1)] = src[i];
......@@ -3294,7 +3295,8 @@ template<typename T> void r2r_separable_hartley(const shape_t &shape,
util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes);
cndarr<T> ain(data_in, shape, stride_in);
ndarr<T> aout(data_out, shape, stride_out);
general_nd<pocketfft_r<T>>(ain, aout, axes, fct, nthreads, ExecHartley{});
general_nd<pocketfft_r<T>>(ain, aout, axes, fct, nthreads, ExecHartley{},
false);
}
} // namespace detail
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment