Commit 68a9c113 authored by Peter Bell's avatar Peter Bell Committed by Martin Reinecke

Simplify general_ functions handling of inplace transforms

parent 50476533
......@@ -2902,25 +2902,16 @@ template<typename T> POCKETFFT_NOINLINE void general_c(
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<cmplx<T> *>(storage.data());
if ((&tin[0]==&out[0]) && (it.stride_out()==sizeof(cmplx<T>))) // fully in-place
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
else if (it.stride_out()==sizeof(cmplx<T>)) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tin[it.iofs(i)];
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
}
else
{
auto buf = it.stride_out() == sizeof(cmplx<T>) ?
&out[it.oofs(0)] : reinterpret_cast<cmplx<T> *>(storage.data());
if (buf != &tin[it.iofs(0)])
for (size_t i=0; i<len; ++i)
tdata[i] = tin[it.iofs(i)];
forward ? plan->forward (tdata, fct) : plan->backward(tdata, fct);
buf[i] = tin[it.iofs(i)];
forward ? plan->forward (buf, fct) : plan->backward(buf, fct);
if (buf != &out[it.oofs(0)])
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tdata[i];
}
out[it.oofs(i)] = buf[i];
}
} // end of parallel region
fct = T(1); // factor has been applied, use 1 for remaining axes
......@@ -3034,23 +3025,16 @@ template<typename Trafo, typename T> POCKETFFT_NOINLINE void general_dcst(
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
if ((&tin[0]==&out[0]) && (it.stride_out()==sizeof(T))) // fully in-place
plan->exec(&out[it.oofs(0)], fct, ortho, type, cosine);
else if (it.stride_out()==sizeof(T)) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tin[it.iofs(i)];
plan->exec(&out[it.oofs(0)], fct, ortho, type, cosine);
}
else
{
auto buf = it.stride_out() == sizeof(T) ? &out[it.oofs(0)]
: reinterpret_cast<T *>(storage.data());
if (buf != &tin[it.iofs(0)])
for (size_t i=0; i<len; ++i)
tdata[i] = tin[it.iofs(i)];
plan->exec(tdata, fct, ortho, type, cosine);
buf[i] = tin[it.iofs(i)];
plan->exec(buf, fct, ortho, type, cosine);
if (buf != &out[it.oofs(0)])
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tdata[i];
}
out[it.oofs(i)] = buf[i];
}
} // end of parallel region
fct = T(1); // factor has been applied, use 1 for remaining axes
......@@ -3242,45 +3226,22 @@ template<typename T> POCKETFFT_NOINLINE void general_r(
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
if ((&tin[0]==&out[0]) && (it.stride_out()==sizeof(T))) // fully in-place
{
if ((!r2c) && forward)
for (size_t i=2; i<len; i+=2)
out[it.oofs(i)] = -out[it.oofs(i)];
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
if (r2c && (!forward))
for (size_t i=2; i<len; i+=2)
out[it.oofs(i)] = -out[it.oofs(i)];
}
else if (it.stride_out()==sizeof(T)) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tin[it.iofs(i)];
if ((!r2c) && forward)
for (size_t i=2; i<len; i+=2)
out[it.oofs(i)] = -out[it.oofs(i)];
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
if (r2c && (!forward))
for (size_t i=2; i<len; i+=2)
out[it.oofs(i)] = -out[it.oofs(i)];
}
else
{
auto buf = it.stride_out() == sizeof(T) ?
&out[it.oofs(0)] : reinterpret_cast<T *>(storage.data());
if (buf != &tin[it.iofs(0)])
for (size_t i=0; i<len; ++i)
tdata[i] = tin[it.iofs(i)];
if ((!r2c) && forward)
for (size_t i=2; i<len; i+=2)
tdata[i] = -tdata[i];
forward ? plan->forward (tdata, fct) : plan->backward(tdata, fct);
if (r2c && (!forward))
for (size_t i=2; i<len; i+=2)
tdata[i] = -tdata[i];
buf[i] = tin[it.iofs(i)];
if ((!r2c) && forward)
for (size_t i=2; i<len; i+=2)
buf[i] = -buf[i];
forward ? plan->forward(buf, fct) : plan->backward(buf, fct);
if (r2c && (!forward))
for (size_t i=2; i<len; i+=2)
buf[i] = -buf[i];
if (buf != &out[it.oofs(0)])
for (size_t i=0; i<len; ++i)
out[it.oofs(i)] = tdata[i];
}
out[it.oofs(i)] = buf[i];
}
} // end of parallel region
fct = T(1); // factor has been applied, use 1 for remaining axes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment