Commit 4e511317 authored by Martin Reinecke's avatar Martin Reinecke

try to write rearrangement code in a clearer fashion

parent 5a83309a
......@@ -2752,8 +2752,6 @@ template<typename T0> class T_dcst23
template<typename T0> class T_dcst4
{
// even length algorithm from
// https://www.appletonaudio.com/blog/2013/derivation-of-fast-dct-4-algorithm-based-on-dft/
private:
size_t N;
unique_ptr<pocketfft_c<T0>> fft;
......@@ -2789,9 +2787,9 @@ template<typename T0> class T_dcst4
{
// The following code is derived from the FFTW3 function apply_re11()
// and is released under the 3-clause BSD license with friendly
// permission of Matteo Frigo.
// permission of Matteo Frigo and Steven G. Johnson.
auto SGN_SET = [](T x, size_t i) {return (i%2) ? -x : x;};
auto SGN = [](size_t i) {return (i&2) ? -sqrt2 : sqrt2;};
arr<T> y(N);
{
size_t i=0, m=n2;
......@@ -2803,30 +2801,26 @@ template<typename T0> class T_dcst4
y[i] = -c[m-2*N];
for (; m<4*N; ++i, m+=4)
y[i] = c[4*N-m-1];
m -= 4*N;
for (; i<N; ++i, m+=4)
y[i] = c[m];
y[i] = c[m-4*N];
}
rfft->exec(y.data(), fct, true);
{
size_t i=0;
for (; i+i+1<n2; ++i)
c[n2] = y[0]*SGN(n2+1);
size_t i=0, i1=1, k=1;
for (; k<n2; ++i, ++i1, k+=2)
{
size_t k = i+i+1;
T c1=y[2*k-1], s1=y[2*k], c2=y[2*k+1], s2=y[2*k+2];
c[i] = sqrt2 * (SGN_SET(c1, (i+1)/2) + SGN_SET(s1, i/2));
c[N-(i+1)] = sqrt2 * (SGN_SET(c1, (N-i)/2) - SGN_SET(s1, (N-(i+1))/2));
c[n2-(i+1)] = sqrt2 * (SGN_SET(c2, (n2-i)/2) - SGN_SET(s2, (n2-(i+1))/2));
c[n2+(i+1)] = sqrt2 * (SGN_SET(c2, (n2+i+2)/2) + SGN_SET(s2, (n2+(i+1))/2));
c[i ] = y[2*k-1]*SGN(i1) + y[2*k ]*SGN(i);
c[N -i1] = y[2*k-1]*SGN(N -i) - y[2*k ]*SGN(N -i1);
c[n2-i1] = y[2*k+1]*SGN(n2-i) - y[2*k+2]*SGN(n2-i1);
c[n2+i1] = y[2*k+1]*SGN(n2+i+2) + y[2*k+2]*SGN(n2+i1);
}
if (i+i+1 == n2)
if (k == n2)
{
T cx=y[2*n2-1], sx=y[2*n2];
c[i] = sqrt2 * (SGN_SET(cx, (i+1)/2) + SGN_SET(sx, i/2));
c[N-(i+1)] = sqrt2 * (SGN_SET(cx, (i+2)/2) + SGN_SET(sx, (i+1)/2));
c[i ] = y[2*k-1]*SGN(i+1) + y[2*k]*SGN(i);
c[N-i1] = y[2*k-1]*SGN(i+2) + y[2*k]*SGN(i1);
}
}
c[n2] = sqrt2 * SGN_SET(y[0], (n2+1)/2);
// FFTW-derived code ends here
}
......
......@@ -37,7 +37,7 @@ def get_extension_modules():
return [Extension('pypocketfft',
language='c++',
sources=['pypocketfft.cc'],
depends=['pocketfft_hdronly.h'],
depends=['pocketfft_hdronly.h', 'setup.py'],
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment