Commit 1a1f6477 authored by Martin Reinecke's avatar Martin Reinecke

more tweaks

parent b87f51f5
......@@ -270,7 +270,7 @@ template<typename T> class sincos_2pibyn
{
private:
using Thigh = typename conditional<(sizeof(T)>sizeof(double)), T, double>::type;
size_t mask, shift;
size_t N, mask, shift;
arr<cmplx<Thigh>> v1, v2;
static cmplx<Thigh> calc(size_t x, size_t n, Thigh ang)
......@@ -309,17 +309,19 @@ template<typename T> class sincos_2pibyn
public:
POCKETFFT_NOINLINE sincos_2pibyn(size_t n)
: N(n)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
Thigh ang = Thigh(0.25L*pi/n);
size_t nval = (n+2)/2;
shift = 1;
while((size_t(1)<<shift)*(size_t(1)<<shift) < n) ++shift;
while((size_t(1)<<shift)*(size_t(1)<<shift) < nval) ++shift;
mask = (size_t(1)<<shift)-1;
v1.resize(mask+1);
v1[0].Set(Thigh(1), Thigh(0));
for (size_t i=1; i<v1.size(); ++i)
v1[i]=calc(i,n,ang);
v2.resize((n+mask)/(mask+1));
v2.resize((nval+mask)/(mask+1));
v2[0].Set(Thigh(1), Thigh(0));
for (size_t i=1; i<v2.size(); ++i)
v2[i]=calc(i*(mask+1),n,ang);
......@@ -327,8 +329,14 @@ template<typename T> class sincos_2pibyn
cmplx<T> operator[](size_t idx) const
{
if (2*idx<=N)
{
auto x1=v1[idx&mask], x2=v2[idx>>shift];
return cmplx<T>(T(x1.r*x2.r-x1.i*x2.i), T(x1.r*x2.i+x1.i*x2.r));
}
idx = N-idx;
auto x1=v1[idx&mask], x2=v2[idx>>shift];
return cmplx<T>(T(x1.r*x2.r-x1.i*x2.i), T(x1.r*x2.i+x1.i*x2.r));
return cmplx<T>(T(x1.r*x2.r-x1.i*x2.i), -T(x1.r*x2.i+x1.i*x2.r));
}
};
......@@ -2484,10 +2492,9 @@ template<typename T0> class T_dcst23
POCKETFFT_NOINLINE T_dcst23(size_t length)
: fftplan(length), twiddle(length)
{
constexpr auto pi = T0(3.141592653589793238462643383279502884197L);
const auto oo2n = pi*T0(0.5)/T0(length);
sincos_2pibyn<T0> tw(4*length);
for (size_t i=0; i<length; ++i)
twiddle[i] = cos(oo2n*T0(i+1));
twiddle[i] = tw[i+1].r;
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct, bool ortho,
......@@ -2560,14 +2567,12 @@ template<typename T0> class T_dcst4
rfft((N&1)? new pocketfft_r<T0>(N) : nullptr),
C2((N&1) ? 0 : N/2)
{
const auto oon = -T0(1.)/T0(N);
if ((N&1)==0)
{
sincos_2pibyn<T0> tw(8*N);
for (size_t i=0; i<N/2; ++i)
{
constexpr auto pi = T0(3.141592653589793238462643383279502884197L);
T0 ang = pi*oon*(T0(i)+T0(0.125));
C2[i].Set(cos(ang), sin(ang));
}
C2[i] = tw[8*N-8*i-1];
}
}
template<typename T> POCKETFFT_NOINLINE void exec(T c[], T0 fct,
......
......@@ -61,12 +61,12 @@ def test(err):
b = fftn(ifftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2,
nthreads=nthreads)
err = update_err(err, "cmax", _l2error(a.real, b, axes))
b = irfftn(rfftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2,
lastsize=lastsize, nthreads=nthreads)
err = update_err(err, "rmax", _l2error(a.real, b, axes))
b = ifftn(fftn(a.astype(np.complex64), axes=axes, nthreads=nthreads),
axes=axes, inorm=2, nthreads=nthreads)
err = update_err(err, "cmaxf", _l2error(a.astype(np.complex64), b, axes))
b = irfftn(rfftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2,
lastsize=lastsize, nthreads=nthreads)
err = update_err(err, "rmax", _l2error(a.real, b, axes))
b = irfftn(rfftn(a.real.astype(np.float32), axes=axes, nthreads=nthreads),
axes=axes, inorm=2, lastsize=lastsize, nthreads=nthreads)
err = update_err(err, "rmaxf", _l2error(a.real.astype(np.float32), b, axes))
......@@ -113,6 +113,10 @@ def test(err):
pypocketfft.dct(a_32.real, axes=axes, nthreads=nthreads, type=4),
axes=axes, type=4, nthreads=nthreads, inorm=2)
err = update_err(err, "c4maxf", _l2error(a_32.real, b, axes))
b = pypocketfft.dst(
pypocketfft.dst(a.real, axes=axes, nthreads=nthreads, type=1),
axes=axes, type=1, nthreads=nthreads, inorm=2)
err = update_err(err, "s1max", _l2error(a.real, b, axes))
b = pypocketfft.dst(
pypocketfft.dst(a_32.real, axes=axes, nthreads=nthreads, type=1),
axes=axes, type=1, nthreads=nthreads, inorm=2)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment