Commit 18adaa57 authored by Martin Reinecke's avatar Martin Reinecke

more complex arrays

parent 7e4b3513
......@@ -88,21 +88,15 @@ template<typename T> struct cmplx {
{ return cmplx(r-other.r, i-other.i); }
template<typename T2> auto operator* (const T2 &other) const
-> cmplx<decltype(r*other)>
{
return {r*other, i*other};
}
{ return {r*other, i*other}; }
template<typename T2> auto operator* (const cmplx<T2> &other) const
-> cmplx<decltype(r+other.r)>
{
return {r*other.r-i*other.i, r*other.i + i*other.r};
}
{ return {r*other.r-i*other.i, r*other.i + i*other.r}; }
template<bool bwd, typename T2> auto special_mul (const cmplx<T2> &other) const
-> cmplx<decltype(r+other.r)>
{
if (bwd)
return {r*other.r-i*other.i, r*other.i + i*other.r};
else
return {r*other.r+i*other.i, i*other.r - r*other.i};
return bwd ? cmplx(r*other.r-i*other.i, r*other.i + i*other.r)
: cmplx(r*other.r+i*other.i, i*other.r - r*other.i);
}
};
template<typename T> void PMC(cmplx<T> &a, cmplx<T> &b,
......@@ -303,6 +297,9 @@ template<typename T> class sincos_2pibyn
}
T operator[](size_t idx) const { return data[idx]; }
const T *rdata() const { return data; }
const cmplx<T> *cdata() const
{ return reinterpret_cast<const cmplx<T> *>(data.data()); }
};
NOINLINE size_t largest_prime_factor (size_t n)
......@@ -914,6 +911,7 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact)
NOINLINE void comp_twiddle()
{
sincos_2pibyn<T0> twid(length, false);
auto twiddle = twid.cdata();
size_t l1=1;
size_t memofs=0;
for (size_t k=0; k<nfct; ++k)
......@@ -923,19 +921,13 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact)
memofs+=(ip-1)*(ido-1);
for (size_t j=1; j<ip; ++j)
for (size_t i=1; i<ido; ++i)
{
fct[k].tw[(j-1)*(ido-1)+i-1].r = twid[2*j*l1*i];
fct[k].tw[(j-1)*(ido-1)+i-1].i = twid[2*j*l1*i+1];
}
fct[k].tw[(j-1)*(ido-1)+i-1] = twiddle[j*l1*i];
if (ip>11)
{
fct[k].tws=mem.data()+memofs;
memofs+=ip;
for (size_t j=0; j<ip; ++j)
{
fct[k].tws[j].r = twid[2*j*l1*ido];
fct[k].tws[j].i = twid[2*j*l1*ido+1];
}
fct[k].tws[j] = twiddle[j*l1*ido];
}
l1*=ip;
}
......@@ -1746,7 +1738,7 @@ NOINLINE void comp_twiddle()
NOINLINE rfftp(size_t length_)
: length(length_)
{
if (length==0) throw runtime_error("zero_sized FFT");
if (length==0) throw runtime_error("zero-sized FFT");
nfct=0;
if (length==1) return;
factorize();
......@@ -1765,82 +1757,66 @@ template<typename T0> class fftblue
private:
size_t n, n2;
cfftp<T0> plan;
arr<T0> mem;
T0 *bk, *bkf;
arr<cmplx<T0>> mem;
cmplx<T0> *bk, *bkf;
template<bool bwd, typename T> NOINLINE
void fft(T c[], T0 fct)
void fft(cmplx<T> c[], T0 fct)
{
constexpr T0 isign = bwd ? 1 : -1;
arr<T> akf(2*n2);
arr<cmplx<T>> akf(n2);
/* initialize a_k and FFT it */
for (size_t m=0; m<2*n; m+=2)
{
akf[m] = c[m]*bk[m] -isign*c[m+1]*bk[m+1];
akf[m+1] = isign*c[m]*bk[m+1] + c[m+1]*bk[m];
}
for (size_t m=2*n; m<2*n2; ++m)
akf[m]=0.*c[0];
for (size_t m=0; m<n; ++m)
akf[m] = c[m].template special_mul<bwd>(bk[m]);
for (size_t m=n; m<n2; ++m)
akf[m]=akf[0]*T0(0);
plan.forward ((cmplx<T> *)akf.data(),1.);
plan.forward (akf.data(),1.);
/* do the convolution */
for (size_t m=0; m<2*n2; m+=2)
{
T im = -isign*akf[m]*bkf[m+1] + akf[m+1]*bkf[m];
akf[m ] = akf[m]*bkf[m] + isign*akf[m+1]*bkf[m+1];
akf[m+1] = im;
}
for (size_t m=0; m<n2; ++m)
akf[m] = akf[m].template special_mul<!bwd>(bkf[m]);
/* inverse FFT */
plan.backward ((cmplx<T> *)akf.data(),1.);
plan.backward (akf.data(),1.);
/* multiply by b_k */
for (size_t m=0; m<2*n; m+=2)
{
c[m] = fct*(bk[m] *akf[m] - isign*bk[m+1]*akf[m+1]);
c[m+1] = fct*(isign*bk[m+1]*akf[m] + bk[m] *akf[m+1]);
}
for (size_t m=0; m<n; ++m)
c[m] = akf[m].template special_mul<bwd>(bk[m])*fct;
}
public:
NOINLINE fftblue(size_t length)
: n(length), n2(good_size(n*2-1)), plan(n2), mem(2*(n+n2)),
bk(mem.data()), bkf(mem.data()+2*n)
: n(length), n2(good_size(n*2-1)), plan(n2), mem(n+n2),
bk(mem.data()), bkf(mem.data()+n)
{
/* initialize b_k */
sincos_2pibyn<T0> tmp(2*n, false);
bk[0] = 1;
bk[1] = 0;
sincos_2pibyn<T0> tmp_(2*n, false);
auto tmp = tmp_.cdata();
bk[0].Set(1, 0);
size_t coeff=0;
for (size_t m=1; m<n; ++m)
{
coeff+=2*m-1;
if (coeff>=2*n) coeff-=2*n;
bk[2*m ] = tmp[2*coeff ];
bk[2*m+1] = tmp[2*coeff+1];
bk[m] = tmp[coeff];
}
/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
T0 xn2 = 1./n2;
bkf[0] = bk[0]*xn2;
bkf[1] = bk[1]*xn2;
for (size_t m=2; m<2*n; m+=2)
{
bkf[m] = bkf[2*n2-m] = bk[m] *xn2;
bkf[m+1] = bkf[2*n2-m+1] = bk[m+1] *xn2;
}
for (size_t m=2*n;m<=(2*n2-2*n+1);++m)
bkf[m]=0.;
plan.forward((cmplx<T0> *)bkf,1.);
for (size_t m=1; m<n; ++m)
bkf[m] = bkf[n2-m] = bk[m]*xn2;
for (size_t m=n;m<=(n2-n);++m)
bkf[m].Set(0.,0.);
plan.forward(bkf,1.);
}
template<typename T> NOINLINE void backward(T c[], T0 fct)
template<typename T> NOINLINE void backward(cmplx<T> c[], T0 fct)
{ fft<true>(c,fct); }
template<typename T> NOINLINE void forward(T c[], T0 fct)
template<typename T> NOINLINE void forward(cmplx<T> c[], T0 fct)
{ fft<false>(c,fct); }
template<typename T> NOINLINE void backward_r(T c[], T0 fct)
......@@ -1855,22 +1831,19 @@ template<typename T0> class fftblue
tmp[2*n-m]=tmp[m];
tmp[2*n-m+1]=-tmp[m+1];
}
fft<true>(tmp.data(),fct);
fft<true>((cmplx<T> *)tmp.data(),fct);
for (size_t m=0; m<n; ++m)
c[m] = tmp[2*m];
}
template<typename T> NOINLINE void forward_r(T c[], T0 fct)
{
arr<T> tmp(2*n);
arr<cmplx<T>> tmp(n);
for (size_t m=0; m<n; ++m)
{
tmp[2*m] = c[m];
tmp[2*m+1] = c[m]*0.;
}
tmp[m].Set(c[m], 0.*c[m]);
fft<false>(tmp.data(),fct);
c[0] = tmp[0];
memcpy (c+1, tmp.data()+2, (n-1)*sizeof(T));
c[0] = tmp[0].r;
memcpy (c+1, tmp.data()+1, (n-1)*sizeof(T));
}
};
......@@ -1907,13 +1880,13 @@ template<typename T0> class pocketfft_c
template<typename T> NOINLINE void backward(T c[], T0 fct)
{
packplan ? packplan->backward((cmplx<T> *)c,fct)
: blueplan->backward(c,fct);
: blueplan->backward((cmplx<T> *)c,fct);
}
template<typename T> NOINLINE void forward(T c[], T0 fct)
{
packplan ? packplan->forward((cmplx<T> *)c,fct)
: blueplan->forward(c,fct);
: blueplan->forward((cmplx<T> *)c,fct);
}
size_t length() const { return len; }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment