Commit 87832dd2 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

cleaner design of constant and nonconstant arrays

parent 4e1f2c3b
......@@ -2200,43 +2200,51 @@ template<typename T0> class pocketfft_r
// multi-D infrastructure
//
template<typename T> class ndarr
class arr_info
{
private:
char *d;
const char *cd;
protected:
shape_t shp;
stride_t str;
public:
ndarr(const void *data_, const shape_t &shape_, const stride_t &stride_)
: d(nullptr), cd(reinterpret_cast<const char *>(data_)), shp(shape_),
str(stride_) {}
ndarr(void *data_, const shape_t &shape_, const stride_t &stride_)
: d(reinterpret_cast<char *>(data_)),
cd(reinterpret_cast<const char *>(data_)), shp(shape_), str(stride_) {}
arr_info(const shape_t &shape_, const stride_t &stride_)
: shp(shape_), str(stride_) {}
size_t ndim() const { return shp.size(); }
size_t size() const { return util::prod(shp); }
const shape_t &shape() const { return shp; }
size_t shape(size_t i) const { return shp[i]; }
const stride_t &stride() const { return str; }
const ptrdiff_t &stride(size_t i) const { return str[i]; }
T &operator[](ptrdiff_t ofs)
{ return *reinterpret_cast<T *>(d+ofs); }
};
template<typename T> class cndarr: public arr_info
{
protected:
const char *d;
public:
cndarr(const void *data_, const shape_t &shape_, const stride_t &stride_)
: arr_info(shape_, stride_),
d(reinterpret_cast<const char *>(data_)) {}
const T &operator[](ptrdiff_t ofs) const
{ return *reinterpret_cast<const T *>(cd+ofs); }
T get(ptrdiff_t ofs) const
{ return *reinterpret_cast<const T *>(cd+ofs); }
void set(ptrdiff_t ofs, const T &val) const
{ *reinterpret_cast<T *>(d+ofs) = val; }
{ return *reinterpret_cast<const T *>(d+ofs); }
};
template<typename T> class ndarr: public cndarr<T>
{
public:
ndarr(void *data_, const shape_t &shape_, const stride_t &stride_)
: cndarr<T>::cndarr(const_cast<const void *>(data_), shape_, stride_)
{}
T &operator[](ptrdiff_t ofs)
{ return *reinterpret_cast<T *>(const_cast<char *>(cndarr<T>::d+ofs)); }
};
template<size_t N, typename Ti, typename To> class multi_iter
template<size_t N> class multi_iter
{
private:
shape_t pos;
const ndarr<Ti> &iarr;
ndarr<To> &oarr;
const arr_info &iarr, &oarr;
ptrdiff_t p_ii, p_i[N], str_i, p_oi, p_o[N], str_o;
size_t idim, rem;
......@@ -2257,7 +2265,7 @@ template<size_t N, typename Ti, typename To> class multi_iter
}
public:
multi_iter(const ndarr<Ti> &iarr_, ndarr<To> &oarr_, size_t idim_)
multi_iter(const arr_info &iarr_, const arr_info &oarr_, size_t idim_)
: pos(iarr_.ndim(), 0), iarr(iarr_), oarr(oarr_), p_ii(0),
str_i(iarr.stride(idim_)), p_oi(0), str_o(oarr.stride(idim_)),
idim(idim_), rem(iarr.size()/iarr.shape(idim))
......@@ -2297,30 +2305,27 @@ template<size_t N, typename Ti, typename To> class multi_iter
}
rem -= n;
}
const Ti &in (size_t i) const { return iarr[p_i[0] + ptrdiff_t(i)*str_i]; }
const Ti &in (size_t j, size_t i) const { return iarr[p_i[j] + ptrdiff_t(i)*str_i]; }
To &out (size_t i) { return oarr[p_o[0] + ptrdiff_t(i)*str_o]; }
To &out (size_t j, size_t i) { return oarr[p_o[j] + ptrdiff_t(i)*str_o]; }
ptrdiff_t iofs(size_t i) const { return p_i[0] + ptrdiff_t(i)*str_i; }
ptrdiff_t iofs(size_t j, size_t i) const { return p_i[j] + ptrdiff_t(i)*str_i; }
ptrdiff_t oofs(size_t i) const { return p_o[0] + ptrdiff_t(i)*str_o; }
ptrdiff_t oofs(size_t j, size_t i) const { return p_o[j] + ptrdiff_t(i)*str_o; }
size_t length_in() const { return iarr.shape(idim); }
size_t length_out() const { return oarr.shape(idim); }
ptrdiff_t stride_in() const { return str_i; }
ptrdiff_t stride_out() const { return str_o; }
size_t remaining() const { return rem; }
bool inplace() const { return &iarr[0]==&oarr[0]; }
bool contiguous_in() const { return str_i==sizeof(Ti); }
bool contiguous_out() const { return str_o==sizeof(To); }
};
template<typename T> class simple_iter
class simple_iter
{
private:
shape_t pos;
const ndarr<T> &arr;
const arr_info &arr;
ptrdiff_t p;
size_t rem;
public:
simple_iter(const ndarr<T> &arr_)
simple_iter(const arr_info &arr_)
: pos(arr_.ndim(), 0), arr(arr_), p(0), rem(arr_.size()) {}
void advance()
{
......@@ -2339,11 +2344,11 @@ template<typename T> class simple_iter
size_t remaining() const { return rem; }
};
template<typename T> class rev_iter
class rev_iter
{
private:
shape_t pos;
ndarr<T> &arr;
const arr_info &arr;
vector<char> rev_axis;
vector<char> rev_jump;
size_t last_axis, last_size;
......@@ -2352,7 +2357,7 @@ template<typename T> class rev_iter
size_t rem;
public:
rev_iter(ndarr<T> &arr_, const shape_t &axes)
rev_iter(const arr_info &arr_, const shape_t &axes)
: pos(arr_.ndim(), 0), arr(arr_), rev_axis(arr_.ndim(), 0),
rev_jump(arr_.ndim(), 1), p(0), rp(0)
{
......@@ -2447,7 +2452,7 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
#endif
template<typename T> NOINLINE void general_c(
const ndarr<cmplx<T>> &in, ndarr<cmplx<T>> &out,
const cndarr<cmplx<T>> &in, ndarr<cmplx<T>> &out,
const shape_t &axes, bool forward, T fct, size_t POCKETFFT_NTHREADS)
{
unique_ptr<pocketfft_c<T>> plan;
......@@ -2464,7 +2469,8 @@ template<typename T> NOINLINE void general_c(
#endif
{
auto storage = alloc_tmp<T>(in.shape(), len, sizeof(cmplx<T>));
multi_iter<vlen, cmplx<T>, cmplx<T>> it(iax==0? in : out, out, axes[iax]);
const auto &tin(iax==0? in : out);
multi_iter<vlen> it(tin, out, axes[iax]);
#ifndef POCKETFFT_NO_VECTORS
if (vlen>1)
while (it.remaining()>=vlen)
......@@ -2474,34 +2480,37 @@ template<typename T> NOINLINE void general_c(
auto tdatav = reinterpret_cast<cmplx<vtype> *>(storage.data());
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
{ tdatav[i].r[j] = it.in(j,i).r; tdatav[i].i[j] = it.in(j,i).i; }
{
tdatav[i].r[j] = tin[it.iofs(j,i)].r;
tdatav[i].i[j] = tin[it.iofs(j,i)].i;
}
forward ? plan->forward (tdatav, fct) : plan->backward(tdatav, fct);
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
it.out(j,i).Set(tdatav[i].r[j],tdatav[i].i[j]);
out[it.oofs(j,i)].Set(tdatav[i].r[j],tdatav[i].i[j]);
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<cmplx<T> *>(storage.data());
if (it.inplace() && it.contiguous_out()) // fully in-place
forward ? plan->forward ((&it.out(0)), fct)
: plan->backward((&it.out(0)), fct);
else if (it.contiguous_out()) // compute FFT in output location
if ((&tin[0]==&out[0]) && (it.stride_out()==sizeof(cmplx<T>))) // fully in-place
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
else if (it.stride_out()==sizeof(cmplx<T>)) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
it.out(i) = it.in(i);
forward ? plan->forward ((&it.out(0)), fct)
: plan->backward((&it.out(0)), fct);
out[it.oofs(i)] = tin[it.iofs(i)];
forward ? plan->forward (&out[it.oofs(0)], fct)
: plan->backward(&out[it.oofs(0)], fct);
}
else
{
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
tdata[i] = tin[it.iofs(i)];
forward ? plan->forward (tdata, fct) : plan->backward(tdata, fct);
for (size_t i=0; i<len; ++i)
it.out(i) = tdata[i];
out[it.oofs(i)] = tdata[i];
}
}
} // end of parallel region
......@@ -2510,7 +2519,7 @@ template<typename T> NOINLINE void general_c(
}
template<typename T> NOINLINE void general_hartley(
const ndarr<T> &in, ndarr<T> &out, const shape_t &axes, T fct,
const cndarr<T> &in, ndarr<T> &out, const shape_t &axes, T fct,
size_t POCKETFFT_NTHREADS)
{
unique_ptr<pocketfft_r<T>> plan;
......@@ -2527,7 +2536,8 @@ template<typename T> NOINLINE void general_hartley(
#endif
{
auto storage = alloc_tmp<T>(in.shape(), len, sizeof(T));
multi_iter<vlen, T, T> it(iax==0 ? in : out, out, axes[iax]);
const auto &tin(iax==0 ? in : out);
multi_iter<vlen> it(tin, out, axes[iax]);
#ifndef POCKETFFT_NO_VECTORS
if (vlen>1)
while (it.remaining()>=vlen)
......@@ -2537,20 +2547,20 @@ template<typename T> NOINLINE void general_hartley(
auto tdatav = reinterpret_cast<vtype *>(storage.data());
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = it.in(j,i);
tdatav[i][j] = tin[it.iofs(j,i)];
plan->forward(tdatav, fct);
for (size_t j=0; j<vlen; ++j)
it.out(j,0) = tdatav[0][j];
out[it.oofs(j,0)] = tdatav[0][j];
size_t i=1, i1=1, i2=len-1;
for (i=1; i<len-1; i+=2, ++i1, --i2)
for (size_t j=0; j<vlen; ++j)
{
it.out(j,i1) = tdatav[i][j]+tdatav[i+1][j];
it.out(j,i2) = tdatav[i][j]-tdatav[i+1][j];
out[it.oofs(j,i1)] = tdatav[i][j]+tdatav[i+1][j];
out[it.oofs(j,i2)] = tdatav[i][j]-tdatav[i+1][j];
}
if (i<len)
for (size_t j=0; j<vlen; ++j)
it.out(j,i1) = tdatav[i][j];
out[it.oofs(j,i1)] = tdatav[i][j];
}
#endif
while (it.remaining()>0)
......@@ -2558,15 +2568,18 @@ template<typename T> NOINLINE void general_hartley(
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
tdata[i] = tin[it.iofs(i)];
plan->forward(tdata, fct);
// Hartley order
it.out(0) = tdata[0];
out[it.oofs(0)] = tdata[0];
size_t i=1, i1=1, i2=len-1;
for (i=1; i<len-1; i+=2, ++i1, --i2)
{ it.out(i1) = tdata[i]+tdata[i+1]; it.out(i2) = tdata[i]-tdata[i+1]; }
{
out[it.oofs(i1)] = tdata[i]+tdata[i+1];
out[it.oofs(i2)] = tdata[i]-tdata[i+1];
}
if (i<len)
it.out(i1) = tdata[i];
out[it.oofs(i1)] = tdata[i];
}
} // end of parallel region
fct = T(1); // factor has been applied, use 1 for remaining axes
......@@ -2574,7 +2587,7 @@ template<typename T> NOINLINE void general_hartley(
}
template<typename T> NOINLINE void general_r2c(
const ndarr<T> &in, ndarr<cmplx<T>> &out, size_t axis, T fct,
const cndarr<T> &in, ndarr<cmplx<T>> &out, size_t axis, T fct,
size_t POCKETFFT_NTHREADS)
{
pocketfft_r<T> plan(in.shape(axis));
......@@ -2585,7 +2598,7 @@ template<typename T> NOINLINE void general_r2c(
#endif
{
auto storage = alloc_tmp<T>(in.shape(), len, sizeof(T));
multi_iter<vlen, T, cmplx<T>> it(in, out, axis);
multi_iter<vlen> it(in, out, axis);
#ifndef POCKETFFT_NO_VECTORS
if (vlen>1)
while (it.remaining()>=vlen)
......@@ -2595,17 +2608,17 @@ template<typename T> NOINLINE void general_r2c(
auto tdatav = reinterpret_cast<vtype *>(storage.data());
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = it.in(j,i);
tdatav[i][j] = in[it.iofs(j,i)];
plan.forward(tdatav, fct);
for (size_t j=0; j<vlen; ++j)
it.out(j,0).Set(tdatav[0][j]);
out[it.oofs(j,0)].Set(tdatav[0][j]);
size_t i=1, ii=1;
for (; i<len-1; i+=2, ++ii)
for (size_t j=0; j<vlen; ++j)
it.out(j,ii).Set(tdatav[i][j], tdatav[i+1][j]);
out[it.oofs(j,ii)].Set(tdatav[i][j], tdatav[i+1][j]);
if (i<len)
for (size_t j=0; j<vlen; ++j)
it.out(j,ii).Set(tdatav[i][j]);
out[it.oofs(j,ii)].Set(tdatav[i][j]);
}
#endif
while (it.remaining()>0)
......@@ -2613,19 +2626,19 @@ template<typename T> NOINLINE void general_r2c(
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
tdata[i] = in[it.iofs(i)];
plan.forward(tdata, fct);
it.out(0).Set(tdata[0]);
out[it.oofs(0)].Set(tdata[0]);
size_t i=1, ii=1;
for (; i<len-1; i+=2, ++ii)
it.out(ii).Set(tdata[i], tdata[i+1]);
out[it.oofs(ii)].Set(tdata[i], tdata[i+1]);
if (i<len)
it.out(ii).Set(tdata[i]);
out[it.oofs(ii)].Set(tdata[i]);
}
} // end of parallel region
}
template<typename T> NOINLINE void general_c2r(
const ndarr<cmplx<T>> &in, ndarr<T> &out, size_t axis, T fct,
const cndarr<cmplx<T>> &in, ndarr<T> &out, size_t axis, T fct,
size_t POCKETFFT_NTHREADS)
{
pocketfft_r<T> plan(out.shape(axis));
......@@ -2636,7 +2649,7 @@ template<typename T> NOINLINE void general_c2r(
#endif
{
auto storage = alloc_tmp<T>(out.shape(), len, sizeof(T));
multi_iter<vlen, cmplx<T>, T> it(in, out, axis);
multi_iter<vlen> it(in, out, axis);
#ifndef POCKETFFT_NO_VECTORS
if (vlen>1)
while (it.remaining()>=vlen)
......@@ -2645,43 +2658,49 @@ template<typename T> NOINLINE void general_c2r(
it.advance(vlen);
auto tdatav = reinterpret_cast<vtype *>(storage.data());
for (size_t j=0; j<vlen; ++j)
tdatav[0][j]=it.in(j,0).r;
tdatav[0][j]=in[it.iofs(j,0)].r;
{
size_t i=1, ii=1;
for (; i<len-1; i+=2, ++ii)
for (size_t j=0; j<vlen; ++j)
{ tdatav[i][j] = it.in(j,ii).r; tdatav[i+1][j] = it.in(j,ii).i; }
{
tdatav[i ][j] = in[it.iofs(j,ii)].r;
tdatav[i+1][j] = in[it.iofs(j,ii)].i;
}
if (i<len)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = it.in(j,ii).r;
tdatav[i][j] = in[it.iofs(j,ii)].r;
}
plan.backward(tdatav, fct);
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
it.out(j,i) = tdatav[i][j];
out[it.oofs(j,i)] = tdatav[i][j];
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
tdata[0]=it.in(0).r;
tdata[0]=in[it.iofs(0)].r;
{
size_t i=1, ii=1;
for (; i<len-1; i+=2, ++ii)
{ tdata[i] = it.in(ii).r; tdata[i+1] = it.in(ii).i; }
{
tdata[i ] = in[it.iofs(ii)].r;
tdata[i+1] = in[it.iofs(ii)].i;
}
if (i<len)
tdata[i] = it.in(ii).r;
tdata[i] = in[it.iofs(ii)].r;
}
plan.backward(tdata, fct);
for (size_t i=0; i<len; ++i)
it.out(i) = tdata[i];
out[it.oofs(i)] = tdata[i];
}
} // end of parallel region
}
template<typename T> NOINLINE void general_r(
const ndarr<T> &in, ndarr<T> &out, size_t axis, bool forward, T fct,
const cndarr<T> &in, ndarr<T> &out, size_t axis, bool forward, T fct,
size_t POCKETFFT_NTHREADS)
{
constexpr auto vlen = VLEN<T>::val;
......@@ -2692,7 +2711,7 @@ template<typename T> NOINLINE void general_r(
#endif
{
auto storage = alloc_tmp<T>(in.shape(), len, sizeof(T));
multi_iter<vlen, T, T> it(in, out, axis);
multi_iter<vlen> it(in, out, axis);
#ifndef POCKETFFT_NO_VECTORS
if (vlen>1)
while (it.remaining()>=vlen)
......@@ -2702,34 +2721,35 @@ template<typename T> NOINLINE void general_r(
auto tdatav = reinterpret_cast<vtype *>(storage.data());
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = it.in(j,i);
tdatav[i][j] = in[it.iofs(j,i)];
forward ? plan.forward (tdatav, fct)
: plan.backward(tdatav, fct);
for (size_t i=0; i<len; ++i)
for (size_t j=0; j<vlen; ++j)
it.out(j,i) = tdatav[i][j];
out[it.oofs(j,i)] = tdatav[i][j];
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto tdata = reinterpret_cast<T *>(storage.data());
if (it.inplace() && it.contiguous_out()) // fully in-place
forward ? plan.forward (&it.out(0), fct)
: plan.backward(&it.out(0), fct);
else if (it.contiguous_out()) // compute FFT in output location
if ((&in[0]==&out[0]) && (it.stride_out()==sizeof(T))) // fully in-place
forward ? plan.forward (&out[it.oofs(0)], fct)
: plan.backward(&out[it.oofs(0)], fct);
else if (it.stride_out()==sizeof(T)) // compute FFT in output location
{
for (size_t i=0; i<len; ++i)
it.out(i) = it.in(i);
forward ? plan.forward (&it.out(0), fct) : plan.backward(&it.out(0), fct);
out[it.oofs(i)] = in[it.iofs(i)];
forward ? plan.forward (&out[it.oofs(0)], fct)
: plan.backward(&out[it.oofs(0)], fct);
}
else
{
for (size_t i=0; i<len; ++i)
tdata[i] = it.in(i);
tdata[i] = in[it.iofs(i)];
forward ? plan.forward (tdata, fct) : plan.backward(tdata, fct);
for (size_t i=0; i<len; ++i)
it.out(i) = tdata[i];
out[it.oofs(i)] = tdata[i];
}
}
} // end of parallel region
......@@ -2744,8 +2764,8 @@ template<typename T> void c2c(const shape_t &shape, const stride_t &stride_in,
{
if (util::prod(shape)==0) return;
util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes);
ndarr<cmplx<T>> ain(data_in, shape, stride_in),
aout(data_out, shape, stride_out);
cndarr<cmplx<T>> ain(data_in, shape, stride_in);
ndarr<cmplx<T>> aout(data_out, shape, stride_out);
general_c(ain, aout, axes, forward, fct, nthreads);
}
......@@ -2755,7 +2775,7 @@ template<typename T> void r2c(const shape_t &shape_in,
{
if (util::prod(shape_in)==0) return;
util::sanity_check(shape_in, stride_in, stride_out, false, axis);
ndarr<T> ain(data_in, shape_in, stride_in);
cndarr<T> ain(data_in, shape_in, stride_in);
shape_t shape_out(shape_in);
shape_out[axis] = shape_in[axis]/2 + 1;
ndarr<cmplx<T>> aout(data_out, shape_out, stride_out);
......@@ -2787,7 +2807,7 @@ template<typename T> void c2r(const shape_t &shape_out,
util::sanity_check(shape_out, stride_in, stride_out, false, axis);
shape_t shape_in(shape_out);
shape_in[axis] = shape_out[axis]/2 + 1;
ndarr<cmplx<T>> ain(data_in, shape_in, stride_in);
cndarr<cmplx<T>> ain(data_in, shape_in, stride_in);
ndarr<T> aout(data_out, shape_out, stride_out);
general_c2r(ain, aout, axis, fct, nthreads);
}
......@@ -2822,7 +2842,8 @@ template<typename T> void r2r_fftpack(const shape_t &shape,
{
if (util::prod(shape)==0) return;
util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axis);
ndarr<T> ain(data_in, shape, stride_in), aout(data_out, shape, stride_out);
cndarr<T> ain(data_in, shape, stride_in);
ndarr<T> aout(data_out, shape, stride_out);
general_r(ain, aout, axis, forward, fct, nthreads);
}
......@@ -2832,7 +2853,8 @@ template<typename T> void r2r_hartley(const shape_t &shape,
{
if (util::prod(shape)==0) return;
util::sanity_check(shape, stride_in, stride_out, data_in==data_out, axes);
ndarr<T> ain(data_in, shape, stride_in), aout(data_out, shape, stride_out);
cndarr<T> ain(data_in, shape, stride_in);
ndarr<T> aout(data_out, shape, stride_out);
general_hartley(ain, aout, axes, fct, nthreads);
}
......
......@@ -138,11 +138,11 @@ template<typename T> py::array sym_rfftn_internal(const py::array &in,
// now fill in second half
using namespace pocketfft::detail;
ndarr<complex<T>> ares(res.mutable_data(), dims, s_out);
rev_iter<complex<T>> iter(ares, axes);
rev_iter iter(ares, axes);
while(iter.remaining()>0)
{
auto v = ares.get(iter.ofs());
ares.set(iter.rev_ofs(), conj(v));
auto v = ares[iter.ofs()];
ares[iter.rev_ofs()] = conj(v);
iter.advance();
}
}
......@@ -282,19 +282,19 @@ template<typename T>py::array complex2hartley(const py::array &in,
using namespace pocketfft::detail;
auto dims_out(copy_shape(in));
py::array out = inplace ? in : py::array_t<T>(dims_out);
ndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
cndarr<cmplx<T>> atmp(tmp.data(), copy_shape(tmp), copy_strides(tmp));
ndarr<T> aout(out.mutable_data(), copy_shape(out), copy_strides(out));
auto axes = makeaxes(in, axes_);
{
py::gil_scoped_release release;
simple_iter<cmplx<T>> iin(atmp);
rev_iter<T> iout(aout, axes);
simple_iter iin(atmp);
rev_iter iout(aout, axes);
if (iin.remaining()!=iout.remaining()) throw runtime_error("oops");
while(iin.remaining()>0)
{
auto v = atmp.get(iin.ofs());
aout.set(iout.ofs(), v.r+v.i);
aout.set(iout.rev_ofs(), v.r-v.i);
auto v = atmp[iin.ofs()];
aout[iout.ofs()] = v.r+v.i;
aout[iout.rev_ofs()] = v.r-v.i;
iin.advance(); iout.advance();
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment