Commit 09e1d7c9 authored by Martin Reinecke's avatar Martin Reinecke

tweaks

parent 84bcc100
......@@ -1992,9 +1992,10 @@ class multi_iter
public:
vector<diminfo> dim;
shape_t pos;
size_t ofs_, len;
int64_t ofs_;
size_t len;
int64_t str;
int64_t rem;
size_t rem;
bool done_;
public:
......@@ -2013,7 +2014,7 @@ class multi_iter
}
void advance()
{
if (--rem<=0) {done_=true; return; }
if (--rem==0) {done_=true; return; }
for (int i=pos.size()-1; i>=0; --i)
{
++pos[i];
......@@ -2026,10 +2027,10 @@ class multi_iter
done_ = true;
}
bool done() const { return done_; }
size_t offset() const { return ofs_; }
int64_t offset() const { return ofs_; }
size_t length() const { return len; }
int64_t stride() const { return str; }
int64_t remaining() const { return rem; }
size_t remaining() const { return rem; }
};
......@@ -2099,6 +2100,17 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
return arr<char>(tmpsize*elemsize);
}
template<size_t vlen> struct multioffset
{
int64_t ofs[vlen];
multioffset(multi_iter &it)
{
for (size_t i=0; i<vlen; ++i)
{ ofs[i] = it.offset(); it.advance(); }
}
int64_t operator[](size_t i) const { return ofs[i]; }
};
template<typename T> void pocketfft_general_c(const shape_t &shape,
const stride_t &stride_in, const stride_t &stride_out,
const shape_t &axes, bool forward, const cmplx<T> *data_in,
......@@ -2124,12 +2136,7 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
#ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen)
{
size_t p_i[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j)
{
......@@ -2186,12 +2193,7 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
#ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen)
{
size_t p_i[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()];
......@@ -2255,12 +2257,7 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
#ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen)
{
size_t p_i[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()];
......@@ -2318,12 +2315,7 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
#ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen)
{
size_t p_i[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t j=0; j<vlen; ++j)
tdatav[0][j]=data_in[p_i[j]].r;
size_t i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment