Commit 09e1d7c9 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

tweaks

parent 84bcc100
...@@ -1992,9 +1992,10 @@ class multi_iter ...@@ -1992,9 +1992,10 @@ class multi_iter
public: public:
vector<diminfo> dim; vector<diminfo> dim;
shape_t pos; shape_t pos;
size_t ofs_, len; int64_t ofs_;
size_t len;
int64_t str; int64_t str;
int64_t rem; size_t rem;
bool done_; bool done_;
public: public:
...@@ -2013,7 +2014,7 @@ class multi_iter ...@@ -2013,7 +2014,7 @@ class multi_iter
} }
void advance() void advance()
{ {
if (--rem<=0) {done_=true; return; } if (--rem==0) {done_=true; return; }
for (int i=pos.size()-1; i>=0; --i) for (int i=pos.size()-1; i>=0; --i)
{ {
++pos[i]; ++pos[i];
...@@ -2026,10 +2027,10 @@ class multi_iter ...@@ -2026,10 +2027,10 @@ class multi_iter
done_ = true; done_ = true;
} }
bool done() const { return done_; } bool done() const { return done_; }
size_t offset() const { return ofs_; } int64_t offset() const { return ofs_; }
size_t length() const { return len; } size_t length() const { return len; }
int64_t stride() const { return str; } int64_t stride() const { return str; }
int64_t remaining() const { return rem; } size_t remaining() const { return rem; }
}; };
...@@ -2099,6 +2100,17 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape, ...@@ -2099,6 +2100,17 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
return arr<char>(tmpsize*elemsize); return arr<char>(tmpsize*elemsize);
} }
template<size_t vlen> struct multioffset
{
int64_t ofs[vlen];
multioffset(multi_iter &it)
{
for (size_t i=0; i<vlen; ++i)
{ ofs[i] = it.offset(); it.advance(); }
}
int64_t operator[](size_t i) const { return ofs[i]; }
};
template<typename T> void pocketfft_general_c(const shape_t &shape, template<typename T> void pocketfft_general_c(const shape_t &shape,
const stride_t &stride_in, const stride_t &stride_out, const stride_t &stride_in, const stride_t &stride_out,
const shape_t &axes, bool forward, const cmplx<T> *data_in, const shape_t &axes, bool forward, const cmplx<T> *data_in,
...@@ -2124,12 +2136,7 @@ template<typename T> void pocketfft_general_c(const shape_t &shape, ...@@ -2124,12 +2136,7 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
#ifdef HAVE_VECSUPPORT #ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen) while (it_in.remaining()>=vlen)
{ {
size_t p_i[vlen]; multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
for (size_t i=0; i<it_in.length(); ++i) for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j) for (size_t j=0; j<vlen; ++j)
{ {
...@@ -2186,12 +2193,7 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape, ...@@ -2186,12 +2193,7 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
#ifdef HAVE_VECSUPPORT #ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen) while (it_in.remaining()>=vlen)
{ {
size_t p_i[vlen]; multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
for (size_t i=0; i<it_in.length(); ++i) for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j) for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()]; tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()];
...@@ -2255,12 +2257,7 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape, ...@@ -2255,12 +2257,7 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
#ifdef HAVE_VECSUPPORT #ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen) while (it_in.remaining()>=vlen)
{ {
size_t p_i[vlen]; multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
for (size_t i=0; i<it_in.length(); ++i) for (size_t i=0; i<it_in.length(); ++i)
for (size_t j=0; j<vlen; ++j) for (size_t j=0; j<vlen; ++j)
tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()]; tdatav[i][j] = data_in[p_i[j]+i*it_in.stride()];
...@@ -2318,12 +2315,7 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out, ...@@ -2318,12 +2315,7 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
#ifdef HAVE_VECSUPPORT #ifdef HAVE_VECSUPPORT
while (it_in.remaining()>=vlen) while (it_in.remaining()>=vlen)
{ {
size_t p_i[vlen]; multioffset<vlen> p_i(it_in), p_o(it_out);
for (size_t i=0; i<vlen; ++i)
{ p_i[i] = it_in.offset(); it_in.advance(); }
size_t p_o[vlen];
for (size_t i=0; i<vlen; ++i)
{ p_o[i] = it_out.offset(); it_out.advance(); }
for (size_t j=0; j<vlen; ++j) for (size_t j=0; j<vlen; ++j)
tdatav[0][j]=data_in[p_i[j]].r; tdatav[0][j]=data_in[p_i[j]].r;
size_t i; size_t i;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment