Commit 07b7644b authored by Martin Reinecke's avatar Martin Reinecke
Browse files

more tests

parent 1c168d5b
...@@ -639,18 +639,49 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp ...@@ -639,18 +639,49 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp
return aligned_array<T>(tmpsize); return aligned_array<T>(tmpsize);
} }
#define MRFFT_PREFETCH template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input_j1(const multi_iter<vlen> &it,
#define MRUTIL_PREFETCH_R(addr) __builtin_prefetch(addr);
#define MRUTIL_PREFETCH_W(addr) __builtin_prefetch(addr,1);
template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst) const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
{ {
if (it.uniform_i()) auto ptr = &src[it.iofs_uni(0,0)];
auto istr = it.stride_in();
size_t i=0;
for (; i<it.length_in(); ++i)
{ {
auto ptr = &src[it.iofs_uni(0,0)]; Cmplx<native_simd<T>> stmp;
auto jstr = it.unistride_i(); for (size_t j=0; j<vlen; ++j)
auto istr = it.stride_in(); {
if (istr==1) auto tmp = ptr[j+i*istr];
stmp.r[j] = tmp.r;
stmp.i[j] = tmp.i;
}
dst[i] = stmp;
}
}
template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input_j1_a16(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
{
auto ptr = &src[it.iofs_uni(0,0)];
ptr=reinterpret_cast<Cmplx<T> *>(__builtin_assume_aligned(ptr,16));
auto istr = it.stride_in();
size_t i=0;
for (; i<it.length_in(); ++i)
{
Cmplx<native_simd<T>> stmp;
for (size_t j=0; j<vlen; ++j)
{
auto tmp = ptr[j+i*istr];
stmp.r[j] = tmp.r;
stmp.i[j] = tmp.i;
}
dst[i] = stmp;
}
}
template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input_i1(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
{
auto ptr = &src[it.iofs_uni(0,0)];
auto jstr = it.unistride_i();
auto istr = it.stride_in();
for (size_t i=0; i<it.length_in(); ++i) for (size_t i=0; i<it.length_in(); ++i)
{ {
Cmplx<native_simd<T>> stmp; Cmplx<native_simd<T>> stmp;
...@@ -662,18 +693,49 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_i ...@@ -662,18 +693,49 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_i
} }
dst[i] = stmp; dst[i] = stmp;
} }
}
#define MRFFT_PREFETCH
#define MRUTIL_PREFETCH_R(addr) __builtin_prefetch(addr);
#define MRUTIL_PREFETCH_W(addr) __builtin_prefetch(addr,1);
template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
{
if (it.uniform_i())
{
auto ptr = &src[it.iofs_uni(0,0)];
auto jstr = it.unistride_i();
auto istr = it.stride_in();
if (istr==1)
copy_input_i1(it, src, dst);
// for (size_t i=0; i<it.length_in(); ++i)
// {
// Cmplx<native_simd<T>> stmp;
// for (size_t j=0; j<vlen; ++j)
// {
// auto tmp = ptr[j*jstr+i];
// stmp.r[j] = tmp.r;
// stmp.i[j] = tmp.i;
// }
// dst[i] = stmp;
// }
else if (jstr==1) else if (jstr==1)
for (size_t i=0; i<it.length_in(); ++i) {
{ if ((reinterpret_cast<uintptr_t>(src.data())&15)==0)
Cmplx<native_simd<T>> stmp; copy_input_j1_a16(it, src, dst);
for (size_t j=0; j<vlen; ++j) else
{ copy_input_j1(it, src, dst);
auto tmp = ptr[j+i*istr]; }
stmp.r[j] = tmp.r; // for (size_t i=0; i<it.length_in(); ++i)
stmp.i[j] = tmp.i; // {
} // Cmplx<native_simd<T>> stmp;
dst[i] = stmp; // for (size_t j=0; j<vlen; ++j)
} // {
// auto tmp = ptr[j+i*istr];
// stmp.r[j] = tmp.r;
// stmp.i[j] = tmp.i;
// }
// dst[i] = stmp;
// }
else else
for (size_t i=0; i<it.length_in(); ++i) for (size_t i=0; i<it.length_in(); ++i)
{ {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment