From 5a190ae6bdd2b7cc29060d276f1a56690c7172e9 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Fri, 24 May 2019 11:45:33 +0200 Subject: [PATCH] more flexible vector lengths --- pocketfft_hdronly.h | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 474a7a8..fd146c7 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -95,15 +95,21 @@ constexpr bool FORWARD = true, #endif #endif +template struct VLEN { static constexpr size_t val=1; }; + #ifndef POCKETFFT_NO_VECTORS #if (defined(__AVX512F__)) -constexpr int VBYTELEN=64; +template<> struct VLEN { static constexpr size_t val=16; }; +template<> struct VLEN { static constexpr size_t val=8; }; #elif (defined(__AVX__)) -constexpr int VBYTELEN=32; +template<> struct VLEN { static constexpr size_t val=8; }; +template<> struct VLEN { static constexpr size_t val=4; }; #elif (defined(__SSE2__)) -constexpr int VBYTELEN=16; +template<> struct VLEN { static constexpr size_t val=4; }; +template<> struct VLEN { static constexpr size_t val=2; }; #elif (defined(__VSX__)) -constexpr int VBYTELEN=16; +template<> struct VLEN { static constexpr size_t val=4; }; +template<> struct VLEN { static constexpr size_t val=2; }; #else #define POCKETFFT_NO_VECTORS #endif @@ -2314,31 +2320,25 @@ template class multi_iter #ifndef POCKETFFT_NO_VECTORS template struct VTYPE {}; -template<> struct VTYPE +template<> struct VTYPE { - using type = long double __attribute__ ((vector_size (sizeof(long double)))); - static constexpr size_t vlen=1; + using type = float __attribute__ ((vector_size (VLEN::val*sizeof(float)))); }; template<> struct VTYPE { - using type = double __attribute__ ((vector_size (VBYTELEN))); - static constexpr size_t vlen=VBYTELEN/sizeof(double); + using type = double __attribute__ ((vector_size (VLEN::val*sizeof(double)))); }; -template<> struct VTYPE +template<> struct VTYPE { - using type = float __attribute__ ((vector_size (VBYTELEN))); - static constexpr size_t vlen=VBYTELEN/sizeof(float); + using type = long double __attribute__ ((vector_size (VLEN::val*sizeof(long double)))); }; -#else -template struct VTYPE - { static constexpr size_t vlen=1; }; #endif template arr alloc_tmp(const shape_t &shape, size_t axsize, size_t elemsize) { auto othersize = util::prod(shape)/axsize; - auto tmpsize = axsize*((othersize>=VTYPE::vlen) ? VTYPE::vlen : 1); + auto tmpsize = axsize*((othersize>=VLEN::val) ? VLEN::val : 1); return arr(tmpsize*elemsize); } template arr alloc_tmp(const shape_t &shape, @@ -2350,7 +2350,7 @@ template arr alloc_tmp(const shape_t &shape, { auto axsize = shape[axes[i]]; auto othersize = fullsize/axsize; - auto sz = axsize*((othersize>=VTYPE::vlen) ? VTYPE::vlen : 1); + auto sz = axsize*((othersize>=VLEN::val) ? VLEN::val : 1); if (sz>tmpsize) tmpsize=sz; } return arr(tmpsize*elemsize); @@ -2370,7 +2370,7 @@ template NOINLINE void general_c( for (size_t iax=0; iax::vlen; + constexpr auto vlen = VLEN::val; size_t len=in.shape(axes[iax]); if ((!plan) || (len!=plan->length())) plan.reset(new pocketfft_c(len)); @@ -2433,7 +2433,7 @@ template NOINLINE void general_hartley( for (size_t iax=0; iax::vlen; + constexpr auto vlen = VLEN::val; size_t len=in.shape(axes[iax]); if ((!plan) || (len!=plan->length())) plan.reset(new pocketfft_r(len)); @@ -2494,7 +2494,7 @@ template NOINLINE void general_r2c( size_t POCKETFFT_NTHREADS) { pocketfft_r plan(in.shape(axis)); - constexpr auto vlen = VTYPE::vlen; + constexpr auto vlen = VLEN::val; size_t len=in.shape(axis); #ifdef POCKETFFT_OPENMP #pragma omp parallel num_threads(util::thread_count(nthreads, in.shape(), axis)) @@ -2545,7 +2545,7 @@ template NOINLINE void general_c2r( size_t POCKETFFT_NTHREADS) { pocketfft_r plan(out.shape(axis)); - constexpr auto vlen = VTYPE::vlen; + constexpr auto vlen = VLEN::val; size_t len=out.shape(axis); #ifdef POCKETFFT_OPENMP #pragma omp parallel num_threads(util::thread_count(nthreads, in.shape(), axis)) @@ -2600,7 +2600,7 @@ template NOINLINE void general_r( const ndarr &in, ndarr &out, size_t axis, bool forward, T fct, size_t POCKETFFT_NTHREADS) { - constexpr auto vlen = VTYPE::vlen; + constexpr auto vlen = VLEN::val; size_t len=in.shape(axis); pocketfft_r plan(len); #ifdef POCKETFFT_OPENMP -- GitLab