From 4ea60b74ba9707d8604a195202003cf26d6aa56a Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Sun, 11 Aug 2019 12:01:01 +0200 Subject: [PATCH] add switch to disable multithreading; fix lambda capture for MSVC --- README.md | 2 +- pocketfft_hdronly.h | 50 ++++++++++++++++++++++++++++++++------------- 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 9948ff1..a54e6bb 100644 --- a/README.md +++ b/README.md @@ -17,4 +17,4 @@ Features - makes use of CPU vector instructions when performing 2D and higher-dimensional transforms - supports prime-length transforms without degrading to O(N**2) performance -- has optional OpenMP support for multidimensional transforms +- has optional multi-threading support for multidimensional transforms diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 8d9db14..a90bd40 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -63,6 +63,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #endif +#ifndef POCKETFFT_NO_MULTITHREADING #include #include #include @@ -73,7 +74,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef POCKETFFT_PTHREADS # include #endif - +#endif #if defined(__GNUC__) #define POCKETFFT_NOINLINE __attribute__((noinline)) @@ -694,21 +695,39 @@ struct util // hack to avoid duplicate symbols if (axis>=shape.size()) throw invalid_argument("bad axis number"); } - static size_t thread_count (size_t nthreads, const shape_t &shape, - size_t axis, size_t vlen) - { - if (nthreads==1) return 1; - size_t size = prod(shape); - size_t parallel = size / (shape[axis] * vlen); - if (shape[axis] < 1000) - parallel /= 4; - size_t max_threads = nthreads == 0 ? - thread::hardware_concurrency() : nthreads; - return max(size_t(1), min(parallel, max_threads)); - } +#ifdef POCKETFFT_NO_MULTITHREADING + static size_t thread_count (size_t /*nthreads*/, const shape_t &/*shape*/, + size_t /*axis*/, size_t /*vlen*/) + { return 1; } +#else + static size_t thread_count (size_t nthreads, const shape_t &shape, + size_t axis, size_t vlen) + { + if (nthreads==1) return 1; + size_t size = prod(shape); + size_t parallel = size / (shape[axis] * vlen); + if (shape[axis] < 1000) + parallel /= 4; + size_t max_threads = nthreads == 0 ? + thread::hardware_concurrency() : nthreads; + return max(size_t(1), min(parallel, max_threads)); + } +#endif }; namespace threading { + +#ifdef POCKETFFT_NO_MULTITHREADING + +constexpr size_t thread_id = 0; +constexpr size_t num_threads = 1; + +template +void thread_map(size_t /* nthreads */, Func f) + { f(); } + +#else + thread_local size_t thread_id = 0; thread_local size_t num_threads = 1; @@ -892,6 +911,9 @@ void thread_map(size_t nthreads, Func f) if (ex) rethrow_exception(ex); } + +#endif + } // @@ -2789,7 +2811,6 @@ template class T_dcst4 // and is released under the 3-clause BSD license with friendly // permission of Matteo Frigo and Steven G. Johnson. - auto SGN = [](size_t i) {return (i&2) ? -sqrt2 : sqrt2;}; arr y(N); { size_t i=0, m=n2; @@ -2806,6 +2827,7 @@ template class T_dcst4 } rfft->exec(y.data(), fct, true); { + auto SGN = [sqrt2](size_t i) {return (i&2) ? -sqrt2 : sqrt2;}; c[n2] = y[0]*SGN(n2+1); size_t i=0, i1=1, k=1; for (; k