Commit 5e630a15 authored by Martin Reinecke's avatar Martin Reinecke

cleanup

parent db4cc7cc
...@@ -44,13 +44,13 @@ MRUTIL_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out, ...@@ -44,13 +44,13 @@ MRUTIL_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out,
const size_t axis, const vector<T0> &kernel, size_t nthreads, const size_t axis, const vector<T0> &kernel, size_t nthreads,
const Exec &exec) const Exec &exec)
{ {
std::shared_ptr<Tplan> plan1, plan2; std::unique_ptr<Tplan> plan1, plan2;
size_t l_in=in.shape(axis), l_out=out.shape(axis); size_t l_in=in.shape(axis), l_out=out.shape(axis);
size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out); size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out);
MR_assert(kernel.size()==l_min/2+1, "bad kernel size"); MR_assert(kernel.size()==l_min/2+1, "bad kernel size");
plan1 = get_plan<Tplan>(l_in); plan1 = std::make_unique<Tplan>(l_in);
plan2 = get_plan<Tplan>(l_out); plan2 = std::make_unique<Tplan>(l_out);
execParallel( execParallel(
util::thread_count(nthreads, in, axis, native_simd<T0>::size()), util::thread_count(nthreads, in, axis, native_simd<T0>::size()),
......
...@@ -41,10 +41,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ...@@ -41,10 +41,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "mr_util/math/fft1d.h" #include "mr_util/math/fft1d.h"
#ifndef POCKETFFT_CACHE_SIZE
#define POCKETFFT_CACHE_SIZE 16
#endif
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <stdexcept> #include <stdexcept>
...@@ -52,9 +48,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ...@@ -52,9 +48,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <vector> #include <vector>
#include <complex> #include <complex>
#include <algorithm> #include <algorithm>
#if POCKETFFT_CACHE_SIZE!=0
#include <array>
#endif
#include "mr_util/infra/threading.h" #include "mr_util/infra/threading.h"
#include "mr_util/infra/simd.h" #include "mr_util/infra/simd.h"
#include "mr_util/infra/mav.h" #include "mr_util/infra/mav.h"
...@@ -366,73 +359,6 @@ template<typename T0> class T_dcst4 ...@@ -366,73 +359,6 @@ template<typename T0> class T_dcst4
// multi-D infrastructure // multi-D infrastructure
// //
template<typename T> std::shared_ptr<T> get_plan(size_t length)
{
#if POCKETFFT_CACHE_SIZE==0
return std::make_shared<T>(length);
#else
constexpr size_t nmax=POCKETFFT_CACHE_SIZE;
static std::array<std::shared_ptr<T>, nmax> cache;
static std::array<size_t, nmax> last_access{{0}};
static size_t access_counter = 0;
auto find_in_cache = [&]() -> std::shared_ptr<T>
{
for (size_t i=0; i<nmax; ++i)
if (cache[i] && (cache[i]->length()==length))
{
// no need to update if this is already the most recent entry
if (last_access[i]!=access_counter)
{
last_access[i] = ++access_counter;
// Guard against overflow
if (access_counter == 0)
last_access.fill(0);
}
return cache[i];
}
return nullptr;
};
#ifdef MRUTIL_NO_THREADING
auto p = find_in_cache();
if (p) return p;
auto plan = std::make_shared<T>(length);
size_t lru = 0;
for (size_t i=1; i<nmax; ++i)
if (last_access[i] < last_access[lru])
lru = i;
cache[lru] = plan;
last_access[lru] = ++access_counter;
return plan;
#else
static std::mutex mut;
{
std::lock_guard<std::mutex> lock(mut);
auto p = find_in_cache();
if (p) return p;
}
auto plan = std::make_shared<T>(length);
{
std::lock_guard<std::mutex> lock(mut);
auto p = find_in_cache();
if (p) return p;
size_t lru = 0;
for (size_t i=1; i<nmax; ++i)
if (last_access[i] < last_access[lru])
lru = i;
cache[lru] = plan;
last_access[lru] = ++access_counter;
}
return plan;
#endif
#endif
}
template<size_t N> class multi_iter template<size_t N> class multi_iter
{ {
private: private:
...@@ -627,7 +553,7 @@ class rev_iter ...@@ -627,7 +553,7 @@ class rev_iter
size_t remaining() const { return rem; } size_t remaining() const { return rem; }
}; };
template<typename T, typename T0> aligned_array<T> alloc_tmp template<typename T, typename T0> MRUTIL_NOINLINE aligned_array<T> alloc_tmp
(const fmav_info &info, size_t axsize) (const fmav_info &info, size_t axsize)
{ {
auto othersize = info.size()/axsize; auto othersize = info.size()/axsize;
...@@ -636,7 +562,7 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp ...@@ -636,7 +562,7 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp
return aligned_array<T>(tmpsize); return aligned_array<T>(tmpsize);
} }
template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_iter<vlen> &it,
const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst) const fmav<Cmplx<T>> &src, Cmplx<native_simd<T>> *MRUTIL_RESTRICT dst)
{ {
if (it.uniform_i()) if (it.uniform_i())
...@@ -695,7 +621,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, ...@@ -695,7 +621,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it,
} }
} }
template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_iter<vlen> &it,
const fmav<T> &src, native_simd<T> *MRUTIL_RESTRICT dst) const fmav<T> &src, native_simd<T> *MRUTIL_RESTRICT dst)
{ {
if (it.uniform_i()) if (it.uniform_i())
...@@ -722,7 +648,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, ...@@ -722,7 +648,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it,
dst[i][j] = src[it.iofs(j,i)]; dst[i][j] = src[it.iofs(j,i)];
} }
template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_iter<vlen> &it,
const fmav<T> &src, T *MRUTIL_RESTRICT dst) const fmav<T> &src, T *MRUTIL_RESTRICT dst)
{ {
if (dst == &src[it.iofs(0)]) return; // in-place if (dst == &src[it.iofs(0)]) return; // in-place
...@@ -730,7 +656,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it, ...@@ -730,7 +656,7 @@ template <typename T, size_t vlen> void copy_input(const multi_iter<vlen> &it,
dst[i] = src[it.iofs(i)]; dst[i] = src[it.iofs(i)];
} }
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it, template<typename T, size_t vlen> MRUTIL_NOINLINE void copy_output(const multi_iter<vlen> &it,
const Cmplx<native_simd<T>> *MRUTIL_RESTRICT src, fmav<Cmplx<T>> &dst) const Cmplx<native_simd<T>> *MRUTIL_RESTRICT src, fmav<Cmplx<T>> &dst)
{ {
if (it.uniform_o()) if (it.uniform_o())
...@@ -760,7 +686,7 @@ template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it, ...@@ -760,7 +686,7 @@ template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it,
} }
} }
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it, template<typename T, size_t vlen> MRUTIL_NOINLINE void copy_output(const multi_iter<vlen> &it,
const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst) const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst)
{ {
if (it.uniform_o()) if (it.uniform_o())
...@@ -790,7 +716,7 @@ template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it, ...@@ -790,7 +716,7 @@ template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it,
} }
} }
template<typename T, size_t vlen> void copy_output(const multi_iter<vlen> &it, template<typename T, size_t vlen> MRUTIL_NOINLINE void copy_output(const multi_iter<vlen> &it,
const T *MRUTIL_RESTRICT src, fmav<T> &dst) const T *MRUTIL_RESTRICT src, fmav<T> &dst)
{ {
auto ptr=dst.vdata(); auto ptr=dst.vdata();
...@@ -809,13 +735,13 @@ MRUTIL_NOINLINE void general_nd(const fmav<T> &in, fmav<T> &out, ...@@ -809,13 +735,13 @@ MRUTIL_NOINLINE void general_nd(const fmav<T> &in, fmav<T> &out,
const shape_t &axes, T0 fct, size_t nthreads, const Exec & exec, const shape_t &axes, T0 fct, size_t nthreads, const Exec & exec,
const bool allow_inplace=true) const bool allow_inplace=true)
{ {
std::shared_ptr<Tplan> plan; std::unique_ptr<Tplan> plan;
for (size_t iax=0; iax<axes.size(); ++iax) for (size_t iax=0; iax<axes.size(); ++iax)
{ {
size_t len=in.shape(axes[iax]); size_t len=in.shape(axes[iax]);
if ((!plan) || (len!=plan->length())) if ((!plan) || (len!=plan->length()))
plan = get_plan<Tplan>(len); plan = std::make_unique<Tplan>(len);
execParallel( execParallel(
util::thread_count(nthreads, in, axes[iax], native_simd<T0>::size()), util::thread_count(nthreads, in, axes[iax], native_simd<T0>::size()),
...@@ -849,7 +775,7 @@ struct ExecC2C ...@@ -849,7 +775,7 @@ struct ExecC2C
{ {
bool forward; bool forward;
template <typename T0, typename T, size_t vlen> void operator() ( template <typename T0, typename T, size_t vlen> MRUTIL_NOINLINE void operator() (
const multi_iter<vlen> &it, const fmav<Cmplx<T0>> &in, const multi_iter<vlen> &it, const fmav<Cmplx<T0>> &in,
fmav<Cmplx<T0>> &out, T *buf, const pocketfft_c<T0> &plan, T0 fct) const fmav<Cmplx<T0>> &out, T *buf, const pocketfft_c<T0> &plan, T0 fct) const
{ {
...@@ -859,7 +785,7 @@ struct ExecC2C ...@@ -859,7 +785,7 @@ struct ExecC2C
} }
}; };
template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it, template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_hartley(const multi_iter<vlen> &it,
const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst) const native_simd<T> *MRUTIL_RESTRICT src, fmav<T> &dst)
{ {
if (it.uniform_o()) if (it.uniform_o())
...@@ -931,7 +857,7 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it, ...@@ -931,7 +857,7 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
} }
} }
template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it, template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_hartley(const multi_iter<vlen> &it,
const T *MRUTIL_RESTRICT src, fmav<T> &dst) const T *MRUTIL_RESTRICT src, fmav<T> &dst)
{ {
auto ptr = dst.vdata(); auto ptr = dst.vdata();
...@@ -948,7 +874,7 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it, ...@@ -948,7 +874,7 @@ template <typename T, size_t vlen> void copy_hartley(const multi_iter<vlen> &it,
struct ExecHartley struct ExecHartley
{ {
template <typename T0, typename T, size_t vlen> void operator () ( template <typename T0, typename T, size_t vlen> MRUTIL_NOINLINE void operator () (
const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out, const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out,
T * buf, const pocketfft_r<T0> &plan, T0 fct) const T * buf, const pocketfft_r<T0> &plan, T0 fct) const
{ {
...@@ -965,7 +891,7 @@ struct ExecDcst ...@@ -965,7 +891,7 @@ struct ExecDcst
bool cosine; bool cosine;
template <typename T0, typename T, typename Tplan, size_t vlen> template <typename T0, typename T, typename Tplan, size_t vlen>
void operator () (const multi_iter<vlen> &it, const fmav<T0> &in, MRUTIL_NOINLINE void operator () (const multi_iter<vlen> &it, const fmav<T0> &in,
fmav <T0> &out, T * buf, const Tplan &plan, T0 fct) const fmav <T0> &out, T * buf, const Tplan &plan, T0 fct) const
{ {
copy_input(it, in, buf); copy_input(it, in, buf);
...@@ -978,7 +904,7 @@ template<typename T> MRUTIL_NOINLINE void general_r2c( ...@@ -978,7 +904,7 @@ template<typename T> MRUTIL_NOINLINE void general_r2c(
const fmav<T> &in, fmav<Cmplx<T>> &out, size_t axis, bool forward, T fct, const fmav<T> &in, fmav<Cmplx<T>> &out, size_t axis, bool forward, T fct,
size_t nthreads) size_t nthreads)
{ {
auto plan = get_plan<pocketfft_r<T>>(in.shape(axis)); auto plan = std::make_unique<pocketfft_r<T>>(in.shape(axis));
size_t len=in.shape(axis); size_t len=in.shape(axis);
execParallel( execParallel(
util::thread_count(nthreads, in, axis, native_simd<T>::size()), util::thread_count(nthreads, in, axis, native_simd<T>::size()),
...@@ -1035,7 +961,7 @@ template<typename T> MRUTIL_NOINLINE void general_c2r( ...@@ -1035,7 +961,7 @@ template<typename T> MRUTIL_NOINLINE void general_c2r(
const fmav<Cmplx<T>> &in, fmav<T> &out, size_t axis, bool forward, T fct, const fmav<Cmplx<T>> &in, fmav<T> &out, size_t axis, bool forward, T fct,
size_t nthreads) size_t nthreads)
{ {
auto plan = get_plan<pocketfft_r<T>>(out.shape(axis)); auto plan = std::make_unique<pocketfft_r<T>>(out.shape(axis));
size_t len=out.shape(axis); size_t len=out.shape(axis);
execParallel( execParallel(
util::thread_count(nthreads, in, axis, native_simd<T>::size()), util::thread_count(nthreads, in, axis, native_simd<T>::size()),
...@@ -1107,7 +1033,7 @@ struct ExecR2R ...@@ -1107,7 +1033,7 @@ struct ExecR2R
{ {
bool r2c, forward; bool r2c, forward;
template <typename T0, typename T, size_t vlen> void operator () ( template <typename T0, typename T, size_t vlen> MRUTIL_NOINLINE void operator () (
const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out, T * buf, const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out, T * buf,
const pocketfft_r<T0> &plan, T0 fct) const const pocketfft_r<T0> &plan, T0 fct) const
{ {
...@@ -1123,7 +1049,7 @@ struct ExecR2R ...@@ -1123,7 +1049,7 @@ struct ExecR2R
} }
}; };
template<typename T> void c2c(const fmav<std::complex<T>> &in, template<typename T> MRUTIL_NOINLINE void c2c(const fmav<std::complex<T>> &in,
fmav<std::complex<T>> &out, const shape_t &axes, bool forward, fmav<std::complex<T>> &out, const shape_t &axes, bool forward,
T fct, size_t nthreads=1) T fct, size_t nthreads=1)
{ {
...@@ -1134,7 +1060,7 @@ template<typename T> void c2c(const fmav<std::complex<T>> &in, ...@@ -1134,7 +1060,7 @@ template<typename T> void c2c(const fmav<std::complex<T>> &in,
general_nd<pocketfft_c<T>>(in2, out2, axes, fct, nthreads, ExecC2C{forward}); general_nd<pocketfft_c<T>>(in2, out2, axes, fct, nthreads, ExecC2C{forward});
} }
template<typename T> void dct(const fmav<T> &in, fmav<T> &out, template<typename T> MRUTIL_NOINLINE void dct(const fmav<T> &in, fmav<T> &out,
const shape_t &axes, int type, T fct, bool ortho, size_t nthreads=1) const shape_t &axes, int type, T fct, bool ortho, size_t nthreads=1)
{ {
if ((type<1) || (type>4)) throw std::invalid_argument("invalid DCT type"); if ((type<1) || (type>4)) throw std::invalid_argument("invalid DCT type");
...@@ -1149,7 +1075,7 @@ template<typename T> void dct(const fmav<T> &in, fmav<T> &out, ...@@ -1149,7 +1075,7 @@ template<typename T> void dct(const fmav<T> &in, fmav<T> &out,
general_nd<T_dcst23<T>>(in, out, axes, fct, nthreads, exec); general_nd<T_dcst23<T>>(in, out, axes, fct, nthreads, exec);
} }
template<typename T> void dst(const fmav<T> &in, fmav<T> &out, template<typename T> MRUTIL_NOINLINE void dst(const fmav<T> &in, fmav<T> &out,
const shape_t &axes, int type, T fct, bool ortho, size_t nthreads=1) const shape_t &axes, int type, T fct, bool ortho, size_t nthreads=1)
{ {
if ((type<1) || (type>4)) throw std::invalid_argument("invalid DST type"); if ((type<1) || (type>4)) throw std::invalid_argument("invalid DST type");
...@@ -1163,7 +1089,7 @@ template<typename T> void dst(const fmav<T> &in, fmav<T> &out, ...@@ -1163,7 +1089,7 @@ template<typename T> void dst(const fmav<T> &in, fmav<T> &out,
general_nd<T_dcst23<T>>(in, out, axes, fct, nthreads, exec); general_nd<T_dcst23<T>>(in, out, axes, fct, nthreads, exec);
} }
template<typename T> void r2c(const fmav<T> &in, template<typename T> MRUTIL_NOINLINE void r2c(const fmav<T> &in,
fmav<std::complex<T>> &out, size_t axis, bool forward, T fct, fmav<std::complex<T>> &out, size_t axis, bool forward, T fct,
size_t nthreads=1) size_t nthreads=1)
{ {
...@@ -1173,7 +1099,7 @@ template<typename T> void r2c(const fmav<T> &in, ...@@ -1173,7 +1099,7 @@ template<typename T> void r2c(const fmav<T> &in,
general_r2c(in, out2, axis, forward, fct, nthreads); general_r2c(in, out2, axis, forward, fct, nthreads);
} }
template<typename T> void r2c(const fmav<T> &in, template<typename T> MRUTIL_NOINLINE void r2c(const fmav<T> &in,
fmav<std::complex<T>> &out, const shape_t &axes, fmav<std::complex<T>> &out, const shape_t &axes,
bool forward, T fct, size_t nthreads=1) bool forward, T fct, size_t nthreads=1)
{ {
...@@ -1186,7 +1112,7 @@ template<typename T> void r2c(const fmav<T> &in, ...@@ -1186,7 +1112,7 @@ template<typename T> void r2c(const fmav<T> &in,
c2c(out, out, newaxes, forward, T(1), nthreads); c2c(out, out, newaxes, forward, T(1), nthreads);
} }
template<typename T> void c2r(const fmav<std::complex<T>> &in, template<typename T> MRUTIL_NOINLINE void c2r(const fmav<std::complex<T>> &in,
fmav<T> &out, size_t axis, bool forward, T fct, size_t nthreads=1) fmav<T> &out, size_t axis, bool forward, T fct, size_t nthreads=1)
{ {
util::sanity_check_cr(in, out, axis); util::sanity_check_cr(in, out, axis);
...@@ -1195,7 +1121,7 @@ template<typename T> void c2r(const fmav<std::complex<T>> &in, ...@@ -1195,7 +1121,7 @@ template<typename T> void c2r(const fmav<std::complex<T>> &in,
general_c2r(in2, out, axis, forward, fct, nthreads); general_c2r(in2, out, axis, forward, fct, nthreads);
} }
template<typename T> void c2r(const fmav<std::complex<T>> &in, template<typename T> MRUTIL_NOINLINE void c2r(const fmav<std::complex<T>> &in,
fmav<T> &out, const shape_t &axes, bool forward, T fct, fmav<T> &out, const shape_t &axes, bool forward, T fct,
size_t nthreads=1) size_t nthreads=1)
{ {
...@@ -1209,7 +1135,7 @@ template<typename T> void c2r(const fmav<std::complex<T>> &in, ...@@ -1209,7 +1135,7 @@ template<typename T> void c2r(const fmav<std::complex<T>> &in,
c2r(atmp, out, axes.back(), forward, fct, nthreads); c2r(atmp, out, axes.back(), forward, fct, nthreads);
} }
template<typename T> void r2r_fftpack(const fmav<T> &in, template<typename T> MRUTIL_NOINLINE void r2r_fftpack(const fmav<T> &in,
fmav<T> &out, const shape_t &axes, bool real2hermitian, bool forward, fmav<T> &out, const shape_t &axes, bool real2hermitian, bool forward,
T fct, size_t nthreads=1) T fct, size_t nthreads=1)
{ {
...@@ -1219,7 +1145,7 @@ template<typename T> void r2r_fftpack(const fmav<T> &in, ...@@ -1219,7 +1145,7 @@ template<typename T> void r2r_fftpack(const fmav<T> &in,
ExecR2R{real2hermitian, forward}); ExecR2R{real2hermitian, forward});
} }
template<typename T> void r2r_separable_hartley(const fmav<T> &in, template<typename T> MRUTIL_NOINLINE void r2r_separable_hartley(const fmav<T> &in,
fmav<T> &out, const shape_t &axes, T fct, size_t nthreads=1) fmav<T> &out, const shape_t &axes, T fct, size_t nthreads=1)
{ {
util::sanity_check_onetype(in, out, in.data()==out.data(), axes); util::sanity_check_onetype(in, out, in.data()==out.data(), axes);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment