From e61cfd4d108f3f0577d42d46cefd2d299d6a4a80 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Sat, 30 May 2020 17:50:50 +0200 Subject: [PATCH] various improvements --- pypocketfft/demos/bench.py | 2 +- pypocketfft/demos/stress.py | 47 +++++++++++------------ setup.py | 13 ++++++- src/mr_util/math/fft.h | 74 ++++++++++++++++++++++++++++++++----- 4 files changed, 101 insertions(+), 35 deletions(-) diff --git a/pypocketfft/demos/bench.py b/pypocketfft/demos/bench.py index 44d0a4a..4a46d14 100644 --- a/pypocketfft/demos/bench.py +++ b/pypocketfft/demos/bench.py @@ -42,7 +42,7 @@ def measure_fftw_np_interface(a, nrepeat, nthr): def measure_pypocketfft(a, nrepeat, nthr): - import pypocketfft as ppf + import ducc_0_1.pypocketfft as ppf tmin = 1e38 b = a.copy() for i in range(nrepeat): diff --git a/pypocketfft/demos/stress.py b/pypocketfft/demos/stress.py index b2c4b35..6c668f8 100644 --- a/pypocketfft/demos/stress.py +++ b/pypocketfft/demos/stress.py @@ -29,10 +29,11 @@ def irfftn(a, axes=None, lastsize=0, inorm=0, nthreads=1): nthreads = 0 -def update_err(err, name, value): +def update_err(err, name, value, shape): if name in err and err[name] >= value: return err err[name] = value + print(shape) for (nm, v) in err.items(): print("{}: {}".format(nm, v)) print() @@ -52,89 +53,89 @@ def test(err): a_32 = a.astype(np.complex64) b = ifftn(fftn(a, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "cmax", _l2error(a, b, axes)) + err = update_err(err, "cmax", _l2error(a, b, axes), shape) b = ifftn(fftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "cmax", _l2error(a.real, b, axes)) + err = update_err(err, "cmax", _l2error(a.real, b, axes), shape) b = fftn(ifftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "cmax", _l2error(a.real, b, axes)) + err = update_err(err, "cmax", _l2error(a.real, b, axes), shape) b = ifftn(fftn(a.astype(np.complex64), axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "cmaxf", _l2error(a.astype(np.complex64), b, axes)) + err = update_err(err, "cmaxf", _l2error(a.astype(np.complex64), b, axes), shape) b = irfftn(rfftn(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, lastsize=lastsize, nthreads=nthreads) - err = update_err(err, "rmax", _l2error(a.real, b, axes)) + err = update_err(err, "rmax", _l2error(a.real, b, axes), shape) b = irfftn(rfftn(a.real.astype(np.float32), axes=axes, nthreads=nthreads), axes=axes, inorm=2, lastsize=lastsize, nthreads=nthreads) - err = update_err(err, "rmaxf", _l2error(a.real.astype(np.float32), b, axes)) + err = update_err(err, "rmaxf", _l2error(a.real.astype(np.float32), b, axes), shape) b = pypocketfft.separable_hartley( pypocketfft.separable_hartley(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "hmax", _l2error(a.real, b, axes)) + err = update_err(err, "hmax", _l2error(a.real, b, axes), shape) b = pypocketfft.genuine_hartley( pypocketfft.genuine_hartley(a.real, axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "hmax", _l2error(a.real, b, axes)) + err = update_err(err, "hmax", _l2error(a.real, b, axes), shape) b = pypocketfft.separable_hartley( pypocketfft.separable_hartley( a.real.astype(np.float32), axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "hmaxf", _l2error(a.real.astype(np.float32), b, axes)) + err = update_err(err, "hmaxf", _l2error(a.real.astype(np.float32), b, axes), shape) b = pypocketfft.genuine_hartley( pypocketfft.genuine_hartley(a.real.astype(np.float32), axes=axes, nthreads=nthreads), axes=axes, inorm=2, nthreads=nthreads) - err = update_err(err, "hmaxf", _l2error(a.real.astype(np.float32), b, axes)) + err = update_err(err, "hmaxf", _l2error(a.real.astype(np.float32), b, axes), shape) if all(a.shape[i] > 1 for i in axes): b = pypocketfft.dct( pypocketfft.dct(a.real, axes=axes, nthreads=nthreads, type=1), axes=axes, type=1, nthreads=nthreads, inorm=2) - err = update_err(err, "c1max", _l2error(a.real, b, axes)) + err = update_err(err, "c1max", _l2error(a.real, b, axes), shape) b = pypocketfft.dct( pypocketfft.dct(a_32.real, axes=axes, nthreads=nthreads, type=1), axes=axes, type=1, nthreads=nthreads, inorm=2) - err = update_err(err, "c1maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "c1maxf", _l2error(a_32.real, b, axes), shape) b = pypocketfft.dct( pypocketfft.dct(a.real, axes=axes, nthreads=nthreads, type=2), axes=axes, type=3, nthreads=nthreads, inorm=2) - err = update_err(err, "c23max", _l2error(a.real, b, axes)) + err = update_err(err, "c23max", _l2error(a.real, b, axes), shape) b = pypocketfft.dct( pypocketfft.dct(a_32.real, axes=axes, nthreads=nthreads, type=2), axes=axes, type=3, nthreads=nthreads, inorm=2) - err = update_err(err, "c23maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "c23maxf", _l2error(a_32.real, b, axes), shape) b = pypocketfft.dct( pypocketfft.dct(a.real, axes=axes, nthreads=nthreads, type=4), axes=axes, type=4, nthreads=nthreads, inorm=2) - err = update_err(err, "c4max", _l2error(a.real, b, axes)) + err = update_err(err, "c4max", _l2error(a.real, b, axes), shape) b = pypocketfft.dct( pypocketfft.dct(a_32.real, axes=axes, nthreads=nthreads, type=4), axes=axes, type=4, nthreads=nthreads, inorm=2) - err = update_err(err, "c4maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "c4maxf", _l2error(a_32.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a.real, axes=axes, nthreads=nthreads, type=1), axes=axes, type=1, nthreads=nthreads, inorm=2) - err = update_err(err, "s1max", _l2error(a.real, b, axes)) + err = update_err(err, "s1max", _l2error(a.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a_32.real, axes=axes, nthreads=nthreads, type=1), axes=axes, type=1, nthreads=nthreads, inorm=2) - err = update_err(err, "s1maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "s1maxf", _l2error(a_32.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a.real, axes=axes, nthreads=nthreads, type=2), axes=axes, type=3, nthreads=nthreads, inorm=2) - err = update_err(err, "s23max", _l2error(a.real, b, axes)) + err = update_err(err, "s23max", _l2error(a.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a_32.real, axes=axes, nthreads=nthreads, type=2), axes=axes, type=3, nthreads=nthreads, inorm=2) - err = update_err(err, "s23maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "s23maxf", _l2error(a_32.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a.real, axes=axes, nthreads=nthreads, type=4), axes=axes, type=4, nthreads=nthreads, inorm=2) - err = update_err(err, "s4max", _l2error(a.real, b, axes)) + err = update_err(err, "s4max", _l2error(a.real, b, axes), shape) b = pypocketfft.dst( pypocketfft.dst(a_32.real, axes=axes, nthreads=nthreads, type=4), axes=axes, type=4, nthreads=nthreads, inorm=2) - err = update_err(err, "s4maxf", _l2error(a_32.real, b, axes)) + err = update_err(err, "s4maxf", _l2error(a_32.real, b, axes), shape) err = dict() diff --git a/setup.py b/setup.py index 9aa158d..296f5dc 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,8 @@ from setuptools import setup, Extension import sys +import os.path +import itertools +from glob import iglob pkgname = 'ducc_0_1' @@ -12,6 +15,13 @@ class _deferred_pybind11_include(object): return pybind11.get_include(self.user) +def _get_files_by_suffix(directory, suffix): + path = directory + iterable_sources = (iglob(os.path.join(root, '*.'+suffix)) + for root, dirs, files in os.walk(path)) + return list(itertools.chain.from_iterable(iterable_sources)) + + include_dirs = ['./src/', _deferred_pybind11_include(True), _deferred_pybind11_include()] @@ -34,10 +44,11 @@ else: # if you don't want debugging info, add "-s" to python_module_link_args def get_extension_modules(): + depfiles = _get_files_by_suffix('.', 'h') + _get_files_by_suffix('.', 'cc') + ['setup.py'] return [Extension(pkgname, language='c++', sources=['module.cc'], - depends=[], + depends=depfiles, include_dirs=include_dirs, define_macros=define_macros, extra_compile_args=extra_compile_args, diff --git a/src/mr_util/math/fft.h b/src/mr_util/math/fft.h index 7010e08..fd778a5 100644 --- a/src/mr_util/math/fft.h +++ b/src/mr_util/math/fft.h @@ -862,19 +862,73 @@ struct ExecC2C template void copy_hartley(const multi_iter &it, const native_simd *MRUTIL_RESTRICT src, fmav &dst) { - auto ptr = dst.vdata(); - for (size_t j=0; j void copy_hartley(const multi_iter &it, -- GitLab