Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
On Thursday, 7th July from 1 to 3 pm there will be a maintenance with a short downtime of GitLab.
Open sidebar
Simon Perkins
ducc
Commits
e266167d
Commit
e266167d
authored
May 18, 2020
by
Martin Reinecke
Browse files
slowly return to sanity part 1/n
parent
07b7644b
Changes
4
Hide whitespace changes
Inline
Side-by-side
pypocketfft/demos/bench.py
View file @
e266167d
...
...
@@ -118,26 +118,26 @@ def bench_nd(ndim, nmax, nthr, ntry, tp, funcs, nrepeat, ttl="", filename="",
tmp
=
func
(
a
,
nrepeat
,
nthr
)
res
.
append
(
tmp
[
0
])
output
.
append
(
tmp
[
1
])
#
print("{0:5.2e}/{1:5.2e} = {2:5.2f} L2 error={3}".format(results[0][n],results[1][n],results[0][n]/results[1][n],_l2error(output[0],output[1])))
#
results = np.array(results)
#
plt.title("{}: {}D, {}, max_extent={}".format(
#
ttl, ndim, str(tp), nmax))
#
plt.xlabel("time ratio")
#
plt.ylabel("counts")
#
plt.hist(results[0, :]/results[1, :], bins="auto")
#
if filename != "":
#
plt.savefig(filename)
#
plt.show()
funcs
=
(
measure_pypocketfft
,)
print
(
"{0:5.2e}/{1:5.2e} = {2:5.2f} L2 error={3}"
.
format
(
results
[
0
][
n
],
results
[
1
][
n
],
results
[
0
][
n
]
/
results
[
1
][
n
],
_l2error
(
output
[
0
],
output
[
1
])))
results
=
np
.
array
(
results
)
plt
.
title
(
"{}: {}D, {}, max_extent={}"
.
format
(
ttl
,
ndim
,
str
(
tp
),
nmax
))
plt
.
xlabel
(
"time ratio"
)
plt
.
ylabel
(
"counts"
)
plt
.
hist
(
results
[
0
,
:]
/
results
[
1
,
:],
bins
=
"auto"
)
if
filename
!=
""
:
plt
.
savefig
(
filename
)
plt
.
show
()
funcs
=
(
measure_pypocketfft
,
measure_fftw
)
ttl
=
"pypocketfft/FFTW()"
ntry
=
100
nthr
=
1
nice_sizes
=
True
#
bench_nd(1, 8192, nthr, ntry, "c16", funcs, 10, ttl, "1d.png", nice_sizes)
bench_nd
(
1
,
8192
,
nthr
,
ntry
,
"c16"
,
funcs
,
10
,
ttl
,
"1d.png"
,
nice_sizes
)
bench_nd
(
2
,
2048
,
nthr
,
ntry
,
"c16"
,
funcs
,
2
,
ttl
,
"2d.png"
,
nice_sizes
)
#
bench_nd(3, 256, nthr, ntry, "c16", funcs, 2, ttl, "3d.png", nice_sizes)
#
bench_nd(1, 8192, nthr, ntry, "c8", funcs, 10, ttl, "1d_single.png", nice_sizes)
#
bench_nd(2, 2048, nthr, ntry, "c8", funcs, 2, ttl, "2d_single.png", nice_sizes)
#
bench_nd(3, 256, nthr, ntry, "c8", funcs, 2, ttl, "3d_single.png", nice_sizes)
bench_nd
(
3
,
256
,
nthr
,
ntry
,
"c16"
,
funcs
,
2
,
ttl
,
"3d.png"
,
nice_sizes
)
bench_nd
(
1
,
8192
,
nthr
,
ntry
,
"c8"
,
funcs
,
10
,
ttl
,
"1d_single.png"
,
nice_sizes
)
bench_nd
(
2
,
2048
,
nthr
,
ntry
,
"c8"
,
funcs
,
2
,
ttl
,
"2d_single.png"
,
nice_sizes
)
bench_nd
(
3
,
256
,
nthr
,
ntry
,
"c8"
,
funcs
,
2
,
ttl
,
"3d_single.png"
,
nice_sizes
)
src/mr_util/infra/simd.h
View file @
e266167d
...
...
@@ -52,10 +52,6 @@ namespace mr {
namespace
detail_simd
{
template
<
typename
T
>
T
myexp
(
T
);
// {return -42;}
template
<
>
inline
double
myexp
(
double
v
)
{
return
std
::
exp
(
v
);}
template
<
>
inline
float
myexp
(
float
v
)
{
return
std
::
exp
(
v
);}
template
<
typename
T
>
constexpr
inline
bool
vectorizable
=
false
;
template
<
>
constexpr
inline
bool
vectorizable
<
float
>
=
true
;
template
<
>
constexpr
inline
bool
vectorizable
<
double
>
=
true
;
...
...
@@ -199,8 +195,6 @@ template<typename Op, typename T, size_t len> T reduce(const vtp<T, len> &v, Op
res
=
op
(
res
,
v
[
i
]);
return
res
;
}
template
<
typename
T
,
size_t
len
>
vtp
<
T
,
len
>
exp
(
const
vtp
<
T
,
len
>
&
v
)
{
return
v
.
apply
(
myexp
<
T
>
);
}
template
<
typename
T
>
class
pseudoscalar
{
private:
...
...
@@ -420,7 +414,6 @@ using detail_simd::native_simd;
using
detail_simd
::
reduce
;
using
detail_simd
::
max
;
using
detail_simd
::
abs
;
using
detail_simd
::
exp
;
using
detail_simd
::
sqrt
;
using
detail_simd
::
any_of
;
using
detail_simd
::
none_of
;
...
...
src/mr_util/infra/useful_macros.h
View file @
e266167d
...
...
@@ -5,14 +5,20 @@
#define MRUTIL_NOINLINE __attribute__((noinline))
#define MRUTIL_RESTRICT __restrict__
#define MRUTIL_ALIGNED(align) __attribute__ ((aligned(align)))
#define MRUTIL_PREFETCH_R(addr) __builtin_prefetch(addr);
#define MRUTIL_PREFETCH_W(addr) __builtin_prefetch(addr,1);
#elif defined(_MSC_VER)
#define MRUTIL_NOINLINE __declspec(noinline)
#define MRUTIL_RESTRICT __restrict
#define MRUTIL_ALIGNED(align)
#define MRUTIL_PREFETCH_R(addr)
#define MRUTIL_PREFETCH_W(addr)
#else
#define MRUTIL_NOINLINE
#define MRUTIL_RESTRICT
#define MRUTIL_ALIGNED(align)
#define MRUTIL_PREFETCH_R(addr)
#define MRUTIL_PREFETCH_W(addr)
#endif
#endif
src/mr_util/math/fft.h
View file @
e266167d
...
...
@@ -38,7 +38,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef MRUTIL_FFT_H
#define MRUTIL_FFT_H
#include <iostream>
#include "mr_util/math/fft1d.h"
#ifndef POCKETFFT_CACHE_SIZE
...
...
@@ -500,7 +500,6 @@ template<size_t N> class multi_iter
shp
.
erase
(
shp
.
begin
()
+
ptrdiff_t
(
i
));
pos
.
pop_back
();
done
=
false
;
// std::cout << "reduced dims" << std::endl;
}
}
if
(
pos
.
size
()
>
0
)
...
...
@@ -543,11 +542,9 @@ template<size_t N> class multi_iter
uni_i
=
uni_o
=
true
;
for
(
size_t
i
=
1
;
i
<
n
;
++
i
)
{
// std::cout << (p_i[i]-p_i[i-1]) << " " << sstr_i << std::endl;
uni_i
=
uni_i
&&
(
p_i
[
i
]
-
p_i
[
i
-
1
]
==
sstr_i
);
uni_o
=
uni_o
&&
(
p_o
[
i
]
-
p_o
[
i
-
1
]
==
sstr_o
);
}
// for (size_t i=0; i<n; ++i)
rem
-=
n
;
}
ptrdiff_t
iofs
(
size_t
i
)
const
{
return
p_i
[
0
]
+
ptrdiff_t
(
i
)
*
cstr_i
;
}
...
...
@@ -657,25 +654,6 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input_j1(const mult
dst
[
i
]
=
stmp
;
}
}
template
<
typename
T
,
size_t
vlen
>
MRUTIL_NOINLINE
void
copy_input_j1_a16
(
const
multi_iter
<
vlen
>
&
it
,
const
fmav
<
Cmplx
<
T
>>
&
src
,
Cmplx
<
native_simd
<
T
>>
*
MRUTIL_RESTRICT
dst
)
{
auto
ptr
=
&
src
[
it
.
iofs_uni
(
0
,
0
)];
ptr
=
reinterpret_cast
<
Cmplx
<
T
>
*>
(
__builtin_assume_aligned
(
ptr
,
16
));
auto
istr
=
it
.
stride_in
();
size_t
i
=
0
;
for
(;
i
<
it
.
length_in
();
++
i
)
{
Cmplx
<
native_simd
<
T
>>
stmp
;
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
auto
tmp
=
ptr
[
j
+
i
*
istr
];
stmp
.
r
[
j
]
=
tmp
.
r
;
stmp
.
i
[
j
]
=
tmp
.
i
;
}
dst
[
i
]
=
stmp
;
}
}
template
<
typename
T
,
size_t
vlen
>
MRUTIL_NOINLINE
void
copy_input_i1
(
const
multi_iter
<
vlen
>
&
it
,
const
fmav
<
Cmplx
<
T
>>
&
src
,
Cmplx
<
native_simd
<
T
>>
*
MRUTIL_RESTRICT
dst
)
{
...
...
@@ -694,9 +672,6 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input_i1(const mult
dst
[
i
]
=
stmp
;
}
}
#define MRFFT_PREFETCH
#define MRUTIL_PREFETCH_R(addr) __builtin_prefetch(addr);
#define MRUTIL_PREFETCH_W(addr) __builtin_prefetch(addr,1);
template
<
typename
T
,
size_t
vlen
>
MRUTIL_NOINLINE
void
copy_input
(
const
multi_iter
<
vlen
>
&
it
,
const
fmav
<
Cmplx
<
T
>>
&
src
,
Cmplx
<
native_simd
<
T
>>
*
MRUTIL_RESTRICT
dst
)
{
...
...
@@ -706,36 +681,29 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_i
auto
jstr
=
it
.
unistride_i
();
auto
istr
=
it
.
stride_in
();
if
(
istr
==
1
)
copy_input_i1
(
it
,
src
,
dst
);
// for (size_t i=0; i<it.length_in(); ++i)
// {
// Cmplx<native_simd<T>> stmp;
// for (size_t j=0; j<vlen; ++j)
// {
// auto tmp = ptr[j*jstr+i];
// stmp.r[j] = tmp.r;
// stmp.i[j] = tmp.i;
// }
// dst[i] = stmp;
// }
for
(
ptrdiff_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
{
Cmplx
<
native_simd
<
T
>>
stmp
;
for
(
ptrdiff_t
j
=
0
;
j
<
vlen
;
++
j
)
{
auto
tmp
=
ptr
[
j
*
jstr
+
i
];
stmp
.
r
[
j
]
=
tmp
.
r
;
stmp
.
i
[
j
]
=
tmp
.
i
;
}
dst
[
i
]
=
stmp
;
}
else
if
(
jstr
==
1
)
{
if
((
reinterpret_cast
<
uintptr_t
>
(
src
.
data
())
&
15
)
==
0
)
copy_input_j1_a16
(
it
,
src
,
dst
);
else
copy_input_j1
(
it
,
src
,
dst
);
}
// for (size_t i=0; i<it.length_in(); ++i)
// {
// Cmplx<native_simd<T>> stmp;
// for (size_t j=0; j<vlen; ++j)
// {
// auto tmp = ptr[j+i*istr];
// stmp.r[j] = tmp.r;
// stmp.i[j] = tmp.i;
// }
// dst[i] = stmp;
// }
for
(
ptrdiff_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
{
Cmplx
<
native_simd
<
T
>>
stmp
;
for
(
ptrdiff_t
j
=
0
;
j
<
vlen
;
++
j
)
{
auto
tmp
=
ptr
[
j
+
i
*
istr
];
stmp
.
r
[
j
]
=
tmp
.
r
;
stmp
.
i
[
j
]
=
tmp
.
i
;
}
dst
[
i
]
=
stmp
;
}
else
for
(
size_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
{
...
...
@@ -766,36 +734,12 @@ template <typename T, size_t vlen> MRUTIL_NOINLINE void copy_input(const multi_i
template
<
typename
T
,
size_t
vlen
>
MRUTIL_NOINLINE
void
copy_input
(
const
multi_iter
<
vlen
>
&
it
,
const
fmav
<
T
>
&
src
,
native_simd
<
T
>
*
MRUTIL_RESTRICT
dst
)
{
size_t
i
=
0
;
#ifdef MRFFT_PREFETCH
constexpr
size_t
dist
=
32
;
if
(
it
.
uniform_i
())
for
(;
i
+
dist
<
it
.
length_in
();
++
i
)
{
native_simd
<
T
>
stmp
;
MRUTIL_PREFETCH_W
(
&
dst
[
i
+
dist
]);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_R
(
&
src
[
it
.
iofs_uni
(
j
,
i
+
dist
)]);
stmp
[
j
]
=
src
[
it
.
iofs_uni
(
j
,
i
)];
}
dst
[
i
]
=
stmp
;
}
else
for
(;
i
+
dist
<
it
.
length_in
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_R
(
&
src
[
it
.
iofs
(
j
,
i
+
dist
)]);
MRUTIL_PREFETCH_W
(
&
dst
[
i
+
dist
]);
dst
[
i
][
j
]
=
src
[
it
.
iofs
(
j
,
i
)];
}
#endif
if
(
it
.
uniform_i
())
for
(;
i
<
it
.
length_in
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
dst
[
i
][
j
]
=
src
[
it
.
iofs_uni
(
j
,
i
)];
else
for
(;
i
<
it
.
length_in
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
dst
[
i
][
j
]
=
src
[
it
.
iofs
(
j
,
i
)];
}
...
...
@@ -812,30 +756,12 @@ template<typename T, size_t vlen> MRUTIL_NOINLINE void copy_output(const multi_i
const
Cmplx
<
native_simd
<
T
>>
*
MRUTIL_RESTRICT
src
,
fmav
<
Cmplx
<
T
>>
&
dst
)
{
auto
ptr
=
dst
.
vdata
();
size_t
i
=
0
;
#ifdef MRFFT_PREFETCH
constexpr
size_t
dist
=
32
;
if
(
it
.
uniform_o
())
for
(
;
i
+
dist
<
it
.
length_out
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_W
(
&
ptr
[
it
.
oofs_uni
(
j
,
i
+
dist
)]);
ptr
[
it
.
oofs_uni
(
j
,
i
)].
Set
(
src
[
i
].
r
[
j
],
src
[
i
].
i
[
j
]);
}
else
for
(;
i
+
dist
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_W
(
&
ptr
[
it
.
oofs
(
j
,
i
+
dist
)]);
ptr
[
it
.
oofs
(
j
,
i
)].
Set
(
src
[
i
].
r
[
j
],
src
[
i
].
i
[
j
]);
}
#endif
if
(
it
.
uniform_o
())
for
(;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
ptr
[
it
.
oofs_uni
(
j
,
i
)].
Set
(
src
[
i
].
r
[
j
],
src
[
i
].
i
[
j
]);
else
for
(;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
ptr
[
it
.
oofs
(
j
,
i
)].
Set
(
src
[
i
].
r
[
j
],
src
[
i
].
i
[
j
]);
}
...
...
@@ -844,30 +770,12 @@ template<typename T, size_t vlen> MRUTIL_NOINLINE void copy_output(const multi_i
const
native_simd
<
T
>
*
MRUTIL_RESTRICT
src
,
fmav
<
T
>
&
dst
)
{
auto
ptr
=
dst
.
vdata
();
size_t
i
=
0
;
#ifdef MRFFT_PREFETCH
constexpr
size_t
dist
=
32
;
if
(
it
.
uniform_o
())
for
(;
i
+
dist
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_W
(
&
ptr
[
it
.
oofs_uni
(
j
,
i
+
dist
)]);
ptr
[
it
.
oofs_uni
(
j
,
i
)]
=
src
[
i
][
j
];
}
else
for
(;
i
+
dist
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
MRUTIL_PREFETCH_W
(
&
ptr
[
it
.
oofs
(
j
,
i
+
dist
)]);
ptr
[
it
.
oofs
(
j
,
i
)]
=
src
[
i
][
j
];
}
#endif
if
(
it
.
uniform_o
())
for
(;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
ptr
[
it
.
oofs_uni
(
j
,
i
)]
=
src
[
i
][
j
];
else
for
(;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_out
();
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
ptr
[
it
.
oofs
(
j
,
i
)]
=
src
[
i
][
j
];
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment