Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ift
nifty_gridder
Commits
3dcf3ba0
Commit
3dcf3ba0
authored
Aug 31, 2019
by
Martin Reinecke
Browse files
WIP: better vectorization for kernel evaluations
parent
043a57c1
Changes
1
Hide whitespace changes
Inline
Side-by-side
gridder_cxx.h
View file @
3dcf3ba0
...
...
@@ -38,6 +38,22 @@ namespace detail {
using
namespace
std
;
template
<
typename
T
>
struct
VLEN
{
static
constexpr
size_t
val
=
1
;
};
#if (defined(__AVX512F__))
template
<
>
struct
VLEN
<
float
>
{
static
constexpr
size_t
val
=
16
;
};
template
<
>
struct
VLEN
<
double
>
{
static
constexpr
size_t
val
=
8
;
};
#elif (defined(__AVX__))
template
<
>
struct
VLEN
<
float
>
{
static
constexpr
size_t
val
=
8
;
};
template
<
>
struct
VLEN
<
double
>
{
static
constexpr
size_t
val
=
4
;
};
#elif (defined(__SSE2__))
template
<
>
struct
VLEN
<
float
>
{
static
constexpr
size_t
val
=
4
;
};
template
<
>
struct
VLEN
<
double
>
{
static
constexpr
size_t
val
=
2
;
};
#elif (defined(__VSX__))
template
<
>
struct
VLEN
<
float
>
{
static
constexpr
size_t
val
=
4
;
};
template
<
>
struct
VLEN
<
double
>
{
static
constexpr
size_t
val
=
2
;
};
#endif
template
<
typename
T
,
size_t
ndim
>
class
mav
{
static_assert
((
ndim
>
0
)
&&
(
ndim
<
3
),
"only supports 1D and 2D arrays"
);
...
...
@@ -577,6 +593,7 @@ template<typename T, typename T2=complex<T>> class Helper
int
bu0
,
bv0
;
// start index of the current buffer
vector
<
T2
>
rbuf
,
wbuf
;
size_t
nvecs
;
void
dump
()
const
{
...
...
@@ -618,7 +635,7 @@ template<typename T, typename T2=complex<T>> class Helper
public:
const
T2
*
p0r
;
T2
*
p0w
;
vector
<
T
>
kernel
;
T
kernel
[
64
]
__attribute__
((
aligned
(
64
)))
;
Helper
(
const
GridderConfig
<
T
>
&
gconf_
,
const
T2
*
grid_r_
,
T2
*
grid_w_
)
:
gconf
(
gconf_
),
nu
(
gconf
.
Nu
()),
nv
(
gconf
.
Nv
()),
nsafe
(
gconf
.
Nsafe
()),
...
...
@@ -627,7 +644,7 @@ template<typename T, typename T2=complex<T>> class Helper
bu0
(
-
1000000
),
bv0
(
-
1000000
),
rbuf
(
su
*
sv
*
(
grid_r
!=
nullptr
),
T
(
0
)),
wbuf
(
su
*
sv
*
(
grid_w
!=
nullptr
),
T
(
0
)),
kernel
(
2
*
supp
)
nvecs
(
VLEN
<
T
>::
val
*
((
2
*
supp
+
VLEN
<
T
>::
val
-
1
)
/
VLEN
<
T
>::
val
)
)
{}
~
Helper
()
{
if
(
grid_w
)
dump
();
}
...
...
@@ -645,8 +662,12 @@ template<typename T, typename T2=complex<T>> class Helper
kernel
[
i
]
=
x0
+
i
*
xsupp
;
kernel
[
i
+
supp
]
=
y0
+
i
*
xsupp
;
}
for
(
auto
&
k
:
kernel
)
k
=
exp
(
beta
*
sqrt
(
T
(
1
)
-
k
*
k
));
for
(
size_t
i
=
2
*
supp
;
i
<
nvecs
;
++
i
)
kernel
[
i
]
=
0
;
for
(
size_t
i
=
0
;
i
<
nvecs
;
++
i
)
kernel
[
i
]
=
exp
(
beta
*
sqrt
(
T
(
1
)
-
kernel
[
i
]
*
kernel
[
i
]));
// for (auto &k : kernel)
// k = exp(beta*sqrt(T(1)-k*k));
if
((
iu0
<
bu0
)
||
(
iv0
<
bv0
)
||
(
iu0
+
supp
>
bu0
+
su
)
||
(
iv0
+
supp
>
bv0
+
sv
))
{
if
(
grid_w
)
{
dump
();
fill
(
wbuf
.
begin
(),
wbuf
.
end
(),
T
(
0
));
}
...
...
@@ -751,8 +772,8 @@ template<typename T, typename Serv> void x2grid_c
Helper
<
T
>
hlp
(
gconf
,
nullptr
,
grid
.
data
());
T
emb
=
exp
(
-
2
*
beta
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
.
data
()
;
const
T
*
kv
=
hlp
.
kernel
.
data
()
+
supp
;
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
// Loop over sampling points
...
...
@@ -808,8 +829,8 @@ template<typename T, typename Serv> void grid2x_c
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
nullptr
);
T
emb
=
exp
(
-
2
*
beta
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
.
data
()
;
const
T
*
kv
=
hlp
.
kernel
.
data
()
+
supp
;
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
#pragma omp for schedule(guided,100)
...
...
@@ -873,8 +894,8 @@ template<typename T> void apply_holo
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
ogrid
.
data
());
T
emb
=
exp
(
-
2
*
beta
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
.
data
()
;
const
T
*
kv
=
hlp
.
kernel
.
data
()
+
supp
;
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
#pragma omp for schedule(guided,100)
for
(
size_t
ipart
=
0
;
ipart
<
nvis
;
++
ipart
)
...
...
@@ -944,8 +965,8 @@ template<typename T> void get_correlations
Helper
<
T
,
T
>
hlp
(
gconf
,
nullptr
,
ogrid
.
data
());
T
emb
=
exp
(
-
2
*
beta
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
.
data
()
;
const
T
*
kv
=
hlp
.
kernel
.
data
()
+
supp
;
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
#pragma omp for schedule(guided,100)
for
(
size_t
ipart
=
0
;
ipart
<
nvis
;
++
ipart
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment