Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ift
nifty_gridder
Commits
6b4f68f3
Commit
6b4f68f3
authored
Sep 05, 2019
by
Martin Reinecke
Browse files
experiment with OpenMP locks
parent
2812ffad
Changes
1
Hide whitespace changes
Inline
Side-by-side
gridder_cxx.h
View file @
6b4f68f3
...
...
@@ -29,6 +29,10 @@
#include
<vector>
#include
<array>
#ifdef _OPENMP
#include
<omp.h>
#endif
#include
"pocketfft_hdronly.h"
#if defined(__GNUC__)
...
...
@@ -715,6 +719,32 @@ class GridderConfig
constexpr
int
logsquare
=
4
;
#ifdef _OPENMP
class
Lock
{
private:
omp_lock_t
lck
;
Lock
(
const
Lock
&
)
=
delete
;
Lock
&
operator
=
(
const
Lock
&
)
=
delete
;
public:
Lock
()
{
omp_init_lock
(
&
lck
);
}
~
Lock
()
{
omp_destroy_lock
(
&
lck
);
}
void
lock
()
{
omp_set_lock
(
&
lck
);
}
void
unlock
()
{
omp_unset_lock
(
&
lck
);
}
};
#else
class
Lock
{
public:
Lock
()
{}
~
Lock
()
{}
void
lock
()
{}
void
unlock
{}
};
#endif
template
<
typename
T
,
typename
T2
=
complex
<
T
>
>
class
Helper
{
private:
...
...
@@ -732,6 +762,7 @@ template<typename T, typename T2=complex<T>> class Helper
T
w0
,
xdw
;
size_t
nexp
;
size_t
nvecs
;
vector
<
Lock
>
&
locks
;
void
dump
()
const
{
...
...
@@ -744,11 +775,13 @@ template<typename T, typename T2=complex<T>> class Helper
for
(
int
iu
=
0
;
iu
<
su
;
++
iu
)
{
int
idxv
=
idxv0
;
locks
[
idxu
].
lock
();
for
(
int
iv
=
0
;
iv
<
sv
;
++
iv
)
{
grid_w
[
idxu
*
nv
+
idxv
]
+=
wbuf
[
iu
*
sv
+
iv
];
if
(
++
idxv
>=
nv
)
idxv
=
0
;
}
locks
[
idxu
].
unlock
();
if
(
++
idxu
>=
nu
)
idxu
=
0
;
}
}
...
...
@@ -776,7 +809,7 @@ template<typename T, typename T2=complex<T>> class Helper
T
kernel
[
64
]
ALIGNED
(
64
);
Helper
(
const
GridderConfig
&
gconf_
,
const
T2
*
grid_r_
,
T2
*
grid_w_
,
T
w0_
=-
1
,
T
dw_
=-
1
)
vector
<
Lock
>
&
locks_
,
T
w0_
=-
1
,
T
dw_
=-
1
)
:
gconf
(
gconf_
),
nu
(
gconf
.
Nu
()),
nv
(
gconf
.
Nv
()),
nsafe
(
gconf
.
Nsafe
()),
supp
(
gconf
.
Supp
()),
beta
(
gconf
.
Beta
()),
grid_r
(
grid_r_
),
grid_w
(
grid_w_
),
su
(
2
*
nsafe
+
(
1
<<
logsquare
)),
sv
(
2
*
nsafe
+
(
1
<<
logsquare
)),
...
...
@@ -787,7 +820,8 @@ template<typename T, typename T2=complex<T>> class Helper
w0
(
w0_
),
xdw
(
T
(
1
)
/
dw_
),
nexp
(
2
*
supp
+
do_w_gridding
),
nvecs
(
VLEN
<
T
>::
val
*
((
nexp
+
VLEN
<
T
>::
val
-
1
)
/
VLEN
<
T
>::
val
))
nvecs
(
VLEN
<
T
>::
val
*
((
nexp
+
VLEN
<
T
>::
val
-
1
)
/
VLEN
<
T
>::
val
)),
locks
(
locks_
)
{}
~
Helper
()
{
if
(
grid_w
)
dump
();
}
...
...
@@ -921,10 +955,11 @@ template<typename T, typename Serv> void x2grid_c
size_t
supp
=
gconf
.
Supp
();
size_t
nthreads
=
gconf
.
Nthreads
();
bool
do_w_gridding
=
dw
>
0
;
vector
<
Lock
>
locks
(
gconf
.
Nu
());
#pragma omp parallel num_threads(nthreads)
{
Helper
<
T
>
hlp
(
gconf
,
nullptr
,
grid
.
data
(),
w0
,
dw
);
Helper
<
T
>
hlp
(
gconf
,
nullptr
,
grid
.
data
(),
locks
,
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
...
...
@@ -981,11 +1016,12 @@ template<typename T, typename Serv> void grid2x_c
size_t
supp
=
gconf
.
Supp
();
size_t
nthreads
=
gconf
.
Nthreads
();
bool
do_w_gridding
=
dw
>
0
;
vector
<
Lock
>
locks
(
gconf
.
Nu
());
// Loop over sampling points
#pragma omp parallel num_threads(nthreads)
{
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
nullptr
,
w0
,
dw
);
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
nullptr
,
locks
,
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
...
...
@@ -1044,11 +1080,12 @@ template<typename T> void apply_holo
checkShape
(
ogrid
.
shape
(),
grid
.
shape
());
ogrid
.
fill
(
0
);
size_t
supp
=
gconf
.
Supp
();
vector
<
Lock
>
locks
(
gconf
.
Nu
());
// Loop over sampling points
#pragma omp parallel num_threads(nthreads)
{
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
ogrid
.
data
());
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
ogrid
.
data
()
,
locks
);
int
jump
=
hlp
.
lineJump
();
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
...
...
@@ -1114,6 +1151,7 @@ template<typename T> void get_correlations
myassert
(
size_t
(
abs
(
dv
))
<
supp
,
"|dv| must be smaller than Supp"
);
size_t
nthreads
=
gconf
.
Nthreads
();
ogrid
.
fill
(
0
);
vector
<
Lock
>
locks
(
gconf
.
Nu
());
size_t
u0
,
u1
,
v0
,
v1
;
if
(
du
>=
0
)
...
...
@@ -1128,7 +1166,7 @@ template<typename T> void get_correlations
// Loop over sampling points
#pragma omp parallel num_threads(nthreads)
{
Helper
<
T
,
T
>
hlp
(
gconf
,
nullptr
,
ogrid
.
data
());
Helper
<
T
,
T
>
hlp
(
gconf
,
nullptr
,
ogrid
.
data
()
,
locks
);
int
jump
=
hlp
.
lineJump
();
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment