Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
ift
nifty_gridder
Commits
9728e8e8
Commit
9728e8e8
authored
Sep 01, 2019
by
Martin Reinecke
Browse files
speedup kernel application
parent
b88f9d5c
Changes
1
Hide whitespace changes
Inline
Side-by-side
gridder_cxx.h
View file @
9728e8e8
...
...
@@ -34,9 +34,11 @@
#if defined(__GNUC__)
#define NOINLINE __attribute__((noinline))
#define ALIGNED(align) __attribute__ ((aligned(align)))
#define RESTRICT __restrict__
#else
#define NOINLINE
#define ALIGNED(align)
#define RESTRICT
#endif
namespace
gridder
{
...
...
@@ -785,8 +787,8 @@ template<typename T, typename Serv> void x2grid_c
{
Helper
<
T
>
hlp
(
gconf
,
nullptr
,
grid
.
data
(),
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
// Loop over sampling points
...
...
@@ -796,14 +798,22 @@ template<typename T, typename Serv> void x2grid_c
UVW
<
T
>
coord
=
srv
.
getCoord
(
ipart
);
auto
flip
=
coord
.
FixW
();
hlp
.
prep
(
coord
);
auto
*
ptr
=
hlp
.
p0w
;
auto
*
RESTRICT
ptr
=
hlp
.
p0w
;
auto
v
(
srv
.
getVis
(
ipart
));
if
(
do_w_gridding
)
v
*=
hlp
.
Wfac
();
if
(
flip
)
v
=
conj
(
v
);
for
(
size_t
cu
=
0
;
cu
<
supp
;
++
cu
)
{
complex
<
T
>
tmp
(
v
*
ku
[
cu
]);
for
(
size_t
cv
=
0
;
cv
<
supp
;
++
cv
)
size_t
cv
=
0
;
for
(;
cv
<
supp
-
3
;
cv
+=
4
)
{
ptr
[
cv
]
+=
tmp
*
kv
[
cv
];
ptr
[
cv
+
1
]
+=
tmp
*
kv
[
cv
+
1
];
ptr
[
cv
+
2
]
+=
tmp
*
kv
[
cv
+
2
];
ptr
[
cv
+
3
]
+=
tmp
*
kv
[
cv
+
3
];
}
for
(;
cv
<
supp
;
++
cv
)
ptr
[
cv
]
+=
tmp
*
kv
[
cv
];
ptr
+=
jump
;
}
...
...
@@ -843,8 +853,8 @@ template<typename T, typename Serv> void grid2x_c
{
Helper
<
T
>
hlp
(
gconf
,
grid
.
data
(),
nullptr
,
w0
,
dw
);
int
jump
=
hlp
.
lineJump
();
const
T
*
ku
=
hlp
.
kernel
;
const
T
*
kv
=
hlp
.
kernel
+
supp
;
const
T
*
RESTRICT
ku
=
hlp
.
kernel
;
const
T
*
RESTRICT
kv
=
hlp
.
kernel
+
supp
;
size_t
np
=
srv
.
Nvis
();
#pragma omp for schedule(guided,100)
...
...
@@ -854,11 +864,17 @@ template<typename T, typename Serv> void grid2x_c
auto
flip
=
coord
.
FixW
();
hlp
.
prep
(
coord
);
complex
<
T
>
r
=
0
;
const
auto
*
ptr
=
hlp
.
p0r
;
const
auto
*
RESTRICT
ptr
=
hlp
.
p0r
;
for
(
size_t
cu
=
0
;
cu
<
supp
;
++
cu
)
{
complex
<
T
>
tmp
(
0
);
for
(
size_t
cv
=
0
;
cv
<
supp
;
++
cv
)
size_t
cv
=
0
;
for
(;
cv
<
supp
-
3
;
cv
+=
4
)
tmp
+=
ptr
[
cv
]
*
kv
[
cv
]
+
ptr
[
cv
+
1
]
*
kv
[
cv
+
1
]
+
ptr
[
cv
+
2
]
*
kv
[
cv
+
2
]
+
ptr
[
cv
+
3
]
*
kv
[
cv
+
3
];
for
(;
cv
<
supp
;
++
cv
)
tmp
+=
ptr
[
cv
]
*
kv
[
cv
];
r
+=
tmp
*
ku
[
cu
];
ptr
+=
jump
;
...
...
@@ -1115,6 +1131,7 @@ template<typename T, typename Serv> void wstack_common(
wmin
=
T
(
1e38
);
T
wmax
=
T
(
-
1e38
);
// FIXME maybe this can be done more intelligently
#pragma omp parallel for num_threads(nthreads) reduction(min:wmin) reduction(max:wmax)
for
(
size_t
ipart
=
0
;
ipart
<
nvis
;
++
ipart
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment