Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
c111ef44
Commit
c111ef44
authored
Nov 23, 2017
by
Andreas Marek
Browse files
Make double precision real Power8 kernels available
parent
44098022
Changes
5
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
c111ef44
...
...
@@ -687,6 +687,8 @@ EXTRA_DIST = \
src/elpa2/kernels/real_avx512_4hv_template.c
\
src/elpa2/kernels/real_avx512_6hv_template.c
\
src/elpa2/kernels/real_vsx_2hv_template.c
\
src/elpa2/kernels/real_vsx_4hv_template.c
\
src/elpa2/kernels/real_vsx_6hv_template.c
\
src/elpa2/kernels/real_sse_2hv_template.c
\
src/elpa2/kernels/real_sse_4hv_template.c
\
src/elpa2/kernels/real_sse_6hv_template.c
\
...
...
configure.ac
View file @
c111ef44
...
...
@@ -478,8 +478,6 @@ m4_define(elpa_m4_vsx_kernels, [
real_vsx_block2
real_vsx_block4
real_vsx_block6
complex_vsx_block1
complex_vsx_block2
])
m4_define(elpa_m4_avx_kernels, [
...
...
elpa/elpa_constants.h.in
View file @
c111ef44
...
...
@@ -74,9 +74,7 @@ enum ELPA_REAL_KERNELS {
X(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2, 13, @ELPA_2STAGE_COMPLEX_AVX512_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_GPU, 14, @ELPA_2STAGE_COMPLEX_GPU_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_SPARC64_BLOCK1, 15, @ELPA_2STAGE_COMPLEX_SPARC64_BLOCK1_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_SPARC64_BLOCK2, 16, @ELPA_2STAGE_COMPLEX_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_VSX_BLOCK1, 17, @ELPA_2STAGE_COMPLEX_VSX_BLOCK1_COMPILED@, __VA_ARGS__) \
X(ELPA_2STAGE_COMPLEX_VSX_BLOCK2, 18, @ELPA_2STAGE_COMPLEX_VSX_BLOCK2_COMPILED@, __VA_ARGS__)
X(ELPA_2STAGE_COMPLEX_SPARC64_BLOCK2, 16, @ELPA_2STAGE_COMPLEX_SPARC64_BLOCK2_COMPILED@, __VA_ARGS__)
#define ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS_AND_DEFAULT(X) \
ELPA_FOR_ALL_2STAGE_COMPLEX_KERNELS(X) \
...
...
src/elpa2/compute_hh_trafo.F90
View file @
c111ef44
...
...
@@ -663,13 +663,13 @@
! vsx block1 complex kernel
#if defined(WITH_COMPLEX_VSX_BLOCK1_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_VSX_BLOCK1
)
then
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
#if (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL))
ttt
=
mpi_wtime
()
do
j
=
ncols
,
1
,
-1
!
#ifndef WITH_FIXED_COMPLEX_KERNEL
!
if (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK1) then
!
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
!
!
#if (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL))
!
ttt = mpi_wtime()
!
do j = ncols, 1, -1
!#ifdef WITH_OPENMP
! call single_hh_trafo_&
! &MATH_DATATYPE&
...
...
@@ -683,12 +683,12 @@
! &PRECISION&
! & (c_loc(a(1,j+off+a_off,istripe)), bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#endif
enddo
#endif /* (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL)) */
#ifndef WITH_FIXED_COMPLEX_KERNEL
endif
! (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK1)
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
!
enddo
!
#endif /* (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL)) */
!
!
#ifndef WITH_FIXED_COMPLEX_KERNEL
!
endif ! (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK1)
!
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_VSX_BLOCK1_KERNEL */
#endif /* COMPLEXCASE */
...
...
@@ -965,14 +965,14 @@
! implementation of vsx block 2 complex case
#if defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL)
#ifndef WITH_FIXED_COMPLEX_KERNEL
if
(
kernel
.eq.
ELPA_2STAGE_COMPLEX_VSX_BLOCK2
)
then
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
ttt
=
mpi_wtime
()
do
j
=
ncols
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
!
#ifndef WITH_FIXED_COMPLEX_KERNEL
!
if (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK2) then
!
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
!
!
ttt = mpi_wtime()
!
do j = ncols, 2, -2
!
w(:,1) = bcast_buffer(1:nbw,j+off)
!
w(:,2) = bcast_buffer(1:nbw,j+off-1)
!#ifdef WITH_OPENMP
! call double_hh_trafo_&
! &MATH_DATATYPE&
...
...
@@ -986,7 +986,7 @@
! &PRECISION&
! & (c_loc(a(1,j+off+a_off-1,istripe)), w, nbw, nl, stripe_width, nbw)
!#endif
enddo
!
enddo
!#ifdef WITH_OPENMP
! if (j==1) call single_hh_trafo_&
! &MATH_DATATYPE&
...
...
@@ -1000,10 +1000,10 @@
! &PRECISION&
! & (c_loc(a(1,1+off+a_off,istripe)), bcast_buffer(1,off+1), nbw, nl, stripe_width)
!#endif
#ifndef WITH_FIXED_COMPLEX_KERNEL
endif
! (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK2)
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
!
!
#ifndef WITH_FIXED_COMPLEX_KERNEL
!
endif ! (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK2)
!
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_VSX_BLOCK2_KERNEL */
#endif /* COMPLEXCASE == 1 */
...
...
src/elpa2/elpa2_template.F90
View file @
c111ef44
...
...
@@ -172,7 +172,7 @@
if
(
gpu
==
1
)
then
if
(
kernel
.ne.
ELPA_2STAGE_REAL_GPU
)
then
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
else
if
(
nblk
.ne.
128
)
then
kernel
=
ELPA_2STAGE_REAL_GENERIC
endif
...
...
@@ -182,6 +182,18 @@
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
endif
endif
#ifdef SINGLE_PRECISION_REAL
! special case at the moment NO single precision kernels on POWER 8 -> set GENERIC for now
if
(
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK2
.or.
&
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK4
.or.
&
kernel
.eq.
ELPA_2STAGE_REAL_VSX_BLOCK6
)
then
write
(
error_unit
,
*
)
"ELPA: At the moment there exist no specific SINGLE precision kernels for POWER8"
write
(
error_unit
,
*
)
"The GENERIC kernel will be used at the moment"
kernel
=
ELPA_2STAGE_REAL_GENERIC
endif
#endif
#endif
#if COMPLEXCASE == 1
...
...
@@ -191,7 +203,7 @@
if
(
gpu
==
1
)
then
if
(
kernel
.ne.
ELPA_2STAGE_COMPLEX_GPU
)
then
write
(
error_unit
,
*
)
"ELPA: Warning, GPU usage has been requested but compute kernel is defined as non-GPU!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
write
(
error_unit
,
*
)
"The compute kernel will be executed on CPUs!"
else
if
(
nblk
.ne.
128
)
then
kernel
=
ELPA_2STAGE_COMPLEX_GENERIC
endif
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment