Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
0735c1a6
Commit
0735c1a6
authored
May 04, 2018
by
Pavel Kus
Browse files
reordering elpa2_template to bring gpu handling logic together
parent
8225bb5b
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/elpa2/elpa2_template.F90
View file @
0735c1a6
...
...
@@ -105,7 +105,10 @@
logical
::
wantDebug
integer
(
kind
=
c_int
)
::
istat
,
gpu
,
debug
,
qr
character
(
200
)
::
errorMessage
logical
::
do_useGPU
,
do_useGPU_trans_ev_tridi
logical
::
do_useGPU
,
do_useGPU_bandred
,
&
do_useGPU_tridi_band
,
do_useGPU_solve_tridi
,
&
do_useGPU_trans_ev_tridi_to_band
,
&
do_useGPU_trans_ev_band_to_full
integer
(
kind
=
c_int
)
::
numberOfGPUDevices
integer
(
kind
=
c_intptr_t
),
parameter
::
size_of_datatype
=
size_of_
&
&
PRECISION
&
...
...
@@ -156,6 +159,32 @@
nblk
=
obj
%
nblk
matrixCols
=
obj
%
local_ncols
call
obj
%
get
(
"mpi_comm_rows"
,
mpi_comm_rows
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_parent"
,
mpi_comm_all
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
mpi_comm_rank
(
mpi_comm_all
,
my_pe
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_all
,
n_pes
,
mpierr
)
call
mpi_comm_rank
(
mpi_comm_rows
,
my_prow
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_rows
,
np_rows
,
mpierr
)
call
mpi_comm_rank
(
mpi_comm_cols
,
my_pcol
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_cols
,
np_cols
,
mpierr
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
! special case na = 1
if
(
na
.eq.
1
)
then
#if REALCASE == 1
...
...
@@ -208,6 +237,48 @@
endif
endif
if
(
gpu
.eq.
1
)
then
useGPU
=
.true.
else
useGPU
=
.false.
endif
do_useGPU
=
.false.
do_useGPU_trans_ev_tridi_to_band
=
.false.
if
(
useGPU
)
then
if
(
check_for_gpu
(
my_pe
,
numberOfGPUDevices
,
wantDebug
=
wantDebug
))
then
do_useGPU
=
.true.
! set the neccessary parameters
cudaMemcpyHostToDevice
=
cuda_memcpyHostToDevice
()
cudaMemcpyDeviceToHost
=
cuda_memcpyDeviceToHost
()
cudaMemcpyDeviceToDevice
=
cuda_memcpyDeviceToDevice
()
cudaHostRegisterPortable
=
cuda_hostRegisterPortable
()
cudaHostRegisterMapped
=
cuda_hostRegisterMapped
()
else
print
*
,
"GPUs are requested but not detected! Aborting..."
success
=
.false.
return
endif
endif
! check consistency between request for GPUs and defined kernel
if
(
do_useGPU
)
then
if
(
nblk
.ne.
128
)
then
! cannot run on GPU with this blocksize
! disable GPU usage for trans_ev_tridi
do_useGPU_trans_ev_tridi_to_band
=
.false.
else
if
(
kernel
.eq.
GPU_KERNEL
)
then
do_useGPU_trans_ev_tridi_to_band
=
.true.
else
do_useGPU_trans_ev_tridi_to_band
=
.false.
endif
endif
endif
#if REALCASE == 1
#ifdef SINGLE_PRECISION_REAL
! special case at the moment NO single precision kernels on POWER 8 -> set GENERIC for now
...
...
@@ -230,27 +301,6 @@
#endif
call
obj
%
get
(
"mpi_comm_rows"
,
mpi_comm_rows
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_parent"
,
mpi_comm_all
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
if
(
gpu
.eq.
1
)
then
useGPU
=
.true.
else
useGPU
=
.false.
endif
#if REALCASE == 1
call
obj
%
get
(
"qr"
,
qr
,
error
)
...
...
@@ -265,15 +315,6 @@
endif
#endif
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
mpi_comm_rank
(
mpi_comm_all
,
my_pe
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_all
,
n_pes
,
mpierr
)
call
mpi_comm_rank
(
mpi_comm_rows
,
my_prow
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_rows
,
np_rows
,
mpierr
)
call
mpi_comm_rank
(
mpi_comm_cols
,
my_pcol
,
mpierr
)
call
mpi_comm_size
(
mpi_comm_cols
,
np_cols
,
mpierr
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
call
obj
%
get
(
"debug"
,
debug
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
...
...
@@ -282,8 +323,6 @@
endif
wantDebug
=
debug
==
1
do_useGPU
=
.false.
do_useGPU_trans_ev_tridi
=
.false.
#if REALCASE == 1
...
...
@@ -304,39 +343,6 @@
endif
#endif /* REALCASE */
if
(
useGPU
)
then
if
(
check_for_gpu
(
my_pe
,
numberOfGPUDevices
,
wantDebug
=
wantDebug
))
then
do_useGPU
=
.true.
! set the neccessary parameters
cudaMemcpyHostToDevice
=
cuda_memcpyHostToDevice
()
cudaMemcpyDeviceToHost
=
cuda_memcpyDeviceToHost
()
cudaMemcpyDeviceToDevice
=
cuda_memcpyDeviceToDevice
()
cudaHostRegisterPortable
=
cuda_hostRegisterPortable
()
cudaHostRegisterMapped
=
cuda_hostRegisterMapped
()
else
print
*
,
"GPUs are requested but not detected! Aborting..."
success
=
.false.
return
endif
endif
! check consistency between request for GPUs and defined kernel
if
(
do_useGPU
)
then
if
(
nblk
.ne.
128
)
then
! cannot run on GPU with this blocksize
! disable GPU usage for trans_ev_tridi
do_useGPU_trans_ev_tridi
=
.false.
else
if
(
kernel
.eq.
GPU_KERNEL
)
then
do_useGPU_trans_ev_tridi
=
.true.
else
do_useGPU_trans_ev_tridi
=
.false.
endif
endif
endif
if
(
.not.
obj
%
eigenvalues_only
)
then
...
...
@@ -569,7 +575,7 @@
&
PRECISION
&
(
obj
,
na
,
nev
,
nblk
,
nbw
,
q
,
&
q_dev
,
&
ldq
,
matrixCols
,
hh_trans
,
mpi_comm_rows
,
mpi_comm_cols
,
wantDebug
,
do_useGPU_trans_ev_tridi
,
&
ldq
,
matrixCols
,
hh_trans
,
mpi_comm_rows
,
mpi_comm_cols
,
wantDebug
,
do_useGPU_trans_ev_tridi
_to_band
,
&
success
=
success
,
kernel
=
kernel
)
call
obj
%
timer
%
stop
(
"trans_ev_to_band"
)
...
...
@@ -588,7 +594,7 @@
if
(
do_trans_to_full
)
then
call
obj
%
timer
%
start
(
"trans_ev_to_full"
)
if
(
(
do_useGPU
)
.and.
.not.
(
do_useGPU_trans_ev_tridi
)
)
then
if
(
(
do_useGPU
)
.and.
.not.
(
do_useGPU_trans_ev_tridi
_to_band
)
)
then
! copy to device if we want to continue on GPU
successCUDA
=
cuda_malloc
(
q_dev
,
ldq
*
matrixCols
*
size_of_datatype
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment