Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
c3b2f590
Commit
c3b2f590
authored
Dec 08, 2021
by
Soheil Soltani
Browse files
Fix high residual error
cuda device must be synchronized before cuda-aware mpi calls
parent
5659ad6f
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/cholesky/elpa_cholesky_template.F90
View file @
c3b2f590
...
...
@@ -54,8 +54,8 @@
use
elpa_gpu
use
mod_check_for_gpu
use
invert_trm_cuda
,
only
:
copy_PRECISION_tmp1_tmp2
,
&
copy_PRECISION_a_tmp1
copy_PRECISION_a_tmp1
,
&
device_synchronize
use
cholesky_cuda
implicit
none
#include "../general/precision_kinds.F90"
...
...
@@ -499,7 +499,7 @@
tmp1_mpi_dev
=
transfer
(
tmp1_dev
,
tmp1_mpi_dev
)
! and associate a fortran pointer
call
c_f_pointer
(
tmp1_mpi_dev
,
tmp1_mpi_fortran_ptr
,
[
nblk
,
nblk
])
call
device_synchronize
()
call
obj
%
timer
%
start
(
"mpi_cuda_communication"
)
call
MPI_Bcast
(
tmp1_mpi_fortran_ptr
,
int
(
nblk
*
(
nblk
+1
)/
2
,
kind
=
MPI_KIND
),
&
...
...
@@ -602,6 +602,8 @@
tmatc_mpi_dev
=
transfer
(
tmatc_dev
,
tmatc_mpi_dev
)
! and associate a fortran pointer
call
c_f_pointer
(
tmatc_mpi_dev
,
tmatc_mpi_fortran_ptr
,
[
l_cols
,
nblk
])
call
device_synchronize
()
do
i
=
1
,
nblk
call
obj
%
timer
%
start
(
"mpi_cuda_communication"
)
...
...
src/invert_trm/elpa_invert_trm_template.F90
View file @
c3b2f590
...
...
@@ -94,13 +94,13 @@
integer
(
kind
=
c_int
)
::
gpu
,
numGPU
integer
(
kind
=
c_intptr_t
)
::
tmat1_dev
,
tmat2_dev
,
a_dev
,
tmp1_dev
,
tmp2_dev
,
zero_dev
type
(
c_ptr
)
::
tmp1_mpi_dev
MATH_DATATYPE
(
kind
=
rck
),
pointer
::
tmp1_mpi_fortran_ptr
(:
,:
)
MATH_DATATYPE
(
kind
=
rck
),
pointer
::
tmp1_mpi_fortran_ptr
(:)
type
(
c_ptr
)
::
tmat1_mpi_dev
,
tmat2_mpi_dev
MATH_DATATYPE
(
kind
=
rck
),
pointer
::
tmat1_mpi_fortran_ptr
(:,:),
tmat2_mpi_fortran_ptr
(:,:)
type
(
c_ptr
)
::
tmp2_mpi_dev
,
a_mpi_dev
integer
(
kind
=
c_intptr_t
)
::
a_off
,
tmat2_off
,
tmp1_off
,
tmp2_off
MATH_DATATYPE
(
kind
=
rck
),
pointer
::
a_mpi_deviceptr
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
pointer
::
a_mpi_deviceptr
(:,:)
,
initializer_ptr
(:)
!DEB
integer
(
kind
=
c_intptr_t
)
::
num
integer
(
kind
=
c_int
)
::
gpu_invert_trm
integer
(
kind
=
c_intptr_t
),
parameter
::
size_of_datatype
=
size_of_
&
...
...
@@ -399,9 +399,11 @@
int(pcol(n, nblk, np_cols),kind=MPI_KIND), int(mpi_comm_cols,kind=MPI_KIND), mpierr)
call obj%timer%stop("
mpi_communication
")
#else
tmp1_mpi_dev = transfer(tmp1_dev, tmp1_mpi_dev)
tmp1_mpi_dev = transfer(tmp1_dev, tmp1_mpi_dev)
! and associate a fortran pointer
call c_f_pointer(tmp1_mpi_dev, tmp1_mpi_fortran_ptr, [nblk,nblk])
call c_f_pointer(tmp1_mpi_dev, tmp1_mpi_fortran_ptr, [nblk*nblk])
call device_synchronize()
if (wantDebug) call obj%timer%start("
cuda_mpi_communication
")
call MPI_Bcast(tmp1_mpi_fortran_ptr, int(nb*(nb+1)/2,kind=MPI_KIND), MPI_MATH_DATATYPE_PRECISION, &
int(pcol(n, nblk, np_cols),kind=MPI_KIND), int(mpi_comm_cols,kind=MPI_KIND), mpierr)
...
...
@@ -497,7 +499,7 @@
tmat1_mpi_dev = transfer(tmat1_dev, tmat1_mpi_dev)
! and associate a fortran pointer
call c_f_pointer(tmat1_mpi_dev, tmat1_mpi_fortran_ptr, [l_rows,nblk])
call device_synchronize()
call obj%timer%start("
mpi_cuda_communication
")
do i=1,nb
call MPI_Bcast(tmat1_mpi_fortran_ptr(1,i), int(l_row1-1,kind=MPI_KIND), MPI_MATH_DATATYPE_PRECISION, &
...
...
@@ -554,6 +556,7 @@
tmat2_mpi_dev = transfer(tmat2_dev, tmat2_mpi_dev)
call c_f_pointer(tmat2_mpi_dev, tmat2_mpi_fortran_ptr, [nblk,l_cols])
call device_synchronize()
call obj%timer%start("
mpi_cuda_communication
")
if (l_cols-l_col1+1 > 0) &
call MPI_Bcast(tmat2_mpi_fortran_ptr(1,l_col1), int((l_cols-l_col1+1)*nblk,kind=MPI_KIND), &
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment