Commit 6b96dfde authored by Andreas Marek's avatar Andreas Marek
Browse files

Fix compilation

parent c3b2f590
......@@ -54,8 +54,7 @@
use elpa_gpu
use mod_check_for_gpu
use invert_trm_cuda, only : copy_PRECISION_tmp1_tmp2, &
copy_PRECISION_a_tmp1, &
device_synchronize
copy_PRECISION_a_tmp1
use cholesky_cuda
implicit none
#include "../general/precision_kinds.F90"
......@@ -499,7 +498,10 @@
tmp1_mpi_dev = transfer(tmp1_dev, tmp1_mpi_dev)
! and associate a fortran pointer
call c_f_pointer(tmp1_mpi_dev, tmp1_mpi_fortran_ptr, [nblk,nblk])
call device_synchronize()
if (wantDebug) call obj%timer%start("cuda_aware_device_synchronize")
successGPU = gpu_devicesynchronize()
check_memcpy_gpu("cholesky: device_synchronize", successGPU)
if (wantDebug) call obj%timer%stop("cuda_aware_device_synchronize")
call obj%timer%start("mpi_cuda_communication")
call MPI_Bcast(tmp1_mpi_fortran_ptr, int(nblk*(nblk+1)/2,kind=MPI_KIND), &
......@@ -603,7 +605,10 @@
! and associate a fortran pointer
call c_f_pointer(tmatc_mpi_dev, tmatc_mpi_fortran_ptr, [l_cols,nblk])
call device_synchronize()
if (wantDebug) call obj%timer%start("cuda_aware_device_synchronize")
successGPU = gpu_devicesynchronize()
check_memcpy_gpu("cholesky: device_synchronize", successGPU)
if (wantDebug) call obj%timer%stop("cuda_aware_device_synchronize")
do i=1,nblk
call obj%timer%start("mpi_cuda_communication")
......
......@@ -615,6 +615,3 @@ extern "C" void copy_float_complex_a_tmp1_FromC(float _Complex *a_dev, float _Co
}
}
extern "C" void device_synchronizeFromC() {
cudaDeviceSynchronize();
}
......@@ -402,7 +402,12 @@
tmp1_mpi_dev = transfer(tmp1_dev, tmp1_mpi_dev)
! and associate a fortran pointer
call c_f_pointer(tmp1_mpi_dev, tmp1_mpi_fortran_ptr, [nblk*nblk])
call device_synchronize()
if (wantDebug) call obj%timer%start("cuda_aware_device_synchronize")
successGPU = gpu_devicesynchronize()
check_memcpy_gpu("invert_trm: device_synchronize", successGPU)
if (wantDebug) call obj%timer%stop("cuda_aware_device_synchronize")
if (wantDebug) call obj%timer%start("cuda_mpi_communication")
call MPI_Bcast(tmp1_mpi_fortran_ptr, int(nb*(nb+1)/2,kind=MPI_KIND), MPI_MATH_DATATYPE_PRECISION, &
......@@ -439,7 +444,6 @@
call gpublas_PRECISION_TRMM('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, ONE, tmp2_dev, &
nblk, a_dev+a_off, matrixRows)
!successGPU = gpu_devicesynchronize()
endif
call obj%timer%stop("gpublas")
......@@ -499,7 +503,10 @@
tmat1_mpi_dev = transfer(tmat1_dev, tmat1_mpi_dev)
! and associate a fortran pointer
call c_f_pointer(tmat1_mpi_dev, tmat1_mpi_fortran_ptr, [l_rows,nblk])
call device_synchronize()
if (wantDebug) call obj%timer%start("cuda_aware_device_synchronize")
successGPU = gpu_devicesynchronize()
check_memcpy_gpu("invert_trm: device_synchronize", successGPU)
if (wantDebug) call obj%timer%stop("cuda_aware_device_synchronize")
call obj%timer%start("mpi_cuda_communication")
do i=1,nb
call MPI_Bcast(tmat1_mpi_fortran_ptr(1,i), int(l_row1-1,kind=MPI_KIND), MPI_MATH_DATATYPE_PRECISION, &
......@@ -556,7 +563,10 @@
tmat2_mpi_dev = transfer(tmat2_dev, tmat2_mpi_dev)
call c_f_pointer(tmat2_mpi_dev, tmat2_mpi_fortran_ptr, [nblk,l_cols])
call device_synchronize()
if (wantDebug) call obj%timer%start("cuda_aware_device_synchronize")
successGPU = gpu_devicesynchronize()
check_memcpy_gpu("invert_trm: device_synchronize", successGPU)
if (wantDebug) call obj%timer%stop("cuda_aware_device_synchronize")
call obj%timer%start("mpi_cuda_communication")
if (l_cols-l_col1+1 > 0) &
call MPI_Bcast(tmat2_mpi_fortran_ptr(1,l_col1), int((l_cols-l_col1+1)*nblk,kind=MPI_KIND), &
......
......@@ -50,12 +50,6 @@ module invert_trm_cuda
implicit none
public
interface
subroutine device_synchronize_c() &
bind(C, name="device_synchronizeFromC")
end subroutine
end interface
interface
subroutine copy_double_a_tmat2_c(a_dev, tmat2_dev, nblk, matrixRows, l_cols, l_colx, l_row1, nb)&
bind(C, name="copy_double_a_tmat2_FromC")
......@@ -498,9 +492,5 @@ module invert_trm_cuda
#endif
end subroutine
subroutine device_synchronize()
call device_synchronize_c()
end subroutine device_synchronize
end module
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment