Commit e8e452cb authored by Andreas Marek's avatar Andreas Marek
Browse files

Workaround for blocksize problem on GPU

parent f4da35c4
...@@ -59,13 +59,17 @@ ...@@ -59,13 +59,17 @@
real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double
integer(kind=c_int) :: i integer(kind=c_int) :: i
logical :: success logical :: success, successCUDA
logical, save :: firstCall = .true. logical, save :: firstCall = .true.
logical :: wantDebug logical :: wantDebug
integer(kind=c_int) :: istat integer(kind=c_int) :: istat
character(200) :: errorMessage character(200) :: errorMessage
logical :: do_useGPU logical :: do_useGPU, do_useGPU_trans_ev_tridi
integer(kind=c_int) :: numberOfGPUDevices integer(kind=c_int) :: numberOfGPUDevices
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
call timer%start("solve_evp_& call timer%start("solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
...@@ -93,6 +97,8 @@ ...@@ -93,6 +97,8 @@
success = .true. success = .true.
do_useGPU = .false. do_useGPU = .false.
do_useGPU_trans_ev_tridi =.false.
#if REALCASE == 1 #if REALCASE == 1
useQRActual = .false. useQRActual = .false.
...@@ -124,6 +130,7 @@ ...@@ -124,6 +130,7 @@
if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
do_useGPU = .true. do_useGPU = .true.
! set the neccessary parameters ! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice() cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost() cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
...@@ -223,6 +230,7 @@ ...@@ -223,6 +230,7 @@
! check consistency between request for GPUs and defined kernel ! check consistency between request for GPUs and defined kernel
if (do_useGPU) then if (do_useGPU) then
do_useGPU_trans_ev_tridi = .true.
if (THIS_ELPA_KERNEL .ne. & if (THIS_ELPA_KERNEL .ne. &
&MATH_DATATYPE& &MATH_DATATYPE&
&_ELPA_KERNEL_GPU) then &_ELPA_KERNEL_GPU) then
...@@ -232,14 +240,27 @@ ...@@ -232,14 +240,27 @@
endif endif
endif endif
! if (do_useGPU) then
! if (nblk .ne. 128) then
! write(error_unit,*) "In case of GPU usage the blocksize for ELPA 2stage has to be 128"
! success = .false.
! return
! endif
! endif
if (do_useGPU) then if (do_useGPU) then
if (nblk .ne. 128) then if (nblk .ne. 128) then
write(error_unit,*) "In case of GPU usage the blocksize for ELPA 2stage has to be 128" ! cannot run on GPU with this blocksize
success = .false. ! disable GPU usage for trans_ev_tridi
return
do_useGPU_trans_ev_tridi = .false.
THIS_ELPA_KERNEL = MATH_DATATYPE&
&_ELPA_KERNEL_GENERIC
! no data transfer to GPU needed
endif endif
endif endif
if(present(bandwidth)) then if(present(bandwidth)) then
nbw = bandwidth nbw = bandwidth
...@@ -407,7 +428,7 @@ ...@@ -407,7 +428,7 @@
&PRECISION & &PRECISION &
(na, nev, nblk, nbw, q, & (na, nev, nblk, nbw, q, &
q_dev, & q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU, & ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi, &
success, THIS_ELPA_KERNEL) success, THIS_ELPA_KERNEL)
if (.not.(success)) return if (.not.(success)) return
...@@ -431,6 +452,14 @@ ...@@ -431,6 +452,14 @@
if(present(bandwidth)) then if(present(bandwidth)) then
time_evp_back = ttt1-ttts time_evp_back = ttt1-ttts
else else
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_datatype)
successCUDA = cuda_memcpy(q_dev, loc(q), ldq*matrixCols* size_of_datatype, cudaMemcpyHostToDevice)
endif
! Backtransform stage 2 ! Backtransform stage 2
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call trans_ev_band_to_full_& call trans_ev_band_to_full_&
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment