Commit d5ccaf8a authored by Pavel Kus's avatar Pavel Kus

individual routines of elpa 1 can be run on GPU or not

parent 50de1abc
......@@ -81,8 +81,15 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
character(20) :: gpuString
call obj%timer%start("solve_tridi" // PRECISION_SUFFIX)
if(useGPU) then
gpuString = "_gpu"
else
gpuString = ""
endif
call obj%timer%start("solve_tridi" // PRECISION_SUFFIX // gpuString)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
......@@ -232,7 +239,7 @@ subroutine solve_tridi_&
stop 1
endif
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX // gpuString)
return
contains
......
......@@ -96,7 +96,8 @@ function elpa_solve_evp_&
logical :: useGPU
logical :: success
logical :: do_useGPU
logical :: do_useGPU, do_useGPU_tridiag, &
do_useGPU_solve_tridi, do_useGPU_trans_ev
integer(kind=ik) :: numberOfGPUDevices
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, mpierr
......@@ -174,7 +175,7 @@ function elpa_solve_evp_&
call obj%get("gpu",gpu,error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
print *,"Problem getting option. Aborting..."
stop
endif
if (gpu .eq. 1) then
......@@ -224,6 +225,39 @@ function elpa_solve_evp_&
endif
endif
do_useGPU_tridiag = do_useGPU
do_useGPU_solve_tridi = do_useGPU
do_useGPU_trans_ev = do_useGPU
! only if we want (and can) use GPU in general, look what are the
! requirements for individual routines. Implicitly they are all set to 1, so
! unles specified otherwise by the user, GPU versions of all individual
! routines should be used
if(do_useGPU) then
call obj%get("gpu_tridiag", gpu, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
do_useGPU_tridiag = (gpu == 1)
call obj%get("gpu_solve_tridi", gpu, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
do_useGPU_solve_tridi = (gpu == 1)
call obj%get("gpu_trans_ev", gpu, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
do_useGPU_trans_ev = (gpu == 1)
endif
! for elpa1 the easy thing is, that the individual phases of the algorithm
! do not share any data on the GPU.
! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
if (.not.(obj%eigenvalues_only)) then
q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
......@@ -275,7 +309,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU, wantDebug)
& (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU_tridiag, wantDebug)
call obj%timer%stop("forward")
endif !do_bandred
......@@ -290,7 +324,7 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1
q_real, l_rows, &
#endif
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU, wantDebug, success)
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success)
call obj%timer%stop("solve")
if (.not.(success)) return
endif !do_solve
......@@ -330,7 +364,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
call obj%timer%stop("back")
endif ! do_trans_ev
......
......@@ -155,6 +155,7 @@
#endif
integer(kind=ik) :: istat
character(200) :: errorMessage
character(20) :: gpuString
integer(kind=C_intptr_T) :: q_dev, tmp_dev, hvm_dev, tmat_dev
logical :: successCUDA
......@@ -162,12 +163,17 @@
&PRECISION&
&_&
&MATH_DATATYPE
if(useGPU) then
gpuString = "_gpu"
else
gpuString = ""
endif
call obj%timer%start("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
&PRECISION_SUFFIX //&
gpuString)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
......@@ -538,8 +544,8 @@
call obj%timer%stop("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX&
)
&PRECISION_SUFFIX // &
gpuString )
end subroutine trans_ev_&
&MATH_DATATYPE&
......
......@@ -203,15 +203,22 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
#endif
integer(kind=ik) :: istat
character(200) :: errorMessage
character(20) :: gpuString
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
if(useGPU) then
gpuString = "_gpu"
else
gpuString = ""
endif
call obj%timer%start("tridiag_&
&MATH_DATATYPE&
&" // &
PRECISION_SUFFIX &
)
PRECISION_SUFFIX // &
gpuString )
if (wantDebug) call obj%timer%start("mpi_communication")
......@@ -1048,8 +1055,9 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
call obj%timer%stop("tridiag_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
PRECISION_SUFFIX // &
gpuString )
end subroutine tridiag_&
&MATH_DATATYPE&
&_&
......
......@@ -144,6 +144,14 @@ static const elpa_index_int_entry_t int_entries[] = {
number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name),
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
cardinality_bool, enumerate_identity, gpu_is_valid, NULL),
//default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
//by the parameter gpu and presence of the device
INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
cardinality_bool, enumerate_identity, NULL, NULL),
INT_ENTRY("gpu_solve_tridi", "Use GPU acceleration for ELPA solve tridi", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
cardinality_bool, enumerate_identity, NULL, NULL),
INT_ENTRY("gpu_trans_ev", "Use GPU acceleration for ELPA1 trans ev", 1, ELPA_AUTOTUNE_NOT_TUNABLE, ELPA_AUTOTUNE_DOMAIN_ANY,
cardinality_bool, enumerate_identity, NULL, NULL),
INT_ENTRY("real_kernel", "Real kernel to use if 'solver' is set to ELPA_SOLVER_2STAGE", ELPA_2STAGE_REAL_DEFAULT, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_REAL, \
number_of_real_kernels, real_kernel_enumerate, \
real_kernel_is_valid, real_kernel_name),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment