Commit 03ef1ee4 authored by Andreas Marek's avatar Andreas Marek
Browse files

Remove some unecessary cuda_memcpy in real case

The same should be done for the complex case
parent 29f31c21
...@@ -182,7 +182,8 @@ contains ...@@ -182,7 +182,8 @@ contains
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks integer(kind=ik) :: nbw, num_blocks
real(kind=rk8), allocatable :: tmat(:,:,:), e(:) real(kind=rk8), allocatable :: tmat(:,:,:), e(:)
integer(kind=c_intptr_t) :: tmat_dev, q_dev, a_dev
real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double
integer(kind=ik) :: i integer(kind=ik) :: i
logical :: success logical :: success
...@@ -322,11 +323,11 @@ contains ...@@ -322,11 +323,11 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
ttts = ttt0 ttts = ttt0
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call bandred_real_double(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_real_double(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual) tmat, tmat_dev, wantDebug, useGPU, success, useQRActual)
#else #else
call bandred_real_single(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_real_single(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual) tmat, tmat_dev, wantDebug, useGPU, success, useQRActual)
#endif #endif
if (.not.(success)) return if (.not.(success)) return
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
...@@ -395,11 +396,11 @@ contains ...@@ -395,11 +396,11 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call trans_ev_tridi_to_band_real_double(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, & call trans_ev_tridi_to_band_real_double(na, nev, nblk, nbw, q, q_dev, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, & mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL) THIS_REAL_ELPA_KERNEL)
#else #else
call trans_ev_tridi_to_band_real_single(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, & call trans_ev_tridi_to_band_real_single(na, nev, nblk, nbw, q, q_dev, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, & mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL) THIS_REAL_ELPA_KERNEL)
#endif #endif
...@@ -421,10 +422,10 @@ contains ...@@ -421,10 +422,10 @@ contains
print *,"useGPU== ",useGPU print *,"useGPU== ",useGPU
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, & call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual) mpi_comm_cols, useGPU, useQRActual)
#else #else
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, & call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual) mpi_comm_cols, useGPU, useQRActual)
#endif #endif
...@@ -533,7 +534,8 @@ contains ...@@ -533,7 +534,8 @@ contains
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks integer(kind=ik) :: nbw, num_blocks
real(kind=rk4), allocatable :: tmat(:,:,:), e(:) real(kind=rk4), allocatable :: tmat(:,:,:), e(:)
integer(kind=c_intptr_t) :: tmat_dev, q_dev, a_dev
real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double real(kind=c_double) :: ttt0, ttt1, ttts ! MPI_WTIME always needs double
integer(kind=ik) :: i integer(kind=ik) :: i
logical :: success logical :: success
...@@ -672,11 +674,11 @@ contains ...@@ -672,11 +674,11 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
ttts = ttt0 ttts = ttt0
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call bandred_real_double(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_real_double(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual) tmat, tmat_dev, wantDebug, useGPU, success, useQRActual)
#else #else
call bandred_real_single(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_real_single(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual) tmat, tmat_dev, wantDebug, useGPU, success, useQRActual)
#endif #endif
if (.not.(success)) return if (.not.(success)) return
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
...@@ -745,11 +747,11 @@ contains ...@@ -745,11 +747,11 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call trans_ev_tridi_to_band_real_double(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, & call trans_ev_tridi_to_band_real_double(na, nev, nblk, nbw, q, q_dev, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, & mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL) THIS_REAL_ELPA_KERNEL)
#else #else
call trans_ev_tridi_to_band_real_single(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, & call trans_ev_tridi_to_band_real_single(na, nev, nblk, nbw, q, q_dev, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, & mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL) THIS_REAL_ELPA_KERNEL)
#endif #endif
...@@ -771,10 +773,10 @@ contains ...@@ -771,10 +773,10 @@ contains
print *,"useGPU== ",useGPU print *,"useGPU== ",useGPU
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, & call trans_ev_band_to_full_real_double(na, nev, nblk, nbw, a, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual) mpi_comm_cols, useGPU, useQRActual)
#else #else
call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, & call trans_ev_band_to_full_real_single(na, nev, nblk, nbw, a, lda, tmat, tmat_dev, q, q_dev, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual) mpi_comm_cols, useGPU, useQRActual)
#endif #endif
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment