Commit c52e3557 authored by Andreas Marek's avatar Andreas Marek
Browse files

Pass q_dev to _band_to_full

parent 98fb8a3e
......@@ -405,9 +405,7 @@
&_&
&PRECISION &
(na, nev, nblk, nbw, q, &
#if REALCASE == 1
q_dev, &
#endif
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU, &
success, THIS_ELPA_KERNEL)
......@@ -440,9 +438,7 @@
&PRECISION &
(na, nev, nblk, nbw, a, &
a_dev, lda, tmat, tmat_dev, q, &
#if REALCASE == 1
q_dev, &
#endif
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU &
#if REALCASE == 1
, useQRActual &
......
......@@ -55,9 +55,7 @@
&_&
&PRECISION &
(na, nqc, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, q, &
#if REALCASE == 1
q_dev, &
#endif
ldq, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols, useGPU &
#if REALCASE == 1
,useQr)
......@@ -313,17 +311,17 @@
! endif
#endif
#if COMPLEXCASE == 1
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_PRECISION_complex)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_complex: error in cudaMalloc"
stop
endif
successCUDA = cuda_memcpy(q_dev, loc(q),ldq*matrixCols*size_of_PRECISION_complex, cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
print *,"trans_ev_band_to_full_complex: error in cudaMemcpy"
stop
endif
! successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_PRECISION_complex)
! if (.not.(successCUDA)) then
! print *,"trans_ev_band_to_full_complex: error in cudaMalloc"
! stop
! endif
!
! successCUDA = cuda_memcpy(q_dev, loc(q),ldq*matrixCols*size_of_PRECISION_complex, cudaMemcpyHostToDevice)
! if (.not.(successCUDA)) then
! print *,"trans_ev_band_to_full_complex: error in cudaMemcpy"
! stop
! endif
#endif
! if MPI is NOT used the following steps could be done on the GPU and memory transfers could be avoided
......
......@@ -3,9 +3,7 @@
&_&
&PRECISION &
(na, nev, nblk, nbw, q, &
#if REALCASE == 1
q_dev, &
#endif
ldq, matrixCols, &
#if REALCASE == 1
hh_trans_real, &
......@@ -87,8 +85,9 @@
#endif
real(kind=REAL_DATATYPE), intent(in) :: hh_trans_real(:,:)
integer(kind=c_intptr_t) :: q_dev
#endif
integer(kind=c_intptr_t) :: q_dev
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: q(ldq,*)
......@@ -2657,21 +2656,38 @@
#endif /* WITH_MPI */
#if REALCASE == 1
!#if REALCASE == 1
! copy q to q_dev needed in trans_ev_band_to_full
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_PRECISION_real)
successCUDA = cuda_malloc(q_dev, ldq*matrixCols* &
#if REALCASE == 1
size_of_PRECISION_real)
#endif
#if COMPLEXCASE == 1
size_of_PRECISION_complex)
#endif
if (.not.(successCUDA)) then
print *,"trans_ev_tridi_to_band_real: error in cudaMalloc"
print *,"trans_ev_tridi_to_band_&
&MAATH_DATATYPE&
&: error in cudaMalloc"
stop
endif
! copy q_dev to device, maybe this can be avoided if q_dev can be kept on device in trans_ev_tridi_to_band
successCUDA = cuda_memcpy(q_dev, loc(q), (ldq)*(matrixCols)*size_of_PRECISION_real, cudaMemcpyHostToDevice)
successCUDA = cuda_memcpy(q_dev, loc(q), (ldq)*(matrixCols)* &
#if REALCASE == 1
size_of_PRECISION_real, &
#endif
#if COMPLEXCASE == 1
size_of_PRECISION_complex, &
#endif
cudaMemcpyHostToDevice)
if (.not.(successCUDA)) then
print *,"trans_ev_tridi_to_band_real: error in cudaMalloc"
print *,"trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&: error in cudaMalloc"
stop
endif
#endif
!#endif
! deallocate all working space
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment