Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
c21d968a
Commit
c21d968a
authored
Feb 06, 2017
by
Andreas Marek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Keep aMatrix on device until redist_band
parent
f64b038b
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
75 additions
and
38 deletions
+75
-38
src/elpa2_bandred_template.X90
src/elpa2_bandred_template.X90
+31
-31
src/elpa2_template.X90
src/elpa2_template.X90
+1
-1
src/elpa2_tridiag_band_template.X90
src/elpa2_tridiag_band_template.X90
+4
-3
src/redist_band.X90
src/redist_band.X90
+39
-3
No files found.
src/elpa2_bandred_template.X90
View file @
c21d968a
...
...
@@ -1937,37 +1937,37 @@
enddo ! istep
if (useGPU) then
! this is not needed since a_dev is passed along from one subroutine to the other
successCUDA = cuda_memcpy ( &
#if REALCASE == 1
loc(a), &
#endif
#if COMPLEXCASE == 1
loc(a(1,1)), &
#endif
a_dev, lda*na_cols* &
#if REALCASE == 1
size_of_PRECISION_real, &
#endif
#if COMPLEXCASE ==1
size_of_PRECISION_complex,&
#endif
cudaMemcpyDeviceToHost)
if (.not.(successCUDA)) then
print *,"bandred_&
&MATH_DATATYPE&
&: error in cudaMemcpy"
stop
endif
successCUDA = cuda_free(a_dev)
if (.not.(successCUDA)) then
print *,"bandred_&
&MATH_DATATYPE&
&: error in cudaFree"
stop
endif
!
! this is not needed since a_dev is passed along from one subroutine to the other
!
!
successCUDA = cuda_memcpy ( &
!
#if REALCASE == 1
!
loc(a), &
!
#endif
!
#if COMPLEXCASE == 1
!
loc(a(1,1)), &
!
#endif
!
a_dev, lda*na_cols* &
!
#if REALCASE == 1
!
size_of_PRECISION_real, &
!
#endif
!
#if COMPLEXCASE ==1
!
size_of_PRECISION_complex,&
!
#endif
!
cudaMemcpyDeviceToHost)
!
if (.not.(successCUDA)) then
!
print *,"bandred_&
!
&MATH_DATATYPE&
!
&: error in cudaMemcpy"
!
stop
!
endif
!
!
successCUDA = cuda_free(a_dev)
!
if (.not.(successCUDA)) then
!
print *,"bandred_&
!
&MATH_DATATYPE&
!
&: error in cudaFree"
!
stop
!
endif
!#ifdef WITH_MPI
!! it should be possible to keep tmat dev on the device and not copy it arround
...
...
src/elpa2_template.X90
View file @
c21d968a
...
...
@@ -323,7 +323,7 @@
&MATH_DATATYPE&
&_&
&PRECISION&
(na, nbw, nblk, a, a_dev, lda, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
(na, nbw, nblk, a, a_dev, lda, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all
, useGPU
)
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
...
...
src/elpa2_tridiag_band_template.X90
View file @
c21d968a
...
...
@@ -63,7 +63,7 @@
#if COMPLEXCASE == 1
hh_trans_complex, &
#endif
mpi_comm_rows, mpi_comm_cols, mpi_comm)
mpi_comm_rows, mpi_comm_cols, mpi_comm
, useGPU
)
!-------------------------------------------------------------------------------
! tridiag_band_real/complex:
! Reduces a real symmetric band matrix to tridiagonal form
...
...
@@ -101,7 +101,8 @@
use iso_c_binding
implicit none
integer(kind=ik), intent(in) :: na, nb, nblk, lda, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm
logical, intent(in) :: useGPU
integer(kind=ik), intent(in) :: na, nb, nblk, lda, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE), intent(in) :: aMatrix(lda,*)
...
...
@@ -269,7 +270,7 @@
&MATH_DATATYPE&
&_&
&PRECISION&
&(aMatrix, a_dev, lda, na, nblk, nb, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm, ab)
&(aMatrix, a_dev, lda, na, nblk, nb, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm, ab
, useGPU
)
! Calculate the workload for each sweep in the back transformation
! and the space requirements to hold the HH vectors
...
...
src/redist_band.X90
View file @
c21d968a
...
...
@@ -60,12 +60,12 @@ subroutine redist_band_&
#endif
a_dev, lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm, &
#if REALCASE == 1
r_ab
)
r_ab
, &
#endif
#if COMPLEXCASE == 1
c_ab
)
c_ab
, &
#endif
useGPU)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
...
...
@@ -74,8 +74,10 @@ subroutine redist_band_&
use elpa2_workload
use precision
use iso_c_binding
use cuda_functions
implicit none
logical, intent(in) :: useGPU
integer(kind=ik), intent(in) :: lda, na, nblk, nbw, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(in) :: r_a(lda, matrixCols)
...
...
@@ -105,12 +107,46 @@ subroutine redist_band_&
nfact, np, npr, npc, mpierr, is, js
integer(kind=ik) :: nblocks_total, il, jl, l_rows, l_cols, n_off
logical :: successCUDA
call timer%start("redist_band_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
if (useGPU) then
! copy a_dev to aMatrix
successCUDA = cuda_memcpy ( &
#if REALCASE == 1
loc(r_a), &
#endif
#if COMPLEXCASE == 1
loc(c_a(1,1)), &
#endif
a_dev, lda*matrixCols* &
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
size_of_double_real_datatype, &
#else
size_of_single_real_datatype, &
#endif
#endif
#if COMPLEXCASE ==1
#ifdef DOUBLE_PRECISION_COMPLEX
size_of_double_complex_datatype,&
#else
size_of_single_complex_datatype,&
#endif
#endif
cudaMemcpyDeviceToHost)
if (.not.(successCUDA)) then
print *,"redist_band_&
&MATH_DATATYPE&
&: error in cudaMemcpy"
stop
endif
endif ! useGPU
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm,my_pe,mpierr)
call mpi_comm_size(mpi_comm,n_pes,mpierr)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment