Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
14
Issues
14
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
98e3e884
Commit
98e3e884
authored
May 03, 2018
by
Pavel Kus
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
renaming a and q to a_mat and q_mat in
elpa2_trans_ev_band_to_full_template
parent
830b54aa
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
29 additions
and
29 deletions
+29
-29
src/elpa2/elpa2_trans_ev_band_to_full_template.F90
src/elpa2/elpa2_trans_ev_band_to_full_template.F90
+28
-28
src/elpa2/redist_band.F90
src/elpa2/redist_band.F90
+1
-1
No files found.
src/elpa2/elpa2_trans_ev_band_to_full_template.F90
View file @
98e3e884
...
...
@@ -55,7 +55,7 @@
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
na
,
nqc
,
nblk
,
nbw
,
a
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q
,
&
(
obj
,
na
,
nqc
,
nblk
,
nbw
,
a
_mat
,
a_dev
,
lda
,
tmat
,
tmat_dev
,
q_mat
,
&
q_dev
,
ldq
,
matrixCols
,
numBlocks
,
mpi_comm_rows
,
mpi_comm_cols
,
useGPU
&
#if REALCASE == 1
,
useQr
)
...
...
@@ -70,27 +70,27 @@
!
! Parameters
!
! na Order of matrix a
, number of rows of matrix q
! na Order of matrix a
_mat, number of rows of matrix q_mat
!
! nqc Number of columns of matrix q
! nqc Number of columns of matrix q
_mat
!
! nblk blocksize of cyclic distribution, must be the same in both directions!
!
! nbw semi bandwith
!
! a
(lda,matrixCols) Matrix containing the Householder vectors (i.e. matrix a
after bandred_real/complex)
! a
_mat(lda,matrixCols) Matrix containing the Householder vectors (i.e. matrix a_mat
after bandred_real/complex)
! Distribution is like in Scalapack.
!
! lda Leading dimension of a
! matrixCols local columns of matrix a
and q
! lda Leading dimension of a
_mat
! matrixCols local columns of matrix a
_mat and q_mat
!
! tmat(nbw,nbw,numBlocks) Factors returned by bandred_real/complex
!
! q On input: Eigenvectors of band matrix
! q
_mat
On input: Eigenvectors of band matrix
! On output: Transformed eigenvectors
! Distribution is like in Scalapack.
!
! ldq Leading dimension of q
! ldq Leading dimension of q
_mat
!
! mpi_comm_rows
! mpi_comm_cols
...
...
@@ -110,9 +110,9 @@
#endif
integer
(
kind
=
ik
)
::
na
,
nqc
,
lda
,
ldq
,
nblk
,
nbw
,
matrixCols
,
numBlocks
,
mpi_comm_rows
,
mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
*
),
q
(
ldq
,
*
),
tmat
(
nbw
,
nbw
,
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
_mat
(
lda
,
*
),
q_mat
(
ldq
,
*
),
tmat
(
nbw
,
nbw
,
*
)
#else
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
matrixCols
),
q
(
ldq
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
_mat
(
lda
,
matrixCols
),
q_mat
(
ldq
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
#endif
integer
(
kind
=
C_intptr_T
)
::
a_dev
! passed from bandred_real at the moment not used since copied in bandred_real
...
...
@@ -124,7 +124,7 @@
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmp1
(:),
tmp2
(:),
hvb
(:),
hvm
(:,:)
! hvm_dev is fist used and set in this routine
! q is changed in trans_ev_tridi on the host, copied to device and passed here. this can be adapted
! q
_mat
is changed in trans_ev_tridi on the host, copied to device and passed here. this can be adapted
! tmp_dev is first used in this routine
! tmat_dev is passed along from bandred_real
integer
(
kind
=
C_intptr_T
)
::
hvm_dev
,
q_dev
,
tmp_dev
,
tmat_dev
...
...
@@ -165,8 +165,8 @@
call
obj
%
timer
%
stop
(
"mpi_communication"
)
max_blocks_row
=
((
na
-1
)/
nblk
)/
np_rows
+
1
! Rows of
A
max_blocks_col
=
((
nqc
-1
)/
nblk
)/
np_cols
+
1
! Columns of q!
max_blocks_row
=
((
na
-1
)/
nblk
)/
np_rows
+
1
! Rows of
a_mat
max_blocks_col
=
((
nqc
-1
)/
nblk
)/
np_cols
+
1
! Columns of q
_mat
!
max_local_rows
=
max_blocks_row
*
nblk
max_local_cols
=
max_blocks_col
*
nblk
...
...
@@ -256,10 +256,10 @@
! stop 1
! endif
! q_temp(:,:) = 0.0
! q_temp(1:ldq,1:na_cols) = q(1:ldq,1:na_cols)
! q_temp(1:ldq,1:na_cols) = q
_mat
(1:ldq,1:na_cols)
! ! copy q_dev to device, maybe this can be avoided if q_dev can be kept on device in trans_ev_tridi_to_band
! successCUDA = cuda_memcpy(q_dev, loc(q), (ldq)*(matrixCols)*size_of_PRECISION_real, cudaMemcpyHostToDevice)
! successCUDA = cuda_memcpy(q_dev, loc(q
_mat
), (ldq)*(matrixCols)*size_of_PRECISION_real, cudaMemcpyHostToDevice)
! if (.not.(successCUDA)) then
! print *,"trans_ev_band_to_full_real: error in cudaMalloc"
! stop 1
...
...
@@ -272,7 +272,7 @@
! stop 1
! endif
!
! successCUDA = cuda_memcpy(q_dev, loc(q),ldq*matrixCols*size_of_PRECISION_complex, cudaMemcpyHostToDevice)
! successCUDA = cuda_memcpy(q_dev, loc(q
_mat
),ldq*matrixCols*size_of_PRECISION_complex, cudaMemcpyHostToDevice)
! if (.not.(successCUDA)) then
! print *,"trans_ev_band_to_full_complex: error in cudaMemcpy"
! stop 1
...
...
@@ -290,7 +290,7 @@
hvm
=
0.0_rck
! Must be set to 0 !!!
hvb
=
0.0_rck
! Safety only
l_cols
=
local_index
(
nqc
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! Local columns of q
l_cols
=
local_index
(
nqc
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! Local columns of q
_mat
do
istep
=
1
,(
na
-1
)/
nbw
...
...
@@ -308,7 +308,7 @@
l_rows
=
local_index
(
nrow
-1
,
my_prow
,
np_rows
,
nblk
,
-1
)
! row length for bcast
l_colh
=
local_index
(
ncol
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! HV local column number
if
(
my_pcol
==
pcol
(
ncol
,
nblk
,
np_cols
))
hvb
(
nb
+1
:
nb
+
l_rows
)
=
a
(
1
:
l_rows
,
l_colh
)
if
(
my_pcol
==
pcol
(
ncol
,
nblk
,
np_cols
))
hvb
(
nb
+1
:
nb
+
l_rows
)
=
a
_mat
(
1
:
l_rows
,
l_colh
)
nb
=
nb
+
l_rows
...
...
@@ -544,7 +544,7 @@
hvm
=
0.0_rck
! Must be set to 0 !!!
hvb
=
0.0_rck
! Safety only
l_cols
=
local_index
(
nqc
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! Local columns of q
l_cols
=
local_index
(
nqc
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! Local columns of q
_mat
! if ( na >= ((t_blocking+1)*nbw) ) then
...
...
@@ -586,7 +586,7 @@
l_rows
=
local_index
(
nrow
-1
,
my_prow
,
np_rows
,
nblk
,
-1
)
! row length for bcast
l_colh
=
local_index
(
ncol
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! HV local column number
if
(
my_pcol
==
pcol
(
ncol
,
nblk
,
np_cols
))
hvb
(
nb
+1
:
nb
+
l_rows
)
=
a
(
1
:
l_rows
,
l_colh
)
if
(
my_pcol
==
pcol
(
ncol
,
nblk
,
np_cols
))
hvb
(
nb
+1
:
nb
+
l_rows
)
=
a
_mat
(
1
:
l_rows
,
l_colh
)
nb
=
nb
+
l_rows
...
...
@@ -681,7 +681,7 @@
call
PRECISION_GEMM
(
BLAS_TRANS_OR_CONJ
,
'N'
,
&
n_cols
,
l_cols
,
l_rows
,
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
&
q
,
ldq
,
ZERO
,
tmp1
,
n_cols
)
q
_mat
,
ldq
,
ZERO
,
tmp1
,
n_cols
)
call
obj
%
timer
%
stop
(
"blas"
)
else
! l_rows>0
...
...
@@ -701,14 +701,14 @@
call
PRECISION_TRMM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
&
n_cols
,
l_cols
,
ONE
,
tmat_complete
,
cwy_blocking
,
tmp2
,
n_cols
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
tmp2
,
n_cols
,
ONE
,
q
,
ldq
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
tmp2
,
n_cols
,
ONE
,
q
_mat
,
ldq
)
#else /* BAND_TO_FULL_BLOCKING */
call
PRECISION_TRMM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
&
n_cols
,
l_cols
,
ONE
,
tmat
(
1
,
1
,
istep
),
ubound
(
tmat
,
dim
=
1
),
tmp2
,
n_cols
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
&
tmp2
,
n_cols
,
ONE
,
q
,
ldq
)
tmp2
,
n_cols
,
ONE
,
q
_mat
,
ldq
)
#endif /* BAND_TO_FULL_BLOCKING */
...
...
@@ -721,13 +721,13 @@
#ifdef BAND_TO_FULL_BLOCKING
call
PRECISION_TRMM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
&
n_cols
,
l_cols
,
ONE
,
tmat_complete
,
cwy_blocking
,
tmp1
,
n_cols
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
tmp1
,
n_cols
,
ONE
,
q
,
ldq
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
tmp1
,
n_cols
,
ONE
,
q
_mat
,
ldq
)
#else /* BAND_TO_FULL_BLOCKING */
call
PRECISION_TRMM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
&
n_cols
,
l_cols
,
ONE
,
tmat
(
1
,
1
,
istep
),
ubound
(
tmat
,
dim
=
1
),
tmp1
,
n_cols
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
ONE
,
hvm
,
ubound
(
hvm
,
dim
=
1
),
&
tmp1
,
n_cols
,
ONE
,
q
,
ldq
)
tmp1
,
n_cols
,
ONE
,
q
_mat
,
ldq
)
#endif /* BAND_TO_FULL_BLOCKING */
endif
...
...
@@ -736,7 +736,7 @@
! if (l_rows>0) then
! call PRECISION_TRMM('L', 'U', 'T', 'N', n_cols, l_cols, ONE, tmat_complete, cwy_blocking, tmp2, n_cols)
! call PRECISION_GEMM('N', 'N', l_rows, l_cols, n_cols, -ONE, hvm, ubound(hvm,dim=1), tmp2, n_cols, ONE, q, ldq)
! call PRECISION_GEMM('N', 'N', l_rows, l_cols, n_cols, -ONE, hvm, ubound(hvm,dim=1), tmp2, n_cols, ONE, q
_mat
, ldq)
! endif
enddo
! istep
...
...
@@ -777,7 +777,7 @@
endif
! final transfer of q_dev
successCUDA
=
cuda_memcpy
(
loc
(
q
),
q_dev
,
ldq
*
matrixCols
*
size_of_datatype
,
cudaMemcpyDeviceToHost
)
successCUDA
=
cuda_memcpy
(
loc
(
q
_mat
),
q_dev
,
ldq
*
matrixCols
*
size_of_datatype
,
cudaMemcpyDeviceToHost
)
if
(
.not.
(
successCUDA
))
then
print
*
,
"trans_ev_band_to_full_&
...
...
@@ -786,7 +786,7 @@
stop
1
endif
! q(1:ldq,1:na_cols) = q_temp(1:ldq,1:na_cols)
! q
_mat
(1:ldq,1:na_cols) = q_temp(1:ldq,1:na_cols)
successCUDA
=
cuda_free
(
q_dev
)
if
(
.not.
(
successCUDA
))
then
...
...
src/elpa2/redist_band.F90
View file @
98e3e884
...
...
@@ -90,7 +90,7 @@ subroutine redist_band_&
)
if
(
useGPU
)
then
! copy a_dev to a
Matrix
! copy a_dev to a
_mat
successCUDA
=
cuda_memcpy
(
loc
(
a_mat
),
int
(
a_dev
,
kind
=
c_intptr_t
),
int
(
lda
*
matrixCols
*
size_of_datatype
,
kind
=
c_intptr_t
),
&
cudaMemcpyDeviceToHost
)
if
(
.not.
(
successCUDA
))
then
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment