Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
fffcad08
Commit
fffcad08
authored
Sep 22, 2017
by
Andreas Marek
Browse files
Merge branch 'master_pre_stage' of
https://gitlab.mpcdf.mpg.de/elpa/elpa
into master_pre_stage
parents
a490a3f4
ec8bc696
Changes
12
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
src/elpa2/elpa2_bandred_template.F90
View file @
fffcad08
...
...
@@ -111,42 +111,19 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
lda
,
nblk
,
nbw
,
matrixCols
,
numBlocks
,
mpi_comm_rows
,
mpi_comm_cols
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real
(
kind
=
REAL_DATATYPE
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
#else
real
(
kind
=
REAL_DATATYPE
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
#else
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
#endif
#endif /* COMPLEXCASE */
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
real
(
kind
=
REAL_DATATYPE
),
parameter
::
ZERO
=
0.0_rk8
,
ONE
=
1.0_rk8
#else
real
(
kind
=
REAL_DATATYPE
),
parameter
::
ZERO
=
0.0_rk4
,
ONE
=
1.0_rk4
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
complex
(
kind
=
COMPLEX_DATATYPE
),
parameter
::
ZERO
=
(
0.0_rk8
,
0.0_rk8
),
ONE
=
(
1.0_rk8
,
0.0_rk8
)
#else
complex
(
kind
=
COMPLEX_DATATYPE
),
parameter
::
ZERO
=
(
0.0_rk4
,
0.0_rk4
),
ONE
=
(
1.0_rk4
,
0.0_rk4
)
real
(
kind
=
rk
)
::
eps
#endif
#endif /* COMPLEXCASE == 1 */
logical
,
intent
(
in
)
::
useGPU
integer
(
kind
=
ik
)
::
my_prow
,
my_pcol
,
np_rows
,
np_cols
,
mpierr
...
...
@@ -161,32 +138,19 @@
integer
(
kind
=
ik
)
::
istep
,
ncol
,
lch
,
lcx
,
nlc
integer
(
kind
=
ik
)
::
tile_size
,
l_rows_tile
,
l_cols_tile
real
(
kind
=
REAL_DATATYPE
)
::
vnorm2
#if REALCASE == 1
real
(
kind
=
REAL_DATATYPE
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#endif
#if COMPLEXCASE == 1
complex
(
kind
=
COMPLEX_DATATYPE
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#endif
real
(
kind
=
rk
)
::
vnorm2
MATH_DATATYPE
(
kind
=
rck
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#if COMPLEXCASE == 1
! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
vr
(:)
#endif
#if REALCASE == 1
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
vr
(:)
#endif
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
vr
(:)
#if REALCASE == 1
! needed for blocked QR decomposition
integer
(
kind
=
ik
)
::
PQRPARAM
(
11
),
work_size
real
(
kind
=
REAL_DATATYPE
)
::
dwork_size
(
1
)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
work_blocked
(:),
tauvector
(:),
blockheuristic
(:)
real
(
kind
=
rk
)
::
dwork_size
(
1
)
real
(
kind
=
rk
),
allocatable
::
work_blocked
(:),
tauvector
(:),
blockheuristic
(:)
#endif
! a_dev is passed from bandred_real to trans_ev_band
integer
(
kind
=
C_intptr_T
)
::
a_dev
,
vmr_dev
,
umc_dev
,
tmat_dev
,
vav_dev
...
...
src/elpa2/elpa2_compute_real_template.F90
View file @
fffcad08
...
...
@@ -103,17 +103,17 @@
use
elpa2_workload
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
real
(
kind
=
rk
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
real
(
kind
=
REAL_DATATYPE
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
real
(
kind
=
rk
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
tau_new
(
nb2
),
ab_s
(
1
+
nb
,
nb2
),
ab_r
(
1
+
nb
,
nb2
),
ab_s2
(
2
*
nb2
,
nb2
),
hv_s
(
nb
,
nb2
)
real
(
kind
=
REAL_DATATYPE
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
real
(
kind
=
rk
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
integer
(
kind
=
ik
)
::
lwork
,
info
integer
(
kind
=
ik
)
::
istep
,
i
,
n
,
dest
...
...
@@ -223,8 +223,8 @@
if
(
my_pe
==
0
)
then
n
=
MIN
(
na
-
na_s
-
nb2
+1
,
nb
)
! number of rows to be reduced
hv
(:,:)
=
CONST_0_0
tau
(:)
=
CONST_0_0
hv
(:,:)
=
0.0_rk
tau
(:)
=
0.0_rk
! The last step (istep=na-1) is only needed for sending the last HH vectors.
! We don't want the sign of the last element flipped (analogous to the other sweeps)
...
...
@@ -236,9 +236,9 @@
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
hv
(
i
+1
:
n
,
i
)
=
ab
(
1
+
nb2
+1
:
1
+
nb2
+
n
-
i
,
na_s
-
n_off
+
i
-1
)
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
CONST_0_0
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
0.0_rk
enddo
endif
...
...
@@ -247,10 +247,10 @@
d
(
istep
)
=
ab
(
1
,
na_s
-
n_off
)
e
(
istep
)
=
ab
(
2
,
na_s
-
n_off
)
if
(
istep
==
na
)
then
e
(
na
)
=
CONST_0_0
e
(
na
)
=
0.0_rk
endif
else
ab_s2
=
CONST_0_0
ab_s2
=
0.0_rk
ab_s2
(:,:)
=
ab
(
1
:
nb2
+1
,
na_s
-
n_off
:
na_s
-
n_off
+
nb2
-1
)
if
(
block_limits2
(
dest
+1
)
<
istep
)
then
dest
=
dest
+1
...
...
@@ -285,7 +285,7 @@
do
i
=
1
,
nb2
tau
(
i
)
=
hv
(
i
,
i
)
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
enddo
endif
endif
...
...
@@ -293,7 +293,7 @@
na_s
=
na_s
+
nb2
if
(
na_s
-
n_off
>
nb
)
then
ab
(:,
1
:
nblocks
*
nb
)
=
ab
(:,
nb
+1
:(
nblocks
+1
)
*
nb
)
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
CONST_0_0
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
0.0_rk
n_off
=
n_off
+
nb
endif
...
...
@@ -324,8 +324,8 @@
ab
(
1
:
nb
+1
,
ne
+
i
-1
)
=
ab_r
(:,
i
)
enddo
endif
hv_new
(:,:)
=
CONST_0_0
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
CONST_0_0
hv_new
(:,:)
=
0.0_rk
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
0.0_rk
if
(
nr
>
0
)
then
call
wy_right_
&
...
...
@@ -335,9 +335,9 @@
call
PRECISION_GEQRF
(
nr
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
tau_new
,
work
,
lwork
,
info
)
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
hv_new
(
i
,
i
)
=
CONST_1_0
hv_new
(
i
,
i
)
=
1.0_rk
hv_new
(
i
+1
:,
i
)
=
ab
(
nb
+2
:
2
*
nb
-
i
+1
,
ns
+
i
-1
)
ab
(
nb
+2
:,
ns
+
i
-1
)
=
CONST_0_0
ab
(
nb
+2
:,
ns
+
i
-1
)
=
0.0_rk
enddo
!send hh-Vector
...
...
@@ -458,16 +458,17 @@
use
elpa_abstract_impl
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!length of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!number of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of Y and W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
rk
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
rk
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
integer
(
kind
=
ik
)
::
i
integer
(
kind
=
ik
)
::
i
call
obj
%
timer
%
start
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
...
@@ -475,8 +476,8 @@
do
i
=
2
,
nb
W
(
1
:
n
,
i
)
=
tau
(
i
)
*
Y
(
1
:
n
,
i
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
CONST_1_0
,
Y
,
lda
,
W
(
1
,
i
),
1
,
CONST_0_0
,
mem
,
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
CONST_1_0
,
W
,
lda
,
mem
,
1
,
CONST_1_0
,
W
(
1
,
i
),
1
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
1.0_rk
,
Y
,
lda
,
W
(
1
,
i
),
1
,
0.0_rk
,
mem
,
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
1.0_rk
,
W
,
lda
,
mem
,
1
,
1.0_rk
,
W
(
1
,
i
),
1
)
call
obj
%
timer
%
stop
(
"blas"
)
enddo
call
obj
%
timer
%
stop
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
...
@@ -489,21 +490,22 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_left"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
CONST_1_0
,
W
,
lda2
,
A
,
lda
,
CONST_0_0
,
mem
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
nb
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
1.0_rk
,
W
,
lda2
,
A
,
lda
,
0.0_rk
,
mem
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
nb
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_left"
//
PRECISION_SUFFIX
)
end
subroutine
...
...
@@ -515,22 +517,23 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!height of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_right"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
CONST_1_0
,
mem
,
n
,
Y
,
lda2
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
1.0_rk
,
mem
,
n
,
Y
,
lda2
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_right"
//
PRECISION_SUFFIX
)
...
...
@@ -543,23 +546,24 @@
use
elpa_abstract_impl
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width/heigth of the matrix A; length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
REAL_DATATYPE
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
call
obj
%
timer
%
start
(
"wy_symm"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
CONST_1_0
,
mem
,
n
,
W
,
lda2
,
CONST_0_0
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
CONST_0_5
,
Y
,
lda2
,
mem2
,
nb
,
CONST_1_0
,
mem
,
n
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
n
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
1.0_rk
,
mem
,
n
,
W
,
lda2
,
0.0_rk
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
0.5_rk
,
Y
,
lda2
,
mem2
,
nb
,
1.0_rk
,
mem
,
n
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
n
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_symm"
//
PRECISION_SUFFIX
)
...
...
test/Fortran/test.F90
View file @
fffcad08
...
...
@@ -501,15 +501,7 @@ program test
#if defined(TEST_EIGENVECTORS) || defined(TEST_QR_DECOMPOSITION)
#ifdef TEST_MATRIX_ANALYTIC
!
!#if defined(TEST_MATRIX_ANALYTIC)
status
=
check_correctness_analytic
(
na
,
nev
,
ev
,
z
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
,
check_all_evals
)
call
check_status
(
status
,
myid
)
if
(
.true.
)
then
! also check residuals
status
=
check_correctness_evp_numeric_residuals
(
na
,
nev
,
as
,
z
,
ev
,
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
call
check_status
(
status
,
myid
)
endif
#else
!#elif defined(TEST_MATRIX_FRANK)
! status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
...
...
utils/scaling_scripts/batch_run.py
0 → 100644
View file @
fffcad08
#!/usr/bin/env python
from
itertools
import
product
from
scaling
import
*
output_dir
=
"out"
template_file
=
"run_template_hydra.sh"
#elpa_method = ['elpa1', 'elpa2']
elpa_method
=
[
'elpa1'
,
'elpa2'
,
'scalapack_all'
,
'scalapack_part'
]
#elpa_method = ['scalapack_part']
math_type
=
[
'real'
,
'complex'
]
precision
=
[
'single'
,
'double'
]
mat_size
=
[
5000
,
20000
]
proc_eigen
=
[
10
,
50
,
100
]
block_size
=
[
16
]
num_nodes
=
[
1
]
#num_nodes.extend([2**i for i in range(2,11)])
num_nodes
.
extend
([
2
**
i
for
i
in
range
(
2
,
7
)])
#num_nodes = [2048]
#===============================================================================================
#===============================================================================================
# the rest of the script should be changed only if something changed (etc. in elpa)
#===============================================================================================
#===============================================================================================
for
em
,
mt
,
pr
,
ms
,
pe
,
bs
,
nn
in
product
(
elpa_method
,
math_type
,
precision
,
mat_size
,
proc_eigen
,
block_size
,
num_nodes
):
tokens
=
{}
tokens
[
'_BLOCK_SIZE_'
]
=
bs
tokens
[
'_MAT_SIZE_'
]
=
ms
·
tokens
[
'_NUM_EIGEN_'
]
=
ms
*
pe
//
100
tokens
[
'_NUM_NODES_'
]
=
nn
variant
(
output_dir
,
template_file
,
tokens
,
em
,
mt
,
pr
)
utils/scaling_scripts/parse_elpa1
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total tridiag solve trans_ev
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
S+
=
" "
`
grep
"|_ tridiag_"
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ solve "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev"
$f
|
awk
'{print $3}'
`
echo
$S
done
utils/scaling_scripts/parse_elpa2
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total bandred tridiag solve trans_ev_to_band trans_ev_to_full
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
S+
=
" "
`
grep
"|_ bandred "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ tridiag "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ solve "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev_to_band "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev_to_full "
$f
|
awk
'{print $3}'
`
echo
$S
done
utils/scaling_scripts/parse_mkl
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
echo
$S
done
utils/scaling_scripts/plot.py
0 → 100755
View file @
fffcad08
#! /usr/bin/env python
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
os
print
(
"PLOTING ..."
)
group_colors
=
[[
'red'
,
'firebrick'
,
'indianred'
,
'tomato'
,
'maroon'
,
'salmon'
],
[
'green'
,
'darkgreen'
,
'springgreen'
,
'darkseagreen'
,
'lawngreen'
,
'yellowgreen'
],
[
'blue'
,
'darkblue'
,
'cornflowerblue'
,
'dodgerblue'
,
'midnightblue'
,
'lightskyblue'
],
[
'magenta'
,
'darkviolet'
,
'mediumvioletred'
,
'orchid'
,
'deeppink'
,
'purple'
],
[
'orange'
,
'gold'
,
'navajowhite'
,
'darkorange'
,
'goldenrod'
,
'sandybrown'
],
[
'cyan'
,
'darkcyan'
,
'lightseagreen'
,
'turquoise'
,
'darkturquoise'
,
'mediumturquoise'
]]
group_symbols
=
[
'o'
,
's'
,
'*'
,
'D'
,
'x'
,
'H'
]
elpa1_subtimes
=
[
"tridiag"
,
"solve"
,
"trans_ev"
]
elpa2_subtimes
=
[
"bandred"
,
"tridiag"
,
"solve"
,
"trans_ev_to_band"
,
"trans_ev_to_full"
]
cores_per_node
=
20
base_paths
=
[
"results"
,
"results2"
]
num_type
=
"real"
prec
=
"double"
mat_size
=
5000
def
scalapack_name
(
num
,
pr
,
all_ev
):
if
(
num_type
==
"real"
):
if
(
pr
==
"single"
):
name
=
"pssyev"
else
:
name
=
"pdsyev"
else
:
if
(
pr
==
"single"
):
name
=
"pcheev"
else
:
name
=
"pzheev"
if
(
all_ev
):
name
+=
"d"
else
:
name
+=
"r"
return
name
def
line
(
what
,
mat_size
,
proc_evec
,
method
,
label
,
color
,
style
):
data_line_res
=
[]
nodes_res
=
[]
for
base_path
in
base_paths
:
path
=
"/"
.
join
([
base_path
,
num_type
,
prec
,
str
(
mat_size
),
str
(
mat_size
*
proc_evec
//
100
),
method
,
"tab.txt"
])
#print(path)
if
not
os
.
path
.
isfile
(
path
):
continue
data
=
np
.
genfromtxt
(
path
,
names
=
True
)
nodes
=
data
[
'nodes'
]
data_line
=
data
[
what
]
#print("data_line", data_line, "data_line_res", data_line_res)
if
(
nodes_res
==
[]):
assert
(
data_line_res
==
[])
nodes_res
=
nodes
data_line_res
=
data_line
else
:
assert
(
all
(
nodes
==
nodes_res
))
data_line_res
=
np
.
minimum
(
data_line_res
,
data_line
)
cores
=
cores_per_node
*
nodes_res
#print(cores, data_line_res)
plt
.
plot
(
cores
,
data_line_res
,
style
,
label
=
label
,
color
=
color
,
linewidth
=
2
)
def
plot1
():
line
(
"total"
,
mat_size
,
100
,
"pdsyevd"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
),
"black"
,
"x-"
)
line
(
"total"
,
mat_size
,
100
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 100% EVs"
,
"blue"
,
"x-"
)
line
(
"total"
,
mat_size
,
50
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 50% EVs"
,
"green"
,
"x-"
)
line
(
"total"
,
mat_size
,
10
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 10% EVs"
,
"red"
,
"x-"
)
line
(
"total"
,
mat_size
,
100
,
"elpa1"
,
"ELPA 1, 100% EVs"
,
"blue"
,
"*--"
)
line
(
"total"
,
mat_size
,
50
,
"elpa1"
,
"ELPA 1, 50% EVs"
,
"green"
,
"*--"
)
line
(
"total"
,
mat_size
,
10
,
"elpa1"
,
"ELPA 1, 10% EVs"
,
"red"
,
"*--"
)
line
(
"total"
,
mat_size
,
100
,
"elpa2"
,
"ELPA 2, 100% EVs"
,
"blue"
,
"o:"
)
line
(
"total"
,
mat_size
,
50
,
"elpa2"
,
"ELPA 2, 50% EVs"
,
"green"
,
"o:"
)
line
(
"total"
,
mat_size
,
10
,
"elpa2"
,
"ELPA 2, 10% EVs"
,
"red"
,
"o:"
)
def
details
(
proc_ev
):
for
i
in
range
(
len
(
elpa1_subtimes
)):
line
(
elpa1_subtimes
[
i
],
mat_size
,
proc_ev
,
"elpa1"
,
"ELPA1 - "
+
elpa1_subtimes
[
i
],
group_colors
[
0
][
i
],
group_symbols
[
2
*
i
]
+
'-'
)
for
i
in
range
(
len
(
elpa2_subtimes
)):
line
(
elpa2_subtimes
[
i
],
mat_size
,
proc_ev
,
"elpa2"
,
"ELPA2 - "
+
elpa2_subtimes
[
i
],
group_colors
[
1
][
i
],
group_symbols
[
i
]
+
'-'
)
fig
=
plt
.
figure
(
figsize
=
(
15
,
10
))
ax
=
fig
.
add_subplot
(
111
)
ax
.
tick_params
(
labelright
=
'on'
)
plot1
()
#details(100)
#plt.title('Num CPUs ' + str(num_cpus) + ' and ' + str(eigenvectors_percent) + '% eigenvectors, ' + numtype)
#plt.title('Num CPUs ')
plt
.
title
(
"Matrix "
+
str
(
mat_size
//
1000
)
+
"k, "
+
num_type
+
", "
+
prec
)
plt
.
grid
()
plt
.
legend
(
loc
=
1
)
plt
.
xlabel
(
'Number of cores'
)
plt
.
ylabel
(
'Execution time [s]'
)
plt
.
xscale
(
'log'
)
plt
.
yscale
(
'log'
)
ax
.
xaxis
.
grid
(
b
=
True
,
which
=
'major'
,
color
=
'black'
,
linestyle
=
':'
)
ax
.
yaxis
.
grid
(
b
=
True
,
which
=
'major'
,
color
=
'black'
,
linestyle
=
'--'
)
ax
.
yaxis
.
grid
(
b
=
True
,
which
=
'minor'
,
color
=
'black'
,
linestyle
=
':'
)
ticks
=
[
20
*
2
**
i
for
i
in
range
(
0
,
12
)]
ax
.
xaxis
.
set_ticks
(
ticks
)
ax
.
xaxis
.
set_ticklabels
(
ticks
)
if
(
mat_size
<
10000
):
y_min
=
0.1
y_max
=
50
else
:
y_min
=
5
y_max
=
500
yticks_major
=
[
1
,
10
,
100
,
1000
,
y_min
,
y_max
]
ax
.
yaxis
.
set_ticks
(
yticks_major
)
ax
.
yaxis
.
set_ticklabels
(
yticks_major
)
# yticks_minor = [2, 5, 20, 50, 200, 500]
# ax.yaxis.set_ticks(yticks_minor, minor=True)
# ax.yaxis.set_ticklabels(yticks_minor, minor=True)
plt
.
ylim
([
y_min
,
y_max
])
plt
.
xlim
([
20
,
41000
])
plt
.
savefig
(
'plot.pdf'
)
#if show:
plt
.
show
()
#plt.close()