Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
fffcad08
Commit
fffcad08
authored
Sep 22, 2017
by
Andreas Marek
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master_pre_stage' of
https://gitlab.mpcdf.mpg.de/elpa/elpa
into master_pre_stage
parents
a490a3f4
ec8bc696
Changes
12
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
416 additions
and
102 deletions
+416
-102
src/elpa2/elpa2_bandred_template.F90
src/elpa2/elpa2_bandred_template.F90
+11
-47
src/elpa2/elpa2_compute_real_template.F90
src/elpa2/elpa2_compute_real_template.F90
+51
-47
test/Fortran/test.F90
test/Fortran/test.F90
+0
-8
utils/scaling_scripts/batch_run.py
utils/scaling_scripts/batch_run.py
+37
-0
utils/scaling_scripts/parse_elpa1
utils/scaling_scripts/parse_elpa1
+18
-0
utils/scaling_scripts/parse_elpa2
utils/scaling_scripts/parse_elpa2
+21
-0
utils/scaling_scripts/parse_mkl
utils/scaling_scripts/parse_mkl
+14
-0
utils/scaling_scripts/plot.py
utils/scaling_scripts/plot.py
+131
-0
utils/scaling_scripts/plt
utils/scaling_scripts/plt
+12
-0
utils/scaling_scripts/process.py
utils/scaling_scripts/process.py
+26
-0
utils/scaling_scripts/run_template_hydra.sh
utils/scaling_scripts/run_template_hydra.sh
+52
-0
utils/scaling_scripts/scaling.py
utils/scaling_scripts/scaling.py
+43
-0
No files found.
src/elpa2/elpa2_bandred_template.F90
View file @
fffcad08
...
...
@@ -111,42 +111,19 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
lda
,
nblk
,
nbw
,
matrixCols
,
numBlocks
,
mpi_comm_rows
,
mpi_comm_cols
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real
(
kind
=
REAL_DATATYPE
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
#else
real
(
kind
=
REAL_DATATYPE
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
*
),
tmat
(
nbw
,
nbw
,
*
)
#else
complex
(
kind
=
COMPLEX_DATATYPE
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
matrixCols
),
tmat
(
nbw
,
nbw
,
numBlocks
)
#endif
#endif /* COMPLEXCASE */
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
real
(
kind
=
REAL_DATATYPE
),
parameter
::
ZERO
=
0.0_rk8
,
ONE
=
1.0_rk8
#else
real
(
kind
=
REAL_DATATYPE
),
parameter
::
ZERO
=
0.0_rk4
,
ONE
=
1.0_rk4
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
complex
(
kind
=
COMPLEX_DATATYPE
),
parameter
::
ZERO
=
(
0.0_rk8
,
0.0_rk8
),
ONE
=
(
1.0_rk8
,
0.0_rk8
)
#else
complex
(
kind
=
COMPLEX_DATATYPE
),
parameter
::
ZERO
=
(
0.0_rk4
,
0.0_rk4
),
ONE
=
(
1.0_rk4
,
0.0_rk4
)
real
(
kind
=
rk
)
::
eps
#endif
#endif /* COMPLEXCASE == 1 */
logical
,
intent
(
in
)
::
useGPU
integer
(
kind
=
ik
)
::
my_prow
,
my_pcol
,
np_rows
,
np_cols
,
mpierr
...
...
@@ -161,32 +138,19 @@
integer
(
kind
=
ik
)
::
istep
,
ncol
,
lch
,
lcx
,
nlc
integer
(
kind
=
ik
)
::
tile_size
,
l_rows_tile
,
l_cols_tile
real
(
kind
=
REAL_DATATYPE
)
::
vnorm2
#if REALCASE == 1
real
(
kind
=
REAL_DATATYPE
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#endif
#if COMPLEXCASE == 1
complex
(
kind
=
COMPLEX_DATATYPE
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#endif
real
(
kind
=
rk
)
::
vnorm2
MATH_DATATYPE
(
kind
=
rck
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
#if COMPLEXCASE == 1
! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
complex
(
kind
=
COMPLEX_DATATYPE
),
allocatable
::
vr
(:)
#endif
#if REALCASE == 1
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
vr
(:)
#endif
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmpCUDA
(:),
vmrCUDA
(:),
umcCUDA
(:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmpCPU
(:,:),
vmrCPU
(:,:),
umcCPU
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
vr
(:)
#if REALCASE == 1
! needed for blocked QR decomposition
integer
(
kind
=
ik
)
::
PQRPARAM
(
11
),
work_size
real
(
kind
=
REAL_DATATYPE
)
::
dwork_size
(
1
)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
work_blocked
(:),
tauvector
(:),
blockheuristic
(:)
real
(
kind
=
rk
)
::
dwork_size
(
1
)
real
(
kind
=
rk
),
allocatable
::
work_blocked
(:),
tauvector
(:),
blockheuristic
(:)
#endif
! a_dev is passed from bandred_real to trans_ev_band
integer
(
kind
=
C_intptr_T
)
::
a_dev
,
vmr_dev
,
umc_dev
,
tmat_dev
,
vav_dev
...
...
src/elpa2/elpa2_compute_real_template.F90
View file @
fffcad08
...
...
@@ -103,17 +103,17 @@
use
elpa2_workload
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
real
(
kind
=
rk
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
real
(
kind
=
REAL_DATATYPE
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
real
(
kind
=
rk
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
tau_new
(
nb2
),
ab_s
(
1
+
nb
,
nb2
),
ab_r
(
1
+
nb
,
nb2
),
ab_s2
(
2
*
nb2
,
nb2
),
hv_s
(
nb
,
nb2
)
real
(
kind
=
REAL_DATATYPE
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
real
(
kind
=
rk
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
integer
(
kind
=
ik
)
::
lwork
,
info
integer
(
kind
=
ik
)
::
istep
,
i
,
n
,
dest
...
...
@@ -223,8 +223,8 @@
if
(
my_pe
==
0
)
then
n
=
MIN
(
na
-
na_s
-
nb2
+1
,
nb
)
! number of rows to be reduced
hv
(:,:)
=
CONST_0_0
tau
(:)
=
CONST_0_0
hv
(:,:)
=
0.0_rk
tau
(:)
=
0.0_rk
! The last step (istep=na-1) is only needed for sending the last HH vectors.
! We don't want the sign of the last element flipped (analogous to the other sweeps)
...
...
@@ -236,9 +236,9 @@
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
hv
(
i
+1
:
n
,
i
)
=
ab
(
1
+
nb2
+1
:
1
+
nb2
+
n
-
i
,
na_s
-
n_off
+
i
-1
)
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
CONST_0_0
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
0.0_rk
enddo
endif
...
...
@@ -247,10 +247,10 @@
d
(
istep
)
=
ab
(
1
,
na_s
-
n_off
)
e
(
istep
)
=
ab
(
2
,
na_s
-
n_off
)
if
(
istep
==
na
)
then
e
(
na
)
=
CONST_0_0
e
(
na
)
=
0.0_rk
endif
else
ab_s2
=
CONST_0_0
ab_s2
=
0.0_rk
ab_s2
(:,:)
=
ab
(
1
:
nb2
+1
,
na_s
-
n_off
:
na_s
-
n_off
+
nb2
-1
)
if
(
block_limits2
(
dest
+1
)
<
istep
)
then
dest
=
dest
+1
...
...
@@ -285,7 +285,7 @@
do
i
=
1
,
nb2
tau
(
i
)
=
hv
(
i
,
i
)
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
enddo
endif
endif
...
...
@@ -293,7 +293,7 @@
na_s
=
na_s
+
nb2
if
(
na_s
-
n_off
>
nb
)
then
ab
(:,
1
:
nblocks
*
nb
)
=
ab
(:,
nb
+1
:(
nblocks
+1
)
*
nb
)
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
CONST_0_0
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
0.0_rk
n_off
=
n_off
+
nb
endif
...
...
@@ -324,8 +324,8 @@
ab
(
1
:
nb
+1
,
ne
+
i
-1
)
=
ab_r
(:,
i
)
enddo
endif
hv_new
(:,:)
=
CONST_0_0
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
CONST_0_0
hv_new
(:,:)
=
0.0_rk
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
0.0_rk
if
(
nr
>
0
)
then
call
wy_right_
&
...
...
@@ -335,9 +335,9 @@
call
PRECISION_GEQRF
(
nr
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
tau_new
,
work
,
lwork
,
info
)
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
hv_new
(
i
,
i
)
=
CONST_1_0
hv_new
(
i
,
i
)
=
1.0_rk
hv_new
(
i
+1
:,
i
)
=
ab
(
nb
+2
:
2
*
nb
-
i
+1
,
ns
+
i
-1
)
ab
(
nb
+2
:,
ns
+
i
-1
)
=
CONST_0_0
ab
(
nb
+2
:,
ns
+
i
-1
)
=
0.0_rk
enddo
!send hh-Vector
...
...
@@ -458,16 +458,17 @@
use
elpa_abstract_impl
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!length of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!number of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of Y and W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
rk
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
rk
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
integer
(
kind
=
ik
)
::
i
integer
(
kind
=
ik
)
::
i
call
obj
%
timer
%
start
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
...
@@ -475,8 +476,8 @@
do
i
=
2
,
nb
W
(
1
:
n
,
i
)
=
tau
(
i
)
*
Y
(
1
:
n
,
i
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
CONST_1_0
,
Y
,
lda
,
W
(
1
,
i
),
1
,
CONST_0_0
,
mem
,
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
CONST_1_0
,
W
,
lda
,
mem
,
1
,
CONST_1_0
,
W
(
1
,
i
),
1
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
1.0_rk
,
Y
,
lda
,
W
(
1
,
i
),
1
,
0.0_rk
,
mem
,
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
1.0_rk
,
W
,
lda
,
mem
,
1
,
1.0_rk
,
W
(
1
,
i
),
1
)
call
obj
%
timer
%
stop
(
"blas"
)
enddo
call
obj
%
timer
%
stop
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
...
@@ -489,21 +490,22 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_left"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
CONST_1_0
,
W
,
lda2
,
A
,
lda
,
CONST_0_0
,
mem
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
nb
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
1.0_rk
,
W
,
lda2
,
A
,
lda
,
0.0_rk
,
mem
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
nb
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_left"
//
PRECISION_SUFFIX
)
end
subroutine
...
...
@@ -515,22 +517,23 @@
use
precision
use
elpa_abstract_impl
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!height of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_right"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
CONST_1_0
,
mem
,
n
,
Y
,
lda2
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
1.0_rk
,
mem
,
n
,
Y
,
lda2
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_right"
//
PRECISION_SUFFIX
)
...
...
@@ -543,23 +546,24 @@
use
elpa_abstract_impl
use
precision
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width/heigth of the matrix A; length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
REAL_DATATYPE
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
call
obj
%
timer
%
start
(
"wy_symm"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
CONST_1_0
,
mem
,
n
,
W
,
lda2
,
CONST_0_0
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
CONST_0_5
,
Y
,
lda2
,
mem2
,
nb
,
CONST_1_0
,
mem
,
n
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
n
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
1.0_rk
,
mem
,
n
,
W
,
lda2
,
0.0_rk
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
0.5_rk
,
Y
,
lda2
,
mem2
,
nb
,
1.0_rk
,
mem
,
n
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
n
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_symm"
//
PRECISION_SUFFIX
)
...
...
test/Fortran/test.F90
View file @
fffcad08
...
...
@@ -501,15 +501,7 @@ program test
#if defined(TEST_EIGENVECTORS) || defined(TEST_QR_DECOMPOSITION)
#ifdef TEST_MATRIX_ANALYTIC
!
!#if defined(TEST_MATRIX_ANALYTIC)
status
=
check_correctness_analytic
(
na
,
nev
,
ev
,
z
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
,
check_all_evals
)
call
check_status
(
status
,
myid
)
if
(
.true.
)
then
! also check residuals
status
=
check_correctness_evp_numeric_residuals
(
na
,
nev
,
as
,
z
,
ev
,
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
call
check_status
(
status
,
myid
)
endif
#else
!#elif defined(TEST_MATRIX_FRANK)
! status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
...
...
utils/scaling_scripts/batch_run.py
0 → 100644
View file @
fffcad08
#!/usr/bin/env python
from
itertools
import
product
from
scaling
import
*
output_dir
=
"out"
template_file
=
"run_template_hydra.sh"
#elpa_method = ['elpa1', 'elpa2']
elpa_method
=
[
'elpa1'
,
'elpa2'
,
'scalapack_all'
,
'scalapack_part'
]
#elpa_method = ['scalapack_part']
math_type
=
[
'real'
,
'complex'
]
precision
=
[
'single'
,
'double'
]
mat_size
=
[
5000
,
20000
]
proc_eigen
=
[
10
,
50
,
100
]
block_size
=
[
16
]
num_nodes
=
[
1
]
#num_nodes.extend([2**i for i in range(2,11)])
num_nodes
.
extend
([
2
**
i
for
i
in
range
(
2
,
7
)])
#num_nodes = [2048]
#===============================================================================================
#===============================================================================================
# the rest of the script should be changed only if something changed (etc. in elpa)
#===============================================================================================
#===============================================================================================
for
em
,
mt
,
pr
,
ms
,
pe
,
bs
,
nn
in
product
(
elpa_method
,
math_type
,
precision
,
mat_size
,
proc_eigen
,
block_size
,
num_nodes
):
tokens
=
{}
tokens
[
'_BLOCK_SIZE_'
]
=
bs
tokens
[
'_MAT_SIZE_'
]
=
ms
·
tokens
[
'_NUM_EIGEN_'
]
=
ms
*
pe
//
100
tokens
[
'_NUM_NODES_'
]
=
nn
variant
(
output_dir
,
template_file
,
tokens
,
em
,
mt
,
pr
)
utils/scaling_scripts/parse_elpa1
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total tridiag solve trans_ev
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
S+
=
" "
`
grep
"|_ tridiag_"
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ solve "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev"
$f
|
awk
'{print $3}'
`
echo
$S
done
utils/scaling_scripts/parse_elpa2
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total bandred tridiag solve trans_ev_to_band trans_ev_to_full
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
S+
=
" "
`
grep
"|_ bandred "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ tridiag "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ solve "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev_to_band "
$f
|
awk
'{print $3}'
`
S+
=
" "
`
grep
"|_ trans_ev_to_full "
$f
|
awk
'{print $3}'
`
echo
$S
done
utils/scaling_scripts/parse_mkl
0 → 100755
View file @
fffcad08
#! /bin/bash
echo
nodes total
for
f
in
*
.txt
do
#echo "processing $f... "
S
=
`
grep
" node = "
$f
|
awk
'{print $5}'
`
TOTAL
=
`
grep
"e%eigenvectors()"
$f
|
awk
'{print $3}'
`
if
[[
-z
"
$TOTAL
"
]]
;
then
continue
fi
S+
=
" "
$TOTAL
echo
$S
done
utils/scaling_scripts/plot.py
0 → 100755
View file @
fffcad08
#! /usr/bin/env python
import
numpy
as
np
import
matplotlib.pyplot
as
plt
import
os
print
(
"PLOTING ..."
)
group_colors
=
[[
'red'
,
'firebrick'
,
'indianred'
,
'tomato'
,
'maroon'
,
'salmon'
],
[
'green'
,
'darkgreen'
,
'springgreen'
,
'darkseagreen'
,
'lawngreen'
,
'yellowgreen'
],
[
'blue'
,
'darkblue'
,
'cornflowerblue'
,
'dodgerblue'
,
'midnightblue'
,
'lightskyblue'
],
[
'magenta'
,
'darkviolet'
,
'mediumvioletred'
,
'orchid'
,
'deeppink'
,
'purple'
],
[
'orange'
,
'gold'
,
'navajowhite'
,
'darkorange'
,
'goldenrod'
,
'sandybrown'
],
[
'cyan'
,
'darkcyan'
,
'lightseagreen'
,
'turquoise'
,
'darkturquoise'
,
'mediumturquoise'
]]
group_symbols
=
[
'o'
,
's'
,
'*'
,
'D'
,
'x'
,
'H'
]
elpa1_subtimes
=
[
"tridiag"
,
"solve"
,
"trans_ev"
]
elpa2_subtimes
=
[
"bandred"
,
"tridiag"
,
"solve"
,
"trans_ev_to_band"
,
"trans_ev_to_full"
]
cores_per_node
=
20
base_paths
=
[
"results"
,
"results2"
]
num_type
=
"real"
prec
=
"double"
mat_size
=
5000
def
scalapack_name
(
num
,
pr
,
all_ev
):
if
(
num_type
==
"real"
):
if
(
pr
==
"single"
):
name
=
"pssyev"
else
:
name
=
"pdsyev"
else
:
if
(
pr
==
"single"
):
name
=
"pcheev"
else
:
name
=
"pzheev"
if
(
all_ev
):
name
+=
"d"
else
:
name
+=
"r"
return
name
def
line
(
what
,
mat_size
,
proc_evec
,
method
,
label
,
color
,
style
):
data_line_res
=
[]
nodes_res
=
[]
for
base_path
in
base_paths
:
path
=
"/"
.
join
([
base_path
,
num_type
,
prec
,
str
(
mat_size
),
str
(
mat_size
*
proc_evec
//
100
),
method
,
"tab.txt"
])
#print(path)
if
not
os
.
path
.
isfile
(
path
):
continue
data
=
np
.
genfromtxt
(
path
,
names
=
True
)
nodes
=
data
[
'nodes'
]
data_line
=
data
[
what
]
#print("data_line", data_line, "data_line_res", data_line_res)
if
(
nodes_res
==
[]):
assert
(
data_line_res
==
[])
nodes_res
=
nodes
data_line_res
=
data_line
else
:
assert
(
all
(
nodes
==
nodes_res
))
data_line_res
=
np
.
minimum
(
data_line_res
,
data_line
)
cores
=
cores_per_node
*
nodes_res
#print(cores, data_line_res)
plt
.
plot
(
cores
,
data_line_res
,
style
,
label
=
label
,
color
=
color
,
linewidth
=
2
)
def
plot1
():
line
(
"total"
,
mat_size
,
100
,
"pdsyevd"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
),
"black"
,
"x-"
)
line
(
"total"
,
mat_size
,
100
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 100% EVs"
,
"blue"
,
"x-"
)
line
(
"total"
,
mat_size
,
50
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 50% EVs"
,
"green"
,
"x-"
)
line
(
"total"
,
mat_size
,
10
,
"pdsyevr"
,
"MKL 2017, "
+
scalapack_name
(
num_type
,
prec
,
True
)
+
", 10% EVs"
,
"red"
,
"x-"
)
line
(
"total"
,
mat_size
,
100
,
"elpa1"
,
"ELPA 1, 100% EVs"
,
"blue"
,
"*--"
)
line
(
"total"
,
mat_size
,
50
,
"elpa1"
,
"ELPA 1, 50% EVs"
,
"green"
,
"*--"
)
line
(
"total"
,
mat_size
,
10
,
"elpa1"
,
"ELPA 1, 10% EVs"
,
"red"
,
"*--"
)
line
(
"total"
,
mat_size
,
100
,
"elpa2"
,
"ELPA 2, 100% EVs"
,
"blue"
,
"o:"
)
line
(
"total"
,
mat_size
,
50
,
"elpa2"
,
"ELPA 2, 50% EVs"
,
"green"
,
"o:"
)
line
(
"total"
,
mat_size
,
10
,
"elpa2"
,
"ELPA 2, 10% EVs"
,
"red"
,
"o:"
)
def
details
(
proc_ev
):
for
i
in
range
(
len
(
elpa1_subtimes
)):
line
(
elpa1_subtimes
[
i
],
mat_size
,
proc_ev
,
"elpa1"
,
"ELPA1 - "
+
elpa1_subtimes
[
i
],
group_colors
[
0
][
i
],
group_symbols
[
2
*
i
]
+
'-'
)
for
i
in
range
(
len
(
elpa2_subtimes
)):
line
(
elpa2_subtimes
[
i
],
mat_size
,
proc_ev
,
"elpa2"
,
"ELPA2 - "
+
elpa2_subtimes
[
i
],
group_colors
[
1
][
i
],
group_symbols
[
i
]
+
'-'
)
fig
=
plt
.
figure
(
figsize
=
(
15
,
10
))
ax
=
fig
.
add_subplot
(
111
)
ax
.
tick_params
(
labelright
=
'on'
)
plot1
()
#details(100)
#plt.title('Num CPUs ' + str(num_cpus) + ' and ' + str(eigenvectors_percent) + '% eigenvectors, ' + numtype)
#plt.title('Num CPUs ')
plt
.
title
(
"Matrix "
+
str
(
mat_size
//
1000
)
+
"k, "
+
num_type
+
", "
+
prec
)
plt
.
grid
()
plt
.
legend
(
loc
=
1
)
plt
.
xlabel
(
'Number of cores'
)
plt
.
ylabel
(
'Execution time [s]'
)
plt
.
xscale
(
'log'
)
plt
.
yscale
(
'log'
)
ax
.
xaxis
.
grid
(
b
=
True
,
which
=
'major'
,
color
=
'black'
,
linestyle
=
':'
)
ax
.
yaxis
.
grid
(
b
=
True
,
which
=
'major'
,
color
=
'black'
,
linestyle
=
'--'
)
ax
.
yaxis
.
grid
(
b
=
True
,
which
=
'minor'
,
color
=
'black'
,
linestyle
=
':'
)
ticks
=
[
20
*
2
**
i
for
i
in
range
(
0
,
12
)]
ax
.
xaxis
.
set_ticks
(
ticks
)
ax
.
xaxis
.
set_ticklabels
(
ticks
)
if
(
mat_size
<
10000
):
y_min
=
0.1
y_max
=
50
else
:
y_min
=
5
y_max
=
500
yticks_major
=
[
1
,
10
,
100
,
1000
,
y_min
,
y_max
]
ax
.
yaxis
.
set_ticks
(
yticks_major
)
ax
.
yaxis
.
set_ticklabels
(
yticks_major
)
# yticks_minor = [2, 5, 20, 50, 200, 500]
# ax.yaxis.set_ticks(yticks_minor, minor=True)
# ax.yaxis.set_ticklabels(yticks_minor, minor=True)
plt
.
ylim
([
y_min
,
y_max
])
plt
.
xlim
([
20
,
41000
])
plt
.
savefig
(
'plot.pdf'
)
#if show:
plt
.
show
()
#plt.close()
utils/scaling_scripts/plt
0 → 100755
View file @
fffcad08
#!/bin/bash
column
=
${
1
:-
2
}
read
x
echo set
terminal dumb
echo set
logscale xy
echo
plot
\"
-
\"
u 1:
$column
with lines title
\"
"
`
echo
$x
|
awk
'{print $"'
"
$column
"
'"}'
`
"
\"
echo
"#"
$x
while
read
x
;
do
echo
$x
;
done
utils/scaling_scripts/process.py
0 → 100755
View file @
fffcad08