Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
10
Issues
10
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
8567bbe5
Commit
8567bbe5
authored
Sep 12, 2017
by
Pavel Kus
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
polished elpa2_compute_real_template
parent
37da2a2f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
47 deletions
+51
-47
src/elpa2/elpa2_compute_real_template.F90
src/elpa2/elpa2_compute_real_template.F90
+51
-47
No files found.
src/elpa2/elpa2_compute_real_template.F90
View file @
8567bbe5
...
@@ -103,17 +103,17 @@
...
@@ -103,17 +103,17 @@
use
elpa2_workload
use
elpa2_workload
use
precision
use
precision
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nb
,
nbCol
,
nb2
,
nb2Col
,
communicator
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
ab
(
2
*
nb
,
nbCol
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
ab2
(
2
*
nb2
,
nb2Col
)
! removed assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
real
(
kind
=
rk
),
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
real
(
kind
=
REAL_DATATYPE
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
real
(
kind
=
rk
)
::
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
&
tau_new
(
nb2
),
ab_s
(
1
+
nb
,
nb2
),
ab_r
(
1
+
nb
,
nb2
),
ab_s2
(
2
*
nb2
,
nb2
),
hv_s
(
nb
,
nb2
)
tau_new
(
nb2
),
ab_s
(
1
+
nb
,
nb2
),
ab_r
(
1
+
nb
,
nb2
),
ab_s2
(
2
*
nb2
,
nb2
),
hv_s
(
nb
,
nb2
)
real
(
kind
=
REAL_DATATYPE
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
real
(
kind
=
rk
)
::
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
integer
(
kind
=
ik
)
::
lwork
,
info
integer
(
kind
=
ik
)
::
lwork
,
info
integer
(
kind
=
ik
)
::
istep
,
i
,
n
,
dest
integer
(
kind
=
ik
)
::
istep
,
i
,
n
,
dest
...
@@ -223,8 +223,8 @@
...
@@ -223,8 +223,8 @@
if
(
my_pe
==
0
)
then
if
(
my_pe
==
0
)
then
n
=
MIN
(
na
-
na_s
-
nb2
+1
,
nb
)
! number of rows to be reduced
n
=
MIN
(
na
-
na_s
-
nb2
+1
,
nb
)
! number of rows to be reduced
hv
(:,:)
=
CONST_0_0
hv
(:,:)
=
0.0_rk
tau
(:)
=
CONST_0_0
tau
(:)
=
0.0_rk
! The last step (istep=na-1) is only needed for sending the last HH vectors.
! The last step (istep=na-1) is only needed for sending the last HH vectors.
! We don't want the sign of the last element flipped (analogous to the other sweeps)
! We don't want the sign of the last element flipped (analogous to the other sweeps)
...
@@ -236,9 +236,9 @@
...
@@ -236,9 +236,9 @@
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
do
i
=
1
,
nb2
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
hv
(
i
+1
:
n
,
i
)
=
ab
(
1
+
nb2
+1
:
1
+
nb2
+
n
-
i
,
na_s
-
n_off
+
i
-1
)
hv
(
i
+1
:
n
,
i
)
=
ab
(
1
+
nb2
+1
:
1
+
nb2
+
n
-
i
,
na_s
-
n_off
+
i
-1
)
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
CONST_0_0
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
0.0_rk
enddo
enddo
endif
endif
...
@@ -247,10 +247,10 @@
...
@@ -247,10 +247,10 @@
d
(
istep
)
=
ab
(
1
,
na_s
-
n_off
)
d
(
istep
)
=
ab
(
1
,
na_s
-
n_off
)
e
(
istep
)
=
ab
(
2
,
na_s
-
n_off
)
e
(
istep
)
=
ab
(
2
,
na_s
-
n_off
)
if
(
istep
==
na
)
then
if
(
istep
==
na
)
then
e
(
na
)
=
CONST_0_0
e
(
na
)
=
0.0_rk
endif
endif
else
else
ab_s2
=
CONST_0_0
ab_s2
=
0.0_rk
ab_s2
(:,:)
=
ab
(
1
:
nb2
+1
,
na_s
-
n_off
:
na_s
-
n_off
+
nb2
-1
)
ab_s2
(:,:)
=
ab
(
1
:
nb2
+1
,
na_s
-
n_off
:
na_s
-
n_off
+
nb2
-1
)
if
(
block_limits2
(
dest
+1
)
<
istep
)
then
if
(
block_limits2
(
dest
+1
)
<
istep
)
then
dest
=
dest
+1
dest
=
dest
+1
...
@@ -285,7 +285,7 @@
...
@@ -285,7 +285,7 @@
do
i
=
1
,
nb2
do
i
=
1
,
nb2
tau
(
i
)
=
hv
(
i
,
i
)
tau
(
i
)
=
hv
(
i
,
i
)
hv
(
i
,
i
)
=
CONST_1_0
hv
(
i
,
i
)
=
1.0_rk
enddo
enddo
endif
endif
endif
endif
...
@@ -293,7 +293,7 @@
...
@@ -293,7 +293,7 @@
na_s
=
na_s
+
nb2
na_s
=
na_s
+
nb2
if
(
na_s
-
n_off
>
nb
)
then
if
(
na_s
-
n_off
>
nb
)
then
ab
(:,
1
:
nblocks
*
nb
)
=
ab
(:,
nb
+1
:(
nblocks
+1
)
*
nb
)
ab
(:,
1
:
nblocks
*
nb
)
=
ab
(:,
nb
+1
:(
nblocks
+1
)
*
nb
)
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
CONST_0_0
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
0.0_rk
n_off
=
n_off
+
nb
n_off
=
n_off
+
nb
endif
endif
...
@@ -324,8 +324,8 @@
...
@@ -324,8 +324,8 @@
ab
(
1
:
nb
+1
,
ne
+
i
-1
)
=
ab_r
(:,
i
)
ab
(
1
:
nb
+1
,
ne
+
i
-1
)
=
ab_r
(:,
i
)
enddo
enddo
endif
endif
hv_new
(:,:)
=
CONST_0_0
! Needed, last rows must be 0 for nr < nb
hv_new
(:,:)
=
0.0_rk
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
CONST_0_0
tau_new
(:)
=
0.0_rk
if
(
nr
>
0
)
then
if
(
nr
>
0
)
then
call
wy_right_
&
call
wy_right_
&
...
@@ -335,9 +335,9 @@
...
@@ -335,9 +335,9 @@
call
PRECISION_GEQRF
(
nr
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
tau_new
,
work
,
lwork
,
info
)
call
PRECISION_GEQRF
(
nr
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
tau_new
,
work
,
lwork
,
info
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
do
i
=
1
,
nb2
do
i
=
1
,
nb2
hv_new
(
i
,
i
)
=
CONST_1_0
hv_new
(
i
,
i
)
=
1.0_rk
hv_new
(
i
+1
:,
i
)
=
ab
(
nb
+2
:
2
*
nb
-
i
+1
,
ns
+
i
-1
)
hv_new
(
i
+1
:,
i
)
=
ab
(
nb
+2
:
2
*
nb
-
i
+1
,
ns
+
i
-1
)
ab
(
nb
+2
:,
ns
+
i
-1
)
=
CONST_0_0
ab
(
nb
+2
:,
ns
+
i
-1
)
=
0.0_rk
enddo
enddo
!send hh-Vector
!send hh-Vector
...
@@ -458,16 +458,17 @@
...
@@ -458,16 +458,17 @@
use
elpa_abstract_impl
use
elpa_abstract_impl
use
precision
use
precision
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!length of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!length of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!number of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!number of householder-vectors
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of Y and W
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of Y and W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
rk
),
intent
(
in
)
::
tau
(
nb
)
!tau values
real
(
kind
=
REAL_DATATYPE
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
rk
),
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
real
(
kind
=
rk
),
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
integer
(
kind
=
ik
)
::
i
integer
(
kind
=
ik
)
::
i
call
obj
%
timer
%
start
(
"wy_gen"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
@@ -475,8 +476,8 @@
...
@@ -475,8 +476,8 @@
do
i
=
2
,
nb
do
i
=
2
,
nb
W
(
1
:
n
,
i
)
=
tau
(
i
)
*
Y
(
1
:
n
,
i
)
W
(
1
:
n
,
i
)
=
tau
(
i
)
*
Y
(
1
:
n
,
i
)
call
obj
%
timer
%
start
(
"blas"
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
CONST_1_0
,
Y
,
lda
,
W
(
1
,
i
),
1
,
CONST_0_0
,
mem
,
1
)
call
PRECISION_GEMV
(
'T'
,
n
,
i
-1
,
1.0_rk
,
Y
,
lda
,
W
(
1
,
i
),
1
,
0.0_rk
,
mem
,
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
CONST_1_0
,
W
,
lda
,
mem
,
1
,
CONST_1_0
,
W
(
1
,
i
),
1
)
call
PRECISION_GEMV
(
'N'
,
n
,
i
-1
,
-
1.0_rk
,
W
,
lda
,
mem
,
1
,
1.0_rk
,
W
(
1
,
i
),
1
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
enddo
enddo
call
obj
%
timer
%
stop
(
"wy_gen"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"wy_gen"
//
PRECISION_SUFFIX
)
...
@@ -489,21 +490,22 @@
...
@@ -489,21 +490,22 @@
use
precision
use
precision
use
elpa_abstract_impl
use
elpa_abstract_impl
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_left"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"wy_left"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
CONST_1_0
,
W
,
lda2
,
A
,
lda
,
CONST_0_0
,
mem
,
nb
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
1.0_rk
,
W
,
lda2
,
A
,
lda
,
0.0_rk
,
mem
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
nb
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
nb
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_left"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"wy_left"
//
PRECISION_SUFFIX
)
end
subroutine
end
subroutine
...
@@ -515,22 +517,23 @@
...
@@ -515,22 +517,23 @@
use
precision
use
precision
use
elpa_abstract_impl
use
elpa_abstract_impl
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!height of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!height of the matrix A
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
m
!length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
),
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
obj
%
timer
%
start
(
"wy_right"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"wy_right"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
CONST_1_0
,
mem
,
n
,
Y
,
lda2
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_GEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-
1.0_rk
,
mem
,
n
,
Y
,
lda2
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_right"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"wy_right"
//
PRECISION_SUFFIX
)
...
@@ -543,23 +546,24 @@
...
@@ -543,23 +546,24 @@
use
elpa_abstract_impl
use
elpa_abstract_impl
use
precision
use
precision
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width/heigth of the matrix A; length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
n
!width/heigth of the matrix A; length of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
nb
!width of matrix W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda
!leading dimension of A
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
integer
(
kind
=
ik
),
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
(
kind
=
REAL_DATATYPE
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
rk
),
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed ! remove assumed size
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
rk
),
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
(
kind
=
REAL_DATATYPE
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
rk
),
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
(
kind
=
REAL_DATATYPE
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
rk
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
(
kind
=
REAL_DATATYPE
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
real
(
kind
=
rk
)
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
call
obj
%
timer
%
start
(
"wy_symm"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"wy_symm"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"blas"
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
CONST_1_0
,
A
,
lda
,
W
,
lda2
,
CONST_0_0
,
mem
,
n
)
call
PRECISION_SYMM
(
'L'
,
'L'
,
n
,
nb
,
1.0_rk
,
A
,
lda
,
W
,
lda2
,
0.0_rk
,
mem
,
n
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
CONST_1_0
,
mem
,
n
,
W
,
lda2
,
CONST_0_0
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
1.0_rk
,
mem
,
n
,
W
,
lda2
,
0.0_rk
,
mem2
,
nb
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
CONST_0_5
,
Y
,
lda2
,
mem2
,
nb
,
CONST_1_0
,
mem
,
n
)
call
PRECISION_GEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-
0.5_rk
,
Y
,
lda2
,
mem2
,
nb
,
1.0_rk
,
mem
,
n
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
CONST_1_0
,
Y
,
lda2
,
mem
,
n
,
CONST_1_0
,
A
,
lda
)
call
PRECISION_SYR2K
(
'L'
,
'N'
,
n
,
nb
,
-
1.0_rk
,
Y
,
lda2
,
mem
,
n
,
1.0_rk
,
A
,
lda
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"blas"
)
call
obj
%
timer
%
stop
(
"wy_symm"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"wy_symm"
//
PRECISION_SUFFIX
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment