Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
c45e001d
Commit
c45e001d
authored
Jan 23, 2013
by
Alexander Heinecke
Browse files
added blocking for complex band to full
parent
545881e6
Changes
1
Hide whitespace changes
Inline
Side-by-side
ELPA_2013.02.BETA/src/elpa2.f90
View file @
c45e001d
...
...
@@ -2208,9 +2208,9 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
integer
l_cols
,
l_rows
,
l_colh
,
n_cols
integer
istep
,
lc
,
ncol
,
nrow
,
nb
,
ns
complex
*
16
,
allocatable
::
tmp1
(:),
tmp2
(:),
hvb
(:),
hvm
(:,:)
complex
*
16
,
allocatable
::
tmp1
(:),
tmp2
(:),
hvb
(:),
hvm
(:,:)
,
tmat_complete
(:,:),
t_tmp
(:,:),
t_tmp2
(:,:)
integer
pcol
,
prow
,
i
integer
pcol
,
prow
,
i
,
cwy_blocking
,
t_blocking
,
t_cols
,
t_rows
pcol
(
i
)
=
MOD
((
i
-1
)/
nblk
,
np_cols
)
!Processor col for global col number
prow
(
i
)
=
MOD
((
i
-1
)/
nblk
,
np_rows
)
!Processor row for global row number
...
...
@@ -2226,20 +2226,26 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
max_local_rows
=
max_blocks_row
*
nblk
max_local_cols
=
max_blocks_col
*
nblk
t_blocking
=
2
! number of matrices T (tmat) which are aggregated into a new (larger) T matrix (tmat_complete) and applied at once
cwy_blocking
=
t_blocking
*
nbw
allocate
(
tmp1
(
max_local_cols
*
nbw
))
allocate
(
tmp2
(
max_local_cols
*
nbw
))
allocate
(
hvb
(
max_local_rows
*
nbw
))
allocate
(
hvm
(
max_local_rows
,
nbw
))
allocate
(
tmp1
(
max_local_cols
*
cwy_blocking
))
allocate
(
tmp2
(
max_local_cols
*
cwy_blocking
))
allocate
(
hvb
(
max_local_rows
*
cwy_blocking
))
allocate
(
hvm
(
max_local_rows
,
cwy_blocking
))
allocate
(
tmat_complete
(
cwy_blocking
,
cwy_blocking
))
allocate
(
t_tmp
(
cwy_blocking
,
nbw
))
allocate
(
t_tmp2
(
cwy_blocking
,
nbw
))
hvm
=
0
! Must be set to 0 !!!
hvb
=
0
! Safety only
l_cols
=
local_index
(
nqc
,
my_pcol
,
np_cols
,
nblk
,
-1
)
! Local columns of q
do
istep
=
1
,(
na
-1
)/
nbw
do
istep
=
1
,(
(
na
-1
)/
nbw
-1
)/
t_blocking
+
1
n_cols
=
MIN
(
na
,
(
istep
+1
)
*
nbw
)
-
istep
*
nbw
! Number of columns in current step
n_cols
=
MIN
(
na
,
istep
*
cwy_blocking
+
nbw
)
-
(
istep
-1
)
*
cwy_blocking
-
nbw
! Number of columns in current step
! Broadcast all Householder vectors for current step compressed in hvb
...
...
@@ -2247,7 +2253,7 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
ns
=
0
do
lc
=
1
,
n_cols
ncol
=
istep
*
nbw
+
lc
! absolute column number of householder vector
ncol
=
(
istep
-1
)
*
cwy_blocking
+
nbw
+
lc
! absolute column number of householder vector
nrow
=
ncol
-
nbw
! absolute number of pivot row
l_rows
=
local_index
(
nrow
-1
,
my_prow
,
np_rows
,
nblk
,
-1
)
! row length for bcast
...
...
@@ -2267,7 +2273,7 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
nb
=
0
do
lc
=
1
,
n_cols
nrow
=
(
istep
-1
)
*
nbw
+
lc
! absolute number of pivot row
nrow
=
(
istep
-1
)
*
cwy_blocking
+
lc
! absolute number of pivot row
l_rows
=
local_index
(
nrow
-1
,
my_prow
,
np_rows
,
nblk
,
-1
)
! row length for bcast
hvm
(
1
:
l_rows
,
lc
)
=
hvb
(
nb
+1
:
nb
+
l_rows
)
...
...
@@ -2276,9 +2282,25 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
nb
=
nb
+
l_rows
enddo
l_rows
=
local_index
(
MIN
(
na
,(
istep
+1
)
*
nbw
),
my_prow
,
np_rows
,
nblk
,
-1
)
l_rows
=
local_index
(
MIN
(
na
,(
istep
+1
)
*
cwy_blocking
),
my_prow
,
np_rows
,
nblk
,
-1
)
! Q = Q - V * T**T * V**T * Q
! compute tmat2 out of tmat(:,:,)
tmat_complete
(:,:)
=
CZERO
do
i
=
1
,
t_blocking
t_cols
=
MIN
(
nbw
,
n_cols
-
(
i
-1
)
*
nbw
)
if
(
t_cols
<=
0
)
exit
t_rows
=
(
i
-
1
)
*
nbw
tmat_complete
(
t_rows
+1
:
t_rows
+
t_cols
,
t_rows
+1
:
t_rows
+
t_cols
)
=
tmat
(
1
:
t_cols
,
1
:
t_cols
,(
istep
-1
)
*
t_blocking
+
i
)
if
(
i
>
1
)
then
call
zgemm
(
'C'
,
'N'
,
t_rows
,
t_cols
,
l_rows
,
CONE
,
hvm
(
1
,
1
),
max_local_rows
,
hvm
(
1
,(
i
-1
)
*
nbw
+1
),
max_local_rows
,
CZERO
,
t_tmp
,
cwy_blocking
)
call
mpi_allreduce
(
t_tmp
,
t_tmp2
,
cwy_blocking
*
nbw
,
MPI_DOUBLE_COMPLEX
,
MPI_SUM
,
mpi_comm_rows
,
mpierr
)
call
ztrmm
(
'L'
,
'U'
,
'N'
,
'N'
,
t_rows
,
t_cols
,
CONE
,
tmat_complete
,
cwy_blocking
,
t_tmp2
,
cwy_blocking
)
call
ztrmm
(
'R'
,
'U'
,
'N'
,
'N'
,
t_rows
,
t_cols
,
-
CONE
,
tmat_complete
(
t_rows
+1
,
t_rows
+1
),
cwy_blocking
,
t_tmp2
,
cwy_blocking
)
tmat_complete
(
1
:
t_rows
,
t_rows
+1
:
t_rows
+
t_cols
)
=
t_tmp2
(
1
:
t_rows
,
1
:
t_cols
)
endif
enddo
if
(
l_rows
>
0
)
then
call
zgemm
(
'C'
,
'N'
,
n_cols
,
l_cols
,
l_rows
,
CONE
,
hvm
,
ubound
(
hvm
,
1
),
&
...
...
@@ -2288,14 +2310,14 @@ subroutine trans_ev_band_to_full_complex(na, nqc, nblk, nbw, a, lda, tmat, q, ld
endif
call
mpi_allreduce
(
tmp1
,
tmp2
,
n_cols
*
l_cols
,
MPI_DOUBLE_COMPLEX
,
MPI_SUM
,
mpi_comm_rows
,
mpierr
)
if
(
l_rows
>
0
)
then
call
ztrmm
(
'L'
,
'U'
,
'C'
,
'N'
,
n_cols
,
l_cols
,
CONE
,
tmat
(
1
,
1
,
istep
),
ubound
(
tmat
,
1
)
,
tmp2
,
n_cols
)
call
ztrmm
(
'L'
,
'U'
,
'C'
,
'N'
,
n_cols
,
l_cols
,
CONE
,
tmat
_complete
,
cwy_blocking
,
tmp2
,
n_cols
)
call
zgemm
(
'N'
,
'N'
,
l_rows
,
l_cols
,
n_cols
,
-
CONE
,
hvm
,
ubound
(
hvm
,
1
),
&
tmp2
,
n_cols
,
CONE
,
q
,
ldq
)
endif
enddo
deallocate
(
tmp1
,
tmp2
,
hvb
,
hvm
)
deallocate
(
tmp1
,
tmp2
,
hvb
,
hvm
,
tmat_complete
,
t_tmp
,
t_tmp2
)
end
subroutine
trans_ev_band_to_full_complex
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment