Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
2626ebcc
Commit
2626ebcc
authored
Dec 01, 2011
by
Thomas Auckenthaler
Browse files
Band-band reduction (band_band_real) added to elpa2. divide_band is now defined as public.
parent
d7a57612
Changes
1
Hide whitespace changes
Inline
Side-by-side
ELPA_development_version/src/elpa2.f90
View file @
2626ebcc
...
...
@@ -29,6 +29,9 @@ module ELPA2
public
::
tridiag_band_complex
public
::
trans_ev_tridi_to_band_complex
public
::
trans_ev_band_to_full_complex
public
::
band_band_real
public
::
divide_band
!-------------------------------------------------------------------------------
...
...
@@ -3409,4 +3412,333 @@ subroutine divide_band(nblocks_total, n_pes, block_limits)
end
subroutine
!-------------------------------------------------------------------------------
subroutine
band_band_real
(
na
,
nb
,
nb2
,
ab
,
ab2
,
d
,
e
,
mpi_comm
)
!-------------------------------------------------------------------------------
! band_band_real:
! Reduces a real symmetric banded matrix to a real symmetric matrix with smaller bandwidth. Householder transformations are not stored.
! Matrix size na and original bandwidth nb have to be a multiple of the target bandwidth nb2. (Hint: expand your matrix with zero entries, if this
! requirement doesn't hold)
!
! na Order of matrix
!
! nb Semi bandwidth of original matrix
!
! nb2 Semi bandwidth of target matrix
!
! ab Input matrix with bandwidth nb. The leading dimension of the banded matrix has to be 2*nb. The parallel data layout
! has to be accordant to divide_band(), i.e. the matrix columns block_limits(n)*nb+1 to min(na, block_limits(n+1)*nb)
! are located on rank n.
!
! ab2 Output matrix with bandwidth nb2. The leading dimension of the banded matrix is 2*nb2. The parallel data layout is
! accordant to divide_band(), i.e. the matrix columns block_limits(n)*nb2+1 to min(na, block_limits(n+1)*nb2) are located
! on rank n.
!
! d(na) Diagonal of tridiagonal matrix, set only on PE 0, set only if ab2 = 1 (output)
!
! e(na) Subdiagonal of tridiagonal matrix, set only on PE 0, set only if ab2 = 1 (output)
!
! mpi_comm
! MPI-Communicator for the total processor set
!-------------------------------------------------------------------------------
implicit
none
integer
,
intent
(
in
)
::
na
,
nb
,
nb2
,
mpi_comm
real
*
8
,
intent
(
inout
)
::
ab
(
2
*
nb
,
*
)
real
*
8
,
intent
(
inout
)
::
ab2
(
2
*
nb2
,
*
)
real
*
8
,
intent
(
out
)
::
d
(
na
),
e
(
na
)
! set only on PE 0
!----------------
real
*
8
hv
(
nb
,
nb2
),
w
(
nb
,
nb2
),
w_new
(
nb
,
nb2
),
tau
(
nb2
),
hv_new
(
nb
,
nb2
),
tau_new
(
nb2
),
ab_s
(
1
+
nb
,
nb2
),
ab_r
(
1
+
nb
,
nb2
),
ab_s2
(
2
*
nb2
,
nb2
),
hv_s
(
nb
,
nb2
)
real
*
8
work
(
nb
*
nb2
),
work2
(
nb2
*
nb2
)
integer
lwork
,
info
integer
istep
,
i
,
n
,
dest
integer
n_off
,
na_s
integer
my_pe
,
n_pes
,
mpierr
integer
nblocks_total
,
nblocks
integer
nblocks_total2
,
nblocks2
integer
ireq_ab
,
ireq_hv
integer
,
allocatable
::
block_limits
(:),
block_limits2
(:),
ireq_ab2
(:)
!----------------
integer
j
,
nc
,
nr
,
ns
,
ne
,
iblk
call
mpi_comm_rank
(
mpi_comm
,
my_pe
,
mpierr
)
call
mpi_comm_size
(
mpi_comm
,
n_pes
,
mpierr
)
! Total number of blocks in the band:
nblocks_total
=
(
na
-1
)/
nb
+
1
nblocks_total2
=
(
na
-1
)/
nb2
+
1
! Set work distribution
allocate
(
block_limits
(
0
:
n_pes
))
call
divide_band
(
nblocks_total
,
n_pes
,
block_limits
)
allocate
(
block_limits2
(
0
:
n_pes
))
call
divide_band
(
nblocks_total2
,
n_pes
,
block_limits2
)
! nblocks: the number of blocks for my task
nblocks
=
block_limits
(
my_pe
+1
)
-
block_limits
(
my_pe
)
nblocks2
=
block_limits2
(
my_pe
+1
)
-
block_limits2
(
my_pe
)
allocate
(
ireq_ab2
(
1
:
nblocks2
))
ireq_ab2
=
MPI_REQUEST_NULL
if
(
nb2
>
1
)
then
do
i
=
0
,
nblocks2
-1
call
mpi_irecv
(
ab2
(
1
,
i
*
nb2
+1
),
2
*
nb2
*
nb2
,
mpi_real8
,
0
,
3
,
mpi_comm
,
ireq_ab2
(
i
+1
),
mpierr
)
enddo
endif
! n_off: Offset of ab within band
n_off
=
block_limits
(
my_pe
)
*
nb
lwork
=
nb
*
nb2
dest
=
0
ireq_ab
=
MPI_REQUEST_NULL
ireq_hv
=
MPI_REQUEST_NULL
! ---------------------------------------------------------------------------
! Start of calculations
na_s
=
block_limits
(
my_pe
)
*
nb
+
1
if
(
my_pe
>
0
.and.
na_s
<=
na
)
then
! send first nb2 columns to previous PE
! Only the PE owning the diagonal does that (sending 1 element of the subdiagonal block also)
do
i
=
1
,
nb2
ab_s
(
1
:
nb
+1
,
i
)
=
ab
(
1
:
nb
+1
,
na_s
-
n_off
+
i
-1
)
enddo
call
mpi_isend
(
ab_s
,(
nb
+1
)
*
nb2
,
mpi_real8
,
my_pe
-1
,
1
,
mpi_comm
,
ireq_ab
,
mpierr
)
endif
do
istep
=
1
,
na
/
nb2
if
(
my_pe
==
0
)
then
n
=
MIN
(
na
-
na_s
-
nb2
+1
,
nb
)
! number of rows to be reduced
hv
(:,:)
=
0
tau
(:)
=
0
! The last step (istep=na-1) is only needed for sending the last HH vectors.
! We don't want the sign of the last element flipped (analogous to the other sweeps)
if
(
istep
<
na
/
nb2
)
then
! Transform first block column of remaining matrix
call
dgeqrf
(
n
,
nb2
,
ab
(
1
+
nb2
,
na_s
-
n_off
),
2
*
nb
-1
,
tau
,
work
,
lwork
,
info
);
do
i
=
1
,
nb2
hv
(
i
,
i
)
=
1.0
hv
(
i
+1
:
n
,
i
)
=
ab
(
1
+
nb2
+1
:
1
+
nb2
+
n
-
i
,
na_s
-
n_off
+
i
-1
)
ab
(
1
+
nb2
+1
:
2
*
nb
,
na_s
-
n_off
+
i
-1
)
=
0
enddo
endif
if
(
nb2
==
1
)
then
d
(
istep
)
=
ab
(
1
,
na_s
-
n_off
)
e
(
istep
)
=
ab
(
2
,
na_s
-
n_off
)
if
(
istep
==
na
)
then
e
(
na
)
=
0
endif
else
ab_s2
=
0
ab_s2
(:,:)
=
ab
(
1
:
nb2
+1
,
na_s
-
n_off
:
na_s
-
n_off
+
nb2
-1
)
if
(
block_limits2
(
dest
+1
)
<
istep
)
then
dest
=
dest
+1
endif
call
mpi_send
(
ab_s2
,
2
*
nb2
*
nb2
,
mpi_real8
,
dest
,
3
,
mpi_comm
,
mpierr
)
endif
else
if
(
na
>
na_s
+
nb2
-1
)
then
! Receive Householder vectors from previous task, from PE owning subdiagonal
call
mpi_recv
(
hv
,
nb
*
nb2
,
mpi_real8
,
my_pe
-1
,
2
,
mpi_comm
,
MPI_STATUS_IGNORE
,
mpierr
)
do
i
=
1
,
nb2
tau
(
i
)
=
hv
(
i
,
i
)
hv
(
i
,
i
)
=
1.
enddo
endif
endif
na_s
=
na_s
+
nb2
if
(
na_s
-
n_off
>
nb
)
then
ab
(:,
1
:
nblocks
*
nb
)
=
ab
(:,
nb
+1
:(
nblocks
+1
)
*
nb
)
ab
(:,
nblocks
*
nb
+1
:(
nblocks
+1
)
*
nb
)
=
0
n_off
=
n_off
+
nb
endif
do
iblk
=
1
,
nblocks
ns
=
na_s
+
(
iblk
-1
)
*
nb
-
n_off
! first column in block
ne
=
ns
+
nb
-
nb2
! last column in block
if
(
ns
+
n_off
>
na
)
exit
nc
=
MIN
(
na
-
ns
-
n_off
+1
,
nb
)
! number of columns in diagonal block
nr
=
MIN
(
na
-
nb
-
ns
-
n_off
+1
,
nb
)
! rows in subdiagonal block (may be < 0!!!)
! Note that nr>=0 implies that diagonal block is full (nc==nb)!
call
wy_gen
(
nc
,
nb2
,
w
,
hv
,
tau
,
work
,
nb
)
if
(
iblk
==
nblocks
.and.
nc
==
nb
)
then
!request last nb2 columns
call
mpi_recv
(
ab_r
,(
nb
+1
)
*
nb2
,
mpi_real8
,
my_pe
+1
,
1
,
mpi_comm
,
MPI_STATUS_IGNORE
,
mpierr
)
do
i
=
1
,
nb2
ab
(
1
:
nb
+1
,
ne
+
i
-1
)
=
ab_r
(:,
i
)
enddo
endif
hv_new
(:,:)
=
0
! Needed, last rows must be 0 for nr < nb
tau_new
(:)
=
0
if
(
nr
>
0
)
then
call
wy_right
(
nr
,
nb
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
w
,
hv
,
work
,
nb
)
call
dgeqrf
(
nr
,
nb2
,
ab
(
nb
+1
,
ns
),
2
*
nb
-1
,
tau_new
,
work
,
lwork
,
info
);
do
i
=
1
,
nb2
hv_new
(
i
,
i
)
=
1.0
hv_new
(
i
+1
:,
i
)
=
ab
(
nb
+2
:
2
*
nb
-
i
+1
,
ns
+
i
-1
)
ab
(
nb
+2
:,
ns
+
i
-1
)
=
0
enddo
!send hh-vector
if
(
iblk
==
nblocks
)
then
call
mpi_wait
(
ireq_hv
,
MPI_STATUS_IGNORE
,
mpierr
)
hv_s
=
hv_new
do
i
=
1
,
nb2
hv_s
(
i
,
i
)
=
tau_new
(
i
)
enddo
call
mpi_isend
(
hv_s
,
nb
*
nb2
,
mpi_real8
,
my_pe
+1
,
2
,
mpi_comm
,
ireq_hv
,
mpierr
)
endif
endif
call
wy_symm
(
nc
,
nb2
,
ab
(
1
,
ns
),
2
*
nb
-1
,
w
,
hv
,
work
,
work2
,
nb
)
if
(
my_pe
>
0
.and.
iblk
==
1
)
then
!send first nb2 columns to previous PE
call
mpi_wait
(
ireq_ab
,
MPI_STATUS_IGNORE
,
mpierr
)
do
i
=
1
,
nb2
ab_s
(
1
:
nb
+1
,
i
)
=
ab
(
1
:
nb
+1
,
ns
+
i
-1
)
enddo
call
mpi_isend
(
ab_s
,(
nb
+1
)
*
nb2
,
mpi_real8
,
my_pe
-1
,
1
,
mpi_comm
,
ireq_ab
,
mpierr
)
endif
if
(
nr
>
0
)
then
call
wy_gen
(
nr
,
nb2
,
w_new
,
hv_new
,
tau_new
,
work
,
nb
)
call
wy_left
(
nb
-
nb2
,
nr
,
nb2
,
ab
(
nb
+1
-
nb2
,
ns
+
nb2
),
2
*
nb
-1
,
w_new
,
hv_new
,
work
,
nb
)
endif
! Use new HH vector for the next block
hv
(:,:)
=
hv_new
(:,:)
tau
=
tau_new
enddo
enddo
! Finish the last outstanding requests
call
mpi_wait
(
ireq_ab
,
MPI_STATUS_IGNORE
,
mpierr
)
call
mpi_wait
(
ireq_hv
,
MPI_STATUS_IGNORE
,
mpierr
)
call
mpi_waitall
(
nblocks2
,
ireq_ab2
,
MPI_STATUSES_IGNORE
,
mpierr
)
call
mpi_barrier
(
mpi_comm
,
mpierr
)
deallocate
(
block_limits
)
deallocate
(
block_limits2
)
deallocate
(
ireq_ab2
)
end
subroutine
! --------------------------------------------------------------------------------------------------
subroutine
wy_gen
(
n
,
nb
,
W
,
Y
,
tau
,
mem
,
lda
)
integer
,
intent
(
in
)
::
n
!length of householder-vectors
integer
,
intent
(
in
)
::
nb
!number of householder-vectors
integer
,
intent
(
in
)
::
lda
!leading dimension of Y and W
real
*
8
,
intent
(
in
)
::
Y
(
lda
,
nb
)
!matrix containing nb householder-vectors of length b
real
*
8
,
intent
(
in
)
::
tau
(
nb
)
!tau values
real
*
8
,
intent
(
out
)
::
W
(
lda
,
nb
)
!output matrix W
real
*
8
,
intent
(
in
)
::
mem
(
nb
)
!memory for a temporary matrix of size nb
integer
i
W
(
1
:
n
,
1
)
=
tau
(
1
)
*
Y
(
1
:
n
,
1
)
do
i
=
2
,
nb
W
(
1
:
n
,
i
)
=
tau
(
i
)
*
Y
(
1
:
n
,
i
)
call
DGEMV
(
'T'
,
n
,
i
-1
,
1.d0
,
Y
,
lda
,
W
(
1
,
i
),
1
,
0.d0
,
mem
,
1
)
call
DGEMV
(
'N'
,
n
,
i
-1
,
-1.d0
,
W
,
lda
,
mem
,
1
,
1.d0
,
W
(
1
,
i
),
1
)
enddo
end
subroutine
! --------------------------------------------------------------------------------------------------
subroutine
wy_left
(
n
,
m
,
nb
,
A
,
lda
,
W
,
Y
,
mem
,
lda2
)
integer
,
intent
(
in
)
::
n
!width of the matrix A
integer
,
intent
(
in
)
::
m
!length of matrix W and Y
integer
,
intent
(
in
)
::
nb
!width of matrix W and Y
integer
,
intent
(
in
)
::
lda
!leading dimension of A
integer
,
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
*
8
,
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed
real
*
8
,
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
*
8
,
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
*
8
,
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
DGEMM
(
'T'
,
'N'
,
nb
,
n
,
m
,
1.d0
,
W
,
lda2
,
A
,
lda
,
0.d0
,
mem
,
nb
)
call
DGEMM
(
'N'
,
'N'
,
m
,
n
,
nb
,
-1.d0
,
Y
,
lda2
,
mem
,
nb
,
1.d0
,
A
,
lda
)
end
subroutine
! --------------------------------------------------------------------------------------------------
subroutine
wy_right
(
n
,
m
,
nb
,
A
,
lda
,
W
,
Y
,
mem
,
lda2
)
integer
,
intent
(
in
)
::
n
!height of the matrix A
integer
,
intent
(
in
)
::
m
!length of matrix W and Y
integer
,
intent
(
in
)
::
nb
!width of matrix W and Y
integer
,
intent
(
in
)
::
lda
!leading dimension of A
integer
,
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
*
8
,
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed
real
*
8
,
intent
(
in
)
::
W
(
m
,
nb
)
!blocked transformation matrix W
real
*
8
,
intent
(
in
)
::
Y
(
m
,
nb
)
!blocked transformation matrix Y
real
*
8
,
intent
(
inout
)
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
call
DGEMM
(
'N'
,
'N'
,
n
,
nb
,
m
,
1.d0
,
A
,
lda
,
W
,
lda2
,
0.d0
,
mem
,
n
)
call
DGEMM
(
'N'
,
'T'
,
n
,
m
,
nb
,
-1.d0
,
mem
,
n
,
Y
,
lda2
,
1.d0
,
A
,
lda
)
end
subroutine
! --------------------------------------------------------------------------------------------------
subroutine
wy_symm
(
n
,
nb
,
A
,
lda
,
W
,
Y
,
mem
,
mem2
,
lda2
)
integer
,
intent
(
in
)
::
n
!width/heigth of the matrix A; length of matrix W and Y
integer
,
intent
(
in
)
::
nb
!width of matrix W and Y
integer
,
intent
(
in
)
::
lda
!leading dimension of A
integer
,
intent
(
in
)
::
lda2
!leading dimension of W and Y
real
*
8
,
intent
(
inout
)
::
A
(
lda
,
*
)
!matrix to be transformed
real
*
8
,
intent
(
in
)
::
W
(
n
,
nb
)
!blocked transformation matrix W
real
*
8
,
intent
(
in
)
::
Y
(
n
,
nb
)
!blocked transformation matrix Y
real
*
8
::
mem
(
n
,
nb
)
!memory for a temporary matrix of size n x nb
real
*
8
::
mem2
(
nb
,
nb
)
!memory for a temporary matrix of size nb x nb
call
DSYMM
(
'L'
,
'L'
,
n
,
nb
,
1.d0
,
A
,
lda
,
W
,
lda2
,
0.d0
,
mem
,
n
)
call
DGEMM
(
'T'
,
'N'
,
nb
,
nb
,
n
,
1.d0
,
mem
,
n
,
W
,
lda2
,
0.d0
,
mem2
,
nb
)
call
DGEMM
(
'N'
,
'N'
,
n
,
nb
,
nb
,
-0.5d0
,
Y
,
lda2
,
mem2
,
nb
,
1.d0
,
mem
,
n
)
call
DSYR2K
(
'L'
,
'N'
,
n
,
nb
,
-1.d0
,
Y
,
lda2
,
mem
,
n
,
1.d0
,
A
,
lda
)
end
subroutine
! --------------------------------------------------------------------------------------------------
end
module
ELPA2
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment