Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
9655c781
Commit
9655c781
authored
May 30, 2012
by
Thomas Auckenthaler
Browse files
"use mpi" replaced by "include 'mpif.h'"
dropped old rank-2 QR-decomposition (blockedQR.f90)
parent
db110dfc
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
ELPA_development_version/src/blockedQR.f90
deleted
100644 → 0
View file @
db110dfc
This diff is collapsed.
Click to expand it.
ELPA_development_version/src/elpa2.f90
View file @
9655c781
...
...
@@ -10,6 +10,7 @@ module ELPA2
! Version 1.1.2, 2011-02-21
USE
ELPA1
use
elpa_pdgeqrf
implicit
none
...
...
ELPA_development_version/src/elpa_qr/elpa_pdgeqrf.f90
View file @
9655c781
This diff is collapsed.
Click to expand it.
ELPA_development_version/src/elpa_qr/elpa_pdlarfb.f90
View file @
9655c781
module
elpa_pdlarfb
use
elpa1
use
tum_utils
implicit
none
PRIVATE
public
::
tum_pdlarfb_1dcomm
public
::
tum_pdlarft_pdlarfb_1dcomm
public
::
tum_pdlarft_set_merge_1dcomm
public
::
tum_pdlarft_tree_merge_1dcomm
public
::
tum_pdlarfl_1dcomm
public
::
tum_pdlarfl2_tmatrix_1dcomm
public
::
tum_tmerge_pdlarfb_1dcomm
include
'mpif.h'
contains
subroutine
tum_pdlarfb_1dcomm
(
m
,
mb
,
n
,
k
,
a
,
lda
,
v
,
ldv
,
tau
,
t
,
ldt
,
baseidx
,
idx
,
rev
,
mpicomm
,
work
,
lwork
)
use
mpi
use
tum_utils
implicit
none
...
...
@@ -59,11 +80,11 @@ subroutine tum_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpi
call
mpi_allreduce
(
work
(
1
,
1
),
work
(
1
,
n
+1
),
k
*
n
,
mpi_real8
,
mpi_sum
,
mpicomm
,
mpierr
)
call
tum_pdlarfb_kernel_local
(
localsize
,
n
,
k
,
a
(
offset
,
1
),
lda
,
v
(
baseoffset
,
1
),
ldv
,
t
,
ldt
,
work
(
1
,
n
+1
),
k
,
rev
)
end
subroutine
end
subroutine
tum_pdlarfb_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine
tum_pdlarft_pdlarfb_1dcomm
(
m
,
mb
,
n
,
oldk
,
k
,
v
,
ldv
,
tau
,
t
,
ldt
,
a
,
lda
,
baseidx
,
idx
,
rev
,
mpicomm
,
work
,
lwork
)
use
mpi
use
tum_utils
implicit
none
...
...
@@ -140,9 +161,9 @@ subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseid
! A = A - Y * V'
call
dgemm
(
"Notrans"
,
"Notrans"
,
localsize
,
n
,
k
,
-1.0d0
,
v
(
baseoffset
,
1
),
ldv
,
work
(
1
,
recvoffset
+
k
),
k
,
1.0d0
,
a
(
offset
,
1
),
lda
)
end
subroutine
end
subroutine
tum_pdlarft_pdlarfb_1dcomm
subroutine
tum_pdlarft_set_merge_1dcomm
(
m
,
mb
,
n
,
blocksize
,
v
,
ldv
,
tau
,
t
,
ldt
,
baseidx
,
idx
,
rev
,
mpicomm
,
work
,
lwork
)
use
mpi
use
tum_utils
implicit
none
...
...
@@ -187,9 +208,9 @@ subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx
if
(
offset
.eq.
0
)
offset
=
blocksize
call
tum_tmerge_set_kernel
(
n
,
blocksize
,
t
,
ldt
,
work
(
1
,
n
+1
+
offset
),
n
,
1
)
end
subroutine
end
subroutine
tum_pdlarft_set_merge_1dcomm
subroutine
tum_pdlarft_tree_merge_1dcomm
(
m
,
mb
,
n
,
blocksize
,
treeorder
,
v
,
ldv
,
tau
,
t
,
ldt
,
baseidx
,
idx
,
rev
,
mpicomm
,
work
,
lwork
)
use
mpi
use
tum_utils
implicit
none
...
...
@@ -236,14 +257,14 @@ subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,
if
(
offset
.eq.
0
)
offset
=
blocksize
call
tum_tmerge_tree_kernel
(
n
,
blocksize
,
treeorder
,
t
,
ldt
,
work
(
1
,
n
+1
+
offset
),
n
,
1
)
end
subroutine
end
subroutine
tum_pdlarft_tree_merge_1dcomm
! apply householder vector to the left
! - assume unitary matrix
! - assume right positions for v
subroutine
tum_pdlarfl_1dcomm
(
v
,
incv
,
baseidx
,
a
,
lda
,
tau
,
work
,
lwork
,
m
,
n
,
idx
,
mb
,
rev
,
mpicomm
)
use
ELPA1
use
tum_utils
use
mpi
implicit
none
...
...
@@ -290,11 +311,8 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
v_local_offset
=
v_local_offset
*
incv
if
(
local_size
>
0
)
then
!call dgemv("Trans",local_size,n,1.0d0,a(local_offset,1),lda,v(v_local_offset),incv,0.0d0,work(1),1)
do
icol
=
1
,
n
!work(icol) = ddot(local_size, a(local_offset,icol), 1, &
! v(v_local_offset), 1)
work
(
icol
)
=
dot_product
(
v
(
v_local_offset
:
v_local_offset
+
local_size
-1
),
a
(
local_offset
:
local_offset
+
local_size
-1
,
icol
))
end
do
...
...
@@ -305,7 +323,6 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
call
mpi_allreduce
(
work
,
work
(
sendsize
+1
),
sendsize
,
mpi_real8
,
mpi_sum
,
mpicomm
,
mpierr
)
if
(
local_size
>
0
)
then
!call dger(local_size,n,-tau,v(v_local_offset),incv,work(sendsize+1),1,a(local_offset,1),lda)
do
icol
=
1
,
n
a
(
local_offset
:
local_offset
+
local_size
-1
,
icol
)
=
a
(
local_offset
:
local_offset
+
local_size
-1
,
icol
)
&
...
...
@@ -313,15 +330,11 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
enddo
end
if
end
subroutine
tum_pdlarfl_1dcomm
!print *,'ref hl', work(sendsize+1:sendsize+recvsize)
end
subroutine
! test reverse version
subroutine
tum_pdlarfl2_tmatrix_1dcomm
(
v
,
ldv
,
baseidx
,
a
,
lda
,
t
,
ldt
,
work
,
lwork
,
m
,
n
,
idx
,
mb
,
rev
,
mpicomm
)
use
ELPA1
use
tum_utils
use
mpi
implicit
none
...
...
@@ -393,10 +406,6 @@ subroutine tum_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,
! update second vector
call
daxpy
(
n
,
t
(
1
,
2
),
work
(
sendsize
+
dgemv1_offset
),
1
,
work
(
sendsize
+
dgemv2_offset
),
1
)
! reference implementation
!call dger(local_size1,n,-1.0d0,v(v1_local_offset,v1col),1,work(sendsize+dgemv1_offset),1,a(local_offset1,1),lda)
!call dger(local_size2,n,-1.0d0,v(v2_local_offset,v2col),1,work(sendsize+dgemv2_offset),1,a(local_offset2,1),lda)
call
local_size_offset_1d
(
m
,
mb
,
baseidx
,
idx
-2
,
rev
,
mpirank
,
mpiprocs
,
&
local_size_dger
,
v_local_offset_dger
,
local_offset_dger
)
...
...
@@ -432,11 +441,11 @@ subroutine tum_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,
end
do
end
do
end
subroutine
end
subroutine
tum_pdlarfl2_tmatrix_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine
tum_tmerge_pdlarfb_1dcomm
(
m
,
mb
,
n
,
oldk
,
k
,
v
,
ldv
,
tau
,
t
,
ldt
,
a
,
lda
,
baseidx
,
idx
,
rev
,
updatemode
,
mpicomm
,
work
,
lwork
)
use
mpi
use
tum_utils
implicit
none
...
...
@@ -473,8 +482,8 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
mergelda
=
k
mergesize
=
mergelda
*
oldk
tgenlda
=
0
! TODO
tgensize
=
0
! TODO
tgenlda
=
0
tgensize
=
0
sendsize
=
updatesize
+
mergesize
+
tgensize
...
...
@@ -491,8 +500,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
call
local_size_offset_1d
(
m
,
mb
,
baseidx
,
baseidx
,
rev
,
mpirank
,
mpiprocs
,
&
localsize
,
baseoffset
,
offset
)
!print '(a,6i)','indices: ',baseidx,idx,localsize,baseoffset,offset
sendoffset
=
1
if
(
oldk
.gt.
0
)
then
...
...
@@ -516,20 +523,9 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
call
dgemm
(
"Trans"
,
"Notrans"
,
k
,
n
,
localsize
,
1.0d0
,
v
(
baseoffset
,
1
),
ldv
,
a
(
offset
,
1
),
lda
,
0.0d0
,
work
(
sendoffset
+
updateoffset
),
updatelda
)
end
if
!print *,'v content'
!print '(5f)',v(baseoffset,1:5)
!print '(5f)',v(baseoffset+1,1:5)
!print '(5f)',v(baseoffset+2,1:5)
!print '(5f)',v(baseoffset+3,1:5)
!print '(5f)',v(baseoffset+4,1:5)
! calculate parts needed for T merge
call
dgemm
(
"Trans"
,
"Notrans"
,
k
,
oldk
,
localsize
,
1.0d0
,
v
(
baseoffset
,
1
),
ldv
,
v
(
baseoffset
,
k
+1
),
ldv
,
0.0d0
,
work
(
sendoffset
+
mergeoffset
),
mergelda
)
! calculate inner product of householdervectors
! TODO: future TGEN parameter
!call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,oldk+1),ldv,0.0d0,work(oldk+1,1),ldw)
else
! cleanup buffer
work
(
sendoffset
:
sendoffset
+
sendsize
-1
)
=
0.0d0
...
...
@@ -548,9 +544,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! Z' = (Y1)' * A
call
dgemm
(
"Trans"
,
"Notrans"
,
k
,
n
,
localsize
,
1.0d0
,
v
(
baseoffset
,
1
),
ldv
,
a
(
offset
,
1
),
lda
,
0.0d0
,
work
(
sendoffset
+
updateoffset
),
updatelda
)
! calculate inner product of householdervectors
! TODO: future TGEN parameter
!call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,oldk+1),ldv,0.0d0,work(oldk+1,1),ldw)
else
! cleanup buffer
work
(
sendoffset
:
sendoffset
+
sendsize
-1
)
=
0.0d0
...
...
@@ -589,4 +582,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
end
if
end
if
end
subroutine
end
subroutine
tum_tmerge_pdlarfb_1dcomm
end
module
elpa_pdlarfb
ELPA_development_version/src/elpa_qr/tum_utils.f90
View file @
9655c781
...
...
@@ -10,10 +10,6 @@ module tum_utils
public
::
reverse_matrix_1dcomm
public
::
reverse_matrix_2dcomm_ref
public
::
tsqr_groups_size
public
::
tsqr_groups_initialize
public
::
tsqr_groups_finalize
contains
! rev parameter is critical, even in rev only mode!
...
...
@@ -58,7 +54,7 @@ subroutine local_size_offset_1d(n,nb,baseidx,idx,rev,rank,nprocs, &
baseoffset
=
offset
-
baseoffset
+
1
end
if
end
subroutine
end
subroutine
local_size_offset_1d
subroutine
reverse_vector_local
(
n
,
x
,
incx
,
work
,
lwork
)
...
...
@@ -86,7 +82,7 @@ subroutine reverse_vector_local(n,x,incx,work,lwork)
x
(
destoffset
)
=
temp
end
do
end
subroutine
end
subroutine
reverse_vector_local
subroutine
reverse_matrix_local
(
trans
,
m
,
n
,
a
,
lda
,
work
,
lwork
)
implicit
none
...
...
@@ -126,7 +122,7 @@ subroutine reverse_matrix_local(trans,m,n,a,lda,work,lwork)
end
do
end
if
end
subroutine
end
subroutine
reverse_matrix_local
subroutine
reverse_matrix_2dcomm_ref
(
m
,
n
,
mb
,
nb
,
a
,
lda
,
work
,
lwork
,
mpicomm_cols
,
mpicomm_rows
)
implicit
none
...
...
@@ -164,7 +160,7 @@ subroutine reverse_matrix_2dcomm_ref(m,n,mb,nb,a,lda,work,lwork,mpicomm_cols,mpi
call
reverse_matrix_1dcomm
(
0
,
m
,
lcols
,
mb
,
a
,
lda
,
work
,
lwork
,
mpicomm_cols
)
call
reverse_matrix_1dcomm
(
1
,
lrows
,
n
,
nb
,
a
,
lda
,
work
,
lwork
,
mpicomm_rows
)
end
subroutine
end
subroutine
reverse_matrix_2dcomm_ref
! b: if trans = 'N': b is size of block distribution between rows
! b: if trans = 'T': b is size of block distribution between columns
...
...
@@ -350,95 +346,6 @@ subroutine reverse_matrix_1dcomm(trans,m,n,b,a,lda,work,lwork,mpicomm)
a
(
1
:
lrows
,
icol
)
=
&
work
(
newmatrix_offset
+
(
icol
-1
)
*
lrows
:
newmatrix_offset
+
icol
*
lrows
-1
)
end
do
end
subroutine
integer
function
tsqr_groups_size
(
comm
,
treeorder
)
use
mpi
implicit
none
! input
integer
comm
,
treeorder
! local scalars
integer
mpiprocs
,
mpierr
integer
nr_groups
,
depth
,
treeprocs
call
MPI_Comm_size
(
comm
,
mpiprocs
,
mpierr
)
! integer logarithm with base treeorder
depth
=
1
treeprocs
=
treeorder
do
while
(
treeprocs
.lt.
mpiprocs
)
treeprocs
=
treeprocs
*
treeorder
depth
=
depth
+
1
end
do
tsqr_groups_size
=
nr_groups
end
function
subroutine
tsqr_groups_initialize
(
comm
,
treeorder
,
groups
)
use
mpi
implicit
none
! input
integer
comm
,
treeorder
! output
integer
,
allocatable
::
groups
(:)
! local scalars
integer
nr_groups
,
igroup
,
mpierr
,
mpirank
integer
split_color
,
split_key
integer
prev_treeorder
,
temp_treeorder
nr_groups
=
tsqr_groups_size
(
comm
,
treeorder
)
allocate
(
groups
(
nr_groups
))
groups
(
1
)
=
comm
call
MPI_Comm_rank
(
comm
,
mpirank
,
mpierr
)
prev_treeorder
=
1
temp_treeorder
=
treeorder
do
igroup
=
2
,
nr_groups
if
(
mod
(
mpirank
,
prev_treeorder
)
.eq.
0
)
then
split_color
=
mpirank
/
temp_treeorder
split_key
=
mod
(
mpirank
/
prev_treeorder
,
treeorder
)
else
split_color
=
MPI_UNDEFINED
split_key
=
0
! ignored due to MPI_UNDEFINED color
end
if
call
MPI_Comm_split
(
comm
,
split_color
,
split_key
,
groups
(
igroup
),
mpierr
)
prev_treeorder
=
temp_treeorder
temp_treeorder
=
temp_treeorder
*
treeorder
end
do
end
subroutine
subroutine
tsqr_groups_finalize
(
groups
,
treeorder
)
use
mpi
implicit
none
! input
integer
,
allocatable
::
groups
(:)
integer
treeorder
! local scalars
integer
nr_groups
,
igroup
,
mpierr
nr_groups
=
tsqr_groups_size
(
groups
(
1
),
treeorder
)
do
igroup
=
2
,
nr_groups
call
MPI_Comm_free
(
groups
(
igroup
),
mpierr
)
end
do
deallocate
(
groups
)
end
subroutine
end
subroutine
reverse_matrix_1dcomm
end
module
ELPA_development_version/test/Makefile
View file @
9655c781
...
...
@@ -54,11 +54,11 @@ read_real_gen: read_real_gen.o elpa1.o
test_complex_gen
:
test_complex_gen.o read_test_parameters.o elpa1.o
$(F90)
-o
$@
test_complex_gen.o read_test_parameters.o elpa1.o
$(LIBS)
test_real2
:
test_real2.o elpa1.o elpa2.o read_test_parameters.o elpa2_kernels.o
blockedQR.o
elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90)
-o
$@
test_real2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o
blockedQR.o
elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(LIBS)
test_real2
:
test_real2.o elpa1.o elpa2.o read_test_parameters.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90)
-o
$@
test_real2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(LIBS)
test_complex2
:
test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o
blockedQR.o
elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90)
-o
$@
test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o
blockedQR.o
elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(LIBS)
test_complex2
:
test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90)
-o
$@
test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(LIBS)
test_real.o
:
test_real.f90 elpa1.o
$(F90)
-c
$<
...
...
@@ -90,19 +90,16 @@ read_test_parameters.o: read_test_parameters.f90
elpa1.o
:
../src/elpa1.f90
$(F90)
-c
$<
blockedQR.o
:
../src/blockedQR.f90
$(F90)
-c
$<
tum_utils.o
:
../src/elpa_qr/tum_utils.f90
$(F90)
-c
$<
elpa_qrkernels.o
:
../src/elpa_qr/elpa_qrkernels.f90
$(F90)
-c
$<
elpa_pdlarfb.o
:
../src/elpa_qr/elpa_pdlarfb.f90 tum_utils.o
elpa_pdlarfb.o
:
../src/elpa_qr/elpa_pdlarfb.f90 tum_utils.o
elpa_qrkernels.o
$(F90)
-c
$<
elpa_pdgeqrf.o
:
../src/elpa_qr/elpa_pdgeqrf.f90 elpa1.o tum_utils.o
elpa_pdgeqrf.o
:
../src/elpa_qr/elpa_pdgeqrf.f90 elpa1.o tum_utils.o
elpa_pdlarfb.o elpa_qrkernels.o
$(F90)
-c
$<
elpa2.o
:
../src/elpa2.f90 elpa1.o elpa_pdgeqrf.o
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment