Commit 8a1f765d authored by Thomas Auckenthaler's avatar Thomas Auckenthaler
Browse files

bugfix in the blocked QR-decomposition

parent 0f93fec7
......@@ -79,12 +79,12 @@ subroutine tum_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpi
! data exchange
call mpi_allreduce(work(1,1),work(1,n+1),k*n,mpi_real8,mpi_sum,mpicomm,mpierr)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k,rev)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k)
end subroutine tum_pdlarfb_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx,idx,rev,mpicomm,work,lwork)
subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx,rev,mpicomm,work,lwork)
use tum_utils
implicit none
......@@ -94,7 +94,7 @@ subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseid
double precision v(ldv,*),tau(*),t(ldt,*),work(k,*),a(lda,*)
! input variables (global)
integer m,mb,n,k,oldk,baseidx,idx,rev,mpicomm
integer m,mb,n,k,oldk,baseidx,rev,mpicomm
! output variables (global)
......@@ -163,17 +163,17 @@ subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseid
end subroutine tum_pdlarft_pdlarfb_1dcomm
subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,mpicomm,work,lwork)
use tum_utils
implicit none
! input variables (local)
integer ldv,ldt,lwork
double precision v(ldv,*),tau(*),t(ldt,*),work(n,*)
double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global)
integer m,mb,n,blocksize,baseidx,idx,rev,mpicomm
integer m,mb,n,blocksize,baseidx,rev,mpicomm
! output variables (global)
......@@ -182,7 +182,6 @@ subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx
! local scalars
integer localsize,offset,baseoffset
integer mpirank,mpiprocs,mpierr
integer icol
if (lwork .eq. -1) then
work(1,1) = DBLE(2*n*n)
......@@ -206,21 +205,21 @@ subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx
! skip Y4'*Y4 part
offset = mod(n,blocksize)
if (offset .eq. 0) offset=blocksize
call tum_tmerge_set_kernel(n,blocksize,t,ldt,work(1,n+1+offset),n,1)
call tum_tmerge_set_kernel(n,blocksize,t,ldt,work(1,n+1+offset),n)
end subroutine tum_pdlarft_set_merge_1dcomm
subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,baseidx,rev,mpicomm,work,lwork)
use tum_utils
implicit none
! input variables (local)
integer ldv,ldt,lwork
double precision v(ldv,*),tau(*),t(ldt,*),work(n,*)
double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global)
integer m,mb,n,blocksize,treeorder,baseidx,idx,rev,mpicomm
integer m,mb,n,blocksize,treeorder,baseidx,rev,mpicomm
! output variables (global)
......@@ -229,7 +228,6 @@ subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,
! local scalars
integer localsize,offset,baseoffset
integer mpirank,mpiprocs,mpierr
integer icol
if (lwork .eq. -1) then
work(1,1) = DBLE(2*n*n)
......@@ -255,7 +253,7 @@ subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,
! skip Y4'*Y4 part
offset = mod(n,blocksize)
if (offset .eq. 0) offset=blocksize
call tum_tmerge_tree_kernel(n,blocksize,treeorder,t,ldt,work(1,n+1+offset),n,1)
call tum_tmerge_tree_kernel(n,blocksize,treeorder,t,ldt,work(1,n+1+offset),n)
end subroutine tum_pdlarft_tree_merge_1dcomm
......@@ -282,7 +280,7 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
integer mpierr,mpirank,mpiprocs
integer sendsize,recvsize,icol
integer local_size,local_offset
integer v_offset,v_local_offset
integer v_local_offset
! external functions
double precision ddot
......@@ -445,17 +443,17 @@ end subroutine tum_pdlarfl2_tmatrix_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx,idx,rev,updatemode,mpicomm,work,lwork)
subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,updatemode,mpicomm,work,lwork)
use tum_utils
implicit none
! input variables (local)
integer ldv,ldt,lda,lwork
double precision v(ldv,*),tau(*),t(ldt,*),work(*),a(lda,*)
double precision v(ldv,*),t(ldt,*),work(*),a(lda,*)
! input variables (global)
integer m,mb,n,k,oldk,baseidx,idx,rev,updatemode,mpicomm
integer m,mb,n,k,oldk,baseidx,rev,updatemode,mpicomm
! output variables (global)
......@@ -464,7 +462,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! local scalars
integer localsize,offset,baseoffset
integer mpirank,mpiprocs,mpierr
integer icol,irow,ldw
integer sendoffset,recvoffset,sendsize
integer updateoffset,updatelda,updatesize
......@@ -563,22 +560,22 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
tgenoffset = recvoffset+tgenoffset
if (oldk .gt. 0) then
call tum_pdlarft_merge_kernel_local(oldk,k,t,ldt,work(mergeoffset),mergelda,rev)
call tum_pdlarft_merge_kernel_local(oldk,k,t,ldt,work(mergeoffset),mergelda)
if (localsize .gt. 0) then
if (updatemode .eq. ichar('I')) then
! update matrix (pdlarfb) with complete T
call tum_pdlarfb_kernel_local(localsize,n,k+oldk,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda,rev)
call tum_pdlarfb_kernel_local(localsize,n,k+oldk,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda)
else
! update matrix (pdlarfb) with small T (same as update with no old T TODO)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda,rev)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda)
end if
end if
else
if (localsize .gt. 0) then
! update matrix (pdlarfb) with small T
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda,rev)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t(1,1),ldt,work(updateoffset),updatelda)
end if
end if
......
......@@ -2,7 +2,7 @@
! calculates A = A - Y*T*Z (rev=1)
! T upper triangle matrix
! assuming zero entries in matrix in upper kxk block
subroutine tum_pdlarfb_kernel_local(m,n,k,a,lda,v,ldv,t,ldt,z,ldz,rev)
subroutine tum_pdlarfb_kernel_local(m,n,k,a,lda,v,ldv,t,ldt,z,ldz)
implicit none
! input variables (local)
......@@ -10,7 +10,7 @@ subroutine tum_pdlarfb_kernel_local(m,n,k,a,lda,v,ldv,t,ldt,z,ldz,rev)
double precision a(lda,*),v(ldv,*),t(ldt,*),z(ldz,*)
! input variables (global)
integer m,n,k,rev
integer m,n,k
! local variables
double precision t11
......@@ -136,15 +136,15 @@ subroutine tum_pdlarfb_kernel_local(m,n,k,a,lda,v,ldv,t,ldt,z,ldz,rev)
end if
end subroutine
subroutine tum_pdlarft_merge_kernel_local(oldk,k,t,ldt,yty,ldy,rev)
subroutine tum_pdlarft_merge_kernel_local(oldk,k,t,ldt,yty,ldy)
implicit none
! input variables (local)
integer ldt,lda,ldy
integer ldt,ldy
double precision t(ldt,*),yty(ldy,*)
! input variables (global)
integer k,oldk,rev
integer k,oldk
! output variables (global)
......@@ -476,20 +476,20 @@ end subroutine
! 0 Y2'*Y3 Y2'*Y4 ...
! 0 0 Y3'*Y4 ...
! 0 0 0 ...
subroutine tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy,rev)
subroutine tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy)
implicit none
! input variables (local)
integer ldt,lda,ldy
integer ldt,ldy
double precision t(ldt,*),yty(ldy,*)
! input variables (global)
integer k,blocksize,treeorder,rev
integer k,blocksize
! output variables (global)
! local scalars
integer temp_blocksize,nr_blocks,current_block
integer nr_blocks,current_block
integer remainder,oldk
integer yty_column,toffset
......@@ -504,19 +504,19 @@ subroutine tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy,rev)
yty_column = 1
if (remainder .gt. 0) then
call tum_pdlarft_merge_kernel_local(blocksize,remainder,t(toffset,toffset),ldt,yty(1,yty_column),ldy,rev)
call tum_pdlarft_merge_kernel_local(blocksize,remainder,t(toffset,toffset),ldt,yty(1,yty_column),ldy)
current_block = 1
oldk = remainder+blocksize
yty_column = yty_column + blocksize
else
call tum_pdlarft_merge_kernel_local(blocksize,blocksize,t(toffset,toffset),ldt,yty(1,yty_column),ldy,rev)
call tum_pdlarft_merge_kernel_local(blocksize,blocksize,t(toffset,toffset),ldt,yty(1,yty_column),ldy)
current_block = 2
oldk = 2*blocksize
yty_column = yty_column + blocksize
end if
do while (current_block .lt. nr_blocks)
call tum_pdlarft_merge_kernel_local(blocksize,oldk,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy,rev)
call tum_pdlarft_merge_kernel_local(blocksize,oldk,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy)
current_block = current_block + 1
oldk = oldk + blocksize
......@@ -530,15 +530,15 @@ end subroutine
! 0 0 Y3'*Y4 ...
! 0 0 0 ...
subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy)
implicit none
! input variables (local)
integer ldt,lda,ldy
integer ldt,ldy
double precision t(ldt,*),yty(ldy,*)
! input variables (global)
integer k,blocksize,treeorder,rev
integer k,blocksize,treeorder
! output variables (global)
......@@ -552,7 +552,7 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
if (treeorder .eq. 0) return ! no merging
if (treeorder .eq. 1) then
call tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy,rev)
call tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy)
return
end if
......@@ -560,7 +560,7 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
max_treeorder = min(nr_blocks,treeorder)
if (max_treeorder .eq. 1) then
call tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy,rev)
call tum_tmerge_set_kernel(k,blocksize,t,ldt,yty,ldy)
return
end if
......@@ -620,7 +620,7 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
if (yty_remainder .gt. 0) then
yty_column = yty_remainder
!print *,'final merging with remainder',temp_blocksize,k,remaining_size,yty_column
call tum_tmerge_set_kernel(k,temp_blocksize,t,ldt,yty(1,yty_column),ldy,1)
call tum_tmerge_set_kernel(k,temp_blocksize,t,ldt,yty(1,yty_column),ldy)
else
!print *,'no remainder - no merging needed',temp_blocksize,k,remaining_size
endif
......@@ -640,13 +640,13 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
yty_column = yty_column_start
!print *,'set merging', toffset, yty_column,remainder
call tum_tmerge_set_kernel(remainder,temp_blocksize,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy,1)
call tum_tmerge_set_kernel(remainder,temp_blocksize,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy)
if (total_remainder .gt. 0) then
! merge with existing global remainder part
!print *,'single+set merging',yty_remainder,total_remainder,remainder
call tum_pdlarft_merge_kernel_local(remainder,total_remainder,t(1,1),ldt,yty(1,yty_remainder),ldy,1)
call tum_pdlarft_merge_kernel_local(remainder,total_remainder,t(1,1),ldt,yty(1,yty_remainder),ldy)
yty_remainder = yty_remainder + remainder
toffset_start = toffset_start + remainder
......@@ -668,7 +668,7 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
! merge with existing global remainder part
!print *,'single merging',yty_remainder,total_remainder,remainder
call tum_pdlarft_merge_kernel_local(remainder,total_remainder,t(1,1),ldt,yty(1,yty_remainder),ldy,1)
call tum_pdlarft_merge_kernel_local(remainder,total_remainder,t(1,1),ldt,yty(1,yty_remainder),ldy)
yty_remainder = yty_remainder + remainder
toffset_start = toffset_start + remainder
......@@ -697,7 +697,7 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
!print *,'recursive merging', toffset, yty_column,setsize
call tum_tmerge_set_kernel(setsize,temp_blocksize,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy,1)
call tum_tmerge_set_kernel(setsize,temp_blocksize,t(toffset,toffset),ldt,yty(toffset,yty_column),ldy)
current_set = current_set + 1
end do
......@@ -708,11 +708,11 @@ subroutine tum_tmerge_tree_kernel(k,blocksize,treeorder,t,ldt,yty,ldy,rev)
end do
end subroutine
! yty should not contain the inner products vi'*vi
subroutine tum_dlarft_kernel(n,tau,yty,ldy,t,ldt,rev)
subroutine tum_dlarft_kernel(n,tau,yty,ldy,t,ldt)
implicit none
! input variables
integer n,ldy,ldt,rev
integer n,ldy,ldt
double precision tau(*),yty(ldy,*)
! output variables
......
......@@ -92,8 +92,7 @@ subroutine reverse_matrix_local(trans,m,n,a,lda,work,lwork)
double precision a(lda,*),work(*)
! local scalars
double precision temp,dworksize(1)
integer srcoffset,destoffset,ientry
double precision dworksize(1)
integer incx
integer dimsize
integer i
......@@ -177,7 +176,6 @@ subroutine reverse_matrix_1dcomm(trans,m,n,b,a,lda,work,lwork,mpicomm)
! local scalars
integer mpirank,mpiprocs,mpierr,mpistatus(MPI_STATUS_SIZE)
integer nr_blocks,dest_process,src_process,step
integer src_block,dest_block
integer lsize,baseoffset,offset
integer current_index,destblk,srcblk,icol,next_index
integer sendcount,recvcount
......
......@@ -36,6 +36,7 @@ program test_real2
integer myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
integer i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
integer ib, ir, im, jb, jr, jm, ii, jj, j !for frank matrices
integer, external :: numroc
......@@ -46,11 +47,12 @@ program test_real2
!-------------------------------------------------------------------------------
! Pharse command line argumnents, if given
INTEGER*4 :: iargc
character*16 arg1
character*16 arg2
na = 12059
nev = 3401
na = 1000
nev = 1000
if (iargc() == 2) then
call getarg(1, arg1)
......@@ -148,21 +150,49 @@ program test_real2
! We want different random numbers on every process
! (otherways A might get rank deficient):
iseed(:) = myid
call RANDOM_SEED(put=iseed)
call RANDOM_NUMBER(z)
a(:,:) = z(:,:)
if (myid==0) then
print '(a)','| Random matrix block has been set up. (only processor 0 confirms this step)'
end if
call pdtran(na, na, 1.d0, z, 1, 1, sc_desc, 1.d0, a, 1, 1, sc_desc) ! A = A + Z**T
if (myid==0) then
print '(a)','| Random matrix has been symmetrized.'
if ( .TRUE. ) then
iseed(:) = myid
call RANDOM_SEED(put=iseed)
call RANDOM_NUMBER(z)
a(:,:) = z(:,:)
if (myid==0) then
print '(a)','| Random matrix block has been set up. (only processor 0 confirms this step)'
end if
call pdtran(na, na, 1.d0, z, 1, 1, sc_desc, 1.d0, a, 1, 1, sc_desc) ! A = A + Z**T
if (myid==0) then
print '(a)','| Random matrix has been symmetrized.'
end if
!generation of frank matrices
else
z(:,:) = 0D0
do i=1,na; ib = (i-1)/nblk+1
if ( MOD(ib-1, np_cols) == my_pcol ) then
do j=1,na; jb = (j-1)/nblk+1
if ( MOD(jb-1, np_rows) == my_prow ) then
ir = (ib-1)/np_cols+1; im = i-(ib-1)*nblk; ii = im+(ir-1)*nblk
jr = (jb-1)/np_rows+1; jm = j-(jb-1)*nblk; jj = jm+(jr-1)*nblk
! each case returns the same eigenpairs, however,
! first case is numerically instable in the Householder transformation
!
z(jj,ii) = (na+1-MAX(na+1-i, na+1-j))*1D0
!
! z(jj,ii) = MIN(i, j)*1D0
end if
end do
end if
end do
a(:,:) = z(:,:)
end if
! Save original matrix A for later accuracy checks
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment