Commit c8f57d2c authored by Andreas Marek's avatar Andreas Marek

Timing information for elpa_pdgeqrf.F90

parent ee4c1f75
......@@ -196,14 +196,14 @@ module ELPA2_compute
if (useQR) then
if (which_qr_decomposition == 1) then
call qr_pqrparam_init(pqrparam, nblk,'M',0, nblk,'M',0, nblk,'M',1,'s')
call qr_pqrparam_init(pqrparam(1:11), nblk,'M',0, nblk,'M',0, nblk,'M',1,'s')
allocate(tauvector(na))
allocate(blockheuristic(nblk))
l_rows = local_index(na, my_prow, np_rows, nblk, -1)
allocate(vmr(max(l_rows,1),na))
call qr_pdgeqrf_2dcomm(a, lda, vmr, max(l_rows,1), tauvector(1), tmat(1,1,1), nbw, dwork_size(1), -1, na, &
nbw, nblk, nblk, na, na, 1, 0, PQRPARAM, mpi_comm_rows, mpi_comm_cols, blockheuristic)
nbw, nblk, nblk, na, na, 1, 0, PQRPARAM(1:11), mpi_comm_rows, mpi_comm_cols, blockheuristic)
work_size = dwork_size(1)
allocate(work_blocked(work_size))
......@@ -239,7 +239,7 @@ module ELPA2_compute
tmat(1,1,istep), nbw, work_blocked, &
work_size, na, n_cols, nblk, nblk, &
istep*nbw+n_cols-nbw, istep*nbw+n_cols, 1,&
0, PQRPARAM, mpi_comm_rows, mpi_comm_cols,&
0, PQRPARAM(1:11), mpi_comm_rows, mpi_comm_cols,&
blockheuristic)
endif
else
......
......@@ -40,118 +40,138 @@
! the original distribution, the GNU Lesser General Public License.
!
!
module elpa_pdgeqrf
use elpa1_compute
use elpa_pdlarfb
use qr_utils_mod
implicit none
PRIVATE
public :: qr_pdgeqrf_2dcomm
public :: qr_pqrparam_init
public :: qr_pdlarfg2_1dcomm_check
include 'mpif.h'
contains
subroutine qr_pdgeqrf_2dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,nb,rowidx,colidx,rev,trans,PQRPARAM, &
mpicomm_rows,mpicomm_cols,blockheuristic)
use precision
use ELPA1
use qr_utils_mod
implicit none
! parameter setup
INTEGER(kind=ik), parameter :: gmode_ = 1, rank_ = 2, eps_ = 3
! input variables (local)
integer(kind=ik) :: lda,lwork,ldv,ldt
real(kind=rk) :: a(lda,*),v(ldv,*),tau(*),work(*),t(ldt,*)
! input variables (global)
integer(kind=ik) :: m,n,mb,nb,rowidx,colidx,rev,trans,mpicomm_cols,mpicomm_rows
integer(kind=ik) :: PQRPARAM(*)
! output variables (global)
real(kind=rk) :: blockheuristic(*)
! input variables derived from PQRPARAM
integer(kind=ik) :: updatemode,tmerge,size2d
! local scalars
integer(kind=ik) :: mpierr,mpirank_cols,broadcast_size,mpirank_rows
integer(kind=ik) :: mpirank_cols_qr,mpiprocs_cols
integer(kind=ik) :: lcols_temp,lcols,icol,lastcol
integer(kind=ik) :: baseoffset,offset,idx,voffset
integer(kind=ik) :: update_voffset,update_tauoffset
integer(kind=ik) :: update_lcols
integer(kind=ik) :: work_offset
real(kind=rk) :: dbroadcast_size(1),dtmat_bcast_size(1)
real(kind=rk) :: pdgeqrf_size(1),pdlarft_size(1),pdlarfb_size(1),tmerge_pdlarfb_size(1)
integer(kind=ik) :: temptau_offset,temptau_size,broadcast_offset,tmat_bcast_size
integer(kind=ik) :: remaining_cols
integer(kind=ik) :: total_cols
integer(kind=ik) :: incremental_update_size ! needed for incremental update mode
size2d = PQRPARAM(1)
updatemode = PQRPARAM(2)
tmerge = PQRPARAM(3)
! copy value before we are going to filter it
total_cols = n
call mpi_comm_rank(mpicomm_cols,mpirank_cols,mpierr)
call mpi_comm_rank(mpicomm_rows,mpirank_rows,mpierr)
call mpi_comm_size(mpicomm_cols,mpiprocs_cols,mpierr)
call qr_pdgeqrf_1dcomm(a,lda,v,ldv,tau,t,ldt,pdgeqrf_size(1),-1,m,total_cols,mb,rowidx,rowidx,rev,trans, &
PQRPARAM(4),mpicomm_rows,blockheuristic)
call qr_pdgeqrf_pack_unpack(v,ldv,dbroadcast_size(1),-1,m,total_cols,mb,rowidx,rowidx,rev,0,mpicomm_rows)
call qr_pdgeqrf_pack_unpack_tmatrix(tau,t,ldt,dtmat_bcast_size(1),-1,total_cols,0)
pdlarft_size(1) = 0.0d0
call qr_pdlarfb_1dcomm(m,mb,total_cols,total_cols,a,lda,v,ldv,tau,t,ldt,rowidx,rowidx,rev,mpicomm_rows, &
pdlarfb_size(1),-1)
call qr_tmerge_pdlarfb_1dcomm(m,mb,total_cols,total_cols,total_cols,v,ldv,t,ldt,a,lda,rowidx,rev,updatemode, &
mpicomm_rows,tmerge_pdlarfb_size(1),-1)
#include "config-f90.h"
module elpa_pdgeqrf
temptau_offset = 1
temptau_size = total_cols
broadcast_offset = temptau_offset + temptau_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
work_offset = broadcast_offset + broadcast_size
if (lwork .eq. -1) then
use elpa1_compute
use elpa_pdlarfb
use qr_utils_mod
implicit none
PRIVATE
public :: qr_pdgeqrf_2dcomm
public :: qr_pqrparam_init
public :: qr_pdlarfg2_1dcomm_check
include 'mpif.h'
contains
subroutine qr_pdgeqrf_2dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,nb,rowidx,colidx,rev,trans,PQRPARAM, &
mpicomm_rows,mpicomm_cols,blockheuristic)
use precision
use ELPA1
use qr_utils_mod
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none
! parameter setup
INTEGER(kind=ik), parameter :: gmode_ = 1, rank_ = 2, eps_ = 3
! input variables (local)
integer(kind=ik) :: lda,lwork,ldv,ldt
real(kind=rk) :: a(lda,*),v(ldv,*),tau(*),work(*),t(ldt,*)
! input variables (global)
integer(kind=ik) :: m,n,mb,nb,rowidx,colidx,rev,trans,mpicomm_cols,mpicomm_rows
#ifdef DESPERATELY_WANT_ASSUMED_SIZE_QR
integer(kind=ik) :: PQRPARAM(*)
#else
integer(kind=ik) :: PQRPARAM(1:11)
#endif
! output variables (global)
real(kind=rk) :: blockheuristic(*)
! input variables derived from PQRPARAM
integer(kind=ik) :: updatemode,tmerge,size2d
! local scalars
integer(kind=ik) :: mpierr,mpirank_cols,broadcast_size,mpirank_rows
integer(kind=ik) :: mpirank_cols_qr,mpiprocs_cols
integer(kind=ik) :: lcols_temp,lcols,icol,lastcol
integer(kind=ik) :: baseoffset,offset,idx,voffset
integer(kind=ik) :: update_voffset,update_tauoffset
integer(kind=ik) :: update_lcols
integer(kind=ik) :: work_offset
real(kind=rk) :: dbroadcast_size(1),dtmat_bcast_size(1)
real(kind=rk) :: pdgeqrf_size(1),pdlarft_size(1),pdlarfb_size(1),tmerge_pdlarfb_size(1)
integer(kind=ik) :: temptau_offset,temptau_size,broadcast_offset,tmat_bcast_size
integer(kind=ik) :: remaining_cols
integer(kind=ik) :: total_cols
integer(kind=ik) :: incremental_update_size ! needed for incremental update mode
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("qr_pdgeqrf_2dcomm")
#endif
size2d = PQRPARAM(1)
updatemode = PQRPARAM(2)
tmerge = PQRPARAM(3)
! copy value before we are going to filter it
total_cols = n
call mpi_comm_rank(mpicomm_cols,mpirank_cols,mpierr)
call mpi_comm_rank(mpicomm_rows,mpirank_rows,mpierr)
call mpi_comm_size(mpicomm_cols,mpiprocs_cols,mpierr)
#ifdef DESPERATELY_WANT_ASSUMED_SIZE_QR
call qr_pdgeqrf_1dcomm(a,lda,v,ldv,tau,t,ldt,pdgeqrf_size(1),-1,m,total_cols,mb,rowidx,rowidx,rev,trans, &
PQRPARAM(4),mpicomm_rows,blockheuristic)
#else
call qr_pdgeqrf_1dcomm(a,lda,v,ldv,tau,t,ldt,pdgeqrf_size(1),-1,m,total_cols,mb,rowidx,rowidx,rev,trans, &
PQRPARAM(4:11),mpicomm_rows,blockheuristic)
#endif
call qr_pdgeqrf_pack_unpack(v,ldv,dbroadcast_size(1),-1,m,total_cols,mb,rowidx,rowidx,rev,0,mpicomm_rows)
call qr_pdgeqrf_pack_unpack_tmatrix(tau,t,ldt,dtmat_bcast_size(1),-1,total_cols,0)
pdlarft_size(1) = 0.0d0
call qr_pdlarfb_1dcomm(m,mb,total_cols,total_cols,a,lda,v,ldv,tau,t,ldt,rowidx,rowidx,rev,mpicomm_rows, &
pdlarfb_size(1),-1)
call qr_tmerge_pdlarfb_1dcomm(m,mb,total_cols,total_cols,total_cols,v,ldv,t,ldt,a,lda,rowidx,rev,updatemode, &
mpicomm_rows,tmerge_pdlarfb_size(1),-1)
temptau_offset = 1
temptau_size = total_cols
broadcast_offset = temptau_offset + temptau_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
work_offset = broadcast_offset + broadcast_size
if (lwork .eq. -1) then
work(1) = (DBLE(temptau_size) + DBLE(broadcast_size) + max(pdgeqrf_size(1),pdlarft_size(1),pdlarfb_size(1), &
tmerge_pdlarfb_size(1)))
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("qr_pdgeqrf_2dcomm")
#endif
return
end if
end if
lastcol = colidx-total_cols+1
voffset = total_cols
lastcol = colidx-total_cols+1
voffset = total_cols
incremental_update_size = 0
incremental_update_size = 0
! clear v buffer: just ensure that there is no junk in the upper triangle
! part, otherwise pdlarfb gets some problems
! pdlarfl(2) do not have these problems as they are working more on a vector
! basis
v(1:ldv,1:total_cols) = 0.0d0
! clear v buffer: just ensure that there is no junk in the upper triangle
! part, otherwise pdlarfb gets some problems
! pdlarfl(2) do not have these problems as they are working more on a vector
! basis
v(1:ldv,1:total_cols) = 0.0d0
icol = colidx
icol = colidx
remaining_cols = total_cols
remaining_cols = total_cols
!print *,'start decomposition',m,rowidx,colidx
!print *,'start decomposition',m,rowidx,colidx
do while (remaining_cols .gt. 0)
do while (remaining_cols .gt. 0)
! determine rank of process column with next qr block
mpirank_cols_qr = MOD((icol-1)/nb,mpiprocs_cols)
......@@ -178,70 +198,77 @@ subroutine qr_pdgeqrf_2dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,nb,rowidx,c
idx = rowidx - colidx + icol
if (mpirank_cols .eq. mpirank_cols_qr) then
! qr decomposition part
tau(offset:offset+lcols-1) = 0.0d0
call qr_pdgeqrf_1dcomm(a(1,offset),lda,v(1,voffset),ldv,tau(offset),t(voffset,voffset),ldt, &
work(work_offset),lwork,m,lcols,mb,rowidx,idx,rev,trans,PQRPARAM(4), &
mpicomm_rows,blockheuristic)
! pack broadcast buffer (v + tau)
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,work(broadcast_offset),lwork,m,lcols,mb,rowidx,&
idx,rev,0,mpicomm_rows)
! determine broadcast size
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,&
0,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
!if (mpirank_rows .eq. 0) then
! pack tmatrix into broadcast buffer and calculate new size
call qr_pdgeqrf_pack_unpack_tmatrix(tau(offset),t(voffset,voffset),ldt, &
work(broadcast_offset+broadcast_size),lwork,lcols,0)
call qr_pdgeqrf_pack_unpack_tmatrix(tau(offset),t(voffset,voffset),ldt,dtmat_bcast_size(1),-1,lcols,0)
broadcast_size = broadcast_size + dtmat_bcast_size(1)
!end if
! initiate broadcast (send part)
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr)
! copy tau parts into temporary tau buffer
work(temptau_offset+voffset-1:temptau_offset+(voffset-1)+lcols-1) = tau(offset:offset+lcols-1)
!print *,'generated tau:', tau(offset)
! qr decomposition part
tau(offset:offset+lcols-1) = 0.0d0
#ifdef DESPERATELY_WANT_ASSUMED_SIZE_QR
call qr_pdgeqrf_1dcomm(a(1,offset),lda,v(1,voffset),ldv,tau(offset),t(voffset,voffset),ldt, &
work(work_offset),lwork,m,lcols,mb,rowidx,idx,rev,trans,PQRPARAM(4), &
mpicomm_rows,blockheuristic)
#else
call qr_pdgeqrf_1dcomm(a(1,offset),lda,v(1,voffset),ldv,tau(offset),t(voffset,voffset),ldt, &
work(work_offset),lwork,m,lcols,mb,rowidx,idx,rev,trans,PQRPARAM(4:11), &
mpicomm_rows,blockheuristic)
#endif
! pack broadcast buffer (v + tau)
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,work(broadcast_offset),lwork,m,lcols,mb,rowidx,&
idx,rev,0,mpicomm_rows)
! determine broadcast size
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,&
0,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
!if (mpirank_rows .eq. 0) then
! pack tmatrix into broadcast buffer and calculate new size
call qr_pdgeqrf_pack_unpack_tmatrix(tau(offset),t(voffset,voffset),ldt, &
work(broadcast_offset+broadcast_size),lwork,lcols,0)
call qr_pdgeqrf_pack_unpack_tmatrix(tau(offset),t(voffset,voffset),ldt,dtmat_bcast_size(1),-1,lcols,0)
broadcast_size = broadcast_size + dtmat_bcast_size(1)
!end if
! initiate broadcast (send part)
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr)
! copy tau parts into temporary tau buffer
work(temptau_offset+voffset-1:temptau_offset+(voffset-1)+lcols-1) = tau(offset:offset+lcols-1)
!print *,'generated tau:', tau(offset)
else
! vector exchange part
! vector exchange part
! determine broadcast size
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
! determine broadcast size
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,dbroadcast_size(1),-1,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
broadcast_size = dbroadcast_size(1)
call qr_pdgeqrf_pack_unpack_tmatrix(work(temptau_offset+voffset-1),t(voffset,voffset),ldt, &
dtmat_bcast_size(1),-1,lcols,0)
tmat_bcast_size = dtmat_bcast_size(1)
call qr_pdgeqrf_pack_unpack_tmatrix(work(temptau_offset+voffset-1),t(voffset,voffset),ldt, &
dtmat_bcast_size(1),-1,lcols,0)
tmat_bcast_size = dtmat_bcast_size(1)
!print *,'broadcast_size (nonqr)',broadcast_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
!print *,'broadcast_size (nonqr)',broadcast_size
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
! initiate broadcast (recv part)
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr)
! initiate broadcast (recv part)
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr)
! last n*n elements in buffer are (still empty) T matrix elements
! fetch from first process in each column
! last n*n elements in buffer are (still empty) T matrix elements
! fetch from first process in each column
! unpack broadcast buffer (v + tau)
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,work(broadcast_offset),lwork,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
! unpack broadcast buffer (v + tau)
call qr_pdgeqrf_pack_unpack(v(1,voffset),ldv,work(broadcast_offset),lwork,m,lcols,mb,rowidx,idx,rev,1,mpicomm_rows)
! now send t matrix to other processes in our process column
broadcast_size = dbroadcast_size(1)
tmat_bcast_size = dtmat_bcast_size(1)
! now send t matrix to other processes in our process column
broadcast_size = dbroadcast_size(1)
tmat_bcast_size = dtmat_bcast_size(1)
! t matrix should now be available on all processes => unpack
call qr_pdgeqrf_pack_unpack_tmatrix(work(temptau_offset+voffset-1),t(voffset,voffset),ldt, &
work(broadcast_offset+broadcast_size),lwork,lcols,1)
! t matrix should now be available on all processes => unpack
call qr_pdgeqrf_pack_unpack_tmatrix(work(temptau_offset+voffset-1),t(voffset,voffset),ldt, &
work(broadcast_offset+broadcast_size),lwork,lcols,1)
end if
remaining_cols = remaining_cols - lcols
......@@ -261,222 +288,269 @@ subroutine qr_pdgeqrf_2dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,nb,rowidx,c
if (lcols .gt. 0) then
!print *,'updating trailing matrix'
!print *,'updating trailing matrix'
if (updatemode .eq. ichar('I')) then
print *,'pdgeqrf_2dcomm: incremental update not yet implemented! rev=1'
else if (updatemode .eq. ichar('F')) then
! full update no merging
call qr_pdlarfb_1dcomm(m,mb,lcols,update_lcols,a(1,offset),lda,v(1,update_voffset),ldv, &
work(temptau_offset+update_voffset-1), &
if (updatemode .eq. ichar('I')) then
print *,'pdgeqrf_2dcomm: incremental update not yet implemented! rev=1'
else if (updatemode .eq. ichar('F')) then
! full update no merging
call qr_pdlarfb_1dcomm(m,mb,lcols,update_lcols,a(1,offset),lda,v(1,update_voffset),ldv, &
work(temptau_offset+update_voffset-1), &
t(update_voffset,update_voffset),ldt, &
rowidx,idx,1,mpicomm_rows,work(work_offset),lwork)
else
! full update + merging default
call qr_tmerge_pdlarfb_1dcomm(m,mb,lcols,n-(update_voffset+update_lcols-1),update_lcols, &
rowidx,idx,1,mpicomm_rows,work(work_offset),lwork)
else
! full update + merging default
call qr_tmerge_pdlarfb_1dcomm(m,mb,lcols,n-(update_voffset+update_lcols-1),update_lcols, &
v(1,update_voffset),ldv, &
t(update_voffset,update_voffset),ldt, &
a(1,offset),lda,rowidx,1,updatemode,mpicomm_rows, &
t(update_voffset,update_voffset),ldt, &
a(1,offset),lda,rowidx,1,updatemode,mpicomm_rows, &
work(work_offset),lwork)
end if
end if
else
if (updatemode .eq. ichar('I')) then
print *,'sole merging of (incremental) T matrix', mpirank_cols, &
if (updatemode .eq. ichar('I')) then
print *,'sole merging of (incremental) T matrix', mpirank_cols, &
n-(update_voffset+incremental_update_size-1)
call qr_tmerge_pdlarfb_1dcomm(m,mb,0,n-(update_voffset+incremental_update_size-1), &
call qr_tmerge_pdlarfb_1dcomm(m,mb,0,n-(update_voffset+incremental_update_size-1), &
incremental_update_size,v(1,update_voffset),ldv, &
t(update_voffset,update_voffset),ldt, &
a,lda,rowidx,1,updatemode,mpicomm_rows,work(work_offset),lwork)
! reset for upcoming incremental updates
incremental_update_size = 0
else if (updatemode .eq. ichar('M')) then
! final merge
call qr_tmerge_pdlarfb_1dcomm(m,mb,0,n-(update_voffset+update_lcols-1),update_lcols, &
t(update_voffset,update_voffset),ldt, &
a,lda,rowidx,1,updatemode,mpicomm_rows,work(work_offset),lwork)
! reset for upcoming incremental updates
incremental_update_size = 0
else if (updatemode .eq. ichar('M')) then
! final merge
call qr_tmerge_pdlarfb_1dcomm(m,mb,0,n-(update_voffset+update_lcols-1),update_lcols, &
v(1,update_voffset),ldv, &
t(update_voffset,update_voffset),ldt, &
a,lda,rowidx,1,updatemode,mpicomm_rows,work(work_offset),lwork)
else
! full updatemode - nothing to update
end if
! reset for upcoming incremental updates
incremental_update_size = 0
t(update_voffset,update_voffset),ldt, &
a,lda,rowidx,1,updatemode,mpicomm_rows,work(work_offset),lwork)
else
! full updatemode - nothing to update
end if
! reset for upcoming incremental updates
incremental_update_size = 0
end if
end do
end do
if ((tmerge .gt. 0) .and. (updatemode .eq. ichar('F'))) then
if ((tmerge .gt. 0) .and. (updatemode .eq. ichar('F'))) then
! finally merge all small T parts
call qr_pdlarft_tree_merge_1dcomm(m,mb,n,size2d,tmerge,v,ldv,t,ldt,rowidx,rev,mpicomm_rows,work,lwork)
end if
!print *,'stop decomposition',rowidx,colidx
end subroutine qr_pdgeqrf_2dcomm
subroutine qr_pdgeqrf_1dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,baseidx,rowidx,rev,trans,PQRPARAM,mpicomm,blockheuristic)
use precision
use ELPA1
implicit none
! parameter setup
INTEGER(kind=ik), parameter :: gmode_ = 1,rank_ = 2,eps_ = 3
! input variables (local)
integer(kind=ik) :: lda,lwork,ldv,ldt
real(kind=rk) :: a(lda,*),v(ldv,*),tau(*),t(ldt,*),work(*)
! input variables (global)
integer(kind=ik) :: m,n,mb,baseidx,rowidx,rev,trans,mpicomm
integer(kind=ik) :: PQRPARAM(*)
! derived input variables
! derived further input variables from QR_PQRPARAM
integer(kind=ik) :: size1d,updatemode,tmerge
! output variables (global)
real(kind=rk) :: blockheuristic(*)
! local scalars
integer(kind=ik) :: nr_blocks,remainder,current_block,aoffset,idx,updatesize
real(kind=rk) :: pdgeqr2_size(1),pdlarfb_size(1),tmerge_tree_size(1)
size1d = max(min(PQRPARAM(1),n),1)
updatemode = PQRPARAM(2)
tmerge = PQRPARAM(3)
end if
if (lwork .eq. -1) then
!print *,'stop decomposition',rowidx,colidx
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("qr_pdgeqrf_2dcomm")
#endif
end subroutine qr_pdgeqrf_2dcomm
subroutine qr_pdgeqrf_1dcomm(a,lda,v,ldv,tau,t,ldt,work,lwork,m,n,mb,baseidx,rowidx,rev,trans,PQRPARAM,mpicomm,blockheuristic)
use precision
use ELPA1
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none
! parameter setup
INTEGER(kind=ik), parameter :: gmode_ = 1,rank_ = 2,eps_ = 3
! input variables (local)
integer(kind=ik) :: lda,lwork,ldv,ldt
real(kind=rk) :: a(lda,*),v(ldv,*),tau(*),t(ldt,*),work(*)
! input variables (global)
integer(kind=ik) :: m,n,mb,baseidx,rowidx,rev,trans,mpicomm
#ifdef DESPERATELY_WANT_ASSUMED_SIZE_QR
integer(kind=ik) :: PQRPARAM(*)
#else
integer(kind=ik) :: PQRPARAM(:)
#endif
! derived input variables
! derived further input variables from QR_PQRPARAM
integer(kind=ik) :: size1d,updatemode,tmerge
! output variables (global)
real(kind=rk) :: blockheuristic(*)
! local scalars
integer(kind=ik) :: nr_blocks,remainder,current_block,aoffset,idx,updatesize
real(kind=rk) :: pdgeqr2_size(1),pdlarfb_size(1),tmerge_tree_size(1)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("qr_pdgeqrf_1dcomm")
#endif
size1d = max(min(PQRPARAM(1),n),1)
updatemode = PQRPARAM(2)
tmerge = PQRPARAM(3)
if (lwork .eq. -1) then
#ifdef DESPERATELY_WANT_ASSUMED_SIZE_QR
call qr_pdgeqr2_1dcomm(a,lda,v,ldv,tau,t,ldt,pdgeqr2_size,-1, &