Commit 9710bf08 authored by Andreas Marek's avatar Andreas Marek
Browse files

Split file elpa1.F90 into elpa1.F90 and elpa1_compute.F90

For automatic generation of documentation, the file elpa1.F90
has been splitted into two files, in order to have a lean,
easy-to-understand user interface:

elpa1.F90
the visible user functios, which provide the library calls.
The usage is the same as always

elpa1_compute.F90
all internal routines, which are used by ELPA1 and ELPA2, but
which are never called by the user. These functions are now "hidden"
in the module elpa1_compute, which is used by ELPA1 and ELPA2.

The procedures in elpa1_compute.F90 are identical to the ones in
elpa1.F90 before this split commit. The only -- but lot of --
changes are intendation.
parent 8f82627a
...@@ -10,6 +10,7 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la ...@@ -10,6 +10,7 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++ libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/elpa_utilities.F90 \ libelpa@SUFFIX@_la_SOURCES = src/elpa_utilities.F90 \
src/elpa1_compute.F90 \
src/elpa1.F90 \ src/elpa1.F90 \
src/elpa2_utilities.F90 \ src/elpa2_utilities.F90 \
src/elpa2.F90 \ src/elpa2.F90 \
......
This diff is collapsed.
This diff is collapsed.
...@@ -66,7 +66,8 @@ module ELPA2 ...@@ -66,7 +66,8 @@ module ELPA2
! Version 1.1.2, 2011-02-21 ! Version 1.1.2, 2011-02-21
use elpa_utilities use elpa_utilities
USE ELPA1 USE elpa1_compute
use elpa1, only : elpa_print_times, time_evp_back, time_evp_fwd, time_evp_solve
use elpa2_utilities use elpa2_utilities
use elpa_pdgeqrf use elpa_pdgeqrf
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
! !
module elpa_pdgeqrf module elpa_pdgeqrf
use elpa1 use elpa1_compute
use elpa_pdlarfb use elpa_pdlarfb
use qr_utils_mod use qr_utils_mod
......
! This file is part of ELPA. ! This file is part of ELPA.
! !
! The ELPA library was originally created by the ELPA consortium, ! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations: ! consisting of the following organizations:
! !
! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik, ! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit ! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen , ! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn, ! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and ! and
! - IBM Deutschland GmbH ! - IBM Deutschland GmbH
! !
! !
...@@ -19,8 +19,8 @@ ...@@ -19,8 +19,8 @@
! http://elpa.rzg.mpg.de/ ! http://elpa.rzg.mpg.de/
! !
! ELPA is free software: you can redistribute it and/or modify ! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the ! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free ! GNU Lesser General Public License as published by the Free
! Software Foundation. ! Software Foundation.
! !
! ELPA is distributed in the hope that it will be useful, ! ELPA is distributed in the hope that it will be useful,
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
! !
module elpa_pdlarfb module elpa_pdlarfb
use elpa1 use elpa1_compute
use qr_utils_mod use qr_utils_mod
implicit none implicit none
PRIVATE PRIVATE
...@@ -55,24 +55,24 @@ module elpa_pdlarfb ...@@ -55,24 +55,24 @@ module elpa_pdlarfb
public :: qr_pdlarfl_1dcomm public :: qr_pdlarfl_1dcomm
public :: qr_pdlarfl2_tmatrix_1dcomm public :: qr_pdlarfl2_tmatrix_1dcomm
public :: qr_tmerge_pdlarfb_1dcomm public :: qr_tmerge_pdlarfb_1dcomm
include 'mpif.h' include 'mpif.h'
contains contains
subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork) subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
use qr_utils_mod use qr_utils_mod
implicit none implicit none
! input variables (local) ! input variables (local)
integer lda,ldv,ldt,lwork integer lda,ldv,ldt,lwork
double precision a(lda,*),v(ldv,*),tau(*),t(ldt,*),work(k,*) double precision a(lda,*),v(ldv,*),tau(*),t(ldt,*),work(k,*)
! input variables (global) ! input variables (global)
integer m,mb,n,k,baseidx,idx,rev,mpicomm integer m,mb,n,k,baseidx,idx,rev,mpicomm
! output variables (global) ! output variables (global)
! derived input variables from QR_PQRPARAM ! derived input variables from QR_PQRPARAM
...@@ -99,7 +99,7 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic ...@@ -99,7 +99,7 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic
work(1,1) = DBLE(2*k*n) work(1,1) = DBLE(2*k*n)
return return
end if end if
!print *,'updating trailing matrix with k=',k !print *,'updating trailing matrix with k=',k
call MPI_Comm_rank(mpicomm,mpirank,mpierr) call MPI_Comm_rank(mpicomm,mpirank,mpierr)
...@@ -119,9 +119,9 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic ...@@ -119,9 +119,9 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic
! data exchange ! data exchange
call mpi_allreduce(work(1,1),work(1,n+1),k*n,mpi_real8,mpi_sum,mpicomm,mpierr) call mpi_allreduce(work(1,1),work(1,n+1),k*n,mpi_real8,mpi_sum,mpicomm,mpierr)
call qr_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k) call qr_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k)
end subroutine qr_pdlarfb_1dcomm end subroutine qr_pdlarfb_1dcomm
! generalized pdlarfl2 version ! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index) ! TODO: include T merge here (seperate by "old" and "new" index)
...@@ -136,7 +136,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx ...@@ -136,7 +136,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! input variables (global) ! input variables (global)
integer m,mb,n,k,oldk,baseidx,rev,mpicomm integer m,mb,n,k,oldk,baseidx,rev,mpicomm
! output variables (global) ! output variables (global)
! derived input variables from QR_PQRPARAM ! derived input variables from QR_PQRPARAM
...@@ -167,7 +167,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx ...@@ -167,7 +167,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! calculate inner product of householdervectors ! calculate inner product of householdervectors
call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,1),ldv,0.0d0,work(1,1),k) call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,1),ldv,0.0d0,work(1,1),k)
! calculate matrix matrix product of householder vectors and target matrix ! calculate matrix matrix product of householder vectors and target matrix
! Z' = Y' * A ! Z' = Y' * A
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0,work(1,k+1),k) call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0,work(1,k+1),k)
...@@ -194,7 +194,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx ...@@ -194,7 +194,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
end do end do
! TODO: elmroth and gustavson ! TODO: elmroth and gustavson
! update matrix (pdlarfb) ! update matrix (pdlarfb)
! Z' = T * Z' ! Z' = T * Z'
call dtrmm("Left","Upper","Notrans","Nonunit",k,n,1.0d0,t,ldt,work(1,recvoffset+k),k) call dtrmm("Left","Upper","Notrans","Nonunit",k,n,1.0d0,t,ldt,work(1,recvoffset+k),k)
...@@ -208,14 +208,14 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev, ...@@ -208,14 +208,14 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
use qr_utils_mod use qr_utils_mod
implicit none implicit none
! input variables (local) ! input variables (local)
integer ldv,ldt,lwork integer ldv,ldt,lwork
double precision v(ldv,*),t(ldt,*),work(n,*) double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global) ! input variables (global)
integer m,mb,n,blocksize,baseidx,rev,mpicomm integer m,mb,n,blocksize,baseidx,rev,mpicomm
! output variables (global) ! output variables (global)
! derived input variables from QR_PQRPARAM ! derived input variables from QR_PQRPARAM
...@@ -228,7 +228,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev, ...@@ -228,7 +228,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
work(1,1) = DBLE(2*n*n) work(1,1) = DBLE(2*n*n)
return return
end if end if
call MPI_Comm_rank(mpicomm,mpirank,mpierr) call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr) call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
...@@ -240,7 +240,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev, ...@@ -240,7 +240,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
else else
work(1:n,1:n) = 0.0d0 work(1:n,1:n) = 0.0d0
end if end if
call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr) call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr)
! skip Y4'*Y4 part ! skip Y4'*Y4 part
...@@ -254,14 +254,14 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b ...@@ -254,14 +254,14 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
use qr_utils_mod use qr_utils_mod
implicit none implicit none
! input variables (local) ! input variables (local)
integer ldv,ldt,lwork integer ldv,ldt,lwork
double precision v(ldv,*),t(ldt,*),work(n,*) double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global) ! input variables (global)
integer m,mb,n,blocksize,treeorder,baseidx,rev,mpicomm integer m,mb,n,blocksize,treeorder,baseidx,rev,mpicomm
! output variables (global) ! output variables (global)
! derived input variables from QR_PQRPARAM ! derived input variables from QR_PQRPARAM
...@@ -276,7 +276,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b ...@@ -276,7 +276,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
end if end if
if (n .le. blocksize) return ! nothing to do if (n .le. blocksize) return ! nothing to do
call MPI_Comm_rank(mpicomm,mpirank,mpierr) call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr) call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
...@@ -288,7 +288,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b ...@@ -288,7 +288,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
else else
work(1:n,1:n) = 0.0d0 work(1:n,1:n) = 0.0d0
end if end if
call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr) call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr)
! skip Y4'*Y4 part ! skip Y4'*Y4 part
...@@ -298,7 +298,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b ...@@ -298,7 +298,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
end subroutine qr_pdlarft_tree_merge_1dcomm end subroutine qr_pdlarft_tree_merge_1dcomm
! apply householder vector to the left ! apply householder vector to the left
! - assume unitary matrix ! - assume unitary matrix
! - assume right positions for v ! - assume right positions for v
subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,mpicomm) subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,mpicomm)
...@@ -306,7 +306,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev, ...@@ -306,7 +306,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
use qr_utils_mod use qr_utils_mod
implicit none implicit none
! input variables (local) ! input variables (local)
integer incv,lda,lwork,baseidx integer incv,lda,lwork,baseidx
double precision v(*),a(lda,*),work(*) double precision v(*),a(lda,*),work(*)
...@@ -314,9 +314,9 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev, ...@@ -314,9 +314,9 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
! input variables (global) ! input variables (global)
integer m,n,mb,rev,idx,mpicomm integer m,n,mb,rev,idx,mpicomm
double precision tau double precision tau
! output variables (global) ! output variables (global)
! local scalars ! local scalars
integer mpierr,mpirank,mpiprocs integer mpierr,mpirank,mpiprocs
integer sendsize,recvsize,icol integer sendsize,recvsize,icol
...@@ -326,7 +326,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev, ...@@ -326,7 +326,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
! external functions ! external functions
double precision ddot double precision ddot
external dgemv,dger,ddot external dgemv,dger,ddot
call MPI_Comm_rank(mpicomm, mpirank, mpierr) call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr) call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
...@@ -337,20 +337,20 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev, ...@@ -337,20 +337,20 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
work(1) = DBLE(sendsize + recvsize) work(1) = DBLE(sendsize + recvsize)
return return
end if end if
if (n .le. 0) return if (n .le. 0) return
if (idx .le. 1) return if (idx .le. 1) return
call local_size_offset_1d(m,mb,baseidx,idx,rev,mpirank,mpiprocs, & call local_size_offset_1d(m,mb,baseidx,idx,rev,mpirank,mpiprocs, &
local_size,v_local_offset,local_offset) local_size,v_local_offset,local_offset)
!print *,'hl ref',local_size,n !print *,'hl ref',local_size,n
v_local_offset = v_local_offset * incv v_local_offset = v_local_offset * incv
if (local_size > 0) then if (local_size > 0) then
do icol=1,n do icol=1,n
work(icol) = dot_product(v(v_local_offset:v_local_offset+local_size-1),a(local_offset:local_offset+local_size-1,icol)) work(icol) = dot_product(v(v_local_offset:v_local_offset+local_size-1),a(local_offset:local_offset+local_size-1,icol))
...@@ -358,7 +358,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev, ...@@ -358,7 +358,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
else else
work(1:n) = 0.0d0 work(1:n) = 0.0d0
end if end if
call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr) call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr)
if (local_size > 0) then if (local_size > 0) then
...@@ -377,16 +377,16 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i ...@@ -377,16 +377,16 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
use qr_utils_mod use qr_utils_mod
implicit none implicit none
! input variables (local) ! input variables (local)
integer ldv,lda,lwork,baseidx,ldt integer ldv,lda,lwork,baseidx,ldt
double precision v(ldv,*),a(lda,*),work(*),t(ldt,*) double precision v(ldv,*),a(lda,*),work(*),t(ldt,*)
! input variables (global) ! input variables (global)
integer m,n,mb,rev,idx,mpicomm integer m,n,mb,rev,idx,mpicomm
! output variables (global) ! output variables (global)
! local scalars ! local scalars
integer mpierr,mpirank,mpiprocs,mpirank_top1,mpirank_top2 integer mpierr,mpirank,mpiprocs,mpirank_top1,mpirank_top2
integer dgemv1_offset,dgemv2_offset integer dgemv1_offset,dgemv2_offset
...@@ -405,7 +405,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i ...@@ -405,7 +405,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
call MPI_Comm_rank(mpicomm, mpirank, mpierr) call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr) call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
sendsize = 2*n sendsize = 2*n
recvsize = sendsize recvsize = sendsize
...@@ -413,7 +413,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i ...@@ -413,7 +413,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
work(1) = sendsize + recvsize work(1) = sendsize + recvsize
return return
end if end if
dgemv1_offset = 1 dgemv1_offset = 1
dgemv2_offset = dgemv1_offset + n dgemv2_offset = dgemv1_offset + n
...@@ -443,7 +443,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i ...@@ -443,7 +443,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
work(dgemv2_offset),1) work(dgemv2_offset),1)
call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr) call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr)
! update second vector ! update second vector
call daxpy(n,t(1,2),work(sendsize+dgemv1_offset),1,work(sendsize+dgemv2_offset),1) call daxpy(n,t(1,2),work(sendsize+dgemv1_offset),1,work(sendsize+dgemv2_offset),1)
...@@ -470,14 +470,14 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i ...@@ -470,14 +470,14 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
end if end if
if (mpirank_top2 .eq. mpirank) then if (mpirank_top2 .eq. mpirank) then
a(local_offset2,icol) = a(local_offset2,icol) & a(local_offset2,icol) = a(local_offset2,icol) &
- v(v2_local_offset,v1col)*work(sendsize+dgemv1_offset+icol-1)*hvdot & - v(v2_local_offset,v1col)*work(sendsize+dgemv1_offset+icol-1)*hvdot &
- work(sendsize+dgemv2_offset+icol-1) - work(sendsize+dgemv2_offset+icol-1)
end if end if
do irow=1,local_size_dger do irow=1,local_size_dger
a(local_offset_dger+irow-1,icol) = a(local_offset_dger+irow-1,icol) & a(local_offset_dger+irow-1,icol) = a(local_offset_dger+irow-1,icol) &
- work(sendsize+dgemv1_offset+icol-1)*v(v_local_offset_dger+irow-1,v1col)*hvdot & - work(sendsize+dgemv1_offset+icol-1)*v(v_local_offset_dger+irow-1,v1col)*hvdot &
- work(sendsize+dgemv2_offset+icol-1)*v(v_local_offset_dger+irow-1,v2col) - work(sendsize+dgemv2_offset+icol-1)*v(v_local_offset_dger+irow-1,v2col)
end do end do
end do end do
...@@ -497,7 +497,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev, ...@@ -497,7 +497,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
! input variables (global) ! input variables (global)
integer m,mb,n,k,oldk,baseidx,rev,updatemode,mpicomm integer m,mb,n,k,oldk,baseidx,rev,updatemode,mpicomm
! output variables (global) ! output variables (global)
! derived input variables from QR_PQRPARAM ! derived input variables from QR_PQRPARAM
...@@ -534,7 +534,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev, ...@@ -534,7 +534,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
call MPI_Comm_rank(mpicomm,mpirank,mpierr) call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr) call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
! use baseidx as idx here, otherwise the upper triangle part will be lost ! use baseidx as idx here, otherwise the upper triangle part will be lost
! during the calculation, especially in the reversed case ! during the calculation, especially in the reversed case
call local_size_offset_1d(m,mb,baseidx,baseidx,rev,mpirank,mpiprocs, & call local_size_offset_1d(m,mb,baseidx,baseidx,rev,mpirank,mpiprocs, &
...@@ -546,14 +546,14 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev, ...@@ -546,14 +546,14 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
updateoffset = 0 updateoffset = 0
mergeoffset = updateoffset + updatesize mergeoffset = updateoffset + updatesize
tgenoffset = mergeoffset + mergesize tgenoffset = mergeoffset + mergesize
sendsize = updatesize + mergesize + tgensize sendsize = updatesize + mergesize + tgensize
!print *,'sendsize',sendsize,updatesize,mergesize,tgensize !print *,'sendsize',sendsize,updatesize,mergesize,tgensize
!print *,'merging nr of rotations', oldk+k !print *,'merging nr of rotations', oldk+k
if (localsize .gt. 0) then if (localsize .gt. 0) then
! calculate matrix matrix product of householder vectors and target matrix ! calculate matrix matrix product of householder vectors and target matrix
if (updatemode .eq. ichar('I')) then if (updatemode .eq. ichar('I')) then
! Z' = (Y1,Y2)' * A ! Z' = (Y1,Y2)' * A
...@@ -577,13 +577,13 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev, ...@@ -577,13 +577,13 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
! do not calculate parts for T merge as there is nothing to merge ! do not calculate parts for T merge as there is nothing to merge
updateoffset = 0 updateoffset = 0
tgenoffset = updateoffset + updatesize tgenoffset = updateoffset + updatesize
sendsize = updatesize + tgensize sendsize = updatesize + tgensize
if (localsize .gt. 0) then if (localsize .gt. 0) then
! calculate matrix matrix product of householder vectors and target matrix ! calculate matrix matrix product of householder vectors and target matrix
! Z' = (Y1)' * A ! Z' = (Y1)' * A
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0, & call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0, &
work(sendoffset+updateoffset),updatelda) work(sendoffset+updateoffset),updatelda)
...@@ -601,7 +601,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev, ...@@ -601,7 +601,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,