Commit 9710bf08 authored by Andreas Marek's avatar Andreas Marek
Browse files

Split file elpa1.F90 into elpa1.F90 and elpa1_compute.F90

For automatic generation of documentation, the file elpa1.F90
has been splitted into two files, in order to have a lean,
easy-to-understand user interface:

elpa1.F90
the visible user functios, which provide the library calls.
The usage is the same as always

elpa1_compute.F90
all internal routines, which are used by ELPA1 and ELPA2, but
which are never called by the user. These functions are now "hidden"
in the module elpa1_compute, which is used by ELPA1 and ELPA2.

The procedures in elpa1_compute.F90 are identical to the ones in
elpa1.F90 before this split commit. The only -- but lot of --
changes are intendation.
parent 8f82627a
......@@ -10,6 +10,7 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/elpa_utilities.F90 \
src/elpa1_compute.F90 \
src/elpa1.F90 \
src/elpa2_utilities.F90 \
src/elpa2.F90 \
......
This diff is collapsed.
This diff is collapsed.
......@@ -66,7 +66,8 @@ module ELPA2
! Version 1.1.2, 2011-02-21
use elpa_utilities
USE ELPA1
USE elpa1_compute
use elpa1, only : elpa_print_times, time_evp_back, time_evp_fwd, time_evp_solve
use elpa2_utilities
use elpa_pdgeqrf
......
......@@ -41,7 +41,7 @@
!
module elpa_pdgeqrf
use elpa1
use elpa1_compute
use elpa_pdlarfb
use qr_utils_mod
......
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
......@@ -19,8 +19,8 @@
! http://elpa.rzg.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
......@@ -41,9 +41,9 @@
!
module elpa_pdlarfb
use elpa1
use elpa1_compute
use qr_utils_mod
implicit none
PRIVATE
......@@ -55,24 +55,24 @@ module elpa_pdlarfb
public :: qr_pdlarfl_1dcomm
public :: qr_pdlarfl2_tmatrix_1dcomm
public :: qr_tmerge_pdlarfb_1dcomm
include 'mpif.h'
contains
subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
use qr_utils_mod
implicit none
! input variables (local)
integer lda,ldv,ldt,lwork
double precision a(lda,*),v(ldv,*),tau(*),t(ldt,*),work(k,*)
! input variables (global)
integer m,mb,n,k,baseidx,idx,rev,mpicomm
! output variables (global)
! derived input variables from QR_PQRPARAM
......@@ -99,7 +99,7 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic
work(1,1) = DBLE(2*k*n)
return
end if
!print *,'updating trailing matrix with k=',k
call MPI_Comm_rank(mpicomm,mpirank,mpierr)
......@@ -119,9 +119,9 @@ subroutine qr_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpic
! data exchange
call mpi_allreduce(work(1,1),work(1,n+1),k*n,mpi_real8,mpi_sum,mpicomm,mpierr)
call qr_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k)
end subroutine qr_pdlarfb_1dcomm
end subroutine qr_pdlarfb_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
......@@ -136,7 +136,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! input variables (global)
integer m,mb,n,k,oldk,baseidx,rev,mpicomm
! output variables (global)
! derived input variables from QR_PQRPARAM
......@@ -167,7 +167,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! calculate inner product of householdervectors
call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,1),ldv,0.0d0,work(1,1),k)
! calculate matrix matrix product of householder vectors and target matrix
! calculate matrix matrix product of householder vectors and target matrix
! Z' = Y' * A
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0,work(1,k+1),k)
......@@ -194,7 +194,7 @@ subroutine qr_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
end do
! TODO: elmroth and gustavson
! update matrix (pdlarfb)
! Z' = T * Z'
call dtrmm("Left","Upper","Notrans","Nonunit",k,n,1.0d0,t,ldt,work(1,recvoffset+k),k)
......@@ -208,14 +208,14 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
use qr_utils_mod
implicit none
! input variables (local)
integer ldv,ldt,lwork
double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global)
integer m,mb,n,blocksize,baseidx,rev,mpicomm
! output variables (global)
! derived input variables from QR_PQRPARAM
......@@ -228,7 +228,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
work(1,1) = DBLE(2*n*n)
return
end if
call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
......@@ -240,7 +240,7 @@ subroutine qr_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,t,ldt,baseidx,rev,
else
work(1:n,1:n) = 0.0d0
end if
call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr)
! skip Y4'*Y4 part
......@@ -254,14 +254,14 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
use qr_utils_mod
implicit none
! input variables (local)
integer ldv,ldt,lwork
double precision v(ldv,*),t(ldt,*),work(n,*)
! input variables (global)
integer m,mb,n,blocksize,treeorder,baseidx,rev,mpicomm
! output variables (global)
! derived input variables from QR_PQRPARAM
......@@ -276,7 +276,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
end if
if (n .le. blocksize) return ! nothing to do
call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
......@@ -288,7 +288,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
else
work(1:n,1:n) = 0.0d0
end if
call mpi_allreduce(work(1,1),work(1,n+1),n*n,mpi_real8,mpi_sum,mpicomm,mpierr)
! skip Y4'*Y4 part
......@@ -298,7 +298,7 @@ subroutine qr_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,t,ldt,b
end subroutine qr_pdlarft_tree_merge_1dcomm
! apply householder vector to the left
! apply householder vector to the left
! - assume unitary matrix
! - assume right positions for v
subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,mpicomm)
......@@ -306,7 +306,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
use qr_utils_mod
implicit none
! input variables (local)
integer incv,lda,lwork,baseidx
double precision v(*),a(lda,*),work(*)
......@@ -314,9 +314,9 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
! input variables (global)
integer m,n,mb,rev,idx,mpicomm
double precision tau
! output variables (global)
! local scalars
integer mpierr,mpirank,mpiprocs
integer sendsize,recvsize,icol
......@@ -326,7 +326,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
! external functions
double precision ddot
external dgemv,dger,ddot
call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
......@@ -337,20 +337,20 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
work(1) = DBLE(sendsize + recvsize)
return
end if
if (n .le. 0) return
if (idx .le. 1) return
call local_size_offset_1d(m,mb,baseidx,idx,rev,mpirank,mpiprocs, &
local_size,v_local_offset,local_offset)
!print *,'hl ref',local_size,n
v_local_offset = v_local_offset * incv
if (local_size > 0) then
do icol=1,n
work(icol) = dot_product(v(v_local_offset:v_local_offset+local_size-1),a(local_offset:local_offset+local_size-1,icol))
......@@ -358,7 +358,7 @@ subroutine qr_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,
else
work(1:n) = 0.0d0
end if
call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr)
if (local_size > 0) then
......@@ -377,16 +377,16 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
use qr_utils_mod
implicit none
! input variables (local)
integer ldv,lda,lwork,baseidx,ldt
double precision v(ldv,*),a(lda,*),work(*),t(ldt,*)
! input variables (global)
integer m,n,mb,rev,idx,mpicomm
! output variables (global)
! local scalars
integer mpierr,mpirank,mpiprocs,mpirank_top1,mpirank_top2
integer dgemv1_offset,dgemv2_offset
......@@ -405,7 +405,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
sendsize = 2*n
recvsize = sendsize
......@@ -413,7 +413,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
work(1) = sendsize + recvsize
return
end if
dgemv1_offset = 1
dgemv2_offset = dgemv1_offset + n
......@@ -443,7 +443,7 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
work(dgemv2_offset),1)
call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr)
! update second vector
call daxpy(n,t(1,2),work(sendsize+dgemv1_offset),1,work(sendsize+dgemv2_offset),1)
......@@ -470,14 +470,14 @@ subroutine qr_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,i
end if
if (mpirank_top2 .eq. mpirank) then
a(local_offset2,icol) = a(local_offset2,icol) &
a(local_offset2,icol) = a(local_offset2,icol) &
- v(v2_local_offset,v1col)*work(sendsize+dgemv1_offset+icol-1)*hvdot &
- work(sendsize+dgemv2_offset+icol-1)
end if
do irow=1,local_size_dger
a(local_offset_dger+irow-1,icol) = a(local_offset_dger+irow-1,icol) &
- work(sendsize+dgemv1_offset+icol-1)*v(v_local_offset_dger+irow-1,v1col)*hvdot &
- work(sendsize+dgemv1_offset+icol-1)*v(v_local_offset_dger+irow-1,v1col)*hvdot &
- work(sendsize+dgemv2_offset+icol-1)*v(v_local_offset_dger+irow-1,v2col)
end do
end do
......@@ -497,7 +497,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
! input variables (global)
integer m,mb,n,k,oldk,baseidx,rev,updatemode,mpicomm
! output variables (global)
! derived input variables from QR_PQRPARAM
......@@ -534,7 +534,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
call MPI_Comm_rank(mpicomm,mpirank,mpierr)
call MPI_Comm_size(mpicomm,mpiprocs,mpierr)
! use baseidx as idx here, otherwise the upper triangle part will be lost
! during the calculation, especially in the reversed case
call local_size_offset_1d(m,mb,baseidx,baseidx,rev,mpirank,mpiprocs, &
......@@ -546,14 +546,14 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
updateoffset = 0
mergeoffset = updateoffset + updatesize
tgenoffset = mergeoffset + mergesize
sendsize = updatesize + mergesize + tgensize
!print *,'sendsize',sendsize,updatesize,mergesize,tgensize
!print *,'merging nr of rotations', oldk+k
if (localsize .gt. 0) then
! calculate matrix matrix product of householder vectors and target matrix
! calculate matrix matrix product of householder vectors and target matrix
if (updatemode .eq. ichar('I')) then
! Z' = (Y1,Y2)' * A
......@@ -577,13 +577,13 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
! do not calculate parts for T merge as there is nothing to merge
updateoffset = 0
tgenoffset = updateoffset + updatesize
sendsize = updatesize + tgensize
if (localsize .gt. 0) then
! calculate matrix matrix product of householder vectors and target matrix
! calculate matrix matrix product of householder vectors and target matrix
! Z' = (Y1)' * A
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0, &
work(sendoffset+updateoffset),updatelda)
......@@ -601,7 +601,7 @@ subroutine qr_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,t,ldt,a,lda,baseidx,rev,
! exchange data
call mpi_allreduce(work(sendoffset),work(recvoffset),sendsize,mpi_real8,mpi_sum,mpicomm,mpierr)
updateoffset = recvoffset+updateoffset
mergeoffset = recvoffset+mergeoffset
tgenoffset = recvoffset+tgenoffset
......
......@@ -58,7 +58,7 @@ contains
! involved in the qr decomposition
subroutine local_size_offset_1d(n,nb,baseidx,idx,rev,rank,nprocs, &
lsize,baseoffset,offset)
use ELPA1
use ELPA1_compute
implicit none
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment