Commit 9655c781 authored by Thomas Auckenthaler's avatar Thomas Auckenthaler
Browse files

"use mpi" replaced by "include 'mpif.h'"

dropped old rank-2 QR-decomposition (blockedQR.f90)
parent db110dfc
This diff is collapsed.
......@@ -10,6 +10,7 @@ module ELPA2
! Version 1.1.2, 2011-02-21
USE ELPA1
use elpa_pdgeqrf
implicit none
......
module elpa_pdlarfb
use elpa1
use tum_utils
implicit none
PRIVATE
public :: tum_pdlarfb_1dcomm
public :: tum_pdlarft_pdlarfb_1dcomm
public :: tum_pdlarft_set_merge_1dcomm
public :: tum_pdlarft_tree_merge_1dcomm
public :: tum_pdlarfl_1dcomm
public :: tum_pdlarfl2_tmatrix_1dcomm
public :: tum_tmerge_pdlarfb_1dcomm
include 'mpif.h'
contains
subroutine tum_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
use mpi
use tum_utils
implicit none
......@@ -59,11 +80,11 @@ subroutine tum_pdlarfb_1dcomm(m,mb,n,k,a,lda,v,ldv,tau,t,ldt,baseidx,idx,rev,mpi
call mpi_allreduce(work(1,1),work(1,n+1),k*n,mpi_real8,mpi_sum,mpicomm,mpierr)
call tum_pdlarfb_kernel_local(localsize,n,k,a(offset,1),lda,v(baseoffset,1),ldv,t,ldt,work(1,n+1),k,rev)
end subroutine
end subroutine tum_pdlarfb_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx,idx,rev,mpicomm,work,lwork)
use mpi
use tum_utils
implicit none
......@@ -140,9 +161,9 @@ subroutine tum_pdlarft_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseid
! A = A - Y * V'
call dgemm("Notrans","Notrans",localsize,n,k,-1.0d0,v(baseoffset,1),ldv,work(1,recvoffset+k),k,1.0d0,a(offset,1),lda)
end subroutine
end subroutine tum_pdlarft_pdlarfb_1dcomm
subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
use mpi
use tum_utils
implicit none
......@@ -187,9 +208,9 @@ subroutine tum_pdlarft_set_merge_1dcomm(m,mb,n,blocksize,v,ldv,tau,t,ldt,baseidx
if (offset .eq. 0) offset=blocksize
call tum_tmerge_set_kernel(n,blocksize,t,ldt,work(1,n+1+offset),n,1)
end subroutine
end subroutine tum_pdlarft_set_merge_1dcomm
subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,ldt,baseidx,idx,rev,mpicomm,work,lwork)
use mpi
use tum_utils
implicit none
......@@ -236,14 +257,14 @@ subroutine tum_pdlarft_tree_merge_1dcomm(m,mb,n,blocksize,treeorder,v,ldv,tau,t,
if (offset .eq. 0) offset=blocksize
call tum_tmerge_tree_kernel(n,blocksize,treeorder,t,ldt,work(1,n+1+offset),n,1)
end subroutine
end subroutine tum_pdlarft_tree_merge_1dcomm
! apply householder vector to the left
! - assume unitary matrix
! - assume right positions for v
subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev,mpicomm)
use ELPA1
use tum_utils
use mpi
implicit none
......@@ -290,11 +311,8 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
v_local_offset = v_local_offset * incv
if (local_size > 0) then
!call dgemv("Trans",local_size,n,1.0d0,a(local_offset,1),lda,v(v_local_offset),incv,0.0d0,work(1),1)
do icol=1,n
!work(icol) = ddot(local_size, a(local_offset,icol), 1, &
! v(v_local_offset), 1)
work(icol) = dot_product(v(v_local_offset:v_local_offset+local_size-1),a(local_offset:local_offset+local_size-1,icol))
end do
......@@ -305,7 +323,6 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
call mpi_allreduce(work, work(sendsize+1), sendsize, mpi_real8, mpi_sum, mpicomm, mpierr)
if (local_size > 0) then
!call dger(local_size,n,-tau,v(v_local_offset),incv,work(sendsize+1),1,a(local_offset,1),lda)
do icol=1,n
a(local_offset:local_offset+local_size-1,icol) = a(local_offset:local_offset+local_size-1,icol) &
......@@ -313,15 +330,11 @@ subroutine tum_pdlarfl_1dcomm(v,incv,baseidx,a,lda,tau,work,lwork,m,n,idx,mb,rev
enddo
end if
end subroutine tum_pdlarfl_1dcomm
!print *,'ref hl', work(sendsize+1:sendsize+recvsize)
end subroutine
! test reverse version
subroutine tum_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,idx,mb,rev,mpicomm)
use ELPA1
use tum_utils
use mpi
implicit none
......@@ -393,10 +406,6 @@ subroutine tum_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,
! update second vector
call daxpy(n,t(1,2),work(sendsize+dgemv1_offset),1,work(sendsize+dgemv2_offset),1)
! reference implementation
!call dger(local_size1,n,-1.0d0,v(v1_local_offset,v1col),1,work(sendsize+dgemv1_offset),1,a(local_offset1,1),lda)
!call dger(local_size2,n,-1.0d0,v(v2_local_offset,v2col),1,work(sendsize+dgemv2_offset),1,a(local_offset2,1),lda)
call local_size_offset_1d(m,mb,baseidx,idx-2,rev,mpirank,mpiprocs, &
local_size_dger,v_local_offset_dger,local_offset_dger)
......@@ -432,11 +441,11 @@ subroutine tum_pdlarfl2_tmatrix_1dcomm(v,ldv,baseidx,a,lda,t,ldt,work,lwork,m,n,
end do
end do
end subroutine
end subroutine tum_pdlarfl2_tmatrix_1dcomm
! generalized pdlarfl2 version
! TODO: include T merge here (seperate by "old" and "new" index)
subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx,idx,rev,updatemode,mpicomm,work,lwork)
use mpi
use tum_utils
implicit none
......@@ -473,8 +482,8 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
mergelda = k
mergesize = mergelda*oldk
tgenlda = 0 ! TODO
tgensize = 0 ! TODO
tgenlda = 0
tgensize = 0
sendsize = updatesize + mergesize + tgensize
......@@ -491,8 +500,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
call local_size_offset_1d(m,mb,baseidx,baseidx,rev,mpirank,mpiprocs, &
localsize,baseoffset,offset)
!print '(a,6i)','indices: ',baseidx,idx,localsize,baseoffset,offset
sendoffset = 1
if (oldk .gt. 0) then
......@@ -516,20 +523,9 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0,work(sendoffset+updateoffset),updatelda)
end if
!print *,'v content'
!print '(5f)',v(baseoffset,1:5)
!print '(5f)',v(baseoffset+1,1:5)
!print '(5f)',v(baseoffset+2,1:5)
!print '(5f)',v(baseoffset+3,1:5)
!print '(5f)',v(baseoffset+4,1:5)
! calculate parts needed for T merge
call dgemm("Trans","Notrans",k,oldk,localsize,1.0d0,v(baseoffset,1),ldv,v(baseoffset,k+1),ldv,0.0d0,work(sendoffset+mergeoffset),mergelda)
! calculate inner product of householdervectors
! TODO: future TGEN parameter
!call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,oldk+1),ldv,0.0d0,work(oldk+1,1),ldw)
else
! cleanup buffer
work(sendoffset:sendoffset+sendsize-1) = 0.0d0
......@@ -548,9 +544,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
! Z' = (Y1)' * A
call dgemm("Trans","Notrans",k,n,localsize,1.0d0,v(baseoffset,1),ldv,a(offset,1),lda,0.0d0,work(sendoffset+updateoffset),updatelda)
! calculate inner product of householdervectors
! TODO: future TGEN parameter
!call dsyrk("Upper","Trans",k,localsize,1.0d0,v(baseoffset,oldk+1),ldv,0.0d0,work(oldk+1,1),ldw)
else
! cleanup buffer
work(sendoffset:sendoffset+sendsize-1) = 0.0d0
......@@ -589,4 +582,6 @@ subroutine tum_tmerge_pdlarfb_1dcomm(m,mb,n,oldk,k,v,ldv,tau,t,ldt,a,lda,baseidx
end if
end if
end subroutine
end subroutine tum_tmerge_pdlarfb_1dcomm
end module elpa_pdlarfb
......@@ -10,10 +10,6 @@ module tum_utils
public :: reverse_matrix_1dcomm
public :: reverse_matrix_2dcomm_ref
public :: tsqr_groups_size
public :: tsqr_groups_initialize
public :: tsqr_groups_finalize
contains
! rev parameter is critical, even in rev only mode!
......@@ -58,7 +54,7 @@ subroutine local_size_offset_1d(n,nb,baseidx,idx,rev,rank,nprocs, &
baseoffset = offset - baseoffset + 1
end if
end subroutine
end subroutine local_size_offset_1d
subroutine reverse_vector_local(n,x,incx,work,lwork)
......@@ -86,7 +82,7 @@ subroutine reverse_vector_local(n,x,incx,work,lwork)
x(destoffset) = temp
end do
end subroutine
end subroutine reverse_vector_local
subroutine reverse_matrix_local(trans,m,n,a,lda,work,lwork)
implicit none
......@@ -126,7 +122,7 @@ subroutine reverse_matrix_local(trans,m,n,a,lda,work,lwork)
end do
end if
end subroutine
end subroutine reverse_matrix_local
subroutine reverse_matrix_2dcomm_ref(m,n,mb,nb,a,lda,work,lwork,mpicomm_cols,mpicomm_rows)
implicit none
......@@ -164,7 +160,7 @@ subroutine reverse_matrix_2dcomm_ref(m,n,mb,nb,a,lda,work,lwork,mpicomm_cols,mpi
call reverse_matrix_1dcomm(0,m,lcols,mb,a,lda,work,lwork,mpicomm_cols)
call reverse_matrix_1dcomm(1,lrows,n,nb,a,lda,work,lwork,mpicomm_rows)
end subroutine
end subroutine reverse_matrix_2dcomm_ref
! b: if trans = 'N': b is size of block distribution between rows
! b: if trans = 'T': b is size of block distribution between columns
......@@ -350,95 +346,6 @@ subroutine reverse_matrix_1dcomm(trans,m,n,b,a,lda,work,lwork,mpicomm)
a(1:lrows,icol) = &
work(newmatrix_offset+(icol-1)*lrows:newmatrix_offset+icol*lrows-1)
end do
end subroutine
integer function tsqr_groups_size(comm,treeorder)
use mpi
implicit none
! input
integer comm,treeorder
! local scalars
integer mpiprocs,mpierr
integer nr_groups,depth,treeprocs
call MPI_Comm_size(comm,mpiprocs,mpierr)
! integer logarithm with base treeorder
depth=1
treeprocs=treeorder
do while(treeprocs .lt. mpiprocs)
treeprocs = treeprocs * treeorder
depth = depth + 1
end do
tsqr_groups_size = nr_groups
end function
subroutine tsqr_groups_initialize(comm,treeorder,groups)
use mpi
implicit none
! input
integer comm,treeorder
! output
integer, allocatable :: groups(:)
! local scalars
integer nr_groups,igroup,mpierr,mpirank
integer split_color,split_key
integer prev_treeorder,temp_treeorder
nr_groups = tsqr_groups_size(comm,treeorder)
allocate(groups(nr_groups))
groups(1) = comm
call MPI_Comm_rank(comm,mpirank,mpierr)
prev_treeorder = 1
temp_treeorder = treeorder
do igroup=2,nr_groups
if (mod(mpirank,prev_treeorder) .eq. 0) then
split_color=mpirank / temp_treeorder
split_key=mod(mpirank / prev_treeorder,treeorder)
else
split_color = MPI_UNDEFINED
split_key = 0 ! ignored due to MPI_UNDEFINED color
end if
call MPI_Comm_split(comm,split_color,split_key,groups(igroup),mpierr)
prev_treeorder = temp_treeorder
temp_treeorder = temp_treeorder * treeorder
end do
end subroutine
subroutine tsqr_groups_finalize(groups,treeorder)
use mpi
implicit none
! input
integer, allocatable :: groups(:)
integer treeorder
! local scalars
integer nr_groups,igroup,mpierr
nr_groups = tsqr_groups_size(groups(1),treeorder)
do igroup=2,nr_groups
call MPI_Comm_free(groups(igroup),mpierr)
end do
deallocate(groups)
end subroutine
end subroutine reverse_matrix_1dcomm
end module
......@@ -54,11 +54,11 @@ read_real_gen: read_real_gen.o elpa1.o
test_complex_gen: test_complex_gen.o read_test_parameters.o elpa1.o
$(F90) -o $@ test_complex_gen.o read_test_parameters.o elpa1.o $(LIBS)
test_real2: test_real2.o elpa1.o elpa2.o read_test_parameters.o elpa2_kernels.o blockedQR.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90) -o $@ test_real2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o blockedQR.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o $(LIBS)
test_real2: test_real2.o elpa1.o elpa2.o read_test_parameters.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90) -o $@ test_real2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o $(LIBS)
test_complex2: test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o blockedQR.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90) -o $@ test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o blockedQR.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o $(LIBS)
test_complex2: test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o
$(F90) -o $@ test_complex2.o read_test_parameters.o elpa1.o elpa2.o elpa2_kernels.o elpa_pdgeqrf.o elpa_pdlarfb.o elpa_qrkernels.o tum_utils.o $(LIBS)
test_real.o: test_real.f90 elpa1.o
$(F90) -c $<
......@@ -90,19 +90,16 @@ read_test_parameters.o: read_test_parameters.f90
elpa1.o: ../src/elpa1.f90
$(F90) -c $<
blockedQR.o: ../src/blockedQR.f90
$(F90) -c $<
tum_utils.o: ../src/elpa_qr/tum_utils.f90
$(F90) -c $<
elpa_qrkernels.o: ../src/elpa_qr/elpa_qrkernels.f90
$(F90) -c $<
elpa_pdlarfb.o: ../src/elpa_qr/elpa_pdlarfb.f90 tum_utils.o
elpa_pdlarfb.o: ../src/elpa_qr/elpa_pdlarfb.f90 tum_utils.o elpa_qrkernels.o
$(F90) -c $<
elpa_pdgeqrf.o: ../src/elpa_qr/elpa_pdgeqrf.f90 elpa1.o tum_utils.o
elpa_pdgeqrf.o: ../src/elpa_qr/elpa_pdgeqrf.f90 elpa1.o tum_utils.o elpa_pdlarfb.o elpa_qrkernels.o
$(F90) -c $<
elpa2.o: ../src/elpa2.f90 elpa1.o elpa_pdgeqrf.o
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment