Commit ab963a3e authored by Werner Jürgens's avatar Werner Jürgens
Browse files

RJ: Added two programs to read (and process) matrices from files.

There are two new programs which read matrices from a file, solve the
real (generalized) eigenvalue problem, print the eigenvalues and check
the result (all using elpa1 only).

The author of these programs (plus documentation and Makefile) is
Rainer Johanni (RJ). The files are checked in by Werner Jürgens.
parent 4a7e92be
......@@ -141,3 +141,31 @@ other piece of code.
test_real2 Real eigenvalue problem, 2 stage solver
test_complex2 Complex eigenvalue problem, 2 stage solver
- There are two programs which read matrices from a file, solve the
eigenvalue problem, print the eigenvalues and check the correctnes
of the result (all using elpa1 only)
read_real for the real eigenvalue problem
read_real_gen for the real generalized eigenvalue problem
A*x - B*x*lambda = 0
read_real has to be called with 1 commandline argument (the file
containing the matrix), read_real_gen has to be called with 2
commandline argument (the files containing matrices A and B)
The file format for matrices is as follows (all is plain ASCII data):
- 1st line containing matrix size
- then following the upper half of the matrix in column-major
(i.e. Fortran) order, one number per line:
a(1,1)
a(1,2)
a(2,2)
...
a(1,i)
...
a(i,i)
...
a(1,n)
...
a(n,n)
......@@ -34,17 +34,23 @@ LIBS = -L/opt/intel/Compiler/11.0/069/mkl/lib/em64t -lmkl_lapack -lmkl -lguide -
#
# ------------------------------------------------------------------------------
all: test_real test_complex test_real_gen test_complex_gen test_real2 test_complex2
all: test_real read_real test_complex test_real_gen read_real_gen test_complex_gen test_real2 test_complex2
test_real: test_real.o elpa1.o
$(F90) -o $@ test_real.o elpa1.o $(LIBS)
read_real: read_real.o elpa1.o
$(F90) -o $@ read_real.o elpa1.o $(LIBS)
test_complex: test_complex.o elpa1.o
$(F90) -o $@ test_complex.o elpa1.o $(LIBS)
test_real_gen: test_real_gen.o elpa1.o
$(F90) -o $@ test_real_gen.o elpa1.o $(LIBS)
read_real_gen: read_real_gen.o elpa1.o
$(F90) -o $@ read_real_gen.o elpa1.o $(LIBS)
test_complex_gen: test_complex_gen.o elpa1.o
$(F90) -o $@ test_complex_gen.o elpa1.o $(LIBS)
......@@ -57,12 +63,18 @@ test_complex2: test_complex2.o elpa1.o elpa2.o elpa2_kernels.o
test_real.o: test_real.f90 elpa1.o
$(F90) -c $<
read_real.o: read_real.f90 elpa1.o
$(F90) -c $<
test_complex.o: test_complex.f90 elpa1.o
$(F90) -c $<
test_real_gen.o: test_real_gen.f90 elpa1.o
$(F90) -c $<
read_real_gen.o: read_real_gen.f90 elpa1.o
$(F90) -c $<
test_complex_gen.o: test_complex_gen.f90 elpa1.o
$(F90) -c $<
......@@ -82,4 +94,4 @@ elpa2_kernels.o: ../src/elpa2_kernels.f90
$(F90OPT) -c ../src/elpa2_kernels.f90
clean:
rm -f *.o *.mod test_real test_complex test_real_gen test_complex_gen test_real2 test_complex2
rm -f *.o *.mod test_real test_complex test_real_gen test_complex_gen test_real2 test_complex2 read_real read_real_gen
program read_real
!-------------------------------------------------------------------------------
! Standard eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!-------------------------------------------------------------------------------
use ELPA1
implicit none
include 'mpif.h'
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer, parameter :: nblk = 16
!-------------------------------------------------------------------------------
! Local Variables
integer na, nev
integer np_rows, np_cols, na_rows, na_cols
integer myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
integer i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol, lenarg
integer, external :: numroc
real*8 err, errmax
real*8, allocatable :: a(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
character*256 filename
!-------------------------------------------------------------------------------
! MPI Initialization
call mpi_init(mpierr)
call mpi_comm_rank(mpi_comm_world,myid,mpierr)
call mpi_comm_size(mpi_comm_world,nprocs,mpierr)
!-------------------------------------------------------------------------------
! Get the name of the input file containing the matrix and open input file
! Please note:
! get_command_argument is a FORTRAN 2003 intrinsic which may not be implemented
! for every Fortran compiler!!!
if(myid==0) then
call get_command_argument(1,filename,lenarg,info)
if(info/=0) then
print *,'Usage: test_real matrix_file'
call mpi_abort(mpi_comm_world,0,mpierr)
endif
open(10,file=filename,action='READ',status='OLD',iostat=info)
if(info/=0) then
print *,'Error: Unable to open ',trim(filename)
call mpi_abort(mpi_comm_world,0,mpierr)
endif
endif
call mpi_barrier(mpi_comm_world, mpierr) ! Just for safety
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
! divisors of nprocs with a number next to the square root of nprocs
! and decrement it until a divisor is found.
do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
if(mod(nprocs,np_cols) == 0 ) exit
enddo
! at the end of the above loop, nprocs is always divisible by np_cols
np_rows = nprocs/np_cols
if(myid==0) then
print *
print '(a)','Standard eigenvalue problem - REAL version'
print *
print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
print *
endif
!-------------------------------------------------------------------------------
! Set up BLACS context and MPI communicators
!
! The BLACS context is only necessary for using Scalapack.
!
! For ELPA, the MPI communicators along rows/cols are sufficient,
! and the grid setup may be done in an arbitrary way as long as it is
! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
! process has a unique (my_prow,my_pcol) pair).
my_blacs_ctxt = mpi_comm_world
call BLACS_Gridinit( my_blacs_ctxt, 'C', np_rows, np_cols )
call BLACS_Gridinfo( my_blacs_ctxt, nprow, npcol, my_prow, my_pcol )
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in get_elpa_row_col_comms.
call get_elpa_row_col_comms(mpi_comm_world, my_prow, my_pcol, &
mpi_comm_rows, mpi_comm_cols)
! Read matrix size
if(myid==0) read(10,*) na
call mpi_bcast(na, 1, mpi_integer, 0, mpi_comm_world, mpierr)
! Quick check for plausibility
if(na<=0 .or. na>10000000) then
if(myid==0) print *,'Illegal value for matrix size: ',na
call mpi_finalize(mpierr)
stop
endif
if(myid==0) print *,'Matrix size: ',na
! Determine the necessary size of the distributed matrices,
! we use the Scalapack tools routine NUMROC for that.
na_rows = numroc(na, nblk, my_prow, 0, np_rows)
na_cols = numroc(na, nblk, my_pcol, 0, np_cols)
! Set up a scalapack descriptor for the checks below.
! For ELPA the following restrictions hold:
! - block sizes in both directions must be identical (args 4+5)
! - first row and column of the distributed matrix must be on row/col 0/0 (args 6+7)
call descinit( sc_desc, na, na, nblk, nblk, 0, 0, my_blacs_ctxt, na_rows, info )
!-------------------------------------------------------------------------------
! Allocate matrices
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
allocate(ev(na))
!-------------------------------------------------------------------------------
! Read matrix
call read_matrix(10, na, a, ubound(a,1), nblk, my_prow, my_pcol, np_rows, np_cols)
if(myid==0) close(10)
nev = na ! all eigenvaules
! Save original matrix A for later accuracy checks
as = a
!-------------------------------------------------------------------------------
! Calculate eigenvalues/eigenvectors
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
call solve_evp_real(na, nev, a, na_rows, ev, z, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols)
if(myid == 0) print *,'Time tridiag_real :',time_evp_fwd
if(myid == 0) print *,'Time solve_tridi :',time_evp_solve
if(myid == 0) print *,'Time trans_ev_real:',time_evp_back
if(myid == 0) then
do i=1,nev
print '(i6,g25.15)',i,ev(i)
enddo
endif
!-------------------------------------------------------------------------------
! Test correctness of result (using plain scalapack routines)
deallocate(a)
allocate(tmp1(na_rows,na_cols))
! 1. Residual (maximum of || A*Zi - Zi*EVi ||)
! tmp1 = A * Z
call pdgemm('N','N',na,nev,na,1.d0,as,1,1,sc_desc, &
z,1,1,sc_desc,0.d0,tmp1,1,1,sc_desc)
deallocate(as)
allocate(tmp2(na_rows,na_cols))
! tmp2 = Zi*EVi
tmp2(:,:) = z(:,:)
do i=1,nev
call pdscal(na,ev(i),tmp2,1,i,sc_desc,1)
enddo
! tmp1 = A*Zi - Zi*EVi
tmp1(:,:) = tmp1(:,:) - tmp2(:,:)
! Get maximum norm of columns of tmp1
errmax = 0
do i=1,nev
err = 0
call pdnrm2(na,err,tmp1,1,i,sc_desc,1)
errmax = max(errmax, err)
enddo
! Get maximum error norm over all processors
err = errmax
call mpi_allreduce(err,errmax,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,mpierr)
if(myid==0) print *
if(myid==0) print *,'Error Residual :',errmax
! 2. Eigenvector orthogonality
! tmp1 = Z**T * Z
tmp1 = 0
call pdgemm('T','N',nev,nev,na,1.d0,z,1,1,sc_desc, &
z,1,1,sc_desc,0.d0,tmp1,1,1,sc_desc)
! Initialize tmp2 to unit matrix
tmp2 = 0
call pdlaset('A',nev,nev,0.d0,1.d0,tmp2,1,1,sc_desc)
! tmp1 = Z**T * Z - Unit Matrix
tmp1(:,:) = tmp1(:,:) - tmp2(:,:)
! Get maximum error (max abs value in tmp1)
err = maxval(abs(tmp1))
call mpi_allreduce(err,errmax,1,MPI_REAL8,MPI_MAX,MPI_COMM_WORLD,mpierr)
if(myid==0) print *,'Error Orthogonality:',errmax
deallocate(z)
deallocate(tmp1)
deallocate(tmp2)
deallocate(ev)
call mpi_finalize(mpierr)
end
!-------------------------------------------------------------------------------
subroutine read_matrix(iunit, na, a, lda, nblk, my_prow, my_pcol, np_rows, np_cols)
implicit none
include 'mpif.h'
integer, intent(in) :: iunit, na, lda, nblk, my_prow, my_pcol, np_rows, np_cols
real*8, intent(out) :: a(lda, *)
integer i, j, lr, lc, myid, mpierr
integer, allocatable :: l_row(:), l_col(:)
real*8, allocatable :: col(:)
! allocate and set index arrays
allocate(l_row(na))
allocate(l_col(na))
! Mapping of global rows/cols to local
l_row(:) = 0
l_col(:) = 0
lr = 0 ! local row counter
lc = 0 ! local column counter
do i = 1, na
if( MOD((i-1)/nblk,np_rows) == my_prow) then
! row i is on local processor
lr = lr+1
l_row(i) = lr
endif
if( MOD((i-1)/nblk,np_cols) == my_pcol) then
! column i is on local processor
lc = lc+1
l_col(i) = lc
endif
enddo
call mpi_comm_rank(mpi_comm_world,myid,mpierr)
allocate(col(na))
do i=1,na
if(myid==0) read(iunit,*) col(1:i)
call mpi_bcast(col,i,MPI_REAL8,0,MPI_COMM_WORLD,mpierr)
if(l_col(i) > 0) then
do j=1,i
if(l_row(j)>0) a(l_row(j),l_col(i)) = col(j)
enddo
endif
if(l_row(i) > 0) then
do j=1,i-1
if(l_col(j)>0) a(l_row(i),l_col(j)) = col(j)
enddo
endif
enddo
deallocate(l_row, l_col, col)
end subroutine read_matrix
program read_real_gen
!-------------------------------------------------------------------------------
! Generalized eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!-------------------------------------------------------------------------------
use ELPA1
implicit none
include 'mpif.h'
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer, parameter :: nblk = 16
!-------------------------------------------------------------------------------
! Local Variables
integer na, nev
integer np_rows, np_cols, na_rows, na_cols
integer myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
integer i, n_row, n_col, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol, lenarg
integer, external :: numroc, indxl2g
real*8 err, errmax
real*8, allocatable :: a(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
real*8, allocatable :: b(:,:), bs(:,:)
character*256 filename
real*8 ttt0, ttt1
!-------------------------------------------------------------------------------
! MPI Initialization
call mpi_init(mpierr)
call mpi_comm_rank(mpi_comm_world,myid,mpierr)
call mpi_comm_size(mpi_comm_world,nprocs,mpierr)
!-------------------------------------------------------------------------------
! Get the name of the input files and open input files
! Please note:
! get_command_argument is a FORTRAN 2003 intrinsic which may not be implemented
! for every Fortran compiler!!!
if(myid==0) then
call get_command_argument(1,filename,lenarg,info)
if(info/=0) then
print *,'Usage: test_real_gen matrix_file_1 matrix_file_2'
call mpi_abort(mpi_comm_world,0,mpierr)
endif
open(10,file=filename,action='READ',status='OLD',iostat=info)
if(info/=0) then
print *,'Error: Unable to open ',trim(filename)
call mpi_abort(mpi_comm_world,0,mpierr)
endif
call get_command_argument(2,filename,lenarg,info)
if(info/=0) then
print *,'Usage: test_real_gen matrix_file_1 matrix_file_2'
call mpi_abort(mpi_comm_world,0,mpierr)
endif
open(20,file=filename,action='READ',status='OLD',iostat=info)
if(info/=0) then
print *,'Error: Unable to open ',trim(filename)
call mpi_abort(mpi_comm_world,0,mpierr)
endif
endif
call mpi_barrier(mpi_comm_world, mpierr) ! Just for safety
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
! divisors of nprocs with a number next to the square root of nprocs
! and decrement it until a divisor is found.
do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
if(mod(nprocs,np_cols) == 0 ) exit
enddo
! at the end of the above loop, nprocs is always divisible by np_cols
np_rows = nprocs/np_cols
if(myid==0) then
print *
print '(a)','Generalized eigenvalue problem - REAL version'
print *
print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
print *
endif
!-------------------------------------------------------------------------------
! Set up BLACS context and MPI communicators
!
! The BLACS context is only necessary for using Scalapack.
!
! For ELPA, the MPI communicators along rows/cols are sufficient,
! and the grid setup may be done in an arbitrary way as long as it is
! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
! process has a unique (my_prow,my_pcol) pair).
my_blacs_ctxt = mpi_comm_world
call BLACS_Gridinit( my_blacs_ctxt, 'C', np_rows, np_cols )
call BLACS_Gridinfo( my_blacs_ctxt, nprow, npcol, my_prow, my_pcol )
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in get_elpa_row_col_comms.
call get_elpa_row_col_comms(mpi_comm_world, my_prow, my_pcol, &
mpi_comm_rows, mpi_comm_cols)
! Read matrix size
if(myid==0) then
read(10,*) na
read(20,*) info
if(na/=info) then
print *,'Error: Matrix sizes in input different: ',na, info
call mpi_abort(mpi_comm_world,0,mpierr)
endif
endif
call mpi_bcast(na, 1, mpi_integer, 0, mpi_comm_world, mpierr)
! Quick check for plausibility
if(na<=0 .or. na>10000000) then
if(myid==0) print *,'Illegal value for matrix size: ',na
call mpi_finalize(mpierr)
stop
endif
if(myid==0) print *,'Matrix size: ',na
! Determine the necessary size of the distributed matrices,
! we use the Scalapack tools routine NUMROC for that.
na_rows = numroc(na, nblk, my_prow, 0, np_rows)
na_cols = numroc(na, nblk, my_pcol, 0, np_cols)
! Set up a scalapack descriptor for the checks below.
! For ELPA the following restrictions hold:
! - block sizes in both directions must be identical (args 4+5)
! - first row and column of the distributed matrix must be on row/col 0/0 (args 6+7)
call descinit( sc_desc, na, na, nblk, nblk, 0, 0, my_blacs_ctxt, na_rows, info )
!-------------------------------------------------------------------------------
! Allocate matrices and set up test matrices for the eigenvalue problem
allocate(a (na_rows,na_cols))
allocate(z (na_rows,na_cols))
allocate(as(na_rows,na_cols))
allocate(b (na_rows,na_cols))
allocate(bs(na_rows,na_cols))
allocate(tmp1(na_rows,na_cols))
allocate(tmp2(na_rows,na_cols))
allocate(ev(na))
!-------------------------------------------------------------------------------
! Read matrices
call read_matrix(10, na, a, ubound(a,1), nblk, my_prow, my_pcol, np_rows, np_cols)
call read_matrix(20, na, b, ubound(b,1), nblk, my_prow, my_pcol, np_rows, np_cols)
if(myid==0) close(10)
if(myid==0) close(20)
nev = na ! all eigenvaules
! Save original matrices A and B for later accuracy checks
as = a
bs = b
!-------------------------------------------------------------------------------
! Solve generalized problem
!
! 1. Calculate Cholesky factorization of Matrix B = U**T * U
! and invert triangular matrix U
!
! Please note: cholesky_real/invert_trm_real are not trimmed for speed.
! The only reason having them is that the Scalapack counterpart
! PDPOTRF very often fails on higher processor numbers for unknown reasons!
call cholesky_real(na, b, na_rows, nblk, mpi_comm_rows, mpi_comm_cols)
call invert_trm_real(na, b, na_rows, nblk, mpi_comm_rows, mpi_comm_cols)
ttt0 = MPI_Wtime()
! 2. Calculate U**-T * A * U**-1
! 2a. tmp1 = U**-T * A
call mult_at_b_real('U', 'L', na, na, b, na_rows, a, na_rows, &
nblk, mpi_comm_rows, mpi_comm_cols, tmp1, na_rows)
! 2b. tmp2 = tmp1**T
call pdtran(na,na,1.d0,tmp1,1,1,sc_desc,0.d0,tmp2,1,1,sc_desc)
! 2c. A = U**-T * tmp2 ( = U**-T * Aorig * U**-1 )
call mult_at_b_real('U', 'U', na, na, b, na_rows, tmp2, na_rows, &
nblk, mpi_comm_rows, mpi_comm_cols, a, na_rows)
ttt1 = MPI_Wtime()
if(myid == 0) print *,'Time U**-T*A*U**-1:',ttt1-ttt0
! A is only set in the upper half, solve_evp_real needs a full matrix
! Set lower half from upper half
call pdtran(na,na,1.d0,a,1,1,sc_desc,0.d0,tmp1,1,1,sc_desc)
do i=1,na_cols
! Get global column corresponding to i and number of local rows up to
! and including the diagonal, these are unchanged in A
n_col = indxl2g(i, nblk, my_pcol, 0, np_cols)
n_row = numroc (n_col, nblk, my_prow, 0, np_rows)
a(n_row+1:na_rows,i) = tmp1(n_row+1:na_rows,i)
enddo
! 3. Calculate eigenvalues/eigenvectors of U**-T * A * U**-1
! Eigenvectors go to tmp1
call solve_evp_real(na, nev, a, na_rows, ev, tmp1, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols)
if(myid == 0) print *,'Time tridiag_real :',time_evp_fwd