Commit 2e8df093 authored by Pavel Kus's avatar Pavel Kus
Browse files

compute template splitted, tridiag file changed to resemble more the real case

Conflicts:
	src/elpa1_compute_complex_template.X90
parent 7c19d880
......@@ -886,6 +886,9 @@ EXTRA_DIST = \
src/elpa1_trans_ev_real_template.X90 \
src/elpa1_tridiag_real_template.X90 \
src/elpa1_compute_complex_template.X90 \
src/elpa1_tools_complex_template.X90 \
src/elpa1_trans_ev_complex_template.X90 \
src/elpa1_tridiag_complex_template.X90 \
src/elpa2_compute_real_template.X90 \
src/elpa2_compute_complex_template.X90 \
src/precision_macros.h \
......
......@@ -54,839 +54,8 @@
#include "precision_macros_complex.h"
subroutine tridiag_complex_PRECISION(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, d, e, tau)
!-------------------------------------------------------------------------------
! tridiag_complex: Reduces a distributed hermitian matrix to tridiagonal form
! (like Scalapack Routine PZHETRD)
!
! Parameters
!
! na Order of matrix
!
! a(lda,matrixCols) Distributed matrix which should be reduced.
! Distribution is like in Scalapack.
! Opposed to PZHETRD, a(:,:) must be set completely (upper and lower half)
! a(:,:) is overwritten on exit with the Householder vectors
!
! lda Leading dimension of a
! matrixCols local columns of matrix a
!
! nblk blocksize of cyclic distribution, must be the same in both directions!
!
! mpi_comm_rows
! mpi_comm_cols
! MPI-Communicators for rows/columns
!
! d(na) Diagonal elements (returned), identical on all processors
!
! e(na) Off-Diagonal elements (returned), identical on all processors
!
! tau(na) Factors for the Householder vectors (returned), needed for back transformation
!
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
complex(kind=COMPLEX_DATATYPE) :: tau(na)
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols)
#endif
real(kind=REAL_DATATYPE) :: d(na), e(na)
integer(kind=ik), parameter :: max_stored_rows = 32
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8)
#else
complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4)
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: totalblocks, max_blocks_row, max_blocks_col, max_local_rows, max_local_cols
integer(kind=ik) :: l_cols, l_rows, nstor
integer(kind=ik) :: istep, i, j, lcs, lce, lrs, lre
integer(kind=ik) :: tile_size, l_rows_tile, l_cols_tile
#ifdef WITH_OPENMP
integer(kind=ik) :: my_thread, n_threads, max_threads, n_iter
integer(kind=ik) :: omp_get_thread_num, omp_get_num_threads, omp_get_max_threads
#endif
real(kind=REAL_DATATYPE) :: vnorm2
complex(kind=COMPLEX_DATATYPE) :: vav, xc, aux(2*max_stored_rows), aux1(2), aux2(2), vrl, xf
complex(kind=COMPLEX_DATATYPE), allocatable :: tmp(:), vr(:), vc(:), ur(:), uc(:), vur(:,:), uvc(:,:)
#ifdef WITH_OPENMP
complex(kind=COMPLEX_DATATYPE), allocatable :: ur_p(:,:), uc_p(:,:)
#endif
real(kind=REAL_DATATYPE), allocatable :: tmpr(:)
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("tridiag_complex" // PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
! Matrix is split into tiles; work is done only for tiles on the diagonal or above
tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide
l_rows_tile = tile_size/np_rows ! local rows of a tile
l_cols_tile = tile_size/np_cols ! local cols of a tile
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
max_blocks_col = (totalblocks-1)/np_cols + 1
max_local_rows = max_blocks_row*nblk
max_local_cols = max_blocks_col*nblk
allocate(tmp(MAX(max_local_rows,max_local_cols)), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating tmp "//errorMessage
stop
endif
allocate(vr(max_local_rows+1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vr "//errorMessage
stop
endif
allocate(ur(max_local_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating ur "//errorMessage
stop
endif
allocate(vc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vc "//errorMessage
stop
endif
allocate(uc(max_local_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uc "//errorMessage
stop
endif
#ifdef WITH_OPENMP
max_threads = omp_get_max_threads()
allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating ur_p "//errorMessage
stop
endif
allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uc_p "//errorMessage
stop
endif
#endif
tmp = 0
vr = 0
ur = 0
vc = 0
uc = 0
allocate(vur(max_local_rows,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating vur "//errorMessage
stop
endif
allocate(uvc(max_local_cols,2*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating uvc "//errorMessage
stop
endif
d(:) = 0
e(:) = 0
tau(:) = 0
nstor = 0
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a
if (my_prow==prow(na, nblk, np_rows) .and. my_pcol==pcol(na, nblk, np_cols)) d(na) = a(l_rows,l_cols)
do istep=na,3,-1
! Calculate number of local rows and columns of the still remaining matrix
! on the local processor
l_rows = local_index(istep-1, my_prow, np_rows, nblk, -1)
l_cols = local_index(istep-1, my_pcol, np_cols, nblk, -1)
! Calculate vector for Householder transformation on all procs
! owning column istep
if (my_pcol==pcol(istep, nblk, np_cols)) then
! Get vector to be transformed; distribute last element and norm of
! remaining elements to all procs in current column
vr(1:l_rows) = a(1:l_rows,l_cols+1)
if (nstor>0 .and. l_rows>0) then
aux(1:2*nstor) = conjg(uvc(l_cols+1,1:2*nstor))
call PRECISION_GEMV('N', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), &
aux, 1, CONE, vr, 1)
endif
if (my_prow==prow(istep-1, nblk, np_rows)) then
aux1(1) = dot_product(vr(1:l_rows-1),vr(1:l_rows-1))
aux1(2) = vr(l_rows)
else
aux1(1) = dot_product(vr(1:l_rows),vr(1:l_rows))
aux1(2) = 0.
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(aux1, aux2, 2, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
vnorm2 = aux2(1)
vrl = aux2(2)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! aux2 = aux1
vnorm2 = aux1(1)
vrl = aux1(2)
#endif /* WITH_MPI */
! vnorm2 = aux2(1)
! vrl = aux2(2)
! Householder transformation
call hh_transform_complex_PRECISION(vrl, vnorm2, xf, tau(istep))
! Scale vr and store Householder vector for back transformation
vr(1:l_rows) = vr(1:l_rows) * xf
if (my_prow==prow(istep-1, nblk, np_rows)) then
vr(l_rows) = 1.
e(istep-1) = vrl
endif
a(1:l_rows,l_cols+1) = vr(1:l_rows) ! store Householder vector for back transformation
endif
! Broadcast the Householder vector (and tau) along columns
if (my_pcol==pcol(istep, nblk, np_cols)) vr(l_rows+1) = tau(istep)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Bcast(vr, l_rows+1, MPI_COMPLEX_PRECISION, pcol(istep, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
tau(istep) = vr(l_rows+1)
! Transpose Householder vector vr -> vc
! call elpa_transpose_vectors (vr, 2*ubound(vr,dim=1), mpi_comm_rows, &
! vc, 2*ubound(vc,dim=1), mpi_comm_cols, &
! 1, 2*(istep-1), 1, 2*nblk)
call elpa_transpose_vectors_complex_PRECISION (vr, ubound(vr,dim=1), mpi_comm_rows, &
vc, ubound(vc,dim=1), mpi_comm_cols, &
1, (istep-1), 1, nblk)
! Calculate u = (A + VU**T + UV**T)*v
! For cache efficiency, we use only the upper half of the matrix tiles for this,
! thus the result is partly in uc(:) and partly in ur(:)
uc(1:l_cols) = 0
ur(1:l_rows) = 0
if (l_rows>0 .and. l_cols>0) then
#ifdef WITH_OPENMP
call timer%start("OpenMP parallel" // PRECISION_SUFFIX)
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,lcs,lce,j,lrs,lre)
my_thread = omp_get_thread_num()
n_threads = omp_get_num_threads()
n_iter = 0
uc_p(1:l_cols,my_thread) = 0.
ur_p(1:l_rows,my_thread) = 0.
#endif
do i=0,(istep-2)/tile_size
lcs = i*l_cols_tile+1
lce = min(l_cols,(i+1)*l_cols_tile)
if (lce<lcs) cycle
do j=0,i
lrs = j*l_rows_tile+1
lre = min(l_rows,(j+1)*l_rows_tile)
if (lre<lrs) cycle
#ifdef WITH_OPENMP
if (mod(n_iter,n_threads) == my_thread) then
call PRECISION_GEMV('C', lre-lrs+1 ,lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc_p(lcs,my_thread), 1)
if (i/=j) then
call PRECISION_GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur_p(lrs,my_thread), 1)
endif
endif
n_iter = n_iter+1
#else /* WITH_OPENMP */
call PRECISION_GEMV('C', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vr(lrs), 1, CONE, uc(lcs), 1)
if (i/=j) then
call PRECISION_GEMV('N', lre-lrs+1, lce-lcs+1, CONE, a(lrs,lcs), lda, vc(lcs), 1, CONE, ur(lrs), 1)
endif
#endif /* WITH_OPENMP */
enddo
enddo
#ifdef WITH_OPENMP
!$OMP END PARALLEL
call timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
do i=0,max_threads-1
uc(1:l_cols) = uc(1:l_cols) + uc_p(1:l_cols,i)
ur(1:l_rows) = ur(1:l_rows) + ur_p(1:l_rows,i)
enddo
#endif
if (nstor>0) then
call PRECISION_GEMV('C', l_rows, 2*nstor, CONE, vur, ubound(vur,dim=1), vr, 1, CZERO, aux, 1)
call PRECISION_GEMV('N', l_cols, 2*nstor, CONE, uvc, ubound(uvc,dim=1), aux, 1, CONE, uc, 1)
endif
endif
! Sum up all ur(:) parts along rows and add them to the uc(:) parts
! on the processors containing the diagonal
! This is only necessary if ur has been calculated, i.e. if the
! global tile size is smaller than the global remaining matrix
if (tile_size < istep-1) then
call elpa_reduce_add_vectors_complex_PRECISION (ur, ubound(ur,dim=1), mpi_comm_rows, &
uc, ubound(uc,dim=1), mpi_comm_cols, &
(istep-1), 1, nblk)
endif
! Sum up all the uc(:) parts, transpose uc -> ur
if (l_cols>0) then
tmp(1:l_cols) = uc(1:l_cols)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp, uc, l_cols, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
uc = tmp
#endif /* WITH_MPI */
endif
! call elpa_transpose_vectors (uc, 2*ubound(uc,dim=1), mpi_comm_cols, &
! ur, 2*ubound(ur,dim=1), mpi_comm_rows, &
! 1, 2*(istep-1), 1, 2*nblk)
call elpa_transpose_vectors_complex_PRECISION (uc, ubound(uc,dim=1), mpi_comm_cols, &
ur, ubound(ur,dim=1), mpi_comm_rows, &
1, (istep-1), 1, nblk)
! calculate u**T * v (same as v**T * (A + VU**T + UV**T) * v )
xc = 0
if (l_cols>0) xc = dot_product(vc(1:l_cols),uc(1:l_cols))
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(xc, vav, 1 , MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
vav = xc
#endif /* WITH_MPI */
! store u and v in the matrices U and V
! these matrices are stored combined in one here
do j=1,l_rows
vur(j,2*nstor+1) = conjg(tau(istep))*vr(j)
vur(j,2*nstor+2) = 0.5*conjg(tau(istep))*vav*vr(j) - ur(j)
enddo
do j=1,l_cols
uvc(j,2*nstor+1) = 0.5*conjg(tau(istep))*vav*vc(j) - uc(j)
uvc(j,2*nstor+2) = conjg(tau(istep))*vc(j)
enddo
nstor = nstor+1
! If the limit of max_stored_rows is reached, calculate A + VU**T + UV**T
if (nstor==max_stored_rows .or. istep==3) then
do i=0,(istep-2)/tile_size
lcs = i*l_cols_tile+1
lce = min(l_cols,(i+1)*l_cols_tile)
lrs = 1
lre = min(l_rows,(i+1)*l_rows_tile)
if (lce<lcs .or. lre<lrs) cycle
call PRECISION_GEMM('N', 'C', lre-lrs+1, lce-lcs+1, 2*nstor, CONE, &
vur(lrs,1), ubound(vur,dim=1), uvc(lcs,1), ubound(uvc,dim=1), &
CONE, a(lrs,lcs), lda)
enddo
nstor = 0
endif
if (my_prow==prow(istep-1, nblk, np_rows) .and. my_pcol==pcol(istep-1, nblk, np_cols)) then
if (nstor>0) a(l_rows,l_cols) = a(l_rows,l_cols) &
+ dot_product(vur(l_rows,1:2*nstor),uvc(l_cols,1:2*nstor))
d(istep-1) = a(l_rows,l_cols)
endif
enddo ! istep
! Store e(1) and d(1)
if (my_pcol==pcol(2, nblk, np_cols)) then
if (my_prow==prow(1, nblk, np_rows)) then
! We use last l_cols value of loop above
vrl = a(1,l_cols)
call hh_transform_complex_PRECISION(vrl, CONST_REAL_0_0, xf, tau(2))
e(1) = vrl
a(1,l_cols) = 1. ! for consistency only
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_bcast(tau(2), 1, MPI_COMPLEX_PRECISION, prow(1, nblk, np_rows), mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_bcast(tau(2), 1, MPI_COMPLEX_PRECISION, pcol(2, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
if (my_prow==prow(1, nblk, np_rows) .and. my_pcol==pcol(1, nblk, np_cols)) d(1) = a(1,1)
deallocate(tmp, vr, ur, vc, uc, vur, uvc, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when deallocating tmp "//errorMessage
stop
endif
! distribute the arrays d and e to all processors
allocate(tmpr(na), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when allocating tmpr "//errorMessage
stop
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = d
call mpi_allreduce(tmpr, d, na, MPI_REAL_PRECISION ,MPI_SUM, mpi_comm_cols, mpierr)
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
tmpr = e
call mpi_allreduce(tmpr, e, na, MPI_REAL_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
deallocate(tmpr, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"tridiag_complex: error when deallocating tmpr "//errorMessage
stop
endif
call timer%stop("tridiag_complex" // PRECISION_SUFFIX)
end subroutine tridiag_complex_PRECISION
subroutine trans_ev_complex_PRECISION(na, nqc, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols)
!-------------------------------------------------------------------------------
! trans_ev_complex: Transforms the eigenvectors of a tridiagonal matrix back
! to the eigenvectors of the original matrix
! (like Scalapack Routine PZUNMTR)
!
! Parameters
!
! na Order of matrix a, number of rows of matrix q
!
! nqc Number of columns of matrix q
!
! a(lda,matrixCols) Matrix containing the Householder vectors (i.e. matrix a after tridiag_complex)
! Distribution is like in Scalapack.
!
! lda Leading dimension of a
!
! tau(na) Factors of the Householder vectors
!
! q On input: Eigenvectors of tridiagonal matrix
! On output: Transformed eigenvectors
! Distribution is like in Scalapack.
!
! ldq Leading dimension of q
!
! nblk blocksize of cyclic distribution, must be the same in both directions!
!
! mpi_comm_rows
! mpi_comm_cols
! MPI-Communicators for rows/columns
!
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
integer(kind=ik) :: na, nqc, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
complex(kind=COMPLEX_DATATYPE) :: tau(na)
#ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), q(ldq,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols), q(ldq,matrixCols)
#endif
integer(kind=ik) :: max_stored_rows
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=ck8), parameter :: CZERO = (0.0_rk8,0.0_rk8), CONE = (1.0_rk8,0.0_rk8)
#else
complex(kind=ck4), parameter :: CZERO = (0.0_rk4,0.0_rk4), CONE = (1.0_rk4,0.0_rk4)
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: totalblocks, max_blocks_row, max_blocks_col, max_local_rows, max_local_cols
integer(kind=ik) :: l_cols, l_rows, l_colh, nstor
integer(kind=ik) :: istep, i, n, nc, ic, ics, ice, nb, cur_pcol
complex(kind=COMPLEX_DATATYPE), allocatable :: tmp1(:), tmp2(:), hvb(:), hvm(:,:)
complex(kind=COMPLEX_DATATYPE), allocatable :: tmat(:,:), h1(:), h2(:)
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("trans_ev_complex" // PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
max_blocks_col = ((nqc-1)/nblk)/np_cols + 1 ! Columns of q!
max_local_rows = max_blocks_row*nblk
max_local_cols = max_blocks_col*nblk
max_stored_rows = (63/nblk+1)*nblk
allocate(tmat(max_stored_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmat "//errorMessage
stop
endif
allocate(h1(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating h1 "//errorMessage
stop
endif
allocate(h2(max_stored_rows*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating h2 "//errorMessage
stop
endif
allocate(tmp1(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmp1 "//errorMessage
stop
endif
allocate(tmp2(max_local_cols*max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating tmp2 "//errorMessage
stop
endif
allocate(hvb(max_local_rows*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating hvb "//errorMessage
stop
endif
allocate(hvm(max_local_rows,max_stored_rows), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"trans_ev_complex: error when allocating hvm "//errorMessage
stop
endif