Commit fffcad08 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master_pre_stage' of https://gitlab.mpcdf.mpg.de/elpa/elpa into master_pre_stage

parents a490a3f4 ec8bc696
...@@ -111,42 +111,19 @@ ...@@ -111,42 +111,19 @@
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols integer(kind=ik) :: na, lda, nblk, nbw, matrixCols, numBlocks, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: a(lda,*), tmat(nbw,nbw,*)
#else
real(kind=REAL_DATATYPE) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
#endif
#endif
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), tmat(nbw,nbw,*) MATH_DATATYPE(kind=rck) :: a(lda,*), tmat(nbw,nbw,*)
#else #else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks) MATH_DATATYPE(kind=rck) :: a(lda,matrixCols), tmat(nbw,nbw,numBlocks)
#endif #endif
#endif /* COMPLEXCASE */
#if REALCASE == 1 #if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL real(kind=rk) :: eps
real(kind=REAL_DATATYPE), parameter :: ZERO = 0.0_rk8, ONE = 1.0_rk8
#else
real(kind=REAL_DATATYPE), parameter :: ZERO = 0.0_rk4, ONE = 1.0_rk4
#endif
#endif
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
complex(kind=COMPLEX_DATATYPE), parameter :: ZERO = (0.0_rk8, 0.0_rk8), ONE = (1.0_rk8, 0.0_rk8)
#else
complex(kind=COMPLEX_DATATYPE), parameter :: ZERO = (0.0_rk4, 0.0_rk4), ONE = (1.0_rk4, 0.0_rk4)
#endif #endif
#endif /* COMPLEXCASE == 1 */
logical, intent(in) :: useGPU logical, intent(in) :: useGPU
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
...@@ -161,32 +138,19 @@ ...@@ -161,32 +138,19 @@
integer(kind=ik) :: istep, ncol, lch, lcx, nlc integer(kind=ik) :: istep, ncol, lch, lcx, nlc
integer(kind=ik) :: tile_size, l_rows_tile, l_cols_tile integer(kind=ik) :: tile_size, l_rows_tile, l_cols_tile
real(kind=REAL_DATATYPE) :: vnorm2 real(kind=rk) :: vnorm2
#if REALCASE == 1 MATH_DATATYPE(kind=rck) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
real(kind=REAL_DATATYPE) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
#endif
#if COMPLEXCASE == 1
complex(kind=COMPLEX_DATATYPE) :: xf, aux1(nbw), aux2(nbw), vrl, tau, vav(nbw,nbw)
#endif
#if COMPLEXCASE == 1
! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case ! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case
complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:) MATH_DATATYPE(kind=rck), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:)
complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:) MATH_DATATYPE(kind=rck), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:)
complex(kind=COMPLEX_DATATYPE), allocatable :: vr(:) MATH_DATATYPE(kind=rck), allocatable :: vr(:)
#endif
#if REALCASE == 1
real(kind=REAL_DATATYPE), allocatable :: tmpCUDA(:), vmrCUDA(:), umcCUDA(:)
real(kind=REAL_DATATYPE), allocatable :: tmpCPU(:,:), vmrCPU(:,:), umcCPU(:,:)
real(kind=REAL_DATATYPE), allocatable :: vr(:)
#endif
#if REALCASE == 1 #if REALCASE == 1
! needed for blocked QR decomposition ! needed for blocked QR decomposition
integer(kind=ik) :: PQRPARAM(11), work_size integer(kind=ik) :: PQRPARAM(11), work_size
real(kind=REAL_DATATYPE) :: dwork_size(1) real(kind=rk) :: dwork_size(1)
real(kind=REAL_DATATYPE), allocatable :: work_blocked(:), tauvector(:), blockheuristic(:) real(kind=rk), allocatable :: work_blocked(:), tauvector(:), blockheuristic(:)
#endif #endif
! a_dev is passed from bandred_real to trans_ev_band ! a_dev is passed from bandred_real to trans_ev_band
integer(kind=C_intptr_T) :: a_dev, vmr_dev, umc_dev, tmat_dev, vav_dev integer(kind=C_intptr_T) :: a_dev, vmr_dev, umc_dev, tmat_dev, vav_dev
......
...@@ -103,17 +103,17 @@ ...@@ -103,17 +103,17 @@
use elpa2_workload use elpa2_workload
use precision use precision
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na, nb, nbCol, nb2, nb2Col, communicator integer(kind=ik), intent(in) :: na, nb, nbCol, nb2, nb2Col, communicator
real(kind=REAL_DATATYPE), intent(inout) :: ab(2*nb,nbCol) ! removed assumed size real(kind=rk), intent(inout) :: ab(2*nb,nbCol) ! removed assumed size
real(kind=REAL_DATATYPE), intent(inout) :: ab2(2*nb2,nb2Col) ! removed assumed size real(kind=rk), intent(inout) :: ab2(2*nb2,nb2Col) ! removed assumed size
real(kind=REAL_DATATYPE), intent(out) :: d(na), e(na) ! set only on PE 0 real(kind=rk), intent(out) :: d(na), e(na) ! set only on PE 0
real(kind=REAL_DATATYPE) :: hv(nb,nb2), w(nb,nb2), w_new(nb,nb2), tau(nb2), hv_new(nb,nb2), & real(kind=rk) :: hv(nb,nb2), w(nb,nb2), w_new(nb,nb2), tau(nb2), hv_new(nb,nb2), &
tau_new(nb2), ab_s(1+nb,nb2), ab_r(1+nb,nb2), ab_s2(2*nb2,nb2), hv_s(nb,nb2) tau_new(nb2), ab_s(1+nb,nb2), ab_r(1+nb,nb2), ab_s2(2*nb2,nb2), hv_s(nb,nb2)
real(kind=REAL_DATATYPE) :: work(nb*nb2), work2(nb2*nb2) real(kind=rk) :: work(nb*nb2), work2(nb2*nb2)
integer(kind=ik) :: lwork, info integer(kind=ik) :: lwork, info
integer(kind=ik) :: istep, i, n, dest integer(kind=ik) :: istep, i, n, dest
...@@ -223,8 +223,8 @@ ...@@ -223,8 +223,8 @@
if (my_pe==0) then if (my_pe==0) then
n = MIN(na-na_s-nb2+1,nb) ! number of rows to be reduced n = MIN(na-na_s-nb2+1,nb) ! number of rows to be reduced
hv(:,:) = CONST_0_0 hv(:,:) = 0.0_rk
tau(:) = CONST_0_0 tau(:) = 0.0_rk
! The last step (istep=na-1) is only needed for sending the last HH vectors. ! The last step (istep=na-1) is only needed for sending the last HH vectors.
! We don't want the sign of the last element flipped (analogous to the other sweeps) ! We don't want the sign of the last element flipped (analogous to the other sweeps)
...@@ -236,9 +236,9 @@ ...@@ -236,9 +236,9 @@
call obj%timer%stop("blas") call obj%timer%stop("blas")
do i=1,nb2 do i=1,nb2
hv(i,i) = CONST_1_0 hv(i,i) = 1.0_rk
hv(i+1:n,i) = ab(1+nb2+1:1+nb2+n-i,na_s-n_off+i-1) hv(i+1:n,i) = ab(1+nb2+1:1+nb2+n-i,na_s-n_off+i-1)
ab(1+nb2+1:2*nb,na_s-n_off+i-1) = CONST_0_0 ab(1+nb2+1:2*nb,na_s-n_off+i-1) = 0.0_rk
enddo enddo
endif endif
...@@ -247,10 +247,10 @@ ...@@ -247,10 +247,10 @@
d(istep) = ab(1,na_s-n_off) d(istep) = ab(1,na_s-n_off)
e(istep) = ab(2,na_s-n_off) e(istep) = ab(2,na_s-n_off)
if (istep == na) then if (istep == na) then
e(na) = CONST_0_0 e(na) = 0.0_rk
endif endif
else else
ab_s2 = CONST_0_0 ab_s2 = 0.0_rk
ab_s2(:,:) = ab(1:nb2+1,na_s-n_off:na_s-n_off+nb2-1) ab_s2(:,:) = ab(1:nb2+1,na_s-n_off:na_s-n_off+nb2-1)
if (block_limits2(dest+1)<istep) then if (block_limits2(dest+1)<istep) then
dest = dest+1 dest = dest+1
...@@ -285,7 +285,7 @@ ...@@ -285,7 +285,7 @@
do i=1,nb2 do i=1,nb2
tau(i) = hv(i,i) tau(i) = hv(i,i)
hv(i,i) = CONST_1_0 hv(i,i) = 1.0_rk
enddo enddo
endif endif
endif endif
...@@ -293,7 +293,7 @@ ...@@ -293,7 +293,7 @@
na_s = na_s+nb2 na_s = na_s+nb2
if (na_s-n_off > nb) then if (na_s-n_off > nb) then
ab(:,1:nblocks*nb) = ab(:,nb+1:(nblocks+1)*nb) ab(:,1:nblocks*nb) = ab(:,nb+1:(nblocks+1)*nb)
ab(:,nblocks*nb+1:(nblocks+1)*nb) = CONST_0_0 ab(:,nblocks*nb+1:(nblocks+1)*nb) = 0.0_rk
n_off = n_off + nb n_off = n_off + nb
endif endif
...@@ -324,8 +324,8 @@ ...@@ -324,8 +324,8 @@
ab(1:nb+1,ne+i-1) = ab_r(:,i) ab(1:nb+1,ne+i-1) = ab_r(:,i)
enddo enddo
endif endif
hv_new(:,:) = CONST_0_0 ! Needed, last rows must be 0 for nr < nb hv_new(:,:) = 0.0_rk ! Needed, last rows must be 0 for nr < nb
tau_new(:) = CONST_0_0 tau_new(:) = 0.0_rk
if (nr>0) then if (nr>0) then
call wy_right_& call wy_right_&
...@@ -335,9 +335,9 @@ ...@@ -335,9 +335,9 @@
call PRECISION_GEQRF(nr, nb2, ab(nb+1,ns), 2*nb-1, tau_new, work, lwork, info) call PRECISION_GEQRF(nr, nb2, ab(nb+1,ns), 2*nb-1, tau_new, work, lwork, info)
call obj%timer%stop("blas") call obj%timer%stop("blas")
do i=1,nb2 do i=1,nb2
hv_new(i,i) = CONST_1_0 hv_new(i,i) = 1.0_rk
hv_new(i+1:,i) = ab(nb+2:2*nb-i+1,ns+i-1) hv_new(i+1:,i) = ab(nb+2:2*nb-i+1,ns+i-1)
ab(nb+2:,ns+i-1) = CONST_0_0 ab(nb+2:,ns+i-1) = 0.0_rk
enddo enddo
!send hh-Vector !send hh-Vector
...@@ -458,16 +458,17 @@ ...@@ -458,16 +458,17 @@
use elpa_abstract_impl use elpa_abstract_impl
use precision use precision
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: n !length of householder-vectors integer(kind=ik), intent(in) :: n !length of householder-vectors
integer(kind=ik), intent(in) :: nb !number of householder-vectors integer(kind=ik), intent(in) :: nb !number of householder-vectors
integer(kind=ik), intent(in) :: lda !leading dimension of Y and W integer(kind=ik), intent(in) :: lda !leading dimension of Y and W
real(kind=REAL_DATATYPE), intent(in) :: Y(lda,nb) !matrix containing nb householder-vectors of length b real(kind=rk), intent(in) :: Y(lda,nb) !matrix containing nb householder-vectors of length b
real(kind=REAL_DATATYPE), intent(in) :: tau(nb) !tau values real(kind=rk), intent(in) :: tau(nb) !tau values
real(kind=REAL_DATATYPE), intent(out) :: W(lda,nb) !output matrix W real(kind=rk), intent(out) :: W(lda,nb) !output matrix W
real(kind=REAL_DATATYPE), intent(in) :: mem(nb) !memory for a temporary matrix of size nb real(kind=rk), intent(in) :: mem(nb) !memory for a temporary matrix of size nb
integer(kind=ik) :: i integer(kind=ik) :: i
call obj%timer%start("wy_gen" // PRECISION_SUFFIX) call obj%timer%start("wy_gen" // PRECISION_SUFFIX)
...@@ -475,8 +476,8 @@ ...@@ -475,8 +476,8 @@
do i=2,nb do i=2,nb
W(1:n,i) = tau(i)*Y(1:n,i) W(1:n,i) = tau(i)*Y(1:n,i)
call obj%timer%start("blas") call obj%timer%start("blas")
call PRECISION_GEMV('T', n, i-1, CONST_1_0, Y, lda, W(1,i), 1, CONST_0_0, mem,1) call PRECISION_GEMV('T', n, i-1, 1.0_rk, Y, lda, W(1,i), 1, 0.0_rk, mem,1)
call PRECISION_GEMV('N', n, i-1, -CONST_1_0, W, lda, mem, 1, CONST_1_0, W(1,i),1) call PRECISION_GEMV('N', n, i-1, -1.0_rk, W, lda, mem, 1, 1.0_rk, W(1,i),1)
call obj%timer%stop("blas") call obj%timer%stop("blas")
enddo enddo
call obj%timer%stop("wy_gen" // PRECISION_SUFFIX) call obj%timer%stop("wy_gen" // PRECISION_SUFFIX)
...@@ -489,21 +490,22 @@ ...@@ -489,21 +490,22 @@
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: n !width of the matrix A integer(kind=ik), intent(in) :: n !width of the matrix A
integer(kind=ik), intent(in) :: m !length of matrix W and Y integer(kind=ik), intent(in) :: m !length of matrix W and Y
integer(kind=ik), intent(in) :: nb !width of matrix W and Y integer(kind=ik), intent(in) :: nb !width of matrix W and Y
integer(kind=ik), intent(in) :: lda !leading dimension of A integer(kind=ik), intent(in) :: lda !leading dimension of A
integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y
real(kind=REAL_DATATYPE), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size real(kind=rk), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size
real(kind=REAL_DATATYPE), intent(in) :: W(m,nb) !blocked transformation matrix W real(kind=rk), intent(in) :: W(m,nb) !blocked transformation matrix W
real(kind=REAL_DATATYPE), intent(in) :: Y(m,nb) !blocked transformation matrix Y real(kind=rk), intent(in) :: Y(m,nb) !blocked transformation matrix Y
real(kind=REAL_DATATYPE), intent(inout) :: mem(n,nb) !memory for a temporary matrix of size n x nb real(kind=rk), intent(inout) :: mem(n,nb) !memory for a temporary matrix of size n x nb
call obj%timer%start("wy_left" // PRECISION_SUFFIX) call obj%timer%start("wy_left" // PRECISION_SUFFIX)
call obj%timer%start("blas") call obj%timer%start("blas")
call PRECISION_GEMM('T', 'N', nb, n, m, CONST_1_0, W, lda2, A, lda, CONST_0_0, mem, nb) call PRECISION_GEMM('T', 'N', nb, n, m, 1.0_rk, W, lda2, A, lda, 0.0_rk, mem, nb)
call PRECISION_GEMM('N', 'N', m, n, nb, -CONST_1_0, Y, lda2, mem, nb, CONST_1_0, A, lda) call PRECISION_GEMM('N', 'N', m, n, nb, -1.0_rk, Y, lda2, mem, nb, 1.0_rk, A, lda)
call obj%timer%stop("blas") call obj%timer%stop("blas")
call obj%timer%stop("wy_left" // PRECISION_SUFFIX) call obj%timer%stop("wy_left" // PRECISION_SUFFIX)
end subroutine end subroutine
...@@ -515,22 +517,23 @@ ...@@ -515,22 +517,23 @@
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: n !height of the matrix A integer(kind=ik), intent(in) :: n !height of the matrix A
integer(kind=ik), intent(in) :: m !length of matrix W and Y integer(kind=ik), intent(in) :: m !length of matrix W and Y
integer(kind=ik), intent(in) :: nb !width of matrix W and Y integer(kind=ik), intent(in) :: nb !width of matrix W and Y
integer(kind=ik), intent(in) :: lda !leading dimension of A integer(kind=ik), intent(in) :: lda !leading dimension of A
integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y
real(kind=REAL_DATATYPE), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size real(kind=rk), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size
real(kind=REAL_DATATYPE), intent(in) :: W(m,nb) !blocked transformation matrix W real(kind=rk), intent(in) :: W(m,nb) !blocked transformation matrix W
real(kind=REAL_DATATYPE), intent(in) :: Y(m,nb) !blocked transformation matrix Y real(kind=rk), intent(in) :: Y(m,nb) !blocked transformation matrix Y
real(kind=REAL_DATATYPE), intent(inout) :: mem(n,nb) !memory for a temporary matrix of size n x nb real(kind=rk), intent(inout) :: mem(n,nb) !memory for a temporary matrix of size n x nb
call obj%timer%start("wy_right" // PRECISION_SUFFIX) call obj%timer%start("wy_right" // PRECISION_SUFFIX)
call obj%timer%start("blas") call obj%timer%start("blas")
call PRECISION_GEMM('N', 'N', n, nb, m, CONST_1_0, A, lda, W, lda2, CONST_0_0, mem, n) call PRECISION_GEMM('N', 'N', n, nb, m, 1.0_rk, A, lda, W, lda2, 0.0_rk, mem, n)
call PRECISION_GEMM('N', 'T', n, m, nb, -CONST_1_0, mem, n, Y, lda2, CONST_1_0, A, lda) call PRECISION_GEMM('N', 'T', n, m, nb, -1.0_rk, mem, n, Y, lda2, 1.0_rk, A, lda)
call obj%timer%stop("blas") call obj%timer%stop("blas")
call obj%timer%stop("wy_right" // PRECISION_SUFFIX) call obj%timer%stop("wy_right" // PRECISION_SUFFIX)
...@@ -543,23 +546,24 @@ ...@@ -543,23 +546,24 @@
use elpa_abstract_impl use elpa_abstract_impl
use precision use precision
implicit none implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: n !width/heigth of the matrix A; length of matrix W and Y integer(kind=ik), intent(in) :: n !width/heigth of the matrix A; length of matrix W and Y
integer(kind=ik), intent(in) :: nb !width of matrix W and Y integer(kind=ik), intent(in) :: nb !width of matrix W and Y
integer(kind=ik), intent(in) :: lda !leading dimension of A integer(kind=ik), intent(in) :: lda !leading dimension of A
integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y integer(kind=ik), intent(in) :: lda2 !leading dimension of W and Y
real(kind=REAL_DATATYPE), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size real(kind=rk), intent(inout) :: A(lda,*) !matrix to be transformed ! remove assumed size
real(kind=REAL_DATATYPE), intent(in) :: W(n,nb) !blocked transformation matrix W real(kind=rk), intent(in) :: W(n,nb) !blocked transformation matrix W
real(kind=REAL_DATATYPE), intent(in) :: Y(n,nb) !blocked transformation matrix Y real(kind=rk), intent(in) :: Y(n,nb) !blocked transformation matrix Y
real(kind=REAL_DATATYPE) :: mem(n,nb) !memory for a temporary matrix of size n x nb real(kind=rk) :: mem(n,nb) !memory for a temporary matrix of size n x nb
real(kind=REAL_DATATYPE) :: mem2(nb,nb) !memory for a temporary matrix of size nb x nb real(kind=rk) :: mem2(nb,nb) !memory for a temporary matrix of size nb x nb
call obj%timer%start("wy_symm" // PRECISION_SUFFIX) call obj%timer%start("wy_symm" // PRECISION_SUFFIX)
call obj%timer%start("blas") call obj%timer%start("blas")
call PRECISION_SYMM('L', 'L', n, nb, CONST_1_0, A, lda, W, lda2, CONST_0_0, mem, n) call PRECISION_SYMM('L', 'L', n, nb, 1.0_rk, A, lda, W, lda2, 0.0_rk, mem, n)
call PRECISION_GEMM('T', 'N', nb, nb, n, CONST_1_0, mem, n, W, lda2, CONST_0_0, mem2, nb) call PRECISION_GEMM('T', 'N', nb, nb, n, 1.0_rk, mem, n, W, lda2, 0.0_rk, mem2, nb)
call PRECISION_GEMM('N', 'N', n, nb, nb, -CONST_0_5, Y, lda2, mem2, nb, CONST_1_0, mem, n) call PRECISION_GEMM('N', 'N', n, nb, nb, -0.5_rk, Y, lda2, mem2, nb, 1.0_rk, mem, n)
call PRECISION_SYR2K('L', 'N', n, nb, -CONST_1_0, Y, lda2, mem, n, CONST_1_0, A, lda) call PRECISION_SYR2K('L', 'N', n, nb, -1.0_rk, Y, lda2, mem, n, 1.0_rk, A, lda)
call obj%timer%stop("blas") call obj%timer%stop("blas")
call obj%timer%stop("wy_symm" // PRECISION_SUFFIX) call obj%timer%stop("wy_symm" // PRECISION_SUFFIX)
......
...@@ -501,15 +501,7 @@ program test ...@@ -501,15 +501,7 @@ program test
#if defined(TEST_EIGENVECTORS) || defined(TEST_QR_DECOMPOSITION) #if defined(TEST_EIGENVECTORS) || defined(TEST_QR_DECOMPOSITION)
#ifdef TEST_MATRIX_ANALYTIC #ifdef TEST_MATRIX_ANALYTIC
!
!#if defined(TEST_MATRIX_ANALYTIC)
status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, check_all_evals) status = check_correctness_analytic(na, nev, ev, z, nblk, myid, np_rows, np_cols, my_prow, my_pcol, check_all_evals)
call check_status(status, myid)
if (.true.) then
! also check residuals
status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
call check_status(status, myid)
endif
#else #else
!#elif defined(TEST_MATRIX_FRANK) !#elif defined(TEST_MATRIX_FRANK)
! status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol) ! status = check_correctness_evp_numeric_residuals(na, nev, as, z, ev, sc_desc, nblk, myid, np_rows,np_cols, my_prow, my_pcol)
......
#!/usr/bin/env python
from itertools import product
from scaling import *
output_dir = "out"
template_file = "run_template_hydra.sh"
#elpa_method = ['elpa1', 'elpa2']
elpa_method = ['elpa1', 'elpa2', 'scalapack_all', 'scalapack_part']
#elpa_method = ['scalapack_part']
math_type = ['real', 'complex']
precision = ['single', 'double']
mat_size = [5000, 20000]
proc_eigen = [10,50,100]
block_size = [16]
num_nodes = [1]
#num_nodes.extend([2**i for i in range(2,11)])
num_nodes.extend([2**i for i in range(2,7)])
#num_nodes = [2048]
#===============================================================================================
#===============================================================================================
# the rest of the script should be changed only if something changed (etc. in elpa)
#===============================================================================================
#===============================================================================================
for em, mt, pr, ms, pe, bs, nn in product(elpa_method, math_type, precision, mat_size, proc_eigen, block_size, num_nodes):
tokens = {}
tokens['_BLOCK_SIZE_'] = bs
tokens['_MAT_SIZE_'] = ms·
tokens['_NUM_EIGEN_'] = ms * pe // 100
tokens['_NUM_NODES_'] = nn
variant(output_dir, template_file, tokens, em, mt, pr)
#! /bin/bash
echo nodes total tridiag solve trans_ev
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
S+=" "`grep "|_ tridiag_" $f | awk '{print $3}'`
S+=" "`grep "|_ solve " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev" $f | awk '{print $3}'`
echo $S
done
#! /bin/bash
echo nodes total bandred tridiag solve trans_ev_to_band trans_ev_to_full
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
S+=" "`grep "|_ bandred " $f | awk '{print $3}'`
S+=" "`grep "|_ tridiag " $f | awk '{print $3}'`
S+=" "`grep "|_ solve " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev_to_band " $f | awk '{print $3}'`
S+=" "`grep "|_ trans_ev_to_full " $f | awk '{print $3}'`
echo $S
done
#! /bin/bash
echo nodes total
for f in *.txt
do
#echo "processing $f... "
S=`grep " node = " $f | awk '{print $5}'`
TOTAL=`grep "e%eigenvectors()" $f | awk '{print $3}'`
if [[ -z "$TOTAL" ]]; then
continue
fi
S+=" "$TOTAL
echo $S
done
#! /usr/bin/env python
import numpy as np