Commit 6d5aa9d8 authored by Andreas Marek's avatar Andreas Marek

Pass elpa object to subroutines for the timer

parent d9b97460
......@@ -417,6 +417,7 @@ noinst_PROGRAMS = \
test_real_double_2stage \
test_complex_double_1stage \
test_complex_double_2stage \
double_instance@SUFFIX@ \
real_2stage_banded@SUFFIX@ \
complex_2stage_banded@SUFFIX@ \
real_2stage@SUFFIX@ \
......@@ -637,6 +638,12 @@ test_single_complex_2stage_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $
-DTEST_SOLVER_2STAGE \
-DTEST_GPU=0
endif
double_instance@SUFFIX@_SOURCES = test/Fortran/elpa2/double_instance.F90
double_instance@SUFFIX@_LDADD = $(build_lib)
double_instance@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_double_instance@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
real_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/real_2stage_banded.F90
real_2stage_banded@SUFFIX@_LDADD = $(build_lib)
real_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
......@@ -1036,6 +1043,7 @@ check_SCRIPTS = \
test_real_double_2stage.sh \
test_complex_double_1stage.sh \
test_complex_double_2stage.sh \
double_instance@SUFFIX@.sh \
real_2stage_banded@SUFFIX@.sh \
complex_2stage_banded@SUFFIX@.sh \
real_2stage@SUFFIX@.sh \
......@@ -1186,6 +1194,7 @@ CLEANFILES = \
single_complex* \
real* \
complex* \
double_instance* \
*.i
clean-local:
......
......@@ -54,9 +54,6 @@
!> \brief Fortran module which contains the source of ELPA 1stage
module elpa1_compute
use elpa_utilities
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use elpa_mpi
implicit none
......
......@@ -62,11 +62,6 @@ function elpa_solve_evp_&
use precision
use cuda_functions
use mod_check_for_gpu
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding
use elpa_api
use elpa_mpi
......@@ -74,7 +69,7 @@ function elpa_solve_evp_&
use elpa1_utilities, only : gpu_usage_via_environment_variable
implicit none
class(elpa_t), intent(in) :: obj
class(elpa_t) :: obj
real(kind=REAL_DATATYPE), intent(out) :: ev(obj%na)
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
......@@ -115,7 +110,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all
call timer%start("elpa_solve_evp_&
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
......@@ -143,7 +138,7 @@ function elpa_solve_evp_&
! summary_timings = .false.
! endif
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
......@@ -156,7 +151,7 @@ function elpa_solve_evp_&
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
wantDebug = obj%get("debug") == 1
......@@ -229,7 +224,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
& (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
!time_evp_fwd = ttt1-ttt0
......@@ -237,7 +232,7 @@ function elpa_solve_evp_&
!ttt0 = MPI_Wtime()
call solve_tridi_&
&PRECISION&
& (na, nev, ev, e, &
& (obj, na, nev, ev, e, &
#if REALCASE == 1
q, ldq, &
#endif
......@@ -254,7 +249,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time trans_ev_real:',ttt1-ttt0
!time_evp_back = ttt1-ttt0
......@@ -279,7 +274,7 @@ function elpa_solve_evp_&
stop 1
endif
call timer%stop("elpa_solve_evp_&
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
......
......@@ -58,10 +58,12 @@
subroutine v_add_s_&
&PRECISION&
&(v,n,s)
&(obj, v,n,s)
use precision
use elpa_api
implicit none
integer(kind=ik) :: n
class(elpa_t) :: obj
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s
......@@ -70,12 +72,14 @@
subroutine distribute_global_column_&
&PRECISION&
&(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
......@@ -100,7 +104,7 @@
subroutine solve_secular_equation_&
&PRECISION&
&(n, i, d, z, delta, rho, dlam)
&(obj, n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
......@@ -150,26 +154,23 @@
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
class(elpa_t) :: obj
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b
call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then
! Special case: Last eigenvalue
......@@ -233,7 +234,7 @@
dlam = x + dshift
delta(:) = delta(:) - x
call timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine solve_secular_equation_&
&PRECISION
......@@ -247,7 +248,7 @@
subroutine hh_transform_complex_&
#endif
&PRECISION &
(alpha, xnorm_sq, xf, tau)
(obj, alpha, xnorm_sq, xf, tau)
#if REALCASE == 1
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif
......@@ -258,12 +259,9 @@
! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
! since this would be expensive for the parallel implementation.
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa_api
implicit none
class(elpa_t) :: obj
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(inout) :: alpha
#endif
......@@ -281,7 +279,7 @@
real(kind=REAL_DATATYPE) :: BETA
call timer%start("hh_transform_&
call obj%timer%start("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
......@@ -345,7 +343,7 @@
ALPHA = BETA
endif
call timer%stop("hh_transform_&
call obj%timer%stop("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
......
......@@ -92,17 +92,13 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
(obj, na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
use cuda_functions
use iso_c_binding
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
integer(kind=ik), intent(in) :: na, nqc, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(in) :: tau(na)
......@@ -167,18 +163,18 @@
&_&
&MATH_DATATYPE
call timer%start("trans_ev_&
call obj%timer%start("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
......@@ -290,7 +286,7 @@
enddo
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
if (nb>0) &
call MPI_Bcast(hvb, nb, &
#if REALCASE == 1
......@@ -301,7 +297,7 @@
&MPI_COMPLEX_PRECISION&
#endif
, cur_pcol, mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
nb = 0
......@@ -322,7 +318,7 @@
! This can be done in different ways, we use dsyrk or zherk
tmat = 0
call timer%start("blas")
call obj%timer%start("blas")
if (l_rows>0) &
#if REALCASE == 1
call PRECISION_SYRK('U', 'T', &
......@@ -331,14 +327,14 @@
call PRECISION_HERK('U', 'C', &
#endif
nstor, l_rows, ONE, hvm, ubound(hvm,dim=1), ZERO, tmat, max_stored_rows)
call timer%stop("blas")
call obj%timer%stop("blas")
nc = 0
do n = 1, nstor-1
h1(nc+1:nc+n) = tmat(1:n,n+1)
nc = nc+n
enddo
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
if (nc>0) call mpi_allreduce( h1, h2, nc, &
#if REALCASE == 1
&MPI_REAL_PRECISION&
......@@ -347,7 +343,7 @@
&MPI_COMPLEX_PRECISION&
#endif
&, MPI_SUM, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
if (nc > 0) h2 = h1
......@@ -358,7 +354,7 @@
nc = 0
tmat(1,1) = tau(ice-nstor+1)
do n = 1, nstor-1
call timer%start("blas")
call obj%timer%start("blas")
#if REALCASE == 1
call PRECISION_TRMV('L', 'T', 'N', &
#endif
......@@ -366,7 +362,7 @@
call PRECISION_TRMV('L', 'C', 'N', &
#endif
n, tmat, max_stored_rows, h2(nc+1), 1)
call timer%stop("blas")
call obj%timer%stop("blas")
tmat(n+1,1:n) = &
#if REALCASE == 1
......@@ -401,7 +397,7 @@
if (l_rows>0) then
if (useGPU) then
call timer%start("cublas")
call obj%timer%start("cublas")
#if REALCASE == 1
call cublas_PRECISION_GEMM('T', 'N', &
#endif
......@@ -410,11 +406,11 @@
#endif
nstor, l_cols, l_rows, ONE, hvm_dev, hvm_ubnd, &
q_dev, ldq, ZERO, tmp_dev, nstor)
call timer%stop("cublas")
call obj%timer%stop("cublas")
else ! useGPU
call timer%start("blas")
call obj%timer%start("blas")
#if REALCASE == 1
call PRECISION_GEMM('T', 'N', &
#endif
......@@ -423,7 +419,7 @@
#endif
nstor, l_cols, l_rows, ONE, hvm, ubound(hvm,dim=1), &
q_mat, ldq, ZERO, tmp1, nstor)
call timer%stop("blas")
call obj%timer%stop("blas")
endif ! useGPU
else !l_rows>0
......@@ -444,7 +440,7 @@
max_local_cols * max_stored_rows * size_of_datatype, cudaMemcpyDeviceToHost)
check_memcpy_cuda("trans_ev", successCUDA)
endif
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_allreduce(tmp1, tmp2, nstor*l_cols, &
#if REALCASE == 1
&MPI_REAL_PRECISION&
......@@ -453,7 +449,7 @@
&MPI_COMPLEX_PRECISION&
#endif
&, MPI_SUM, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
! copy back tmp2 - after reduction...
if (useGPU) then
successCUDA = cuda_memcpy(tmp_dev, loc(tmp2(1)), &
......@@ -468,7 +464,7 @@
if (l_rows>0) then
if (useGPU) then
call timer%start("cublas")
call obj%timer%start("cublas")
call cublas_PRECISION_TRMM('L', 'L', 'N', 'N', &
nstor, l_cols, ONE, tmat_dev, max_stored_rows, &
tmp_dev, nstor)
......@@ -476,25 +472,25 @@
call cublas_PRECISION_GEMM('N', 'N' ,l_rows ,l_cols ,nstor, &
-ONE, hvm_dev, hvm_ubnd, tmp_dev, nstor, &
ONE, q_dev, ldq)
call timer%stop("cublas")
call obj%timer%stop("cublas")
else !useGPU
#ifdef WITH_MPI
! tmp2 = tmat * tmp2
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
ONE, tmat, max_stored_rows, tmp2, nstor)
!q_mat = q_mat - hvm*tmp2
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-ONE, hvm, ubound(hvm,dim=1), tmp2, nstor, ONE, q_mat, ldq)
call timer%stop("blas")
call obj%timer%stop("blas")
#else /* WITH_MPI */
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
ONE, tmat, max_stored_rows, tmp1, nstor)
call PRECISION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-ONE, hvm, ubound(hvm,dim=1), tmp1, nstor, ONE, q_mat, ldq)
call timer%stop("blas")
call obj%timer%stop("blas")
#endif /* WITH_MPI */
endif ! useGPU
endif ! l_rows>0
......@@ -539,7 +535,7 @@
endif
call timer%stop("trans_ev_&
call obj%timer%stop("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX&
......
This diff is collapsed.
......@@ -73,18 +73,11 @@ module elpa1_utilities
contains
function gpu_usage_via_environment_variable() result(useGPU)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
logical :: useGPU
CHARACTER(len=255) :: ELPA_USE_GPU_ENVIRONMENT
call timer%start("gpu_usage_via_environment_variable")
useGPU = .false.
#if defined(HAVE_ENVIRONMENT_CHECKING)
call get_environment_variable("ELPA_USE_GPU",ELPA_USE_GPU_ENVIRONMENT)
......@@ -93,8 +86,6 @@ module elpa1_utilities
useGPU = .true.
endif
call timer%stop("gpu_usage_via_environment_variable")
end function gpu_usage_via_environment_variable
!-------------------------------------------------------------------------------
......
......@@ -46,16 +46,11 @@
use elpa1_compute
use elpa_utilities
use elpa_mpi
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t), intent(in) :: obj
class(elpa_t) :: obj
integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
......@@ -88,7 +83,7 @@
integer(kind=ik) :: istat, debug
character(200) :: errorMessage
call timer%start("elpa_cholesky_&
call obj%timer%start("elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
......@@ -108,12 +103,12 @@
wantDebug = .false.
endif
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
! Matrix is split into tiles; work is done only for tiles on the diagonal or above
......@@ -180,10 +175,10 @@
! of the remaining block
if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
call timer%stop("blas")
call obj%timer%stop("blas")
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
......@@ -211,10 +206,10 @@
! The process owning the upper left remaining block does the
! Cholesky-Factorization of this block
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
call timer%stop("blas")
call obj%timer%stop("blas")
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
......@@ -238,7 +233,7 @@
enddo
endif
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call MPI_Bcast(tmp1, nblk*(nblk+1)/2, &
#if REALCASE == 1
......@@ -249,7 +244,7 @@
#endif
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
nc = 0
......@@ -258,7 +253,7 @@
nc = nc+i
enddo
call timer%start("blas")
call obj%timer%start("blas")
if (l_cols-l_colx+1>0) &
#if REALCASE == 1
call PRECISION_TRSM('L', 'U', 'T', 'N', nblk, l_cols-l_colx+1, CONST_1_0, tmp2, ubound(tmp2,dim=1), &
......@@ -270,7 +265,7 @@
tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
#endif
call timer%stop("blas")
call obj%timer%stop("blas")
endif
do i=1,nblk
......@@ -284,7 +279,7 @@
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
if (l_cols-l_colx+1>0) &
call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, &
#if REALCASE == 1
......@@ -295,7 +290,7 @@
#endif
prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
! this has to be checked since it was changed substantially when doing type safe
......@@ -303,7 +298,7 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
(obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
n, na, nblk, nblk)
......@@ -313,7 +308,7 @@
lrs = l_rowx
lre = min(l_rows,(i+1)*l_rows_tile)
if (lce<lcs .or. lre<lrs) cycle
call timer%start("blas")
call obj%timer%start("blas")
#if REALCASE == 1
call PRECISION_GEMM('N', 'T', lre-lrs+1, lce-lcs+1, nblk, -CONST_1_0, &
......@@ -326,7 +321,7 @@
tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
CONST_COMPLEX_PAIR_1_0, a(lrs,lcs), lda)
#endif
call timer%stop("blas")
call obj%timer%stop("blas")
enddo
......@@ -350,7 +345,7 @@
a(l_row1:l_rows,l_col1) = 0
endif
enddo
call timer%stop("elpa_cholesky_&
call obj%timer%stop("elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
......
......@@ -57,13 +57,8 @@
use elpa_utilities
use elpa_mpi
use elpa_api
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
class(elpa_t), intent(in) :: obj
class(elpa_t) :: obj
integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
......@@ -96,7 +91,7 @@
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("elpa_invert_trm_&
call obj%timer%start("elpa_invert_trm_&
&MATH_DATATYPE&
&_&
&PRECISION&
......@@ -115,12 +110,12 @@
else
wantDebug = .true.
endif
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
......@@ -181,11 +176,11 @@
if (my_prow==prow(n, nblk, np_rows)) then
if (my_pcol==pcol(n, nblk, np_cols)) then
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_TRTRI('U', 'N', nb, a(l_row1,l_col1), lda, info)
call timer%stop("blas")
call obj%timer%stop("blas")
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_invert_trm_&
......@@ -199,7 +194,7 @@
#endif
success = .false.
call timer%stop("elpa_invert_trm_&
call obj%timer%stop("elpa_invert_trm_&
&MATH_DATATYPE&
&_&
&PRECISION&
......@@ -214,7 +209,7 @@
enddo
endif
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call MPI_Bcast(tmp1, nb*(nb+1)/2, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
......@@ -223,7 +218,7 @@
MPI_COMPLEX_PRECISION, &
#endif
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
nc = 0
do i=1,nb
......@@ -231,7 +226,7 @@
nc = nc+i
enddo
call timer%start("blas")
call obj%timer%start("blas")
if (l_cols-l_colx+1>0) &
call PRECISION_TRMM ('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, &
#if REALCASE == 1
......@@ -241,7 +236,7 @@
CONST_COMPLEX_PAIR_1_0, &
#endif
tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
call timer%stop("blas")
call obj%timer%stop("blas")
if (l_colx<=l_cols) tmat2(1:nb,l_colx:l_cols) = a(l_row1:l_row1+nb-1,l_colx:l_cols)
if (my_pcol==pcol(n, nblk, np_cols)) tmat2(1:nb,l_col1:l_col1+nb-1) = tmp2(1:nb,1:nb) ! tmp2 has the lower left triangle 0
......@@ -255,7 +250,7 @@
do i=1,nb
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call MPI_Bcast(tmat1(1,i), l_row1-1, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
......@@ -265,12 +260,12 @@
#endif
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
endif
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
if (l_cols-l_col1+1>0) &
call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, &
#if REALCASE == 1
......@@ -281,10 +276,10 @@
#endif
prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
call timer%start("blas")
call obj%timer%start("blas")
if (l_row1>1 .and. l_cols-l_col1+1>0) &
call PRECISION_GEMM('N', 'N', l_row1-1, l_cols-l_col1+1, nb, &
#if REALCASE == 1
......@@ -302,7 +297,7 @@
#endif
a(1,l_col1), lda)
call timer%stop("blas")
call obj%timer%stop("blas")
enddo
......@@ -314,7 +309,7 @@
stop 1
endif