Commit 6d5aa9d8 authored by Andreas Marek's avatar Andreas Marek

Pass elpa object to subroutines for the timer

parent d9b97460
......@@ -417,6 +417,7 @@ noinst_PROGRAMS = \
test_real_double_2stage \
test_complex_double_1stage \
test_complex_double_2stage \
double_instance@SUFFIX@ \
real_2stage_banded@SUFFIX@ \
complex_2stage_banded@SUFFIX@ \
real_2stage@SUFFIX@ \
......@@ -637,6 +638,12 @@ test_single_complex_2stage_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $
-DTEST_SOLVER_2STAGE \
-DTEST_GPU=0
endif
double_instance@SUFFIX@_SOURCES = test/Fortran/elpa2/double_instance.F90
double_instance@SUFFIX@_LDADD = $(build_lib)
double_instance@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
EXTRA_double_instance@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
real_2stage_banded@SUFFIX@_SOURCES = test/Fortran/elpa2/real_2stage_banded.F90
real_2stage_banded@SUFFIX@_LDADD = $(build_lib)
real_2stage_banded@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $(FC_MODINC)private_modules
......@@ -1036,6 +1043,7 @@ check_SCRIPTS = \
test_real_double_2stage.sh \
test_complex_double_1stage.sh \
test_complex_double_2stage.sh \
double_instance@SUFFIX@.sh \
real_2stage_banded@SUFFIX@.sh \
complex_2stage_banded@SUFFIX@.sh \
real_2stage@SUFFIX@.sh \
......@@ -1186,6 +1194,7 @@ CLEANFILES = \
single_complex* \
real* \
complex* \
double_instance* \
*.i
clean-local:
......
......@@ -54,9 +54,6 @@
!> \brief Fortran module which contains the source of ELPA 1stage
module elpa1_compute
use elpa_utilities
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use elpa_mpi
implicit none
......
......@@ -56,18 +56,14 @@
subroutine solve_tridi_&
&PRECISION_AND_SUFFIX &
( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
( obj, na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, wantDebug, success )
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik), intent(in) :: na, nev, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
class(elpa_t) :: obj
integer(kind=ik), intent(in) :: na, nev, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=REAL_DATATYPE), intent(inout) :: d(na), e(na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,*)
......@@ -85,14 +81,14 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%start("solve_tridi" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
......@@ -120,7 +116,7 @@ subroutine solve_tridi_&
! Scalapack supports it but delivers no results for these columns,
! which is rather annoying
if (nc==0) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width'
success = .false.
return
......@@ -147,10 +143,10 @@ subroutine solve_tridi_&
endif
call solve_tridi_col_&
&PRECISION_AND_SUFFIX &
(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
(obj, l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
matrixCols, mpi_comm_rows, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
! If there is only 1 processor column, we are done
......@@ -162,7 +158,7 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
......@@ -223,9 +219,9 @@ subroutine solve_tridi_&
! Recursively merge sub problems
call merge_recursive_&
&PRECISION &
(0, np_cols, wantDebug, success)
(obj, 0, np_cols, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
......@@ -235,24 +231,21 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
contains
recursive subroutine merge_recursive_&
&PRECISION &
(np_off, nprocs, wantDebug, success)
(obj, np_off, nprocs, wantDebug, success)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa_api
implicit none
! noff is always a multiple of nblk_ev
! nlen-noff is always > nblk_ev
class(elpa_t) :: obj
integer(kind=ik) :: np_off, nprocs
integer(kind=ik) :: np1, np2, noff, nlen, nmid, n
#ifdef WITH_MPI
......@@ -276,11 +269,11 @@ subroutine solve_tridi_&
if (np1 > 1) call merge_recursive_&
&PRECISION &
(np_off, np1, wantDebug, success)
(obj, np_off, np1, wantDebug, success)
if (.not.(success)) return
if (np2 > 1) call merge_recursive_&
&PRECISION &
(np_off+np1, np2, wantDebug, success)
(obj, np_off+np1, np2, wantDebug, success)
if (.not.(success)) return
noff = limits(np_off)
......@@ -288,20 +281,20 @@ subroutine solve_tridi_&
nlen = limits(np_off+nprocs) - noff
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
if (my_pcol==np_off) then
do n=np_off+np1,np_off+nprocs-1
call mpi_send(d(noff+1), nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
enddo
endif
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_recv(d(noff+1), nmid, MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
#endif /* WITH_MPI */
......@@ -310,18 +303,18 @@ subroutine solve_tridi_&
if (my_pcol==np_off+np1) then
do n=np_off,np_off+np1-1
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_send(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
endif
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_recv(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
#endif /* WITH_MPI */
......@@ -332,7 +325,7 @@ subroutine solve_tridi_&
! p_col_bc is set so that only nev eigenvalues are calculated
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
(obj, nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col, p_col, &
l_col_bc, p_col_bc, np_off, nprocs, wantDebug, success )
if (.not.(success)) return
......@@ -340,7 +333,7 @@ subroutine solve_tridi_&
! Not last merge, leave dense column distribution
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
(obj, nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col(noff+1), p_col(noff+1), &
l_col(noff+1), p_col(noff+1), np_off, nprocs, wantDebug, success )
if (.not.(success)) return
......@@ -353,25 +346,22 @@ subroutine solve_tridi_&
subroutine solve_tridi_col_&
&PRECISION_AND_SUFFIX &
( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
( obj, na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
! Solves the symmetric, tridiagonal eigenvalue problem on one processor column
! with the divide and conquer method.
! Works best if the number of processor rows is a power of 2!
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
integer(kind=ik) :: na, nev, nqoff, ldq, nblk, matrixCols, mpi_comm_rows
real(kind=REAL_DATATYPE) :: d(na), e(na)
real(kind=REAL_DATATYPE) :: d(na), e(na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: q(ldq,*)
real(kind=REAL_DATATYPE) :: q(ldq,*)
#else
real(kind=REAL_DATATYPE) :: q(ldq,matrixCols)
real(kind=REAL_DATATYPE) :: q(ldq,matrixCols)
#endif
integer(kind=ik), parameter :: min_submatrix_size = 16 ! Minimum size of the submatrices to be used
......@@ -387,11 +377,11 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call obj%timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
! Calculate the number of subdivisions needed.
......@@ -451,7 +441,7 @@ subroutine solve_tridi_&
call solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen,d(noff+1),e(noff+1), &
(obj, nlen,d(noff+1),e(noff+1), &
q(nqoff+noff+1,noff+1),ubound(q,dim=1), wantDebug, success)
if (.not.(success)) return
......@@ -482,7 +472,7 @@ subroutine solve_tridi_&
nlen = limits(my_prow+1)-noff ! Size of subproblem
call solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen,d(noff+1),e(noff+1),qmat1, &
(obj, nlen,d(noff+1),e(noff+1),qmat1, &
ubound(qmat1,dim=1), wantDebug, success)
if (.not.(success)) return
......@@ -495,11 +485,11 @@ subroutine solve_tridi_&
noff = limits(np)
nlen = limits(np+1)-noff
#ifdef WITH_MPI
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call MPI_Bcast(d(noff+1), nlen, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
qmat2 = qmat1
call MPI_Bcast(qmat2, max_size*max_size, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
! qmat2 = qmat1 ! is this correct
#endif /* WITH_MPI */
......@@ -508,11 +498,11 @@ subroutine solve_tridi_&
#ifdef WITH_MPI
call distribute_global_column_&
&PRECISION &
(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
(obj, qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#else /* WITH_MPI */
call distribute_global_column_&
&PRECISION &
(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
(obj, qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#endif /* WITH_MPI */
enddo
......@@ -557,7 +547,7 @@ subroutine solve_tridi_&
endif
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
(obj, nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_self, l_col(noff+1), p_col_i(noff+1), &
l_col(noff+1), p_col_o(noff+1), 0, 1, wantDebug, success)
if (.not.(success)) return
......@@ -574,26 +564,22 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
end subroutine solve_tridi_col_&
&PRECISION_AND_SUFFIX
recursive subroutine solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen, d, e, q, ldq, wantDebug, success)
(obj, nlen, d, e, q, ldq, wantDebug, success)
! Solves the symmetric, tridiagonal eigenvalue problem on a single processor.
! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly.
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik) :: nlen, ldq
class(elpa_t) :: obj
integer(kind=ik) :: nlen, ldq
real(kind=REAL_DATATYPE) :: d(nlen), e(nlen), q(ldq,nlen)
real(kind=REAL_DATATYPE), allocatable :: work(:), qtmp(:), ds(:), es(:)
......@@ -607,7 +593,7 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_single" // PRECISION_SUFFIX)
call obj%timer%start("solve_tridi_single" // PRECISION_SUFFIX)
success = .true.
allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage)
......@@ -630,9 +616,9 @@ subroutine solve_tridi_&
print *,"solve_tridi_single: error when allocating work "//errorMessage
stop 1
endif
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info)
call timer%stop("blas")
call obj%timer%stop("blas")
if (info /= 0) then
......@@ -642,9 +628,9 @@ subroutine solve_tridi_&
d(:) = ds(:)
e(:) = es(:)
call timer%start("blas")
call obj%timer%start("blas")
call PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info)
call timer%stop("blas")
call obj%timer%stop("blas")
! If DSTEQR fails also, we don't know what to do further ...
......@@ -705,7 +691,7 @@ subroutine solve_tridi_&
endif
enddo
call timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
call obj%timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
end subroutine solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX
......
......@@ -62,11 +62,6 @@ function elpa_solve_evp_&
use precision
use cuda_functions
use mod_check_for_gpu
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding
use elpa_api
use elpa_mpi
......@@ -74,7 +69,7 @@ function elpa_solve_evp_&
use elpa1_utilities, only : gpu_usage_via_environment_variable
implicit none
class(elpa_t), intent(in) :: obj
class(elpa_t) :: obj
real(kind=REAL_DATATYPE), intent(out) :: ev(obj%na)
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
......@@ -115,7 +110,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all
call timer%start("elpa_solve_evp_&
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
......@@ -143,7 +138,7 @@ function elpa_solve_evp_&
! summary_timings = .false.
! endif
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
......@@ -156,7 +151,7 @@ function elpa_solve_evp_&
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif
call timer%stop("mpi_communication")
call obj%timer%stop("mpi_communication")
success = .true.
wantDebug = obj%get("debug") == 1
......@@ -229,7 +224,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
& (obj, na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
!time_evp_fwd = ttt1-ttt0
......@@ -237,7 +232,7 @@ function elpa_solve_evp_&
!ttt0 = MPI_Wtime()
call solve_tridi_&
&PRECISION&
& (na, nev, ev, e, &
& (obj, na, nev, ev, e, &
#if REALCASE == 1
q, ldq, &
#endif
......@@ -254,7 +249,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time trans_ev_real:',ttt1-ttt0
!time_evp_back = ttt1-ttt0
......@@ -279,7 +274,7 @@ function elpa_solve_evp_&
stop 1
endif
call timer%stop("elpa_solve_evp_&
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
......
......@@ -58,10 +58,12 @@
subroutine v_add_s_&
&PRECISION&
&(v,n,s)
&(obj, v,n,s)
use precision
use elpa_api
implicit none
integer(kind=ik) :: n
class(elpa_t) :: obj
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s
......@@ -70,12 +72,14 @@
subroutine distribute_global_column_&
&PRECISION&
&(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
......@@ -100,7 +104,7 @@
subroutine solve_secular_equation_&
&PRECISION&
&(n, i, d, z, delta, rho, dlam)
&(obj, n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
......@@ -150,26 +154,23 @@
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
class(elpa_t) :: obj
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b
call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then
! Special case: Last eigenvalue
......@@ -233,7 +234,7 @@
dlam = x + dshift
delta(:) = delta(:) - x
call timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine solve_secular_equation_&
&PRECISION
......@@ -247,7 +248,7 @@
subroutine hh_transform_complex_&
#endif
&PRECISION &
(alpha, xnorm_sq, xf, tau)
(obj, alpha, xnorm_sq, xf, tau)
#if REALCASE == 1
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif
......@@ -258,12 +259,9 @@
! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
! since this would be expensive for the parallel implementation.
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa_api
implicit none
class(elpa_t) :: obj
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(inout) :: alpha
#endif
......@@ -281,7 +279,7 @@
real(kind=REAL_DATATYPE) :: BETA
call timer%start("hh_transform_&
call obj%timer%start("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
......@@ -345,7 +343,7 @@
ALPHA = BETA
endif
call timer%stop("hh_transform_&
call obj%timer%stop("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
......
......@@ -92,17 +92,13 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
(obj, na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
use cuda_functions
use iso_c_binding
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
integer(kind=ik), intent(in) :: na, nqc, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(in) :: tau(na)
......@@ -167,18 +163,18 @@
&_&
&MATH_DATATYPE
call timer%start("trans_ev_&
call obj%timer%start("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
call timer%start("mpi_communication")
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)