Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
6d5aa9d8
Commit
6d5aa9d8
authored
May 21, 2017
by
Andreas Marek
Browse files
Pass elpa object to subroutines for the timer
parent
d9b97460
Changes
36
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
6d5aa9d8
...
...
@@ -417,6 +417,7 @@ noinst_PROGRAMS = \
test_real_double_2stage
\
test_complex_double_1stage
\
test_complex_double_2stage
\
double_instance@SUFFIX@
\
real_2stage_banded@SUFFIX@
\
complex_2stage_banded@SUFFIX@
\
real_2stage@SUFFIX@
\
...
...
@@ -637,6 +638,12 @@ test_single_complex_2stage_FCFLAGS = $(AM_FCFLAGS) $(FC_MODOUT)private_modules $
-DTEST_SOLVER_2STAGE
\
-DTEST_GPU
=
0
endif
double_instance@SUFFIX@
_SOURCES
=
test
/Fortran/elpa2/double_instance.F90
double_instance@SUFFIX@
_LDADD
=
$(build_lib)
double_instance@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
$(FC_MODOUT)
private_modules
$(FC_MODINC)
private_modules
EXTRA_double_instance@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
real_2stage_banded@SUFFIX@
_SOURCES
=
test
/Fortran/elpa2/real_2stage_banded.F90
real_2stage_banded@SUFFIX@
_LDADD
=
$(build_lib)
real_2stage_banded@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
$(FC_MODOUT)
private_modules
$(FC_MODINC)
private_modules
...
...
@@ -1036,6 +1043,7 @@ check_SCRIPTS = \
test_real_double_2stage.sh
\
test_complex_double_1stage.sh
\
test_complex_double_2stage.sh
\
double_instance@SUFFIX@.sh
\
real_2stage_banded@SUFFIX@.sh
\
complex_2stage_banded@SUFFIX@.sh
\
real_2stage@SUFFIX@.sh
\
...
...
@@ -1186,6 +1194,7 @@ CLEANFILES = \
single_complex
*
\
real
*
\
complex
*
\
double_instance
*
\
*
.i
clean-local
:
...
...
src/elpa1/elpa1_compute_private.F90
View file @
6d5aa9d8
...
...
@@ -54,9 +54,6 @@
!> \brief Fortran module which contains the source of ELPA 1stage
module
elpa1_compute
use
elpa_utilities
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
elpa_mpi
implicit
none
...
...
src/elpa1/elpa1_merge_systems_real_template.X90
View file @
6d5aa9d8
This diff is collapsed.
Click to expand it.
src/elpa1/elpa1_solve_tridi_real_template.X90
View file @
6d5aa9d8
...
...
@@ -56,18 +56,14 @@
subroutine solve_tridi_&
&PRECISION_AND_SUFFIX &
( na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
(
obj,
na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, wantDebug, success )
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik), intent(in) :: na, nev, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
class(elpa_t) :: obj
integer(kind=ik), intent(in)
:: na, nev, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=REAL_DATATYPE), intent(inout) :: d(na), e(na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,*)
...
...
@@ -85,14 +81,14 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%start("solve_tridi" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
success = .true.
...
...
@@ -120,7 +116,7 @@ subroutine solve_tridi_&
! Scalapack supports it but delivers no results for these columns,
! which is rather annoying
if (nc==0) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi" // PRECISION_SUFFIX)
if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width'
success = .false.
return
...
...
@@ -147,10 +143,10 @@ subroutine solve_tridi_&
endif
call solve_tridi_col_&
&PRECISION_AND_SUFFIX &
(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
(
obj,
l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
matrixCols, mpi_comm_rows, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
! If there is only 1 processor column, we are done
...
...
@@ -162,7 +158,7 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
...
...
@@ -223,9 +219,9 @@ subroutine solve_tridi_&
! Recursively merge sub problems
call merge_recursive_&
&PRECISION &
(0, np_cols, wantDebug, success)
(
obj,
0, np_cols, wantDebug, success)
if (.not.(success)) then
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
endif
...
...
@@ -235,24 +231,21 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi" // PRECISION_SUFFIX)
return
contains
recursive subroutine merge_recursive_&
&PRECISION &
(np_off, nprocs, wantDebug, success)
(
obj,
np_off, nprocs, wantDebug, success)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa_api
implicit none
! noff is always a multiple of nblk_ev
! nlen-noff is always > nblk_ev
class(elpa_t) :: obj
integer(kind=ik) :: np_off, nprocs
integer(kind=ik) :: np1, np2, noff, nlen, nmid, n
#ifdef WITH_MPI
...
...
@@ -276,11 +269,11 @@ subroutine solve_tridi_&
if (np1 > 1) call merge_recursive_&
&PRECISION &
(np_off, np1, wantDebug, success)
(
obj,
np_off, np1, wantDebug, success)
if (.not.(success)) return
if (np2 > 1) call merge_recursive_&
&PRECISION &
(np_off+np1, np2, wantDebug, success)
(
obj,
np_off+np1, np2, wantDebug, success)
if (.not.(success)) return
noff = limits(np_off)
...
...
@@ -288,20 +281,20 @@ subroutine solve_tridi_&
nlen = limits(np_off+nprocs) - noff
#ifdef WITH_MPI
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
if (my_pcol==np_off) then
do n=np_off+np1,np_off+nprocs-1
call mpi_send(d(noff+1), nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
enddo
endif
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
#endif /* WITH_MPI */
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_recv(d(noff+1), nmid, MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
#endif /* WITH_MPI */
...
...
@@ -310,18 +303,18 @@ subroutine solve_tridi_&
if (my_pcol==np_off+np1) then
do n=np_off,np_off+np1-1
#ifdef WITH_MPI
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_send(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
endif
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_recv(d(noff+nmid+1), nlen-nmid, MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
#else /* WITH_MPI */
! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
#endif /* WITH_MPI */
...
...
@@ -332,7 +325,7 @@ subroutine solve_tridi_&
! p_col_bc is set so that only nev eigenvalues are calculated
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
(
obj,
nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col, p_col, &
l_col_bc, p_col_bc, np_off, nprocs, wantDebug, success )
if (.not.(success)) return
...
...
@@ -340,7 +333,7 @@ subroutine solve_tridi_&
! Not last merge, leave dense column distribution
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
(
obj,
nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, noff, &
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, l_col(noff+1), p_col(noff+1), &
l_col(noff+1), p_col(noff+1), np_off, nprocs, wantDebug, success )
if (.not.(success)) return
...
...
@@ -353,25 +346,22 @@ subroutine solve_tridi_&
subroutine solve_tridi_col_&
&PRECISION_AND_SUFFIX &
( na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
(
obj,
na, nev, nqoff, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, wantDebug, success )
! Solves the symmetric, tridiagonal eigenvalue problem on one processor column
! with the divide and conquer method.
! Works best if the number of processor rows is a power of 2!
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
integer(kind=ik) :: na, nev, nqoff, ldq, nblk, matrixCols, mpi_comm_rows
real(kind=REAL_DATATYPE)
:: d(na), e(na)
real(kind=REAL_DATATYPE) :: d(na), e(na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE)
:: q(ldq,*)
real(kind=REAL_DATATYPE) :: q(ldq,*)
#else
real(kind=REAL_DATATYPE)
:: q(ldq,matrixCols)
real(kind=REAL_DATATYPE) :: q(ldq,matrixCols)
#endif
integer(kind=ik), parameter :: min_submatrix_size = 16 ! Minimum size of the submatrices to be used
...
...
@@ -387,11 +377,11 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call timer%start("mpi_communication")
call
obj%
timer%start("solve_tridi_col" // PRECISION_SUFFIX)
call
obj%
timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
success = .true.
! Calculate the number of subdivisions needed.
...
...
@@ -451,7 +441,7 @@ subroutine solve_tridi_&
call solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen,d(noff+1),e(noff+1), &
(
obj,
nlen,d(noff+1),e(noff+1), &
q(nqoff+noff+1,noff+1),ubound(q,dim=1), wantDebug, success)
if (.not.(success)) return
...
...
@@ -482,7 +472,7 @@ subroutine solve_tridi_&
nlen = limits(my_prow+1)-noff ! Size of subproblem
call solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen,d(noff+1),e(noff+1),qmat1, &
(
obj,
nlen,d(noff+1),e(noff+1),qmat1, &
ubound(qmat1,dim=1), wantDebug, success)
if (.not.(success)) return
...
...
@@ -495,11 +485,11 @@ subroutine solve_tridi_&
noff = limits(np)
nlen = limits(np+1)-noff
#ifdef WITH_MPI
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call MPI_Bcast(d(noff+1), nlen, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
qmat2 = qmat1
call MPI_Bcast(qmat2, max_size*max_size, MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
#else /* WITH_MPI */
! qmat2 = qmat1 ! is this correct
#endif /* WITH_MPI */
...
...
@@ -508,11 +498,11 @@ subroutine solve_tridi_&
#ifdef WITH_MPI
call distribute_global_column_&
&PRECISION &
(qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
(
obj,
qmat2(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#else /* WITH_MPI */
call distribute_global_column_&
&PRECISION &
(qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
(
obj,
qmat1(1,i), q(1,noff+i), nqoff+noff, nlen, my_prow, np_rows, nblk)
#endif /* WITH_MPI */
enddo
...
...
@@ -557,7 +547,7 @@ subroutine solve_tridi_&
endif
call merge_systems_&
&PRECISION &
(nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
(
obj,
nlen, nmid, d(noff+1), e(noff+nmid), q, ldq, nqoff+noff, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_self, l_col(noff+1), p_col_i(noff+1), &
l_col(noff+1), p_col_o(noff+1), 0, 1, wantDebug, success)
if (.not.(success)) return
...
...
@@ -574,26 +564,22 @@ subroutine solve_tridi_&
stop 1
endif
call timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi_col" // PRECISION_SUFFIX)
end subroutine solve_tridi_col_&
&PRECISION_AND_SUFFIX
recursive subroutine solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX &
(nlen, d, e, q, ldq, wantDebug, success)
(
obj,
nlen, d, e, q, ldq, wantDebug, success)
! Solves the symmetric, tridiagonal eigenvalue problem on a single processor.
! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly.
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik) :: nlen, ldq
class(elpa_t) :: obj
integer(kind=ik)
:: nlen, ldq
real(kind=REAL_DATATYPE) :: d(nlen), e(nlen), q(ldq,nlen)
real(kind=REAL_DATATYPE), allocatable :: work(:), qtmp(:), ds(:), es(:)
...
...
@@ -607,7 +593,7 @@ subroutine solve_tridi_&
integer(kind=ik) :: istat
character(200) :: errorMessage
call timer%start("solve_tridi_single" // PRECISION_SUFFIX)
call
obj%
timer%start("solve_tridi_single" // PRECISION_SUFFIX)
success = .true.
allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage)
...
...
@@ -630,9 +616,9 @@ subroutine solve_tridi_&
print *,"solve_tridi_single: error when allocating work "//errorMessage
stop 1
endif
call timer%start("blas")
call
obj%
timer%start("blas")
call PRECISION_STEDC('I', nlen, d, e, q, ldq, work, lwork, iwork, liwork, info)
call timer%stop("blas")
call
obj%
timer%stop("blas")
if (info /= 0) then
...
...
@@ -642,9 +628,9 @@ subroutine solve_tridi_&
d(:) = ds(:)
e(:) = es(:)
call timer%start("blas")
call
obj%
timer%start("blas")
call PRECISION_STEQR('I', nlen, d, e, q, ldq, work, info)
call timer%stop("blas")
call
obj%
timer%stop("blas")
! If DSTEQR fails also, we don't know what to do further ...
...
...
@@ -705,7 +691,7 @@ subroutine solve_tridi_&
endif
enddo
call timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_tridi_single" // PRECISION_SUFFIX)
end subroutine solve_tridi_single_problem_&
&PRECISION_AND_SUFFIX
...
...
src/elpa1/elpa1_template.X90
View file @
6d5aa9d8
...
...
@@ -62,11 +62,6 @@ function elpa_solve_evp_&
use precision
use cuda_functions
use mod_check_for_gpu
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding
use elpa_api
use elpa_mpi
...
...
@@ -74,7 +69,7 @@ function elpa_solve_evp_&
use elpa1_utilities, only : gpu_usage_via_environment_variable
implicit none
class(elpa_t)
, intent(in)
:: obj
class(elpa_t) :: obj
real(kind=REAL_DATATYPE), intent(out) :: ev(obj%na)
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
...
...
@@ -115,7 +110,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all
call timer%start("elpa_solve_evp_&
call
obj%
timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
...
...
@@ -143,7 +138,7 @@ function elpa_solve_evp_&
! summary_timings = .false.
! endif
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
...
...
@@ -156,7 +151,7 @@ function elpa_solve_evp_&
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
success = .true.
wantDebug = obj%get("debug") == 1
...
...
@@ -229,7 +224,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
& (
obj,
na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
!time_evp_fwd = ttt1-ttt0
...
...
@@ -237,7 +232,7 @@ function elpa_solve_evp_&
!ttt0 = MPI_Wtime()
call solve_tridi_&
&PRECISION&
& (na, nev, ev, e, &
& (
obj,
na, nev, ev, e, &
#if REALCASE == 1
q, ldq, &
#endif
...
...
@@ -254,7 +249,7 @@ function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
& (
obj,
na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
!ttt1 = MPI_Wtime()
!if(my_prow==0 .and. my_pcol==0 .and. summary_timings) write(error_unit,*) 'Time trans_ev_real:',ttt1-ttt0
!time_evp_back = ttt1-ttt0
...
...
@@ -279,7 +274,7 @@ function elpa_solve_evp_&
stop 1
endif
call timer%stop("elpa_solve_evp_&
call
obj%
timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
...
...
src/elpa1/elpa1_tools_template.X90
View file @
6d5aa9d8
...
...
@@ -58,10 +58,12 @@
subroutine v_add_s_&
&PRECISION&
&(v,n,s)
&(
obj,
v,n,s)
use precision
use elpa_api
implicit none
integer(kind=ik) :: n
class(elpa_t) :: obj
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s
...
...
@@ -70,12 +72,14 @@
subroutine distribute_global_column_&
&PRECISION&
&(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
&(
obj,
g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik)
:: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
...
...
@@ -100,7 +104,7 @@
subroutine solve_secular_equation_&
&PRECISION&
&(n, i, d, z, delta, rho, dlam)
&(
obj,
n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
...
...
@@ -150,26 +154,23 @@
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
class(elpa_t) :: obj
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE)
:: a, b, x, y, dshift
integer(kind=ik)
:: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b
call timer%start("solve_secular_equation" // PRECISION_SUFFIX)
call
obj%
timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then
! Special case: Last eigenvalue
...
...
@@ -233,7 +234,7 @@
dlam = x + dshift
delta(:) = delta(:) - x
call timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
call
obj%
timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine solve_secular_equation_&
&PRECISION
...
...
@@ -247,7 +248,7 @@
subroutine hh_transform_complex_&
#endif
&PRECISION &
(alpha, xnorm_sq, xf, tau)
(
obj,
alpha, xnorm_sq, xf, tau)
#if REALCASE == 1
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif
...
...
@@ -258,12 +259,9 @@
! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
! since this would be expensive for the parallel implementation.
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa_api
implicit none
class(elpa_t) :: obj
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(inout) :: alpha
#endif
...
...
@@ -281,7 +279,7 @@
real(kind=REAL_DATATYPE) :: BETA
call timer%start("hh_transform_&
call
obj%
timer%start("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
...
...
@@ -345,7 +343,7 @@
ALPHA = BETA
endif
call timer%stop("hh_transform_&
call
obj%
timer%stop("hh_transform_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX )
...
...
src/elpa1/elpa1_trans_ev_template.X90
View file @
6d5aa9d8
...
...
@@ -92,17 +92,13 @@
&MATH_DATATYPE&
&_&
&PRECISION &
(na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
(
obj,
na, nqc, a_mat, lda, tau, q_mat, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, useGPU)
use cuda_functions
use iso_c_binding
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use elpa_api
implicit none
class(elpa_t) :: obj
integer(kind=ik), intent(in) :: na, nqc, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#if REALCASE == 1
real(kind=REAL_DATATYPE), intent(in) :: tau(na)
...
...
@@ -167,18 +163,18 @@
&_&
&MATH_DATATYPE
call timer%start("trans_ev_&
call
obj%
timer%start("trans_ev_&
&MATH_DATATYPE&
&" // &
&PRECISION_SUFFIX &
)
call timer%start("mpi_communication")
call
obj%
timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call timer%stop("mpi_communication")
call
obj%
timer%stop("mpi_communication")
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
...
...
@@ -290,7 +286,7 @@