Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
f7c5807e
Commit
f7c5807e
authored
Oct 18, 2016
by
Pavel Kus
Browse files
detailed timing fixes, dummy timer introduced (to avoid ifdefs)
Conflicts: src/elpa1_solve_tridi_real_template.X90
parent
6dac0853
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
f7c5807e
...
...
@@ -75,6 +75,9 @@ if HAVE_DETAILED_TIMINGS
src/ftimings/time.c
\
src/ftimings/virtual_memory.c
\
src/ftimings/papi.c
else
libelpa@SUFFIX@
_private_la_SOURCES
+=
\
src/timer_dummy.F90
endif
if
WITH_GPU_VERSION
...
...
configure.ac
View file @
f7c5807e
...
...
@@ -187,7 +187,7 @@ AC_MSG_CHECKING(whether ELPA should be build with more detailed timing support)
AC_ARG_ENABLE([timings],
AS_HELP_STRING([--enable-timing],
[more detailed timing, default no.]),
[],
[
enable_timings=yes
],
[enable_timings=no])
AC_MSG_RESULT([${enable_timings}])
...
...
src/elpa1_merge_systems_real_template.X90
View file @
f7c5807e
...
...
@@ -117,7 +117,7 @@
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%start("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
success = .true.
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -135,7 +135,7 @@
if (my_pcol<npc_0 .or. my_pcol>=npc_0+npc_n) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
...
...
@@ -155,14 +155,14 @@
call M_check_monotony_PRECISSION(nm,d,'Input1',wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
call M_check_monotony_PRECISSION(na-nm,d(nm+1),'Input2',wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
...
...
@@ -249,7 +249,7 @@
call M_resort_ev_PRECISSION(idx, na)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
...
...
@@ -363,14 +363,14 @@
call M_check_monotony_PRECISSION(na1,d1,'Sorted1', wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
call M_check_monotony_PRECISSION(na2,d2,'Sorted2', wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
...
...
@@ -424,7 +424,7 @@
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,j)
...
...
@@ -472,7 +472,7 @@
!$OMP END PARALLEL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
do i = 0, max_threads-1
...
...
@@ -493,7 +493,7 @@
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
!$OMP PARALLEL DO PRIVATE(i) SHARED(na1, my_proc, n_procs, &
...
...
@@ -516,7 +516,7 @@
!$OMP END PARALLEL DO
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
#endif
...
...
@@ -537,7 +537,7 @@
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
return
endif
...
...
@@ -770,7 +770,7 @@
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems"
+
M_PRECISSION_SUFFIX)
call timer%stop("merge_systems"
//
M_PRECISSION_SUFFIX)
#endif
...
...
src/elpa1_solve_tridi_real_template.X90
View file @
f7c5807e
...
...
@@ -57,6 +57,8 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
...
...
@@ -79,19 +81,15 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_tridi" // M_PRECISSION_SUFFIX)
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
success = .true.
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
...
...
@@ -118,9 +116,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
! Scalapack supports it but delivers no results for these columns,
! which is rather annoying
if (nc==0) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi" // M_PRECISSION_SUFFIX)
if (wantDebug) write(error_unit,*) 'ELPA1_solve_tridi: ERROR: Problem contains processor column with zero width'
success = .false.
return
...
...
@@ -148,9 +144,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
call M_solve_tridi_col_PRECISSION(l_cols, nev1, nc, d(nc+1), e(nc+1), q, ldq, nblk, &
matrixCols, mpi_comm_rows, wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi" // M_PRECISSION_SUFFIX)
return
endif
! If there is only 1 processor column, we are done
...
...
@@ -162,9 +156,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi" // M_PRECISSION_SUFFIX)
return
endif
...
...
@@ -225,9 +217,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
! Recursively merge sub problems
call M_merge_recursive_PRECISSION(0, np_cols, wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi" // M_PRECISSION_SUFFIX)
return
endif
...
...
@@ -237,9 +227,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi" // M_PRECISSION_SUFFIX)
return
contains
...
...
@@ -364,6 +352,8 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
! Works best if the number of processor rows is a power of 2!
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
...
...
@@ -389,9 +379,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_tridi_col" + M_PRECISSION_SUFFIX)
#endif
call timer%start("solve_tridi_col" // M_PRECISSION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
...
...
@@ -576,9 +564,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi_col" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi_col" // M_PRECISSION_SUFFIX)
end subroutine M_solve_tridi_col_PRECISSION
...
...
@@ -588,6 +574,8 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
! Takes precautions if DSTEDC fails or if the eigenvalues are not ordered correctly.
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
...
...
@@ -606,9 +594,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_tridi_single" + M_PRECISSION_SUFFIX)
#endif
call timer%start("solve_tridi_single" // M_PRECISSION_SUFFIX)
success = .true.
allocate(ds(nlen), es(nlen), stat=istat, errmsg=errorMessage)
...
...
@@ -702,9 +688,7 @@ subroutine M_solve_tridi_PRECISSION( na, nev, d, e, q, ldq, nblk, matrixCols, mp
endif
enddo
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_tridi_single" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("solve_tridi_single" // M_PRECISSION_SUFFIX)
end subroutine M_solve_tridi_single_problem_PRECISSION
src/elpa1_tools_real_template.X90
View file @
f7c5807e
...
...
@@ -156,7 +156,7 @@
! Upper and lower bound of the shifted solution interval are a and b
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_secular_equation"
+
M_PRECISSION_SUFFIX)
call timer%start("solve_secular_equation"
//
M_PRECISSION_SUFFIX)
#endif
if (i==n) then
...
...
@@ -222,7 +222,7 @@
dlam = x + dshift
delta(:) = delta(:) - x
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_secular_equation"
+
M_PRECISSION_SUFFIX)
call timer%stop("solve_secular_equation"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_solve_secular_equation_PRECISSION
...
...
src/elpa1_trans_ev_real_template.X90
View file @
f7c5807e
...
...
@@ -111,7 +111,7 @@
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_real"
+
M_PRECISSION_SUFFIX)
call timer%start("trans_ev_real"
//
M_PRECISSION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
...
...
@@ -249,7 +249,9 @@
tmat = 0
if (l_rows>0) &
call M_PRECISSION_SYRK('U', 'T', nstor, l_rows, M_CONST_1_0, hvm, ubound(hvm,dim=1), M_CONST_0_0, tmat, max_stored_rows)
call M_PRECISSION_SYRK('U', 'T', nstor, l_rows, &
M_CONST_1_0, hvm, ubound(hvm,dim=1), &
M_CONST_0_0, tmat, max_stored_rows)
nc = 0
do n=1,nstor-1
...
...
@@ -272,7 +274,9 @@
nc = 0
tmat(1,1) = tau(ice-nstor+1)
do n=1,nstor-1
call M_PRECISSION_TRMV('L', 'T', 'N', n, tmat, max_stored_rows, h2(nc+1), 1)
call M_PRECISSION_TRMV('L', 'T', 'N', n, &
tmat, max_stored_rows, &
h2(nc+1), 1)
tmat(n+1,1:n) = -h2(nc+1:nc+n)*tau(ice-nstor+n+1)
tmat(n+1,n+1) = tau(ice-nstor+n+1)
nc = nc+n
...
...
@@ -288,8 +292,10 @@
! Q = Q - V * T * V**T * Q
if (l_rows>0) then
call M_PRECISSION_GEMM('T', 'N', nstor, l_cols, l_rows, M_CONST_1_0, hvm, ubound(hvm,dim=1), &
q, ldq, M_CONST_0_0, tmp1, nstor)
call M_PRECISSION_GEMM('T', 'N', nstor, l_cols, l_rows, &
M_CONST_1_0, hvm, ubound(hvm,dim=1), &
q, ldq, &
M_CONST_0_0, tmp1, nstor)
else
! if (useGPU) then
...
...
@@ -316,13 +322,21 @@
if (l_rows>0) then
#ifdef WITH_MPI
call M_PRECISSION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, M_CONST_1_0, tmat, max_stored_rows, tmp2, nstor)
call M_PRECISSION_GEMM('N', 'N', l_rows, l_cols, nstor, -M_CONST_1_0, hvm, ubound(hvm,dim=1), &
tmp2, nstor, M_CONST_1_0, q, ldq)
call M_PRECISSION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
M_CONST_1_0, tmat, max_stored_rows, &
tmp2, nstor)
call M_PRECISSION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-M_CONST_1_0, hvm, ubound(hvm,dim=1), &
tmp2, nstor, &
M_CONST_1_0, q, ldq)
#else
call M_PRECISSION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, M_CONST_1_0, tmat, max_stored_rows, tmp1, nstor)
call M_PRECISSION_GEMM('N', 'N', l_rows, l_cols, nstor, -M_CONST_1_0, hvm, ubound(hvm,dim=1), &
tmp1, nstor, M_CONST_1_0, q, ldq)
call M_PRECISSION_TRMM('L', 'L', 'N', 'N', nstor, l_cols, &
M_CONST_1_0, tmat, max_stored_rows, &
tmp1, nstor)
call M_PRECISSION_GEMM('N', 'N', l_rows, l_cols, nstor, &
-M_CONST_1_0, hvm, ubound(hvm,dim=1), &
tmp1, nstor, &
M_CONST_1_0, q, ldq)
#endif
endif
nstor = 0
...
...
@@ -342,7 +356,7 @@
! endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("trans_ev_real"
+
M_PRECISSION_SUFFIX)
call timer%stop("trans_ev_real"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_trans_ev_real_PRECISSION
src/elpa1_tridiag_real_template.X90
View file @
f7c5807e
...
...
@@ -86,6 +86,8 @@
use iso_c_binding
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
...
...
@@ -155,9 +157,7 @@
integer(kind=ik) :: istat
character(200) :: errorMessage
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("tridiag_real" + M_PRECISSION_SUFFIX)
#endif
call timer%start("tridiag_real" // M_PRECISSION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
...
...
@@ -448,9 +448,7 @@
! else !do not use GPU
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel")
#endif
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end)
my_thread = omp_get_thread_num()
...
...
@@ -531,9 +529,7 @@
#ifdef WITH_OPENMP
!$OMP END PARALLEL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel")
#endif
do i=0,max_threads-1
u_col(1:l_cols) = u_col(1:l_cols) + uc_p(1:l_cols,i)
u_row(1:l_rows) = u_row(1:l_rows) + ur_p(1:l_rows,i)
...
...
@@ -708,6 +704,31 @@
print *,"tridiag_real: error when deallocating uv_stored_cols "//errorMessage
stop
endif
if (useGPU) then
! todo: should we leave a on the device for further use?
successCUDA = cuda_free(a_dev)
call check_dealloc_CUDA("tridiag_real", "a_dev", successCUDA)
successCUDA = cuda_free(v_row_dev)
call check_dealloc_CUDA("tridiag_real", "v_row_dev", successCUDA)
successCUDA = cuda_free(u_row_dev)
call check_dealloc_CUDA("tridiag_real", "u_row_dev", successCUDA)
successCUDA = cuda_free(v_col_dev)
call check_dealloc_CUDA("tridiag_real", "v_col_dev", successCUDA)
successCUDA = cuda_free(u_col_dev)
call check_dealloc_CUDA("tridiag_real", "u_col_dev", successCUDA)
successCUDA = cuda_free(vu_stored_rows_dev)
call check_dealloc_CUDA("tridiag_real", "vu_stored_rows_dev", successCUDA)
successCUDA = cuda_free(uv_stored_cols_dev)
call check_dealloc_CUDA("tridiag_real", "uv_stored_cols_dev", successCUDA)
endif
! todo dealocate at the GPU
...
...
@@ -740,9 +761,7 @@
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("tridiag_real" + M_PRECISSION_SUFFIX)
#endif
call timer%stop("tridiag_real" // M_PRECISSION_SUFFIX)
contains
...
...
src/elpa2_compute_real_template.X90
View file @
f7c5807e
...
...
@@ -152,7 +152,7 @@
integer(kind=ik) :: mystart, myend, m_way, n_way, work_per_thread, m_id, n_id, n_threads, &
ii, pp, transformChunkSize
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("bandred_real"
+
M_PRECISSION_SUFFIX)
call timer%start("bandred_real"
//
M_PRECISSION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
...
...
@@ -1206,7 +1206,7 @@
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("bandred_real"
+
M_PRECISSION_SUFFIX)
call timer%stop("bandred_real"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_bandred_real_PRECISSION ! slower for gpu on 10000 10000 ???
...
...
@@ -1232,7 +1232,7 @@
real(kind=REAL_DATATYPE) :: h1(n*n), h2(n*n)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("symm_matrix_allreduce"
+
M_PRECISSION_SUFFIX)
call timer%start("symm_matrix_allreduce"
//
M_PRECISSION_SUFFIX)
#endif
nc = 0
...
...
@@ -1275,7 +1275,7 @@
! enddo
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("symm_matrix_allreduce"
+
M_PRECISSION_SUFFIX)
call timer%stop("symm_matrix_allreduce"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_symm_matrix_allreduce_PRECISSION
...
...
@@ -1357,7 +1357,7 @@
logical :: successCUDA
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_band_to_full_real"
+
M_PRECISSION_SUFFIX)
call timer%start("trans_ev_band_to_full_real"
//
M_PRECISSION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
...
...
@@ -1858,7 +1858,7 @@
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("trans_ev_band_to_full_real"
+
M_PRECISSION_SUFFIX)
call timer%stop("trans_ev_band_to_full_real"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_trans_ev_band_to_full_real_PRECISSION
...
...
@@ -1940,7 +1940,7 @@
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("tridiag_band_real"
+
M_PRECISSION_SUFFIX)
call timer%start("tridiag_band_real"
//
M_PRECISSION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
...
...
@@ -2309,7 +2309,7 @@
! After the first iteration it is also the place to exchange the last row
! with MPI calls
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
!$omp parallel do private(my_thread, my_block_s, my_block_e, iblk, ns, ne, hv, tau, &
...
...
@@ -2401,7 +2401,7 @@
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
if (iter==1) then
...
...
@@ -2875,7 +2875,7 @@
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("tridiag_band_real"
+
M_PRECISSION_SUFFIX)
call timer%stop("tridiag_band_real"
//
M_PRECISSION_SUFFIX)
#endif
end subroutine M_tridiag_band_real_PRECISSION
...
...
@@ -3027,7 +3027,7 @@
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_tridi_to_band_real"
+
M_PRECISSION_SUFFIX)
call timer%start("trans_ev_tridi_to_band_real"
//
M_PRECISSION_SUFFIX)
#endif
#ifdef WITH_GPU_VERSION
...
...
@@ -3227,7 +3227,7 @@
! This is not for speed, but because we want the matrix a in the memory and
! in the cache of the correct thread (if possible)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
!$omp parallel do private(my_thread), schedule(static, 1)
do my_thread = 1, max_threads
...
...
@@ -3236,7 +3236,7 @@
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
#endif /* WITH_OPENMP */
...
...
@@ -3268,7 +3268,7 @@
#endif /* WITH_MPI */
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
!$omp parallel do private(my_thread), schedule(static, 1)
...
...
@@ -3279,7 +3279,7 @@
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
#else /* WITH_OPENMP */
...
...
@@ -3327,7 +3327,7 @@
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
if (useGPU) then
...
...
@@ -3342,7 +3342,7 @@
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
#else /* WITH_OPENMP */
...
...
@@ -3426,7 +3426,7 @@
#endif /* WITH_MPI */
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
!$omp parallel do private(my_thread), schedule(static, 1)
...
...
@@ -3436,7 +3436,7 @@
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif
#else /* WITH_OPENMP */
...
...
@@ -3858,7 +3858,7 @@
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
!$omp parallel do private(my_thread, n_off, b_len, b_off), schedule(static, 1)
...
...
@@ -3871,7 +3871,7 @@
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
#else /* WITH_OPENMP */
...
...
@@ -4003,7 +4003,7 @@
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
if (useGPU) then
...
...
@@ -4032,7 +4032,7 @@
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%stop("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
#else /* WITH_OPENMP */
...
...
@@ -4133,7 +4133,7 @@
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel"
+
M_PRECISSION_SUFFIX)
call timer%start("OpenMP parallel"
//
M_PRECISSION_SUFFIX)
#endif /* HAVE_DETAILED_TIMINGS */
!$omp parallel do private(my_thread, b_len, b_off), schedule(static, 1)
...
...
@@ -4150,7 +4150,7 @@
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel"
+
M_PRECISSION_SUFFIX)