Commit 5cf0ae52 authored by Andreas Marek's avatar Andreas Marek
Browse files

Dummy timer in elpa2

parent 65d959e7
......@@ -99,6 +99,8 @@
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use cuda_functions
......@@ -144,24 +146,18 @@
character(200) :: errorMessage
integer(kind=ik) :: istat
logical :: successCUDA
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("bandred_complex_double")
#else
call timer%start("bandred_complex_single")
#endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
success = .true.
......@@ -439,18 +435,14 @@
#endif
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(aux1, aux2, 2, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#else
call mpi_allreduce(aux1, aux2, 2, MPI_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
aux2 = aux1
......@@ -485,18 +477,14 @@
vr(lr+1) = tau
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Bcast(vr, lr+1, MPI_DOUBLE_COMPLEX, cur_pcol, mpi_comm_cols, mpierr)
#else
call MPI_Bcast(vr, lr+1, MPI_COMPLEX, cur_pcol, mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
vmr(1:lr,lc) = vr(1:lr)
......@@ -523,9 +511,7 @@
! Get global dot products
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
if (nlc>0) call mpi_allreduce(aux1, aux2, nlc, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
......@@ -544,9 +530,7 @@
endif
enddo
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! if (nlc>0) aux2=aux1
......@@ -790,18 +774,14 @@
print *,"bandred_complex: error when allocating tmp "//errorMessage
stop
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(umc, tmp, l_cols*n_cols, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#else
call mpi_allreduce(umc, tmp, l_cols*n_cols, MPI_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
umc(1:l_cols,1:n_cols) = tmp(1:l_cols,1:n_cols)
deallocate(tmp, stat=istat, errmsg=errorMessage)
......@@ -1152,13 +1132,11 @@
endif
endif ! use GPU
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("bandred_complex_double")
#else
call timer%stop("bandred_complex_single")
#endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine bandred_complex_double
......
......@@ -98,6 +98,8 @@
use elpa1_compute
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
#ifdef WITH_OPENMP
use omp_lib
......@@ -151,20 +153,16 @@
integer(kind=ik) :: mystart, myend, m_way, n_way, work_per_thread, m_id, n_id, n_threads, &
ii, pp, transformChunkSize
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("bandred_real" // M_PRECISION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
success = .true.
......@@ -488,17 +486,13 @@
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(aux1, aux2, 2, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
aux2 = aux1 ! this should be optimized
#endif
#endif
vnorm2 = aux2(1)
vrl = aux2(2)
......@@ -523,13 +517,9 @@
vr(lr+1) = tau
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Bcast(vr, lr+1, M_MPI_REAL_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
if (useGPU) then
......@@ -574,13 +564,9 @@
!$omp barrier
!$omp single
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (mynlc>0) call mpi_allreduce(aux1, aux2, mynlc, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
if (mynlc>0) aux2 = aux1
#endif /* WITH_MPI */
......@@ -619,13 +605,9 @@
! Get global dot products
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (nlc>0) call mpi_allreduce(aux1, aux2, nlc, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
if (nlc>0) aux2=aux1
#endif /* WITH_MPI */
......@@ -862,15 +844,11 @@
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(umcCUDA, tmpCUDA, l_cols*n_cols, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, ierr)
umcCUDA(1 : l_cols * n_cols) = tmpCUDA(1 : l_cols * n_cols)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! tmpCUDA(1 : l_cols * n_cols) = umcCUDA(1 : l_cols * n_cols)
......@@ -983,14 +961,10 @@
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(umcCPU, tmpCPU, l_cols*n_cols, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
umcCPU(1:l_cols,1:n_cols) = tmpCPU(1:l_cols,1:n_cols)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! tmpCPU(1:l_cols,1:n_cols) = umcCPU(1:l_cols,1:n_cols)
#endif /* WITH_MPI */
......@@ -1205,9 +1179,7 @@
endif
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("bandred_real" // M_PRECISION_SUFFIX)
#endif
end subroutine M_bandred_real_PRECISION ! slower for gpu on 10000 10000 ???
......
......@@ -73,6 +73,8 @@ module elpa2_workload
subroutine determine_workload(na, nb, nprocs, limits)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -82,15 +84,12 @@ module elpa2_workload
integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("determine_workload")
#endif
if (na <= 0) then
limits(:) = 0
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("determine_workload")
#endif
return
endif
......@@ -105,9 +104,7 @@ module elpa2_workload
enddo
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("determine_workload")
#endif
end subroutine
!---------------------------------------------------------------------------------------------------
! divide_band: sets the work distribution in band
......@@ -116,6 +113,8 @@ module elpa2_workload
subroutine divide_band(nblocks_total, n_pes, block_limits)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -125,9 +124,7 @@ module elpa2_workload
integer(kind=ik) :: n, nblocks, nblocks_left
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("divide_band")
#endif
block_limits(0) = 0
if (nblocks_total < n_pes) then
......@@ -149,9 +146,7 @@ module elpa2_workload
enddo
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("divide_band")
#endif
end subroutine
end module elpa2_workload
......@@ -7,6 +7,8 @@
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -19,9 +21,7 @@
integer(kind=ik) :: i, nc, mpierr
real(kind=REAL_DATATYPE) :: h1(n*n), h2(n*n)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("symm_matrix_allreduce" // M_PRECISION_SUFFIX)
#endif
nc = 0
do i=1,n
......@@ -30,13 +30,9 @@
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(h1, h2, nc, M_MPI_REAL_PRECISION, MPI_SUM, comm, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
nc = 0
do i=1,n
a(1:i,i) = h2(nc+1:nc+i)
......@@ -62,9 +58,7 @@
! nc = nc+i
! enddo
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("symm_matrix_allreduce" // M_PRECISION_SUFFIX)
#endif
end subroutine M_symm_matrix_allreduce_PRECISION
......
......@@ -42,6 +42,8 @@
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use cuda_functions
use iso_c_binding
......@@ -78,24 +80,18 @@
character(200) :: errorMessage
logical :: successCUDA
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("trans_ev_band_to_full_complex_double")
#else
call timer%start("trans_ev_band_to_full_complex_single")
#endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
max_blocks_row = ((na -1)/nblk)/np_rows + 1 ! Rows of A
max_blocks_col = ((nqc-1)/nblk)/np_cols + 1 ! Columns of q!
......@@ -237,18 +233,14 @@
if (lc==n_cols .or. mod(ncol,nblk)==0) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call MPI_Bcast(hvb(ns+1), nb-ns, MPI_DOUBLE_COMPLEX, pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
#else
call MPI_Bcast(hvb(ns+1), nb-ns, MPI_COMPLEX, pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
ns = nb
......@@ -334,18 +326,14 @@
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_allreduce(tmp1, tmp2, n_cols*l_cols, MPI_DOUBLE_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#else
call mpi_allreduce(tmp1, tmp2, n_cols*l_cols, MPI_COMPLEX, MPI_SUM, mpi_comm_rows, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! tmp2(1:n_cols*l_cols) = tmp1(1:n_cols*l_cols)
......@@ -490,13 +478,11 @@
!print *,"trans_ev_band_to_full_complex: error when deallocating tmat_temp "//errorMessage
!endif
endif ! use GPU
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%stop("trans_ev_band_to_full_complex_double")
#else
call timer%stop("trans_ev_band_to_full_complex_single")
#endif
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
end subroutine trans_ev_band_to_full_complex_double
......
......@@ -36,6 +36,8 @@
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
use cuda_functions
......@@ -75,19 +77,15 @@
character(200) :: errorMessage
logical :: successCUDA
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("trans_ev_band_to_full_real" // M_PRECISION_SUFFIX)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
max_blocks_row = ((na -1)/nblk)/np_rows + 1 ! Rows of A
max_blocks_col = ((nqc-1)/nblk)/np_cols + 1 ! Columns of q!
......@@ -192,13 +190,9 @@
if (lc==n_cols .or. mod(ncol,nblk)==0) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Bcast(hvb(ns+1), nb-ns, M_MPI_REAL_PRECISION, pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
ns = nb
......@@ -272,13 +266,9 @@
! endif
!#endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp1, tmp2, n_cols*l_cols, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! tmp2(1:n_cols*l_cols) = tmp1(1:n_cols*l_cols)
#endif /* WITH_MPI */
......@@ -393,13 +383,9 @@
if (lc==n_cols .or. mod(ncol,nblk)==0) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Bcast(hvb(ns+1), nb-ns, M_MPI_REAL_PRECISION, pcol(ncol, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
ns = nb
......@@ -432,14 +418,10 @@
call M_PRECISION_GEMM('T', 'N', t_rows, t_cols, l_rows, M_CONST_1_0, hvm(1,1), max_local_rows, hvm(1,(i-1)*nbw+1), &
max_local_rows, M_CONST_0_0, t_tmp, cwy_blocking)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(t_tmp, t_tmp2, cwy_blocking*nbw, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
call M_PRECISION_TRMM('L', 'U', 'N', 'N', t_rows, t_cols, M_CONST_1_0, tmat_complete, cwy_blocking, t_tmp2, cwy_blocking)
call M_PRECISION_TRMM('R', 'U', 'N', 'N', t_rows, t_cols, -M_CONST_1_0, tmat_complete(t_rows+1,t_rows+1), cwy_blocking, &
......@@ -477,14 +459,10 @@
endif ! l_rows>0
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp1, tmp2, n_cols*l_cols, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows ,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
if (l_rows>0) then
call M_PRECISION_TRMM('L', 'U', 'T', 'N', n_cols, l_cols, M_CONST_1_0, tmat_complete, cwy_blocking, tmp2, n_cols)
......@@ -576,9 +554,7 @@
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("trans_ev_band_to_full_real" // M_PRECISION_SUFFIX)
#endif
end subroutine M_trans_ev_band_to_full_real_PRECISION
......
......@@ -34,6 +34,8 @@
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use elpa2_workload
use precision
......@@ -86,16 +88,12 @@
! ! dummies for calling redist_band
! real*8 :: r_a(1,1), r_ab(1,1)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef DOUBLE_PRECISION_COMPLEX
call timer%start("tridiag_band_complex_double")
#else
call timer%start("tridiag_band_complex_single")
#endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm,my_pe,mpierr)
call mpi_comm_size(mpi_comm,n_pes,mpierr)
......@@ -103,9 +101,7 @@
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
!#ifdef WITH_GPU_VERSION
! t_1 = 0
! t_2 = 0
......@@ -120,13 +116,9 @@
global_id(:,:) = 0
global_id(my_prow, my_pcol) = my_pe
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(mpi_in_place, global_id, np_rows*np_cols, mpi_integer, mpi_sum, mpi_comm, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif
! Total number of blocks in the band:
......@@ -261,9 +253,7 @@
if (mod(n-1,np_cols) == my_pcol .and. local_size>0 .and. nx>1) then
num_chunks = num_chunks+1
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_irecv(hh_trans_complex(1,num_hh_vecs+1), nb*local_size, MPI_COMPLEX16, nt, &
10+n-block_limits(nt), mpi_comm, ireq_hhr(num_chunks), mpierr)
......@@ -271,9 +261,7 @@
call mpi_irecv(hh_trans_complex(1,num_hh_vecs+1), nb*local_size, MPI_COMPLEX8, nt, &
10+n-block_limits(nt), mpi_comm, ireq_hhr(num_chunks), mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! carefull non-block recv data copy must be done at wait or send
! hh_trans_complex(1:nb*local_size,num_hh_vecs+1) = hh_send(1:nb*hh_cnt(iblk),1,iblk)
......@@ -386,17 +374,13 @@
! Only the PE owning the diagonal does that (sending 1 element of the subdiagonal block also)
ab_s(1:nb+1) = ab(1:nb+1,na_s-n_off)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_isend(ab_s, nb+1, MPI_COMPLEX16, my_pe-1, 1, mpi_comm, ireq_ab, mpierr)
#else
call mpi_isend(ab_s, nb+1, MPI_COMPLEX8, my_pe-1, 1, mpi_comm, ireq_ab, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
endif
......@@ -456,17 +440,13 @@
#ifdef WITH_OPENMP
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
#ifdef DOUBLE_PRECISION_COMPLEX
call mpi_recv(hv, nb, MPI_COMPLEX16, my_pe-1, 2, mpi_comm, MPI_STATUS_IGNORE, mpierr)
#else
call mpi_recv(hv, nb, MPI_COMPLEX8, my_pe-1, 2, mpi_comm, MPI_STATUS_IGNORE, mpierr)
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
hv(1:nb) = hv_s(1:nb)
#endif /* WITH_MPI */
......