Commit eb4e54e1 authored by Pavel Kus's avatar Pavel Kus
Browse files

unneccessary #ifdef HAVE_DETAILED_TIMINGS removed

parent f4b573dd
......@@ -56,6 +56,8 @@
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -116,27 +118,19 @@
endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("merge_systems" // M_PRECISION_SUFFIX)
#endif
success = .true.
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
! If my processor column isn't in the requested set, do nothing
if (my_pcol<npc_0 .or. my_pcol>=npc_0+npc_n) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
! Determine number of "next" and "prev" column for ring sends
......@@ -154,16 +148,12 @@
endif
call M_check_monotony_PRECISION(nm,d,'Input1',wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
call M_check_monotony_PRECISION(na-nm,d(nm+1),'Input2',wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
! Get global number of processors and my processor number.
......@@ -248,9 +238,7 @@
! Rearrange eigenvectors
call M_resort_ev_PRECISION(idx, na)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
ENDIF
......@@ -362,16 +350,12 @@
enddo
call M_check_monotony_PRECISION(na1,d1,'Sorted1', wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
call M_check_monotony_PRECISION(na2,d2,'Sorted2', wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
......@@ -423,9 +407,7 @@
info = 0
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel" // M_PRECISION_SUFFIX)
#endif
!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,j)
my_thread = omp_get_thread_num()
......@@ -471,9 +453,7 @@
#ifdef WITH_OPENMP
!$OMP END PARALLEL
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel" // M_PRECISION_SUFFIX)
#endif
do i = 0, max_threads-1
z(1:na1) = z(1:na1)*z_p(1:na1,i)
......@@ -492,9 +472,7 @@
#ifdef WITH_OPENMP
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("OpenMP parallel" // M_PRECISION_SUFFIX)
#endif
!$OMP PARALLEL DO PRIVATE(i) SHARED(na1, my_proc, n_procs, &
!$OMP d1,dbase, ddiff, z, ev_scale) &
......@@ -515,9 +493,7 @@
#ifdef WITH_OPENMP
!$OMP END PARALLEL DO
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("OpenMP parallel" // M_PRECISION_SUFFIX)
#endif
#endif
......@@ -536,9 +512,7 @@
call M_check_monotony_PRECISION(na,d,'Output', wantDebug, success)
if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
endif
! Eigenvector calculations
......@@ -639,15 +613,11 @@
np_rem = np_rem-1
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Sendrecv_replace(qtmp1, l_rows*max_local_cols, M_MPI_REAL_PRECISION, &
np_next, 1111, np_prev, 1111, &
mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
call timer%stop("mpi_communication")
#endif /* WITH_MPI */
endif
......@@ -769,10 +739,7 @@
endif
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("merge_systems" // M_PRECISION_SUFFIX)
#endif
return
......@@ -803,6 +770,8 @@
subroutine M_resort_ev_PRECISION(idx_ev, nLength)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -844,24 +813,16 @@
qtmp(1:l_rows,nc) = q(l_rqs:l_rqe,lc1)
else
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_send(q(l_rqs,lc1), l_rows, M_MPI_REAL_PRECISION, pc2, mod(i,4096), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
endif
else if (pc2==my_pcol) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_recv(qtmp(1,nc), l_rows, M_MPI_REAL_PRECISION, pc1, mod(i,4096), mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
qtmp(1:l_rows,nc) = q(l_rqs:l_rqe,lc1)
#endif /* WITH_MPI */
......@@ -893,6 +854,8 @@
subroutine M_transform_columns_PRECISION(col1, col2)
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -915,15 +878,11 @@
q(l_rqs:l_rqe,lc1) = tmp(1:l_rows)
else
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_sendrecv(q(l_rqs,lc1), l_rows, M_MPI_REAL_PRECISION, pc2, 1, &
tmp, l_rows, M_MPI_REAL_PRECISION, pc2, 1, &
mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp(1:l_rows) = q(l_rqs:l_rqe,lc1)
#endif /* WITH_MPI */
......@@ -931,15 +890,11 @@
endif
else if (pc2==my_pcol) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_sendrecv(q(l_rqs,lc2), l_rows, M_MPI_REAL_PRECISION, pc1, 1, &
tmp, l_rows, M_MPI_REAL_PRECISION, pc1, 1, &
mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp(1:l_rows) = q(l_rqs:l_rqe,lc2)
#endif /* WITH_MPI */
......@@ -956,6 +911,8 @@
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
......@@ -967,13 +924,9 @@
! Do an mpi_allreduce over processor rows
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(z, tmp, n, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp = z
#endif /* WITH_MPI */
......@@ -986,13 +939,9 @@
! If all processor columns are involved, we can use mpi_allreduce
if (npc_n==np_cols) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp, z, n, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp = z
#endif /* WITH_MPI */
......@@ -1005,14 +954,10 @@
do np = 1, npc_n
z(:) = z(:) + tmp(:)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Sendrecv_replace(z, n, M_MPI_REAL_PRECISION, np_next, 1111, np_prev, 1111, &
mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
enddo
end subroutine M_global_gather_PRECISION
......@@ -1022,6 +967,8 @@
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
......@@ -1034,13 +981,9 @@
! Do an mpi_allreduce over processor rows
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(z, tmp, n, M_MPI_REAL_PRECISION, MPI_PROD, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp = z
#endif /* WITH_MPI */
......@@ -1053,13 +996,9 @@
! If all processor columns are involved, we can use mpi_allreduce
if (npc_n==np_cols) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp, z, n, M_MPI_REAL_PRECISION, MPI_PROD, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
z = tmp
#endif /* WITH_MPI */
......@@ -1073,13 +1012,9 @@
z(1:n) = tmp(1:n)
do np = npc_0+1, npc_0+npc_n-1
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_recv(tmp, n, M_MPI_REAL_PRECISION, np, 1111, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
tmp(1:n) = z(1:n)
#endif /* WITH_MPI */
......@@ -1087,25 +1022,17 @@
enddo
do np = npc_0+1, npc_0+npc_n-1
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_send(z, n, M_MPI_REAL_PRECISION, np, 1111, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
enddo
else
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_send(tmp, n, M_MPI_REAL_PRECISION, npc_0, 1111, mpi_comm_cols, mpierr)
call mpi_recv(z ,n, M_MPI_REAL_PRECISION, npc_0, 1111, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
z(1:n) = tmp(1:n)
#endif /* WITH_MPI */
......
......@@ -235,6 +235,8 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
implicit none
......@@ -272,28 +274,20 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
nlen = limits(np_off+nprocs) - noff
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (my_pcol==np_off) then
do n=np_off+np1,np_off+nprocs-1
call mpi_send(d(noff+1), nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
enddo
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_recv(d(noff+1), nmid, M_MPI_REAL_PRECISION, np_off, 1, mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
#endif /* WITH_MPI */
......@@ -302,26 +296,18 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
if (my_pcol==np_off+np1) then
do n=np_off,np_off+np1-1
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_send(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, n, 1, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
enddo
endif
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_recv(d(noff+nmid+1), nlen-nmid, M_MPI_REAL_PRECISION, np_off+np1, 1,mpi_comm_cols, MPI_STATUS_IGNORE, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
#endif /* WITH_MPI */
......@@ -380,14 +366,10 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
character(200) :: errorMessage
call timer%start("solve_tridi_col" // M_PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
success = .true.
! Calculate the number of subdivisions needed.
......@@ -487,15 +469,11 @@ subroutine M_solve_tridi_PRECISION( na, nev, d, e, q, ldq, nblk, matrixCols, mpi
noff = limits(np)
nlen = limits(np+1)-noff
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call MPI_Bcast(d(noff+1), nlen, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
qmat2 = qmat1
call MPI_Bcast(qmat2, max_size*max_size, M_MPI_REAL_PRECISION, np, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
! qmat2 = qmat1 ! is this correct
#endif /* WITH_MPI */
......
......@@ -140,6 +140,8 @@
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
......@@ -155,9 +157,7 @@
! Upper and lower bound of the shifted solution interval are a and b
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_secular_equation" // M_PRECISION_SUFFIX)
#endif
if (i==n) then
! Special case: Last eigenvalue
......@@ -221,9 +221,7 @@
dlam = x + dshift
delta(:) = delta(:) - x
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_secular_equation" // M_PRECISION_SUFFIX)
#endif
end subroutine M_solve_secular_equation_PRECISION
!-------------------------------------------------------------------------------
......
......@@ -127,17 +127,13 @@
logical :: successCUDA
call timer%start("trans_ev_complex" // PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
......@@ -233,15 +229,11 @@
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (nb>0) &
call MPI_Bcast(hvb, nb, MPI_COMPLEX_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
nb = 0
......@@ -270,13 +262,9 @@
nc = nc+n
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (nc>0) call mpi_allreduce(h1, h2, nc, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
if (nc>0) h2=h1
......@@ -342,13 +330,9 @@
check_memcpy_cuda("trans_ev", successCUDA)
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp1, tmp2, nstor*l_cols, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
! copy back tmp2 - after reduction...
if (useGPU) then
......
......@@ -126,16 +126,12 @@
call timer%start("trans_ev_real" // M_PRECISION_SUFFIX)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
totalblocks = (na-1)/nblk + 1
max_blocks_row = (totalblocks-1)/np_rows + 1
max_blocks_col = ((nqc-1)/nblk)/np_cols + 1 ! Columns of q_mat!
......@@ -224,14 +220,10 @@
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (nb>0) &
call MPI_Bcast(hvb, nb, M_MPI_REAL_PRECISION, cur_pcol, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#endif /* WITH_MPI */
nb = 0
do ic=ics,ice
......@@ -262,13 +254,9 @@
nc = nc+n
enddo
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
if (nc>0) call mpi_allreduce( h1, h2, nc, M_MPI_REAL_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
if (nc>0) h2 = h1
#endif /* WITH_MPI */
......@@ -326,9 +314,7 @@
endif !l_rows>0
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
! In the legacy GPU version, this allreduce was ommited. But probably it has to be done for GPU + MPI
! todo: does it need to be copied whole? Wouldn't be a part sufficient?
if (useGPU) then
......@@ -346,9 +332,7 @@
check_memcpy_cuda("trans_ev", successCUDA)
endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else
! tmp2 = tmp1
#endif
......
......@@ -285,13 +285,9 @@
aux1(2) = 0.
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(aux1, aux2, 2, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
aux2 = aux1
#endif /* WITH_MPI */
......@@ -318,14 +314,10 @@
endif !(my_pcol==pcol(istep, nblk, np_cols))
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call timer%start("mpi_communication")
! Broadcast the Householder vector (and tau) along columns
call MPI_Bcast(v_row, l_rows+1, MPI_COMPLEX_PRECISION, pcol(istep, nblk, np_cols), mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
call timer%stop("mpi_communication")
#endif /* WITH_MPI */
......@@ -475,13 +467,9 @@
if (l_cols>0) then
tmp(1:l_cols) = u_col(1:l_cols)
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(tmp, u_col, l_cols, MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_rows, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
u_col = tmp
......@@ -502,13 +490,9 @@
if (l_cols>0) &
xc = dot_product(v_col(1:l_cols),u_col(1:l_cols))
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("mpi_communication")
#endif
call mpi_allreduce(xc, vav, 1 , MPI_COMPLEX_PRECISION, MPI_SUM, mpi_comm_cols, mpierr)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("mpi_communication")
#endif
#else /* WITH_MPI */
vav = xc
......@@ -613,25 +597,17 @@
endif
#ifdef WITH_MPI
#ifdef HAVE_DETAILED_TIMINGS