Commit 0d08507c authored by Andreas Marek's avatar Andreas Marek
Browse files

Rename OPENMP preprocessor macro

parent 1aa89171
...@@ -93,9 +93,9 @@ AC_ARG_ENABLE([openmp], ...@@ -93,9 +93,9 @@ AC_ARG_ENABLE([openmp],
], ],
[enable_openmp=no]) [enable_openmp=no])
AC_MSG_RESULT([${enable_openmp}]) AC_MSG_RESULT([${enable_openmp}])
AM_CONDITIONAL([WITH_OPENMP],[test x"$enable_openmp" = x"yes"]) AM_CONDITIONAL([WITH_OPENMP_TRADITIONAL],[test x"$enable_openmp" = x"yes"])
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading]) AC_DEFINE([WITH_OPENMP_TRADITIONAL], [1], [use OpenMP threading])
fi fi
......
...@@ -64,7 +64,7 @@ subroutine merge_systems_& ...@@ -64,7 +64,7 @@ subroutine merge_systems_&
use elpa_abstract_impl use elpa_abstract_impl
use elpa_blas_interfaces use elpa_blas_interfaces
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
use omp_lib use omp_lib
#endif #endif
implicit none implicit none
...@@ -93,7 +93,7 @@ subroutine merge_systems_& ...@@ -93,7 +93,7 @@ subroutine merge_systems_&
dbase(na), ddiff(na), ev_scale(na), tmp(na) dbase(na), ddiff(na), ev_scale(na), tmp(na)
real(kind=REAL_DATATYPE) :: d1u(na), zu(na), d1l(na), zl(na) real(kind=REAL_DATATYPE) :: d1u(na), zu(na), d1l(na), zl(na)
real(kind=REAL_DATATYPE), allocatable :: qtmp1(:,:), qtmp2(:,:), ev(:,:) real(kind=REAL_DATATYPE), allocatable :: qtmp1(:,:), qtmp2(:,:), ev(:,:)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
real(kind=REAL_DATATYPE), allocatable :: z_p(:,:) real(kind=REAL_DATATYPE), allocatable :: z_p(:,:)
#endif #endif
...@@ -122,7 +122,7 @@ subroutine merge_systems_& ...@@ -122,7 +122,7 @@ subroutine merge_systems_&
&PRECISION& &PRECISION&
&_real &_real
integer(kind=ik), intent(in) :: max_threads integer(kind=ik), intent(in) :: max_threads
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
integer(kind=ik) :: my_thread integer(kind=ik) :: my_thread
allocate(z_p(na,0:max_threads-1), stat=istat, errmsg=errorMessage) allocate(z_p(na,0:max_threads-1), stat=istat, errmsg=errorMessage)
...@@ -442,7 +442,7 @@ subroutine merge_systems_& ...@@ -442,7 +442,7 @@ subroutine merge_systems_&
! Solve secular equation ! Solve secular equation
z(1:na1) = 1 z(1:na1) = 1
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
z_p(1:na1,:) = 1 z_p(1:na1,:) = 1
#endif #endif
dbase(1:na1) = 0 dbase(1:na1) = 0
...@@ -450,7 +450,7 @@ subroutine merge_systems_& ...@@ -450,7 +450,7 @@ subroutine merge_systems_&
info = 0 info = 0
infoBLAS = int(info,kind=BLAS_KIND) infoBLAS = int(info,kind=BLAS_KIND)
!#ifdef WITH_OPENMP !#ifdef WITH_OPENMP_TRADITIONAL
! !
! call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX) ! call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
!!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,infoBLAS,j) !!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,infoBLAS,j)
...@@ -474,7 +474,7 @@ subroutine merge_systems_& ...@@ -474,7 +474,7 @@ subroutine merge_systems_&
! Compute updated z ! Compute updated z
!#ifdef WITH_OPENMP !#ifdef WITH_OPENMP_TRADITIONAL
! do j=1,na1 ! do j=1,na1
! if (i/=j) z_p(j,my_thread) = z_p(j,my_thread)*( delta(j) / (d1(j)-d1(i)) ) ! if (i/=j) z_p(j,my_thread) = z_p(j,my_thread)*( delta(j) / (d1(j)-d1(i)) )
! enddo ! enddo
...@@ -500,7 +500,7 @@ subroutine merge_systems_& ...@@ -500,7 +500,7 @@ subroutine merge_systems_&
ddiff(i) = delta(i) ddiff(i) = delta(i)
endif endif
enddo enddo
!#ifdef WITH_OPENMP !#ifdef WITH_OPENMP_TRADITIONAL
!!$OMP END PARALLEL !!$OMP END PARALLEL
! !
! call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX) ! call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
...@@ -526,7 +526,7 @@ subroutine merge_systems_& ...@@ -526,7 +526,7 @@ subroutine merge_systems_&
! Calculate scale factors for eigenvectors ! Calculate scale factors for eigenvectors
ev_scale(:) = 0.0_rk ev_scale(:) = 0.0_rk
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX) call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
...@@ -548,7 +548,7 @@ subroutine merge_systems_& ...@@ -548,7 +548,7 @@ subroutine merge_systems_&
&(obj, d1, dbase, ddiff, z, ev_scale(i), na1,i) &(obj, d1, dbase, ddiff, z, ev_scale(i), na1,i)
! ev_scale(i) = ev_scale_val ! ev_scale(i) = ev_scale_val
enddo enddo
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX) call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
...@@ -888,7 +888,7 @@ subroutine merge_systems_& ...@@ -888,7 +888,7 @@ subroutine merge_systems_&
deallocate(ev, qtmp1, qtmp2, stat=istat, errmsg=errorMessage) deallocate(ev, qtmp1, qtmp2, stat=istat, errmsg=errorMessage)
check_deallocate("merge_systems: ev, qtmp1, qtmp2",istat, errorMessage) check_deallocate("merge_systems: ev, qtmp1, qtmp2",istat, errorMessage)
endif !very outer test (na1==1 .or. na1==2) endif !very outer test (na1==1 .or. na1==2)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
deallocate(z_p, stat=istat, errmsg=errorMessage) deallocate(z_p, stat=istat, errmsg=errorMessage)
check_deallocate("merge_systems: z_p",istat, errorMessage) check_deallocate("merge_systems: z_p",istat, errorMessage)
#endif #endif
......
...@@ -201,7 +201,7 @@ function elpa_solve_evp_& ...@@ -201,7 +201,7 @@ function elpa_solve_evp_&
call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr) call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr)
my_pe = int(my_peMPI,kind=c_int) my_pe = int(my_peMPI,kind=c_int)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads() omp_threads_caller = omp_get_max_threads()
...@@ -263,7 +263,7 @@ function elpa_solve_evp_& ...@@ -263,7 +263,7 @@ function elpa_solve_evp_&
endif endif
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
...@@ -562,7 +562,7 @@ function elpa_solve_evp_& ...@@ -562,7 +562,7 @@ function elpa_solve_evp_&
call nvtxRangePop() call nvtxRangePop()
#endif #endif
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
......
...@@ -145,7 +145,7 @@ subroutine tridiag_& ...@@ -145,7 +145,7 @@ subroutine tridiag_&
integer(kind=c_intptr_t) :: a_offset integer(kind=c_intptr_t) :: a_offset
integer(kind=ik), intent(in) :: max_threads integer(kind=ik), intent(in) :: max_threads
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
integer(kind=ik) :: my_thread, n_threads, n_iter integer(kind=ik) :: my_thread, n_threads, n_iter
#endif #endif
...@@ -170,7 +170,7 @@ subroutine tridiag_& ...@@ -170,7 +170,7 @@ subroutine tridiag_&
! pattern: u1,v1,u2,v2,u3,v3,.... ! pattern: u1,v1,u2,v2,u3,v3,....
MATH_DATATYPE(kind=rck), allocatable :: uv_stored_cols(:,:) MATH_DATATYPE(kind=rck), allocatable :: uv_stored_cols(:,:)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
MATH_DATATYPE(kind=rck), allocatable :: ur_p(:,:), uc_p(:,:) MATH_DATATYPE(kind=rck), allocatable :: ur_p(:,:), uc_p(:,:)
#endif #endif
...@@ -355,7 +355,7 @@ subroutine tridiag_& ...@@ -355,7 +355,7 @@ subroutine tridiag_&
endif endif
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage) allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage)
call check_alloc("tridiag_& call check_alloc("tridiag_&
&MATH_DATATYPE ", "ur_p", istat, errorMessage) &MATH_DATATYPE ", "ur_p", istat, errorMessage)
...@@ -363,7 +363,7 @@ subroutine tridiag_& ...@@ -363,7 +363,7 @@ subroutine tridiag_&
allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage) allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage)
call check_alloc("tridiag_& call check_alloc("tridiag_&
&MATH_DATATYPE ", "uc_p", istat, errorMessage) &MATH_DATATYPE ", "uc_p", istat, errorMessage)
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP_TRADITIONAL */
tmp = 0 tmp = 0
v_row = 0 v_row = 0
...@@ -579,7 +579,7 @@ subroutine tridiag_& ...@@ -579,7 +579,7 @@ subroutine tridiag_&
check_memcpy_cuda("tridiag: v_row_dev", successCUDA) check_memcpy_cuda("tridiag: v_row_dev", successCUDA)
endif ! useGU endif ! useGU
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
call obj%timer%start("OpenMP parallel") call obj%timer%start("OpenMP parallel")
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end) !$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end)
...@@ -592,7 +592,7 @@ subroutine tridiag_& ...@@ -592,7 +592,7 @@ subroutine tridiag_&
! first calculate A*v part of (A + VU**T + UV**T)*v ! first calculate A*v part of (A + VU**T + UV**T)*v
uc_p(1:l_cols,my_thread) = 0. uc_p(1:l_cols,my_thread) = 0.
ur_p(1:l_rows,my_thread) = 0. ur_p(1:l_rows,my_thread) = 0.
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP_TRADITIONAL */
do i= 0, (istep-2)/tile_size do i= 0, (istep-2)/tile_size
l_col_beg = i*l_cols_per_tile+1 l_col_beg = i*l_cols_per_tile+1
l_col_end = min(l_cols,(i+1)*l_cols_per_tile) l_col_end = min(l_cols,(i+1)*l_cols_per_tile)
...@@ -601,7 +601,7 @@ subroutine tridiag_& ...@@ -601,7 +601,7 @@ subroutine tridiag_&
l_row_beg = j*l_rows_per_tile+1 l_row_beg = j*l_rows_per_tile+1
l_row_end = min(l_rows,(j+1)*l_rows_per_tile) l_row_end = min(l_rows,(j+1)*l_rows_per_tile)
if (l_row_end < l_row_beg) cycle if (l_row_end < l_row_beg) cycle
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
if (mod(n_iter,n_threads) == my_thread) then if (mod(n_iter,n_threads) == my_thread) then
if (wantDebug) call obj%timer%start("blas") if (wantDebug) call obj%timer%start("blas")
call PRECISION_GEMV(BLAS_TRANS_OR_CONJ, & call PRECISION_GEMV(BLAS_TRANS_OR_CONJ, &
...@@ -628,7 +628,7 @@ subroutine tridiag_& ...@@ -628,7 +628,7 @@ subroutine tridiag_&
if (wantDebug) call obj%timer%stop("blas") if (wantDebug) call obj%timer%stop("blas")
endif endif
n_iter = n_iter+1 n_iter = n_iter+1
#else /* WITH_OPENMP */ #else /* WITH_OPENMP_TRADITIONAL */
! multiplication by blocks is efficient only for CPU ! multiplication by blocks is efficient only for CPU
! for GPU we introduced 2 other ways, either by stripes (more simmilar to the original ! for GPU we introduced 2 other ways, either by stripes (more simmilar to the original
...@@ -658,7 +658,7 @@ subroutine tridiag_& ...@@ -658,7 +658,7 @@ subroutine tridiag_&
if (wantDebug) call obj%timer%stop("blas") if (wantDebug) call obj%timer%stop("blas")
endif ! not useGPU endif ! not useGPU
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP_TRADITIONAL */
enddo ! j=0,i enddo ! j=0,i
enddo ! i=0,(istep-2)/tile_size enddo ! i=0,(istep-2)/tile_size
...@@ -738,7 +738,7 @@ subroutine tridiag_& ...@@ -738,7 +738,7 @@ subroutine tridiag_&
check_memcpy_cuda("tridiag: u_row_dev 1", successCUDA) check_memcpy_cuda("tridiag: u_row_dev 1", successCUDA)
endif ! useGPU endif ! useGPU
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$OMP END PARALLEL !$OMP END PARALLEL
call obj%timer%stop("OpenMP parallel") call obj%timer%stop("OpenMP parallel")
...@@ -746,7 +746,7 @@ subroutine tridiag_& ...@@ -746,7 +746,7 @@ subroutine tridiag_&
u_col(1:l_cols) = u_col(1:l_cols) + uc_p(1:l_cols,i) u_col(1:l_cols) = u_col(1:l_cols) + uc_p(1:l_cols,i)
u_row(1:l_rows) = u_row(1:l_rows) + ur_p(1:l_rows,i) u_row(1:l_rows) = u_row(1:l_rows) + ur_p(1:l_rows,i)
enddo enddo
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP_TRADITIONAL */
! second calculate (VU**T + UV**T)*v part of (A + VU**T + UV**T)*v ! second calculate (VU**T + UV**T)*v part of (A + VU**T + UV**T)*v
if (n_stored_vecs > 0) then if (n_stored_vecs > 0) then
......
...@@ -82,7 +82,7 @@ ...@@ -82,7 +82,7 @@
&PRECISION& &PRECISION&
&") &")
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads() omp_threads_caller = omp_get_max_threads()
...@@ -330,7 +330,7 @@ ...@@ -330,7 +330,7 @@
enddo enddo
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
......
...@@ -75,7 +75,7 @@ subroutine elpa_reduce_add_vectors_& ...@@ -75,7 +75,7 @@ subroutine elpa_reduce_add_vectors_&
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
use precision use precision
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
use omp_lib use omp_lib
#endif #endif
use elpa_mpi use elpa_mpi
...@@ -132,7 +132,7 @@ subroutine elpa_reduce_add_vectors_& ...@@ -132,7 +132,7 @@ subroutine elpa_reduce_add_vectors_&
check_allocate("elpa_reduce_add: aux2", istat, errorMessage) check_allocate("elpa_reduce_add: aux2", istat, errorMessage)
aux1(:) = 0 aux1(:) = 0
aux2(:) = 0 aux2(:) = 0
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!call omp_set_num_threads(nrThreads) !call omp_set_num_threads(nrThreads)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads) !$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads)
...@@ -147,7 +147,7 @@ subroutine elpa_reduce_add_vectors_& ...@@ -147,7 +147,7 @@ subroutine elpa_reduce_add_vectors_&
if (myps == ips) then if (myps == ips) then
! k = 0 ! k = 0
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp do !$omp do
#endif #endif
do lc=1,nvc do lc=1,nvc
...@@ -161,7 +161,7 @@ subroutine elpa_reduce_add_vectors_& ...@@ -161,7 +161,7 @@ subroutine elpa_reduce_add_vectors_&
enddo enddo
k = nvc * auxstride k = nvc * auxstride
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier !$omp barrier
!$omp master !$omp master
#endif #endif
...@@ -184,13 +184,13 @@ subroutine elpa_reduce_add_vectors_& ...@@ -184,13 +184,13 @@ subroutine elpa_reduce_add_vectors_&
if (k>0) aux2 = aux1 if (k>0) aux2 = aux1
#endif /* WITH_MPI */ #endif /* WITH_MPI */
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp end master !$omp end master
!$omp barrier !$omp barrier
#endif #endif
if (mypt == ipt) then if (mypt == ipt) then
! k = 0 ! k = 0
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp do !$omp do
#endif #endif
do lc=1,nvc do lc=1,nvc
...@@ -207,7 +207,7 @@ subroutine elpa_reduce_add_vectors_& ...@@ -207,7 +207,7 @@ subroutine elpa_reduce_add_vectors_&
endif endif
enddo enddo
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp end parallel !$omp end parallel
#endif #endif
......
...@@ -92,7 +92,7 @@ ...@@ -92,7 +92,7 @@
matrixRows = obj%local_nrows matrixRows = obj%local_nrows
matrixCols = obj%local_ncols matrixCols = obj%local_ncols
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads() omp_threads_caller = omp_get_max_threads()
...@@ -135,7 +135,7 @@ ...@@ -135,7 +135,7 @@
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
......
...@@ -87,7 +87,7 @@ subroutine ROUTINE_NAME& ...@@ -87,7 +87,7 @@ subroutine ROUTINE_NAME&
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
use omp_lib use omp_lib
#endif #endif
use elpa_mpi use elpa_mpi
...@@ -147,7 +147,7 @@ subroutine ROUTINE_NAME& ...@@ -147,7 +147,7 @@ subroutine ROUTINE_NAME&
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ), stat=istat, errmsg=errorMessage) allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ), stat=istat, errmsg=errorMessage)
check_allocate("elpa_transpose_vectors: aux", istat, errorMessage) check_allocate("elpa_transpose_vectors: aux", istat, errorMessage)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n) !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif #endif
do n = 0, lcm_s_t-1 do n = 0, lcm_s_t-1
...@@ -163,7 +163,7 @@ subroutine ROUTINE_NAME& ...@@ -163,7 +163,7 @@ subroutine ROUTINE_NAME&
if (nblks_comm .ne. 0) then if (nblks_comm .ne. 0) then
if (myps == ips) then if (myps == ips) then
! k = 0 ! k = 0
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp do !$omp do
#endif #endif
do lc=1,nvc do lc=1,nvc
...@@ -177,7 +177,7 @@ subroutine ROUTINE_NAME& ...@@ -177,7 +177,7 @@ subroutine ROUTINE_NAME&
enddo enddo
endif endif
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier !$omp barrier
!$omp master !$omp master
#endif #endif
...@@ -198,7 +198,7 @@ subroutine ROUTINE_NAME& ...@@ -198,7 +198,7 @@ subroutine ROUTINE_NAME&
call obj%timer%stop("mpi_communication") call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */ #endif /* WITH_MPI */
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp end master !$omp end master
!$omp barrier !$omp barrier
...@@ -222,7 +222,7 @@ subroutine ROUTINE_NAME& ...@@ -222,7 +222,7 @@ subroutine ROUTINE_NAME&
endif endif
enddo enddo
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp end parallel !$omp end parallel
#endif #endif
deallocate(aux, stat=istat, errmsg=errorMessage) deallocate(aux, stat=istat, errmsg=errorMessage)
......
...@@ -78,7 +78,7 @@ subroutine elpa_transpose_vectors_ss_& ...@@ -78,7 +78,7 @@ subroutine elpa_transpose_vectors_ss_&
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
use omp_lib use omp_lib
#endif #endif
use elpa_mpi use elpa_mpi
...@@ -130,7 +130,7 @@ subroutine elpa_transpose_vectors_ss_& ...@@ -130,7 +130,7 @@ subroutine elpa_transpose_vectors_ss_&
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc )) allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
check_allocate("elpa_transpose_vectors_ss: aux", istat, errorMessage) check_allocate("elpa_transpose_vectors_ss: aux", istat, errorMessage)
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n) !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif #endif
do n = 0, lcm_s_t-1 do n = 0, lcm_s_t-1
...@@ -146,7 +146,7 @@ subroutine elpa_transpose_vectors_ss_& ...@@ -146,7 +146,7 @@ subroutine elpa_transpose_vectors_ss_&
if (nblks_comm .ne. 0) then if (nblks_comm .ne. 0) then
if (myps == ips) then if (myps == ips) then
! k = 0 ! k = 0
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
!$omp do !$omp do
#endif #endif
do lc=1,nvc do