Commit 0d08507c authored by Andreas Marek's avatar Andreas Marek
Browse files

Rename OPENMP preprocessor macro

parent 1aa89171
......@@ -93,9 +93,9 @@ AC_ARG_ENABLE([openmp],
],
[enable_openmp=no])
AC_MSG_RESULT([${enable_openmp}])
AM_CONDITIONAL([WITH_OPENMP],[test x"$enable_openmp" = x"yes"])
AM_CONDITIONAL([WITH_OPENMP_TRADITIONAL],[test x"$enable_openmp" = x"yes"])
if test x"${enable_openmp}" = x"yes"; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading])
AC_DEFINE([WITH_OPENMP_TRADITIONAL], [1], [use OpenMP threading])
fi
......
......@@ -64,7 +64,7 @@ subroutine merge_systems_&
use elpa_abstract_impl
use elpa_blas_interfaces
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
use omp_lib
#endif
implicit none
......@@ -93,7 +93,7 @@ subroutine merge_systems_&
dbase(na), ddiff(na), ev_scale(na), tmp(na)
real(kind=REAL_DATATYPE) :: d1u(na), zu(na), d1l(na), zl(na)
real(kind=REAL_DATATYPE), allocatable :: qtmp1(:,:), qtmp2(:,:), ev(:,:)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
real(kind=REAL_DATATYPE), allocatable :: z_p(:,:)
#endif
......@@ -122,7 +122,7 @@ subroutine merge_systems_&
&PRECISION&
&_real
integer(kind=ik), intent(in) :: max_threads
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
integer(kind=ik) :: my_thread
allocate(z_p(na,0:max_threads-1), stat=istat, errmsg=errorMessage)
......@@ -442,7 +442,7 @@ subroutine merge_systems_&
! Solve secular equation
z(1:na1) = 1
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
z_p(1:na1,:) = 1
#endif
dbase(1:na1) = 0
......@@ -450,7 +450,7 @@ subroutine merge_systems_&
info = 0
infoBLAS = int(info,kind=BLAS_KIND)
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP_TRADITIONAL
!
! call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
!!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,infoBLAS,j)
......@@ -474,7 +474,7 @@ subroutine merge_systems_&
! Compute updated z
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP_TRADITIONAL
! do j=1,na1
! if (i/=j) z_p(j,my_thread) = z_p(j,my_thread)*( delta(j) / (d1(j)-d1(i)) )
! enddo
......@@ -500,7 +500,7 @@ subroutine merge_systems_&
ddiff(i) = delta(i)
endif
enddo
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP_TRADITIONAL
!!$OMP END PARALLEL
!
! call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
......@@ -526,7 +526,7 @@ subroutine merge_systems_&
! Calculate scale factors for eigenvectors
ev_scale(:) = 0.0_rk
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
......@@ -548,7 +548,7 @@ subroutine merge_systems_&
&(obj, d1, dbase, ddiff, z, ev_scale(i), na1,i)
! ev_scale(i) = ev_scale_val
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$OMP END PARALLEL DO
call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
......@@ -888,7 +888,7 @@ subroutine merge_systems_&
deallocate(ev, qtmp1, qtmp2, stat=istat, errmsg=errorMessage)
check_deallocate("merge_systems: ev, qtmp1, qtmp2",istat, errorMessage)
endif !very outer test (na1==1 .or. na1==2)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
deallocate(z_p, stat=istat, errmsg=errorMessage)
check_deallocate("merge_systems: z_p",istat, errorMessage)
#endif
......
......@@ -201,7 +201,7 @@ function elpa_solve_evp_&
call mpi_comm_rank(int(mpi_comm_all,kind=MPI_KIND), my_peMPI, mpierr)
my_pe = int(my_peMPI,kind=c_int)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
......@@ -263,7 +263,7 @@ function elpa_solve_evp_&
endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
......@@ -562,7 +562,7 @@ function elpa_solve_evp_&
call nvtxRangePop()
#endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
......
......@@ -145,7 +145,7 @@ subroutine tridiag_&
integer(kind=c_intptr_t) :: a_offset
integer(kind=ik), intent(in) :: max_threads
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
integer(kind=ik) :: my_thread, n_threads, n_iter
#endif
......@@ -170,7 +170,7 @@ subroutine tridiag_&
! pattern: u1,v1,u2,v2,u3,v3,....
MATH_DATATYPE(kind=rck), allocatable :: uv_stored_cols(:,:)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
MATH_DATATYPE(kind=rck), allocatable :: ur_p(:,:), uc_p(:,:)
#endif
......@@ -355,7 +355,7 @@ subroutine tridiag_&
endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
allocate(ur_p(max_local_rows,0:max_threads-1), stat=istat, errmsg=errorMessage)
call check_alloc("tridiag_&
&MATH_DATATYPE ", "ur_p", istat, errorMessage)
......@@ -363,7 +363,7 @@ subroutine tridiag_&
allocate(uc_p(max_local_cols,0:max_threads-1), stat=istat, errmsg=errorMessage)
call check_alloc("tridiag_&
&MATH_DATATYPE ", "uc_p", istat, errorMessage)
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
tmp = 0
v_row = 0
......@@ -579,7 +579,7 @@ subroutine tridiag_&
check_memcpy_cuda("tridiag: v_row_dev", successCUDA)
endif ! useGU
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
call obj%timer%start("OpenMP parallel")
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end)
......@@ -592,7 +592,7 @@ subroutine tridiag_&
! first calculate A*v part of (A + VU**T + UV**T)*v
uc_p(1:l_cols,my_thread) = 0.
ur_p(1:l_rows,my_thread) = 0.
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
do i= 0, (istep-2)/tile_size
l_col_beg = i*l_cols_per_tile+1
l_col_end = min(l_cols,(i+1)*l_cols_per_tile)
......@@ -601,7 +601,7 @@ subroutine tridiag_&
l_row_beg = j*l_rows_per_tile+1
l_row_end = min(l_rows,(j+1)*l_rows_per_tile)
if (l_row_end < l_row_beg) cycle
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
if (mod(n_iter,n_threads) == my_thread) then
if (wantDebug) call obj%timer%start("blas")
call PRECISION_GEMV(BLAS_TRANS_OR_CONJ, &
......@@ -628,7 +628,7 @@ subroutine tridiag_&
if (wantDebug) call obj%timer%stop("blas")
endif
n_iter = n_iter+1
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
! multiplication by blocks is efficient only for CPU
! for GPU we introduced 2 other ways, either by stripes (more simmilar to the original
......@@ -658,7 +658,7 @@ subroutine tridiag_&
if (wantDebug) call obj%timer%stop("blas")
endif ! not useGPU
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
enddo ! j=0,i
enddo ! i=0,(istep-2)/tile_size
......@@ -738,7 +738,7 @@ subroutine tridiag_&
check_memcpy_cuda("tridiag: u_row_dev 1", successCUDA)
endif ! useGPU
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$OMP END PARALLEL
call obj%timer%stop("OpenMP parallel")
......@@ -746,7 +746,7 @@ subroutine tridiag_&
u_col(1:l_cols) = u_col(1:l_cols) + uc_p(1:l_cols,i)
u_row(1:l_rows) = u_row(1:l_rows) + ur_p(1:l_rows,i)
enddo
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
! second calculate (VU**T + UV**T)*v part of (A + VU**T + UV**T)*v
if (n_stored_vecs > 0) then
......
......@@ -82,7 +82,7 @@
&PRECISION&
&")
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
......@@ -330,7 +330,7 @@
enddo
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
......
......@@ -75,7 +75,7 @@ subroutine elpa_reduce_add_vectors_&
!-------------------------------------------------------------------------------
use precision
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
use omp_lib
#endif
use elpa_mpi
......@@ -132,7 +132,7 @@ subroutine elpa_reduce_add_vectors_&
check_allocate("elpa_reduce_add: aux2", istat, errorMessage)
aux1(:) = 0
aux2(:) = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!call omp_set_num_threads(nrThreads)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads)
......@@ -147,7 +147,7 @@ subroutine elpa_reduce_add_vectors_&
if (myps == ips) then
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp do
#endif
do lc=1,nvc
......@@ -161,7 +161,7 @@ subroutine elpa_reduce_add_vectors_&
enddo
k = nvc * auxstride
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier
!$omp master
#endif
......@@ -184,13 +184,13 @@ subroutine elpa_reduce_add_vectors_&
if (k>0) aux2 = aux1
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end master
!$omp barrier
#endif
if (mypt == ipt) then
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp do
#endif
do lc=1,nvc
......@@ -207,7 +207,7 @@ subroutine elpa_reduce_add_vectors_&
endif
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end parallel
#endif
......
......@@ -92,7 +92,7 @@
matrixRows = obj%local_nrows
matrixCols = obj%local_ncols
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
......@@ -135,7 +135,7 @@
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
......
......@@ -87,7 +87,7 @@ subroutine ROUTINE_NAME&
!-------------------------------------------------------------------------------
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
use omp_lib
#endif
use elpa_mpi
......@@ -147,7 +147,7 @@ subroutine ROUTINE_NAME&
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ), stat=istat, errmsg=errorMessage)
check_allocate("elpa_transpose_vectors: aux", istat, errorMessage)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif
do n = 0, lcm_s_t-1
......@@ -163,7 +163,7 @@ subroutine ROUTINE_NAME&
if (nblks_comm .ne. 0) then
if (myps == ips) then
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp do
#endif
do lc=1,nvc
......@@ -177,7 +177,7 @@ subroutine ROUTINE_NAME&
enddo
endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier
!$omp master
#endif
......@@ -198,7 +198,7 @@ subroutine ROUTINE_NAME&
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end master
!$omp barrier
......@@ -222,7 +222,7 @@ subroutine ROUTINE_NAME&
endif
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end parallel
#endif
deallocate(aux, stat=istat, errmsg=errorMessage)
......
......@@ -78,7 +78,7 @@ subroutine elpa_transpose_vectors_ss_&
!-------------------------------------------------------------------------------
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
use omp_lib
#endif
use elpa_mpi
......@@ -130,7 +130,7 @@ subroutine elpa_transpose_vectors_ss_&
allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
check_allocate("elpa_transpose_vectors_ss: aux", istat, errorMessage)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif
do n = 0, lcm_s_t-1
......@@ -146,7 +146,7 @@ subroutine elpa_transpose_vectors_ss_&
if (nblks_comm .ne. 0) then
if (myps == ips) then
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp do
#endif
do lc=1,nvc
......@@ -160,7 +160,7 @@ subroutine elpa_transpose_vectors_ss_&
enddo
endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp barrier
!$omp master
#endif
......@@ -181,7 +181,7 @@ subroutine elpa_transpose_vectors_ss_&
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end master
!$omp barrier
......@@ -201,7 +201,7 @@ subroutine elpa_transpose_vectors_ss_&
endif
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
!$omp end parallel
#endif
deallocate(aux, stat=istat, errmsg=errorMessage)
......
......@@ -45,19 +45,19 @@
subroutine compute_hh_trafo_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
&_openmp_&
#else
&_&
#endif
&PRECISION &
(obj, useGPU, wantDebug, a, a_dev, stripe_width, a_dim2, stripe_count, max_threads, &
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
l_nev, &
#endif
a_off, nbw, max_blk_size, bcast_buffer, bcast_buffer_dev, &
hh_tau_dev, kernel_flops, kernel_time, n_times, off, ncols, istripe, &
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
my_thread, thread_width, &
#else
last_stripe_width, &
......@@ -129,7 +129,7 @@
integer(kind=ik), intent(in) :: stripe_width,a_dim2,stripe_count
integer(kind=ik), intent(in) :: max_threads
#ifndef WITH_OPENMP
#ifndef WITH_OPENMP_TRADITIONAL
integer(kind=ik), intent(in) :: last_stripe_width
#if REALCASE == 1
! real(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count)
......@@ -140,7 +140,7 @@
complex(kind=C_DATATYPE_KIND),pointer :: a(:,:,:)
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
integer(kind=ik), intent(in) :: l_nev, thread_width
#if REALCASE == 1
! real(kind=C_DATATYPE_KIND) :: a(stripe_width,a_dim2,stripe_count,max_threads)
......@@ -152,7 +152,7 @@
complex(kind=C_DATATYPE_KIND),pointer :: a(:,:,:,:)
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
integer(kind=ik), intent(in) :: kernel
......@@ -163,7 +163,7 @@
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer(kind=ik) :: off, ncols, istripe
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
integer(kind=ik) :: my_thread, noff
#endif
integer(kind=ik) :: j, nl, jj, jjj, n_times
......@@ -213,7 +213,7 @@
if (wantDebug) call obj%timer%start("compute_hh_trafo_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
&_openmp" // &
#else
&" // &
......@@ -222,15 +222,15 @@
)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
if (my_thread==1) then
#endif
ttt = mpi_wtime()
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
endif
#endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
#if REALCASE == 1
if (kernel .eq. ELPA_2STAGE_REAL_GPU) then
......@@ -248,11 +248,11 @@
stop 1
endif
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
#ifndef WITH_OPENMP
#ifndef WITH_OPENMP_TRADITIONAL
nl = merge(stripe_width, last_stripe_width, istripe<stripe_count)
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
if (istripe<stripe_count) then
nl = stripe_width
......@@ -262,7 +262,7 @@
if (nl<=0) then
if (wantDebug) call obj%timer%stop("compute_hh_trafo_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
&_openmp" // &
#else
&" // &
......@@ -273,7 +273,7 @@
return
endif
endif
#endif /* not WITH_OPENMP */
#endif /* not WITH_OPENMP_TRADITIONAL */
#if REALCASE == 1
! GPU kernel real
......@@ -340,7 +340,7 @@
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_&
......@@ -358,7 +358,7 @@
nbw, nl, stripe_width, nbw)
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_&
......@@ -374,7 +374,7 @@
&PRECISION&
& (a(1:stripe_width,j+off+a_off-1:j+off+a_off+nbw-1,istripe),w(1:nbw,1:6), nbw, nl, stripe_width, nbw)
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
enddo
......@@ -395,7 +395,7 @@
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
ttt = mpi_wtime()
do j = ncols, 1, -1
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
#ifdef USE_ASSUMED_SIZE
call single_hh_trafo_&
......@@ -412,7 +412,7 @@
bcast_buffer(1:nbw,j+off), nbw, nl, stripe_width)
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
#ifdef USE_ASSUMED_SIZE
call single_hh_trafo_&
......@@ -428,7 +428,7 @@
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off), &
nbw, nl, stripe_width)
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
enddo
#ifndef WITH_FIXED_COMPLEX_KERNEL
......@@ -449,7 +449,7 @@
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_&
......@@ -466,7 +466,7 @@
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
#ifdef USE_ASSUMED_SIZE
call double_hh_trafo_&
......@@ -483,7 +483,7 @@
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP_TRADITIONAL */
enddo
#ifndef WITH_FIXED_REAL_KERNEL
......@@ -502,7 +502,7 @@
#endif /* not WITH_FIXED_COMPLEX_KERNEL */
ttt = mpi_wtime()
do j = ncols, 1, -1
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP_TRADITIONAL
#ifdef USE_ASSUMED_SIZE
call single_hh_trafo_&
&MATH_DATATYPE&
......@@ -518,7 +518,7 @@
nbw, nl, stripe_width)
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP_TRADITIONAL */
#ifdef USE_ASSUMED_SIZE
call single_hh_trafo_&
......@@ -535,7 +535,7 @@
nbw, nl,