Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
0d08507c
Commit
0d08507c
authored
Aug 10, 2020
by
Andreas Marek
Browse files
Rename OPENMP preprocessor macro
parent
1aa89171
Changes
34
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
0d08507c
...
@@ -93,9 +93,9 @@ AC_ARG_ENABLE([openmp],
...
@@ -93,9 +93,9 @@ AC_ARG_ENABLE([openmp],
],
],
[enable_openmp=no])
[enable_openmp=no])
AC_MSG_RESULT([${enable_openmp}])
AC_MSG_RESULT([${enable_openmp}])
AM_CONDITIONAL([WITH_OPENMP],[test x"$enable_openmp" = x"yes"])
AM_CONDITIONAL([WITH_OPENMP
_TRADITIONAL
],[test x"$enable_openmp" = x"yes"])
if test x"${enable_openmp}" = x"yes"; then
if test x"${enable_openmp}" = x"yes"; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading])
AC_DEFINE([WITH_OPENMP
_TRADITIONAL
], [1], [use OpenMP threading])
fi
fi
...
...
src/elpa1/elpa1_merge_systems_real_template.F90
View file @
0d08507c
...
@@ -64,7 +64,7 @@ subroutine merge_systems_&
...
@@ -64,7 +64,7 @@ subroutine merge_systems_&
use
elpa_abstract_impl
use
elpa_abstract_impl
use
elpa_blas_interfaces
use
elpa_blas_interfaces
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
use
omp_lib
use
omp_lib
#endif
#endif
implicit
none
implicit
none
...
@@ -93,7 +93,7 @@ subroutine merge_systems_&
...
@@ -93,7 +93,7 @@ subroutine merge_systems_&
dbase
(
na
),
ddiff
(
na
),
ev_scale
(
na
),
tmp
(
na
)
dbase
(
na
),
ddiff
(
na
),
ev_scale
(
na
),
tmp
(
na
)
real
(
kind
=
REAL_DATATYPE
)
::
d1u
(
na
),
zu
(
na
),
d1l
(
na
),
zl
(
na
)
real
(
kind
=
REAL_DATATYPE
)
::
d1u
(
na
),
zu
(
na
),
d1l
(
na
),
zl
(
na
)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
qtmp1
(:,:),
qtmp2
(:,:),
ev
(:,:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
qtmp1
(:,:),
qtmp2
(:,:),
ev
(:,:)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
z_p
(:,:)
real
(
kind
=
REAL_DATATYPE
),
allocatable
::
z_p
(:,:)
#endif
#endif
...
@@ -122,7 +122,7 @@ subroutine merge_systems_&
...
@@ -122,7 +122,7 @@ subroutine merge_systems_&
&
PRECISION
&
&
PRECISION
&
&
_
real
&
_
real
integer
(
kind
=
ik
),
intent
(
in
)
::
max_threads
integer
(
kind
=
ik
),
intent
(
in
)
::
max_threads
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
integer
(
kind
=
ik
)
::
my_thread
integer
(
kind
=
ik
)
::
my_thread
allocate
(
z_p
(
na
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
allocate
(
z_p
(
na
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
...
@@ -442,7 +442,7 @@ subroutine merge_systems_&
...
@@ -442,7 +442,7 @@ subroutine merge_systems_&
! Solve secular equation
! Solve secular equation
z
(
1
:
na1
)
=
1
z
(
1
:
na1
)
=
1
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
z_p
(
1
:
na1
,:)
=
1
z_p
(
1
:
na1
,:)
=
1
#endif
#endif
dbase
(
1
:
na1
)
=
0
dbase
(
1
:
na1
)
=
0
...
@@ -450,7 +450,7 @@ subroutine merge_systems_&
...
@@ -450,7 +450,7 @@ subroutine merge_systems_&
info
=
0
info
=
0
infoBLAS
=
int
(
info
,
kind
=
BLAS_KIND
)
infoBLAS
=
int
(
info
,
kind
=
BLAS_KIND
)
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP
_TRADITIONAL
!
!
! call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
! call obj%timer%start("OpenMP parallel" // PRECISION_SUFFIX)
!!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,infoBLAS,j)
!!$OMP PARALLEL PRIVATE(i,my_thread,delta,s,info,infoBLAS,j)
...
@@ -474,7 +474,7 @@ subroutine merge_systems_&
...
@@ -474,7 +474,7 @@ subroutine merge_systems_&
! Compute updated z
! Compute updated z
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP
_TRADITIONAL
! do j=1,na1
! do j=1,na1
! if (i/=j) z_p(j,my_thread) = z_p(j,my_thread)*( delta(j) / (d1(j)-d1(i)) )
! if (i/=j) z_p(j,my_thread) = z_p(j,my_thread)*( delta(j) / (d1(j)-d1(i)) )
! enddo
! enddo
...
@@ -500,7 +500,7 @@ subroutine merge_systems_&
...
@@ -500,7 +500,7 @@ subroutine merge_systems_&
ddiff
(
i
)
=
delta
(
i
)
ddiff
(
i
)
=
delta
(
i
)
endif
endif
enddo
enddo
!#ifdef WITH_OPENMP
!#ifdef WITH_OPENMP
_TRADITIONAL
!!$OMP END PARALLEL
!!$OMP END PARALLEL
!
!
! call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
! call obj%timer%stop("OpenMP parallel" // PRECISION_SUFFIX)
...
@@ -526,7 +526,7 @@ subroutine merge_systems_&
...
@@ -526,7 +526,7 @@ subroutine merge_systems_&
! Calculate scale factors for eigenvectors
! Calculate scale factors for eigenvectors
ev_scale
(:)
=
0.0_rk
ev_scale
(:)
=
0.0_rk
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
call
obj
%
timer
%
start
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
...
@@ -548,7 +548,7 @@ subroutine merge_systems_&
...
@@ -548,7 +548,7 @@ subroutine merge_systems_&
&(
obj
,
d1
,
dbase
,
ddiff
,
z
,
ev_scale
(
i
),
na1
,
i
)
&(
obj
,
d1
,
dbase
,
ddiff
,
z
,
ev_scale
(
i
),
na1
,
i
)
! ev_scale(i) = ev_scale_val
! ev_scale(i) = ev_scale_val
enddo
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$OMP END PARALLEL DO
!$OMP END PARALLEL DO
call
obj
%
timer
%
stop
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
...
@@ -888,7 +888,7 @@ subroutine merge_systems_&
...
@@ -888,7 +888,7 @@ subroutine merge_systems_&
deallocate
(
ev
,
qtmp1
,
qtmp2
,
stat
=
istat
,
errmsg
=
errorMessage
)
deallocate
(
ev
,
qtmp1
,
qtmp2
,
stat
=
istat
,
errmsg
=
errorMessage
)
check_deallocate
(
"merge_systems: ev, qtmp1, qtmp2"
,
istat
,
errorMessage
)
check_deallocate
(
"merge_systems: ev, qtmp1, qtmp2"
,
istat
,
errorMessage
)
endif
!very outer test (na1==1 .or. na1==2)
endif
!very outer test (na1==1 .or. na1==2)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
deallocate
(
z_p
,
stat
=
istat
,
errmsg
=
errorMessage
)
deallocate
(
z_p
,
stat
=
istat
,
errmsg
=
errorMessage
)
check_deallocate
(
"merge_systems: z_p"
,
istat
,
errorMessage
)
check_deallocate
(
"merge_systems: z_p"
,
istat
,
errorMessage
)
#endif
#endif
...
...
src/elpa1/elpa1_template.F90
View file @
0d08507c
...
@@ -201,7 +201,7 @@ function elpa_solve_evp_&
...
@@ -201,7 +201,7 @@ function elpa_solve_evp_&
call
mpi_comm_rank
(
int
(
mpi_comm_all
,
kind
=
MPI_KIND
),
my_peMPI
,
mpierr
)
call
mpi_comm_rank
(
int
(
mpi_comm_all
,
kind
=
MPI_KIND
),
my_peMPI
,
mpierr
)
my_pe
=
int
(
my_peMPI
,
kind
=
c_int
)
my_pe
=
int
(
my_peMPI
,
kind
=
c_int
)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
omp_threads_caller
=
omp_get_max_threads
()
omp_threads_caller
=
omp_get_max_threads
()
...
@@ -263,7 +263,7 @@ function elpa_solve_evp_&
...
@@ -263,7 +263,7 @@ function elpa_solve_evp_&
endif
endif
! restore original OpenMP settings
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
call
omp_set_num_threads
(
omp_threads_caller
)
...
@@ -562,7 +562,7 @@ function elpa_solve_evp_&
...
@@ -562,7 +562,7 @@ function elpa_solve_evp_&
call
nvtxRangePop
()
call
nvtxRangePop
()
#endif
#endif
! restore original OpenMP settings
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
call
omp_set_num_threads
(
omp_threads_caller
)
...
...
src/elpa1/elpa1_tridiag_template.F90
View file @
0d08507c
...
@@ -145,7 +145,7 @@ subroutine tridiag_&
...
@@ -145,7 +145,7 @@ subroutine tridiag_&
integer
(
kind
=
c_intptr_t
)
::
a_offset
integer
(
kind
=
c_intptr_t
)
::
a_offset
integer
(
kind
=
ik
),
intent
(
in
)
::
max_threads
integer
(
kind
=
ik
),
intent
(
in
)
::
max_threads
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
integer
(
kind
=
ik
)
::
my_thread
,
n_threads
,
n_iter
integer
(
kind
=
ik
)
::
my_thread
,
n_threads
,
n_iter
#endif
#endif
...
@@ -170,7 +170,7 @@ subroutine tridiag_&
...
@@ -170,7 +170,7 @@ subroutine tridiag_&
! pattern: u1,v1,u2,v2,u3,v3,....
! pattern: u1,v1,u2,v2,u3,v3,....
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
uv_stored_cols
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
uv_stored_cols
(:,:)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
ur_p
(:,:),
uc_p
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
ur_p
(:,:),
uc_p
(:,:)
#endif
#endif
...
@@ -355,7 +355,7 @@ subroutine tridiag_&
...
@@ -355,7 +355,7 @@ subroutine tridiag_&
endif
endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
allocate
(
ur_p
(
max_local_rows
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
allocate
(
ur_p
(
max_local_rows
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
call
check_alloc
(
"tridiag_&
call
check_alloc
(
"tridiag_&
&MATH_DATATYPE "
,
"ur_p"
,
istat
,
errorMessage
)
&MATH_DATATYPE "
,
"ur_p"
,
istat
,
errorMessage
)
...
@@ -363,7 +363,7 @@ subroutine tridiag_&
...
@@ -363,7 +363,7 @@ subroutine tridiag_&
allocate
(
uc_p
(
max_local_cols
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
allocate
(
uc_p
(
max_local_cols
,
0
:
max_threads
-1
),
stat
=
istat
,
errmsg
=
errorMessage
)
call
check_alloc
(
"tridiag_&
call
check_alloc
(
"tridiag_&
&MATH_DATATYPE "
,
"uc_p"
,
istat
,
errorMessage
)
&MATH_DATATYPE "
,
"uc_p"
,
istat
,
errorMessage
)
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP
_TRADITIONAL
*/
tmp
=
0
tmp
=
0
v_row
=
0
v_row
=
0
...
@@ -579,7 +579,7 @@ subroutine tridiag_&
...
@@ -579,7 +579,7 @@ subroutine tridiag_&
check_memcpy_cuda
(
"tridiag: v_row_dev"
,
successCUDA
)
check_memcpy_cuda
(
"tridiag: v_row_dev"
,
successCUDA
)
endif
! useGU
endif
! useGU
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
call
obj
%
timer
%
start
(
"OpenMP parallel"
)
call
obj
%
timer
%
start
(
"OpenMP parallel"
)
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end)
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end)
...
@@ -592,7 +592,7 @@ subroutine tridiag_&
...
@@ -592,7 +592,7 @@ subroutine tridiag_&
! first calculate A*v part of (A + VU**T + UV**T)*v
! first calculate A*v part of (A + VU**T + UV**T)*v
uc_p
(
1
:
l_cols
,
my_thread
)
=
0.
uc_p
(
1
:
l_cols
,
my_thread
)
=
0.
ur_p
(
1
:
l_rows
,
my_thread
)
=
0.
ur_p
(
1
:
l_rows
,
my_thread
)
=
0.
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP
_TRADITIONAL
*/
do
i
=
0
,
(
istep
-2
)/
tile_size
do
i
=
0
,
(
istep
-2
)/
tile_size
l_col_beg
=
i
*
l_cols_per_tile
+1
l_col_beg
=
i
*
l_cols_per_tile
+1
l_col_end
=
min
(
l_cols
,(
i
+1
)
*
l_cols_per_tile
)
l_col_end
=
min
(
l_cols
,(
i
+1
)
*
l_cols_per_tile
)
...
@@ -601,7 +601,7 @@ subroutine tridiag_&
...
@@ -601,7 +601,7 @@ subroutine tridiag_&
l_row_beg
=
j
*
l_rows_per_tile
+1
l_row_beg
=
j
*
l_rows_per_tile
+1
l_row_end
=
min
(
l_rows
,(
j
+1
)
*
l_rows_per_tile
)
l_row_end
=
min
(
l_rows
,(
j
+1
)
*
l_rows_per_tile
)
if
(
l_row_end
<
l_row_beg
)
cycle
if
(
l_row_end
<
l_row_beg
)
cycle
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
if
(
mod
(
n_iter
,
n_threads
)
==
my_thread
)
then
if
(
mod
(
n_iter
,
n_threads
)
==
my_thread
)
then
if
(
wantDebug
)
call
obj
%
timer
%
start
(
"blas"
)
if
(
wantDebug
)
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMV
(
BLAS_TRANS_OR_CONJ
,
&
call
PRECISION_GEMV
(
BLAS_TRANS_OR_CONJ
,
&
...
@@ -628,7 +628,7 @@ subroutine tridiag_&
...
@@ -628,7 +628,7 @@ subroutine tridiag_&
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
endif
n_iter
=
n_iter
+1
n_iter
=
n_iter
+1
#else /* WITH_OPENMP */
#else /* WITH_OPENMP
_TRADITIONAL
*/
! multiplication by blocks is efficient only for CPU
! multiplication by blocks is efficient only for CPU
! for GPU we introduced 2 other ways, either by stripes (more simmilar to the original
! for GPU we introduced 2 other ways, either by stripes (more simmilar to the original
...
@@ -658,7 +658,7 @@ subroutine tridiag_&
...
@@ -658,7 +658,7 @@ subroutine tridiag_&
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
! not useGPU
endif
! not useGPU
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP
_TRADITIONAL
*/
enddo
! j=0,i
enddo
! j=0,i
enddo
! i=0,(istep-2)/tile_size
enddo
! i=0,(istep-2)/tile_size
...
@@ -738,7 +738,7 @@ subroutine tridiag_&
...
@@ -738,7 +738,7 @@ subroutine tridiag_&
check_memcpy_cuda
(
"tridiag: u_row_dev 1"
,
successCUDA
)
check_memcpy_cuda
(
"tridiag: u_row_dev 1"
,
successCUDA
)
endif
! useGPU
endif
! useGPU
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$OMP END PARALLEL
!$OMP END PARALLEL
call
obj
%
timer
%
stop
(
"OpenMP parallel"
)
call
obj
%
timer
%
stop
(
"OpenMP parallel"
)
...
@@ -746,7 +746,7 @@ subroutine tridiag_&
...
@@ -746,7 +746,7 @@ subroutine tridiag_&
u_col
(
1
:
l_cols
)
=
u_col
(
1
:
l_cols
)
+
uc_p
(
1
:
l_cols
,
i
)
u_col
(
1
:
l_cols
)
=
u_col
(
1
:
l_cols
)
+
uc_p
(
1
:
l_cols
,
i
)
u_row
(
1
:
l_rows
)
=
u_row
(
1
:
l_rows
)
+
ur_p
(
1
:
l_rows
,
i
)
u_row
(
1
:
l_rows
)
=
u_row
(
1
:
l_rows
)
+
ur_p
(
1
:
l_rows
,
i
)
enddo
enddo
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP
_TRADITIONAL
*/
! second calculate (VU**T + UV**T)*v part of (A + VU**T + UV**T)*v
! second calculate (VU**T + UV**T)*v part of (A + VU**T + UV**T)*v
if
(
n_stored_vecs
>
0
)
then
if
(
n_stored_vecs
>
0
)
then
...
...
src/elpa1/elpa_cholesky_template.F90
View file @
0d08507c
...
@@ -82,7 +82,7 @@
...
@@ -82,7 +82,7 @@
&PRECISION&
&PRECISION&
&"
)
&"
)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
omp_threads_caller
=
omp_get_max_threads
()
omp_threads_caller
=
omp_get_max_threads
()
...
@@ -330,7 +330,7 @@
...
@@ -330,7 +330,7 @@
enddo
enddo
! restore original OpenMP settings
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
call
omp_set_num_threads
(
omp_threads_caller
)
...
...
src/elpa1/elpa_reduce_add_vectors.F90
View file @
0d08507c
...
@@ -75,7 +75,7 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -75,7 +75,7 @@ subroutine elpa_reduce_add_vectors_&
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
use
precision
use
precision
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
use
omp_lib
use
omp_lib
#endif
#endif
use
elpa_mpi
use
elpa_mpi
...
@@ -132,7 +132,7 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -132,7 +132,7 @@ subroutine elpa_reduce_add_vectors_&
check_allocate
(
"elpa_reduce_add: aux2"
,
istat
,
errorMessage
)
check_allocate
(
"elpa_reduce_add: aux2"
,
istat
,
errorMessage
)
aux1
(:)
=
0
aux1
(:)
=
0
aux2
(:)
=
0
aux2
(:)
=
0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!call omp_set_num_threads(nrThreads)
!call omp_set_num_threads(nrThreads)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads)
!$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl) num_threads(nrThreads)
...
@@ -147,7 +147,7 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -147,7 +147,7 @@ subroutine elpa_reduce_add_vectors_&
if
(
myps
==
ips
)
then
if
(
myps
==
ips
)
then
! k = 0
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp do
!$omp do
#endif
#endif
do
lc
=
1
,
nvc
do
lc
=
1
,
nvc
...
@@ -161,7 +161,7 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -161,7 +161,7 @@ subroutine elpa_reduce_add_vectors_&
enddo
enddo
k
=
nvc
*
auxstride
k
=
nvc
*
auxstride
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp barrier
!$omp barrier
!$omp master
!$omp master
#endif
#endif
...
@@ -184,13 +184,13 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -184,13 +184,13 @@ subroutine elpa_reduce_add_vectors_&
if
(
k
>
0
)
aux2
=
aux1
if
(
k
>
0
)
aux2
=
aux1
#endif /* WITH_MPI */
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp end master
!$omp end master
!$omp barrier
!$omp barrier
#endif
#endif
if
(
mypt
==
ipt
)
then
if
(
mypt
==
ipt
)
then
! k = 0
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp do
!$omp do
#endif
#endif
do
lc
=
1
,
nvc
do
lc
=
1
,
nvc
...
@@ -207,7 +207,7 @@ subroutine elpa_reduce_add_vectors_&
...
@@ -207,7 +207,7 @@ subroutine elpa_reduce_add_vectors_&
endif
endif
enddo
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp end parallel
!$omp end parallel
#endif
#endif
...
...
src/elpa1/elpa_solve_tridi_impl_public.F90
View file @
0d08507c
...
@@ -92,7 +92,7 @@
...
@@ -92,7 +92,7 @@
matrixRows
=
obj
%
local_nrows
matrixRows
=
obj
%
local_nrows
matrixCols
=
obj
%
local_ncols
matrixCols
=
obj
%
local_ncols
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
omp_threads_caller
=
omp_get_max_threads
()
omp_threads_caller
=
omp_get_max_threads
()
...
@@ -135,7 +135,7 @@
...
@@ -135,7 +135,7 @@
! restore original OpenMP settings
! restore original OpenMP settings
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
call
omp_set_num_threads
(
omp_threads_caller
)
...
...
src/elpa1/elpa_transpose_vectors.F90
View file @
0d08507c
...
@@ -87,7 +87,7 @@ subroutine ROUTINE_NAME&
...
@@ -87,7 +87,7 @@ subroutine ROUTINE_NAME&
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
use
precision
use
precision
use
elpa_abstract_impl
use
elpa_abstract_impl
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
use
omp_lib
use
omp_lib
#endif
#endif
use
elpa_mpi
use
elpa_mpi
...
@@ -147,7 +147,7 @@ subroutine ROUTINE_NAME&
...
@@ -147,7 +147,7 @@ subroutine ROUTINE_NAME&
allocate
(
aux
(
((
nblks_tot
-
nblks_skip
+
lcm_s_t
-1
)/
lcm_s_t
)
*
nblk
*
nvc
),
stat
=
istat
,
errmsg
=
errorMessage
)
allocate
(
aux
(
((
nblks_tot
-
nblks_skip
+
lcm_s_t
-1
)/
lcm_s_t
)
*
nblk
*
nvc
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_transpose_vectors: aux"
,
istat
,
errorMessage
)
check_allocate
(
"elpa_transpose_vectors: aux"
,
istat
,
errorMessage
)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif
#endif
do
n
=
0
,
lcm_s_t
-1
do
n
=
0
,
lcm_s_t
-1
...
@@ -163,7 +163,7 @@ subroutine ROUTINE_NAME&
...
@@ -163,7 +163,7 @@ subroutine ROUTINE_NAME&
if
(
nblks_comm
.ne.
0
)
then
if
(
nblks_comm
.ne.
0
)
then
if
(
myps
==
ips
)
then
if
(
myps
==
ips
)
then
! k = 0
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp do
!$omp do
#endif
#endif
do
lc
=
1
,
nvc
do
lc
=
1
,
nvc
...
@@ -177,7 +177,7 @@ subroutine ROUTINE_NAME&
...
@@ -177,7 +177,7 @@ subroutine ROUTINE_NAME&
enddo
enddo
endif
endif
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp barrier
!$omp barrier
!$omp master
!$omp master
#endif
#endif
...
@@ -198,7 +198,7 @@ subroutine ROUTINE_NAME&
...
@@ -198,7 +198,7 @@ subroutine ROUTINE_NAME&
call
obj
%
timer
%
stop
(
"mpi_communication"
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
#endif /* WITH_MPI */
#endif /* WITH_MPI */
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp end master
!$omp end master
!$omp barrier
!$omp barrier
...
@@ -222,7 +222,7 @@ subroutine ROUTINE_NAME&
...
@@ -222,7 +222,7 @@ subroutine ROUTINE_NAME&
endif
endif
enddo
enddo
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp end parallel
!$omp end parallel
#endif
#endif
deallocate
(
aux
,
stat
=
istat
,
errmsg
=
errorMessage
)
deallocate
(
aux
,
stat
=
istat
,
errmsg
=
errorMessage
)
...
...
src/elpa1/elpa_transpose_vectors_ss.F90
View file @
0d08507c
...
@@ -78,7 +78,7 @@ subroutine elpa_transpose_vectors_ss_&
...
@@ -78,7 +78,7 @@ subroutine elpa_transpose_vectors_ss_&
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
use
precision
use
precision
use
elpa_abstract_impl
use
elpa_abstract_impl
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
use
omp_lib
use
omp_lib
#endif
#endif
use
elpa_mpi
use
elpa_mpi
...
@@ -130,7 +130,7 @@ subroutine elpa_transpose_vectors_ss_&
...
@@ -130,7 +130,7 @@ subroutine elpa_transpose_vectors_ss_&
allocate
(
aux
(
((
nblks_tot
-
nblks_skip
+
lcm_s_t
-1
)/
lcm_s_t
)
*
nblk
*
nvc
))
allocate
(
aux
(
((
nblks_tot
-
nblks_skip
+
lcm_s_t
-1
)/
lcm_s_t
)
*
nblk
*
nvc
))
check_allocate
(
"elpa_transpose_vectors_ss: aux"
,
istat
,
errorMessage
)
check_allocate
(
"elpa_transpose_vectors_ss: aux"
,
istat
,
errorMessage
)
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
!$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif
#endif
do
n
=
0
,
lcm_s_t
-1
do
n
=
0
,
lcm_s_t
-1
...
@@ -146,7 +146,7 @@ subroutine elpa_transpose_vectors_ss_&
...
@@ -146,7 +146,7 @@ subroutine elpa_transpose_vectors_ss_&
if
(
nblks_comm
.ne.
0
)
then
if
(
nblks_comm
.ne.
0
)
then
if
(
myps
==
ips
)
then
if
(
myps
==
ips
)
then
! k = 0
! k = 0
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
_TRADITIONAL
!$omp do
!$omp do
#endif
#endif
do
lc
=
1
,
nvc
do