Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
d3a5bab4
Commit
d3a5bab4
authored
Jan 21, 2021
by
Andreas Marek
Browse files
Merge branch 'task-pinning' into 'master_pre_stage'
Task pinning See merge request
!53
parents
91bca5bb
62a4c546
Changes
8
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
d3a5bab4
This diff is collapsed.
Click to expand it.
Makefile.am
View file @
d3a5bab4
...
@@ -69,6 +69,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
...
@@ -69,6 +69,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/mod_distribute_global_column.F90
\
src/elpa1/mod_distribute_global_column.F90
\
src/elpa1/mod_v_add_s.F90
\
src/elpa1/mod_v_add_s.F90
\
src/elpa1/mod_solve_secular_equation.F90
\
src/elpa1/mod_solve_secular_equation.F90
\
src/helpers/mod_thread_affinity.F90
\
src/helpers/check_thread_affinity.c
\
src/elpa_index.c
src/elpa_index.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa_c_interface.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa_c_interface.c
...
...
ci_test_scripts/generate_gitlab_ci_tests.py
View file @
d3a5bab4
...
@@ -610,9 +610,14 @@ coverage = {
...
@@ -610,9 +610,14 @@ coverage = {
#"knl" : "--enable-avx512",
#"knl" : "--enable-avx512",
#"power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --disable-mpi-module --with-GPU-compute-capability=sm_60 ",
#"power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --disable-mpi-module --with-GPU-compute-capability=sm_60 ",
#instruction_set = {
# "sse" : " --enable-sse --enable-sse-assembly",
# "avx" : " --enable-avx",
# "avx2" : " --enable-avx2",
# "avx512" : "--enable-avx512",
#}
instruction_set
=
{
instruction_set
=
{
"sse"
:
" --enable-sse --enable-sse-assembly"
,
"avx"
:
" --enable-avx"
,
"avx2"
:
" --enable-avx2"
,
"avx2"
:
" --enable-avx2"
,
"avx512"
:
"--enable-avx512"
,
"avx512"
:
"--enable-avx512"
,
}
}
...
...
src/elpa1/elpa1_template.F90
View file @
d3a5bab4
...
@@ -83,6 +83,7 @@ function elpa_solve_evp_&
...
@@ -83,6 +83,7 @@ function elpa_solve_evp_&
use
elpa_scalapack_interfaces
use
elpa_scalapack_interfaces
#endif
#endif
use
solve_tridi
use
solve_tridi
use
thread_affinity
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
...
@@ -176,6 +177,8 @@ function elpa_solve_evp_&
...
@@ -176,6 +177,8 @@ function elpa_solve_evp_&
MATH_DATATYPE
(
kind
=
rck
),
allocatable
,
target
::
aIntern
(:,:)
MATH_DATATYPE
(
kind
=
rck
),
allocatable
,
target
::
aIntern
(:,:)
MATH_DATATYPE
(
kind
=
C_DATATYPE_KIND
),
allocatable
,
target
::
qIntern
(:,:)
MATH_DATATYPE
(
kind
=
C_DATATYPE_KIND
),
allocatable
,
target
::
qIntern
(:,:)
#endif
#endif
integer
(
kind
=
c_int
)
::
pinningInfo
logical
::
do_tridiag
,
do_solve
,
do_trans_ev
logical
::
do_tridiag
,
do_solve
,
do_trans_ev
integer
(
kind
=
ik
)
::
nrThreads
integer
(
kind
=
ik
)
::
nrThreads
integer
(
kind
=
ik
)
::
global_index
integer
(
kind
=
ik
)
::
global_index
...
@@ -216,8 +219,19 @@ function elpa_solve_evp_&
...
@@ -216,8 +219,19 @@ function elpa_solve_evp_&
#ifdef WITH_NVTX
#ifdef WITH_NVTX
call
nvtxRangePush
(
"elpa1"
)
call
nvtxRangePush
(
"elpa1"
)
#endif
#endif
call
obj
%
get
(
"output_pinning_information"
,
pinningInfo
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem setting option for debug. Aborting..."
stop
endif
if
(
pinningInfo
.eq.
1
)
then
call
init_thread_affinity
(
nrThreads
)
call
check_thread_affinity
()
if
(
my_pe
.eq.
0
)
call
print_thread_affinity
(
my_pe
)
call
cleanup_thread_affinity
()
endif
success
=
.true.
success
=
.true.
#ifdef REDISTRIBUTE_MATRIX
#ifdef REDISTRIBUTE_MATRIX
...
@@ -619,7 +633,6 @@ function elpa_solve_evp_&
...
@@ -619,7 +633,6 @@ function elpa_solve_evp_&
call
blacs_gridexit
(
blacs_ctxt_
)
call
blacs_gridexit
(
blacs_ctxt_
)
endif
endif
#endif /* REDISTRIBUTE_MATRIX */
#endif /* REDISTRIBUTE_MATRIX */
call
obj
%
timer
%
stop
(
"elpa_solve_evp_&
call
obj
%
timer
%
stop
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&MATH_DATATYPE&
&_1stage_&
&_1stage_&
...
...
src/elpa2/elpa2_template.F90
View file @
d3a5bab4
...
@@ -89,6 +89,7 @@
...
@@ -89,6 +89,7 @@
use
elpa_scalapack_interfaces
use
elpa_scalapack_interfaces
#endif
#endif
use
solve_tridi
use
solve_tridi
use
thread_affinity
use
,
intrinsic
::
iso_c_binding
use
,
intrinsic
::
iso_c_binding
implicit
none
implicit
none
#include "../general/precision_kinds.F90"
#include "../general/precision_kinds.F90"
...
@@ -200,6 +201,7 @@
...
@@ -200,6 +201,7 @@
#endif
#endif
integer
(
kind
=
ik
)
::
global_index
integer
(
kind
=
ik
)
::
global_index
logical
::
reDistributeMatrix
,
doRedistributeMatrix
logical
::
reDistributeMatrix
,
doRedistributeMatrix
integer
(
kind
=
ik
)
::
pinningInfo
#if REALCASE == 1
#if REALCASE == 1
#undef GPU_KERNEL
#undef GPU_KERNEL
...
@@ -238,6 +240,20 @@
...
@@ -238,6 +240,20 @@
nrThreads
=
1
nrThreads
=
1
#endif
#endif
call
obj
%
get
(
"output_pinning_information"
,
pinningInfo
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem setting option for debug. Aborting..."
stop
endif
if
(
pinningInfo
.eq.
1
)
then
call
init_thread_affinity
(
nrThreads
)
call
check_thread_affinity
()
if
(
my_pe
.eq.
0
)
call
print_thread_affinity
(
my_pe
)
call
cleanup_thread_affinity
()
endif
success
=
.true.
success
=
.true.
#ifdef REDISTRIBUTE_MATRIX
#ifdef REDISTRIBUTE_MATRIX
...
...
src/elpa_index.c
View file @
d3a5bab4
...
@@ -271,6 +271,7 @@ static const elpa_index_int_entry_t int_entries[] = {
...
@@ -271,6 +271,7 @@ static const elpa_index_int_entry_t int_entries[] = {
BOOL_ENTRY
(
"print_flops"
,
"Print FLOP rates on task 0"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"print_flops"
,
"Print FLOP rates on task 0"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"measure_performance"
,
"Also measure with flops (via papi) with the timings"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"measure_performance"
,
"Also measure with flops (via papi) with the timings"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"check_pd"
,
"Check eigenvalues to be positive"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"check_pd"
,
"Check eigenvalues to be positive"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"output_pinning_information"
,
"Print the pinning information"
,
0
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"cannon_for_generalized"
,
"Whether to use Cannons algorithm for the generalized EVP"
,
1
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
BOOL_ENTRY
(
"cannon_for_generalized"
,
"Whether to use Cannons algorithm for the generalized EVP"
,
1
,
ELPA_AUTOTUNE_NOT_TUNABLE
,
0
,
PRINT_YES
),
};
};
...
...
src/helpers/check_thread_affinity.c
0 → 100644
View file @
d3a5bab4
// Copyright 2021, A. Marek
//
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#define _GNU_SOURCE
#include
<stdio.h>
#include
<stdlib.h>
#include
<sched.h>
#include
<sys/types.h>
#include
<unistd.h>
void
get_thread_affinity
(
int
*
cpu_id
)
{
*
cpu_id
=
sched_getcpu
();
}
void
get_process_affinity
(
int
cpu_id
)
{
cpu_set_t
set
;
int
ret
,
i
;
int
cpu
;
cpu_id
=
9999999
;
ret
=
sched_getaffinity
(
0
,
sizeof
(
cpu_set_t
),
&
set
);
for
(
i
=
0
;
i
<
CPU_SETSIZE
;
i
++
)
{
cpu
=
CPU_ISSET
(
i
,
&
set
);
if
(
cpu
==
1
)
{
cpu_id
=
i
;
}
}
}
void
get_process_id
(
int
*
process_id
,
int
*
pprocess_id
)
{
int
id
;
*
process_id
=
0
;
*
pprocess_id
=
0
;
id
=
getpid
();
*
process_id
=
id
;
id
=
getppid
();
*
pprocess_id
=
id
;
//printf("My pid %d \n",*process_id);
//printf("My ppid %d \n",*pprocess_id);
}
src/helpers/mod_thread_affinity.F90
0 → 100644
View file @
d3a5bab4
! Copyright 2021, A. Marek
!
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! Author: Andreas Marek, MPCDF
#include "config-f90.h"
module
thread_affinity
use
precision
implicit
none
public
::
check_thread_affinity
,
&
init_thread_affinity
,
cleanup_thread_affinity
,
print_thread_affinity
private
! integer(kind=ik) :: thread_num
integer
(
kind
=
ik
)
::
thread_max
integer
(
kind
=
ik
)
::
process_cpu_id
integer
(
kind
=
ik
),
allocatable
::
cpu_ids
(:)
interface
subroutine
get_process_id_c
(
process_id
,
pprocess_id
)
bind
(
C
,
name
=
"get_process_id"
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
C_INT
),
intent
(
out
)
::
process_id
,
pprocess_id
end
subroutine
end
interface
interface
subroutine
get_thread_affinity_c
(
cpu_id
)
bind
(
C
,
name
=
"get_thread_affinity"
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
C_INT
)
::
cpu_id
end
subroutine
end
interface
interface
subroutine
get_process_affinity_c
(
cpu_id
)
bind
(
C
,
name
=
"get_process_affinity"
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
C_INT
),
value
::
cpu_id
end
subroutine
end
interface
contains
subroutine
get_thread_affinity
(
cpu_id
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
ik
),
intent
(
out
)
::
cpu_id
integer
(
kind
=
C_INT
)
::
cpu_id_c
call
get_thread_affinity_c
(
cpu_id_c
)
cpu_id
=
int
(
cpu_id_c
,
kind
=
ik
)
end
subroutine
subroutine
get_process_affinity
(
cpu_id
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
ik
),
intent
(
out
)
::
cpu_id
integer
(
kind
=
C_INT
)
::
cpu_id_c
call
get_process_affinity_c
(
cpu_id_c
)
cpu_id
=
int
(
cpu_id_c
,
kind
=
ik
)
end
subroutine
subroutine
get_process_id
(
process_id
,
pprocess_id
)
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
ik
),
intent
(
out
)
::
process_id
,
pprocess_id
integer
(
kind
=
C_INT
)
::
process_id_c
,
pprocess_id_c
call
get_process_id_c
(
process_id_c
,
pprocess_id_c
)
process_id
=
int
(
process_id_c
,
kind
=
ik
)
pprocess_id
=
int
(
pprocess_id_c
,
kind
=
ik
)
end
subroutine
subroutine
init_thread_affinity
(
nrThreads
)
use
precision
use
omp_lib
implicit
none
integer
(
kind
=
ik
)
::
istat
integer
(
kind
=
ik
),
intent
(
in
)
::
nrThreads
thread_max
=
nrThreads
#ifdef WITH_OPENMP_TRADITIONAL
if
(
.not.
(
allocated
(
cpu_ids
)))
then
allocate
(
cpu_ids
(
0
:
thread_max
-1
),
stat
=
istat
)
if
(
istat
.ne.
0
)
then
print
*
,
"Error when allocating init_thread_affinity"
endif
endif
#endif
end
subroutine
init_thread_affinity
subroutine
cleanup_thread_affinity
use
precision
implicit
none
integer
(
kind
=
ik
)
::
istat
if
((
allocated
(
cpu_ids
)))
then
deallocate
(
cpu_ids
,
stat
=
istat
)
if
(
istat
.ne.
0
)
then
print
*
,
"Error when deallocating init_thread_affinity"
endif
endif
end
subroutine
cleanup_thread_affinity
subroutine
check_thread_affinity
()
use
precision
use
omp_lib
implicit
none
integer
(
kind
=
ik
)
::
thread_cpu_id
integer
(
kind
=
ik
)
::
i
,
actuall_num
call
get_process_affinity
(
process_cpu_id
)
#ifdef WITH_OPENMP_TRADITIONAL
!$OMP PARALLEL DO &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i,thread_cpu_id,actuall_num) &
!$OMP SHARED(thread_max,cpu_ids) &
!$OMP SCHEDULE(STATIC)
do
i
=
0
,
thread_max
-1
call
get_thread_affinity
(
thread_cpu_id
)
actuall_num
=
omp_get_thread_num
()
cpu_ids
(
actuall_num
)
=
thread_cpu_id
enddo
#endif
end
subroutine
check_thread_affinity
subroutine
print_thread_affinity
(
mype
)
use
precision
implicit
none
integer
(
kind
=
ik
)
::
i
integer
(
kind
=
ik
),
intent
(
in
)
::
mype
integer
(
kind
=
ik
)
::
pid
,
ppid
call
get_process_id
(
pid
,
ppid
)
write
(
*
,
'("Task ",i4," runs on process id: ",i4," with pid ",i4," and ppid ",i4)'
)
mype
,
process_cpu_id
,
pid
,
ppid
#ifdef WITH_OPENMP_TRADITIONAL
write
(
*
,
'("Each task uses ",i4," threads")'
)
thread_max
do
i
=
0
,
thread_max
-1
write
(
*
,
'("Thread ",i4," is running on logical CPU-ID ",i4)'
)
i
,
cpu_ids
(
i
)
print
*
,
i
,
cpu_ids
(
i
)
enddo
#endif
end
subroutine
print_thread_affinity
end
module
thread_affinity
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment