Commit d3a5bab4 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'task-pinning' into 'master_pre_stage'

Task pinning

See merge request !53
parents 91bca5bb 62a4c546
This diff is collapsed.
......@@ -69,6 +69,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/mod_distribute_global_column.F90 \
src/elpa1/mod_v_add_s.F90 \
src/elpa1/mod_solve_secular_equation.F90 \
src/helpers/mod_thread_affinity.F90 \
src/helpers/check_thread_affinity.c \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......
......@@ -610,9 +610,14 @@ coverage = {
#"knl" : "--enable-avx512",
#"power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --disable-mpi-module --with-GPU-compute-capability=sm_60 ",
#instruction_set = {
# "sse" : " --enable-sse --enable-sse-assembly",
# "avx" : " --enable-avx",
# "avx2" : " --enable-avx2",
# "avx512" : "--enable-avx512",
#}
instruction_set = {
"sse" : " --enable-sse --enable-sse-assembly",
"avx" : " --enable-avx",
"avx2" : " --enable-avx2",
"avx512" : "--enable-avx512",
}
......
......@@ -83,6 +83,7 @@ function elpa_solve_evp_&
use elpa_scalapack_interfaces
#endif
use solve_tridi
use thread_affinity
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -176,6 +177,8 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), allocatable, target :: aIntern(:,:)
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target :: qIntern(:,:)
#endif
integer(kind=c_int) :: pinningInfo
logical :: do_tridiag, do_solve, do_trans_ev
integer(kind=ik) :: nrThreads
integer(kind=ik) :: global_index
......@@ -216,8 +219,19 @@ function elpa_solve_evp_&
#ifdef WITH_NVTX
call nvtxRangePush("elpa1")
#endif
call obj%get("output_pinning_information", pinningInfo, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option for debug. Aborting..."
stop
endif
if (pinningInfo .eq. 1) then
call init_thread_affinity(nrThreads)
call check_thread_affinity()
if (my_pe .eq. 0) call print_thread_affinity(my_pe)
call cleanup_thread_affinity()
endif
success = .true.
#ifdef REDISTRIBUTE_MATRIX
......@@ -619,7 +633,6 @@ function elpa_solve_evp_&
call blacs_gridexit(blacs_ctxt_)
endif
#endif /* REDISTRIBUTE_MATRIX */
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......
......@@ -89,6 +89,7 @@
use elpa_scalapack_interfaces
#endif
use solve_tridi
use thread_affinity
use, intrinsic :: iso_c_binding
implicit none
#include "../general/precision_kinds.F90"
......@@ -200,6 +201,7 @@
#endif
integer(kind=ik) :: global_index
logical :: reDistributeMatrix, doRedistributeMatrix
integer(kind=ik) :: pinningInfo
#if REALCASE == 1
#undef GPU_KERNEL
......@@ -238,6 +240,20 @@
nrThreads = 1
#endif
call obj%get("output_pinning_information", pinningInfo, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option for debug. Aborting..."
stop
endif
if (pinningInfo .eq. 1) then
call init_thread_affinity(nrThreads)
call check_thread_affinity()
if (my_pe .eq. 0) call print_thread_affinity(my_pe)
call cleanup_thread_affinity()
endif
success = .true.
#ifdef REDISTRIBUTE_MATRIX
......
......@@ -271,6 +271,7 @@ static const elpa_index_int_entry_t int_entries[] = {
BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("output_pinning_information", "Print the pinning information", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
};
......
// Copyright 2021, A. Marek
//
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
void get_thread_affinity(int *cpu_id) {
*cpu_id = sched_getcpu();
}
void get_process_affinity(int cpu_id) {
cpu_set_t set;
int ret, i;
int cpu;
cpu_id = 9999999 ;
ret = sched_getaffinity(0, sizeof(cpu_set_t), &set);
for (i=0; i < CPU_SETSIZE; i++)
{
cpu = CPU_ISSET(i, &set);
if (cpu == 1) { cpu_id = i; }
}
}
void get_process_id(int *process_id, int *pprocess_id) {
int id;
*process_id = 0;
*pprocess_id = 0;
id = getpid();
*process_id = id ;
id = getppid();
*pprocess_id = id ;
//printf("My pid %d \n",*process_id);
//printf("My ppid %d \n",*pprocess_id);
}
! Copyright 2021, A. Marek
!
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! Author: Andreas Marek, MPCDF
#include "config-f90.h"
module thread_affinity
use precision
implicit none
public :: check_thread_affinity, &
init_thread_affinity, cleanup_thread_affinity, print_thread_affinity
private
! integer(kind=ik) :: thread_num
integer(kind=ik) :: thread_max
integer(kind=ik) :: process_cpu_id
integer(kind=ik), allocatable :: cpu_ids(:)
interface
subroutine get_process_id_c(process_id, pprocess_id) bind(C, name="get_process_id")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_INT), intent(out) :: process_id, pprocess_id
end subroutine
end interface
interface
subroutine get_thread_affinity_c(cpu_id) bind(C, name="get_thread_affinity")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_INT) :: cpu_id
end subroutine
end interface
interface
subroutine get_process_affinity_c(cpu_id) bind(C, name="get_process_affinity")
use, intrinsic :: iso_c_binding
implicit none
integer(kind=C_INT), value :: cpu_id
end subroutine
end interface
contains
subroutine get_thread_affinity(cpu_id)
use, intrinsic :: iso_c_binding
implicit none
integer(kind=ik), intent(out) :: cpu_id
integer(kind=C_INT) :: cpu_id_c
call get_thread_affinity_c(cpu_id_c)
cpu_id = int(cpu_id_c, kind=ik)
end subroutine
subroutine get_process_affinity(cpu_id)
use, intrinsic :: iso_c_binding
implicit none
integer(kind=ik), intent(out) :: cpu_id
integer(kind=C_INT) :: cpu_id_c
call get_process_affinity_c(cpu_id_c)
cpu_id = int(cpu_id_c, kind=ik)
end subroutine
subroutine get_process_id(process_id, pprocess_id)
use, intrinsic :: iso_c_binding
implicit none
integer(kind=ik), intent(out) :: process_id, pprocess_id
integer(kind=C_INT) :: process_id_c, pprocess_id_c
call get_process_id_c(process_id_c, pprocess_id_c)
process_id = int(process_id_c, kind=ik)
pprocess_id = int(pprocess_id_c, kind=ik)
end subroutine
subroutine init_thread_affinity(nrThreads)
use precision
use omp_lib
implicit none
integer(kind=ik) :: istat
integer(kind=ik), intent(in) :: nrThreads
thread_max = nrThreads
#ifdef WITH_OPENMP_TRADITIONAL
if(.not.(allocated(cpu_ids))) then
allocate(cpu_ids(0:thread_max-1), stat=istat)
if (istat .ne. 0) then
print *,"Error when allocating init_thread_affinity"
endif
endif
#endif
end subroutine init_thread_affinity
subroutine cleanup_thread_affinity
use precision
implicit none
integer(kind=ik) :: istat
if((allocated(cpu_ids))) then
deallocate(cpu_ids, stat=istat)
if (istat .ne. 0) then
print *,"Error when deallocating init_thread_affinity"
endif
endif
end subroutine cleanup_thread_affinity
subroutine check_thread_affinity()
use precision
use omp_lib
implicit none
integer(kind=ik) :: thread_cpu_id
integer(kind=ik) :: i, actuall_num
call get_process_affinity(process_cpu_id)
#ifdef WITH_OPENMP_TRADITIONAL
!$OMP PARALLEL DO &
!$OMP DEFAULT(NONE) &
!$OMP PRIVATE(i,thread_cpu_id,actuall_num) &
!$OMP SHARED(thread_max,cpu_ids) &
!$OMP SCHEDULE(STATIC)
do i=0,thread_max-1
call get_thread_affinity(thread_cpu_id)
actuall_num=omp_get_thread_num()
cpu_ids(actuall_num)=thread_cpu_id
enddo
#endif
end subroutine check_thread_affinity
subroutine print_thread_affinity(mype)
use precision
implicit none
integer(kind=ik) :: i
integer(kind=ik), intent(in) :: mype
integer(kind=ik) :: pid, ppid
call get_process_id(pid, ppid)
write(*,'("Task ",i4," runs on process id: ",i4," with pid ",i4," and ppid ",i4)') mype, process_cpu_id,pid,ppid
#ifdef WITH_OPENMP_TRADITIONAL
write(*,'("Each task uses ",i4," threads")') thread_max
do i=0,thread_max-1
write(*,'("Thread ",i4," is running on logical CPU-ID ",i4)') i,cpu_ids(i)
print *,i,cpu_ids(i)
enddo
#endif
end subroutine print_thread_affinity
end module thread_affinity
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment