Commit 1d2b3806 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master_pre_stage' into 'master'

Master pre stage

See merge request !54
parents e6767067 1996d441
This diff is collapsed.
...@@ -27,7 +27,7 @@ autotools procedure. This is the **only supported way** how to build and install ...@@ -27,7 +27,7 @@ autotools procedure. This is the **only supported way** how to build and install
If you obtained *ELPA* from the official git repository, you will not find If you obtained *ELPA* from the official git repository, you will not find
the needed configure script! You will have to create the configure script with autoconf. the needed configure script! You will have to create the configure script with autoconf. You can also run the `autogen.sh` script that does this step for you.
## (A): Installing *ELPA* as library with configure ## ## (A): Installing *ELPA* as library with configure ##
......
...@@ -69,6 +69,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \ ...@@ -69,6 +69,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/mod_distribute_global_column.F90 \ src/elpa1/mod_distribute_global_column.F90 \
src/elpa1/mod_v_add_s.F90 \ src/elpa1/mod_v_add_s.F90 \
src/elpa1/mod_solve_secular_equation.F90 \ src/elpa1/mod_solve_secular_equation.F90 \
src/helpers/mod_thread_affinity.F90 \
src/helpers/check_thread_affinity.c \
src/elpa_index.c src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......
...@@ -99,7 +99,7 @@ Nonetheless, we are grateful if you cite the following publications: ...@@ -99,7 +99,7 @@ Nonetheless, we are grateful if you cite the following publications:
Yu, V.; Moussa, J.; Kus, P.; Marek, A.; Messmer, P.; Yoon, M.; Lederer, H.; Blum, V. Yu, V.; Moussa, J.; Kus, P.; Marek, A.; Messmer, P.; Yoon, M.; Lederer, H.; Blum, V.
"GPU-Acceleration of the ELPA2 Distributed Eigensolver for Dense Symmetric and Hermitian Eigenproblems", "GPU-Acceleration of the ELPA2 Distributed Eigensolver for Dense Symmetric and Hermitian Eigenproblems",
https://arxiv.org/abs/2002.10991 Computer Physics Communications, 262, 2021
If you use the new API and/or autotuning: If you use the new API and/or autotuning:
......
...@@ -610,9 +610,14 @@ coverage = { ...@@ -610,9 +610,14 @@ coverage = {
#"knl" : "--enable-avx512", #"knl" : "--enable-avx512",
#"power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --disable-mpi-module --with-GPU-compute-capability=sm_60 ", #"power8" : " --enable-vsx --disable-sse --disable-sse-assembly --disable-avx --disable-avx2 --disable-avx512 --disable-mpi-module --with-GPU-compute-capability=sm_60 ",
#instruction_set = {
# "sse" : " --enable-sse --enable-sse-assembly",
# "avx" : " --enable-avx",
# "avx2" : " --enable-avx2",
# "avx512" : "--enable-avx512",
#}
instruction_set = { instruction_set = {
"sse" : " --enable-sse --enable-sse-assembly",
"avx" : " --enable-avx",
"avx2" : " --enable-avx2", "avx2" : " --enable-avx2",
"avx512" : "--enable-avx512", "avx512" : "--enable-avx512",
} }
......
...@@ -308,6 +308,7 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then ...@@ -308,6 +308,7 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then
# argument checking or fail # argument checking or fail
# 2. no MPI case: switch of PACK_REAL_TO_COMPLEX # 2. no MPI case: switch of PACK_REAL_TO_COMPLEX
if test x"${with_mpi}" = x"yes"; then if test x"${with_mpi}" = x"yes"; then
AC_MSG_CHECKING(whether MPI module defines all interfaces )
# check whether MPI module defines all interfaces; not the case for Intel MPI! # check whether MPI module defines all interfaces; not the case for Intel MPI!
AC_COMPILE_IFELSE([AC_LANG_SOURCE([ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
program test_mpi_interfaces program test_mpi_interfaces
...@@ -316,6 +317,8 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then ...@@ -316,6 +317,8 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then
integer :: rank integer :: rank
integer :: buf(10) integer :: buf(10)
integer :: ierr integer :: ierr
real*8 :: a(2)
complex*16 :: b(2)
call MPI_Init(ierr) call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr) call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
...@@ -324,19 +327,27 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then ...@@ -324,19 +327,27 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then
buf(:) = 42; buf(:) = 42;
end if end if
! this is OK
call MPI_Bcast(buf, 10, MPI_INT, 0, MPI_COMM_WORLD, ierr) !! this is OK
!call MPI_Bcast(buf, 10, MPI_INT, 0, MPI_COMM_WORLD, ierr)
! Oops, wrong order here: !! Oops, wrong order here:
call MPI_Bcast(10, buf, MPI_INT, 0, MPI_COMM_WORLD, ierr) !call MPI_Bcast(10, buf, MPI_INT, 0, MPI_COMM_WORLD, ierr)
! if the correct interfaces exists in the MPI module
! this must work. If not and the compiler infers interfaces
! this will fail
call MPI_Bcast(a, 2, MPI_REAL8, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(b, 2, MPI_DOUBLE_COMPLEX, 0, MPI_COMM_WORLD, ierr)
call MPI_Finalize(ierr) call MPI_Finalize(ierr)
end program end program
])], ])],
[MPImodule_has_interfaces=no], [MPImodule_has_interfaces=yes],
[MPImodule_has_interfaces=yes] [MPImodule_has_interfaces=no]
) )
AC_MSG_RESULT([${MPImodule_has_interfaces}])
if test x"${MPImodule_has_interfaces}" = x"no"; then if test x"${MPImodule_has_interfaces}" = x"no"; then
AC_MSG_CHECKING(whether we can cure missing interfaces by allowing argument mismatch )
#check whether we can cure this by disabling argument checking #check whether we can cure this by disabling argument checking
FCFLAGS_SAVE2="$FCFLAGS" FCFLAGS_SAVE2="$FCFLAGS"
FCFLAGS="$FCFLAGS -fallow-argument-mismatch" FCFLAGS="$FCFLAGS -fallow-argument-mismatch"
...@@ -353,10 +364,11 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then ...@@ -353,10 +364,11 @@ if test x"${FC_does_infer_interfaces}" = x"yes"; then
end program end program
])], ])],
[FC_does_still_infer_interfaces=no], [FC_infer_interfaces_cured=yes],
[FC_does_still_infer_interfaces=yes] [FC_infer_interfaces_cured=no]
) )
if test x"${FC_does_still_infer_interfaces}" = x"yes"; then AC_MSG_RESULT([${FC_infer_interfaces_cured}])
if test x"${FC_infer_interfaces_cured}" = x"no"; then
FCFLAGS = "$FCFLAGS_SAVE2" FCFLAGS = "$FCFLAGS_SAVE2"
AC_MSG_ERROR([Fortran compiler infers interfaces; but MPI module does not supply all of them]) AC_MSG_ERROR([Fortran compiler infers interfaces; but MPI module does not supply all of them])
fi fi
......
...@@ -106,7 +106,7 @@ ...@@ -106,7 +106,7 @@
!> !>
!> \code{.f90} !> \code{.f90}
!> use elpa !> use elpa
!> class(elpa_t), pointer :: elpa !> class(elpa_t), pointer :: elpaInstance
!> integer :: success !> integer :: success
!> !>
!> ! We urge the user to always check the error code of all ELPA functions !> ! We urge the user to always check the error code of all ELPA functions
...@@ -121,34 +121,34 @@ ...@@ -121,34 +121,34 @@
!> endif !> endif
!> !>
!> ! set parameters decribing the matrix and it's MPI distribution !> ! set parameters decribing the matrix and it's MPI distribution
!> call elpa%set("na", na, success, success) !> call elpaIstance%set("na", na, success, success)
!> if (success /= ELPA_OK) then !> if (success /= ELPA_OK) then
!> print *,"Could not set entry" !> print *,"Could not set entry"
!> endif !> endif
!> call elpa%set("nev", nev, success, success) !> call elpaInstance%set("nev", nev, success, success)
!> ! check success code ... !> ! check success code ...
!> !>
!> call elpa%set("local_nrows", na_rows, success) !> call elpaInstance%set("local_nrows", na_rows, success)
!> ! check success code ... !> ! check success code ...
!> !>
!> call elpa%set("local_ncols", na_cols, success) !> call elpaInstance%set("local_ncols", na_cols, success)
!> call elpa%set("nblk", nblk, success) !> call elpaInstance%set("nblk", nblk, success)
!> call elpa%set("mpi_comm_parent", MPI_COMM_WORLD, success) !> call elpaInstance%set("mpi_comm_parent", MPI_COMM_WORLD, success)
!> call elpa%set("process_row", my_prow, success) !> call elpaInstance%set("process_row", my_prow, success)
!> call elpa%set("process_col", my_pcol, success) !> call elpaInstance%set("process_col", my_pcol, success)
!> !>
!> ! set up the elpa object !> ! set up the elpa object
!> success = elpa%setup() !> success = elpaInstance%setup()
!> if (succes /= ELPA_OK) then !> if (succes /= ELPA_OK) then
!> print *,"Could not setup ELPA object" !> print *,"Could not setup ELPA object"
!> endif !> endif
!> !>
!> ! if desired, set tunable run-time options !> ! if desired, set tunable run-time options
!> ! here we want to use the 2-stage solver !> ! here we want to use the 2-stage solver
!> call elpa%set("solver", ELPA_SOLVER_2STAGE, success) !> call elpaInstance%set("solver", ELPA_SOLVER_2STAGE, success)
!> !>
!> ! and set a specific kernel (must be supported on the machine) !> ! and set a specific kernel (must be supported on the machine)
!> call elpa%set("real_kernel", ELPA_2STAGE_REAL_AVX_BLOCK2) !> call elpaInstance%set("real_kernel", ELPA_2STAGE_REAL_AVX_BLOCK2)
!> \endcode !> \endcode
!> ... set and get all other options that are desired !> ... set and get all other options that are desired
!> \code{.f90} !> \code{.f90}
...@@ -159,7 +159,7 @@ ...@@ -159,7 +159,7 @@
!> ! use method solve to solve the eigenvalue problem to obtain eigenvalues !> ! use method solve to solve the eigenvalue problem to obtain eigenvalues
!> ! and eigenvectors !> ! and eigenvectors
!> ! other possible methods are desribed in \ref elpa_api::elpa_t derived type !> ! other possible methods are desribed in \ref elpa_api::elpa_t derived type
!> call elpa%eigenvectors(a, ev, z, success) !> call elpaInstance%eigenvectors(a, ev, z, success)
!> !>
!> ! cleanup !> ! cleanup
!> call elpa_deallocate(e, success) !> call elpa_deallocate(e, success)
......
...@@ -83,6 +83,7 @@ function elpa_solve_evp_& ...@@ -83,6 +83,7 @@ function elpa_solve_evp_&
use elpa_scalapack_interfaces use elpa_scalapack_interfaces
#endif #endif
use solve_tridi use solve_tridi
use thread_affinity
implicit none implicit none
#include "../general/precision_kinds.F90" #include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
...@@ -176,6 +177,8 @@ function elpa_solve_evp_& ...@@ -176,6 +177,8 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), allocatable, target :: aIntern(:,:) MATH_DATATYPE(kind=rck), allocatable, target :: aIntern(:,:)
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target :: qIntern(:,:) MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target :: qIntern(:,:)
#endif #endif
integer(kind=c_int) :: pinningInfo
logical :: do_tridiag, do_solve, do_trans_ev logical :: do_tridiag, do_solve, do_trans_ev
integer(kind=ik) :: nrThreads integer(kind=ik) :: nrThreads
integer(kind=ik) :: global_index integer(kind=ik) :: global_index
...@@ -216,8 +219,19 @@ function elpa_solve_evp_& ...@@ -216,8 +219,19 @@ function elpa_solve_evp_&
#ifdef WITH_NVTX #ifdef WITH_NVTX
call nvtxRangePush("elpa1") call nvtxRangePush("elpa1")
#endif #endif
call obj%get("output_pinning_information", pinningInfo, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option for debug. Aborting..."
stop
endif
if (pinningInfo .eq. 1) then
call init_thread_affinity(nrThreads)
call check_thread_affinity()
if (my_pe .eq. 0) call print_thread_affinity(my_pe)
call cleanup_thread_affinity()
endif
success = .true. success = .true.
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
...@@ -619,7 +633,6 @@ function elpa_solve_evp_& ...@@ -619,7 +633,6 @@ function elpa_solve_evp_&
call blacs_gridexit(blacs_ctxt_) call blacs_gridexit(blacs_ctxt_)
endif endif
#endif /* REDISTRIBUTE_MATRIX */ #endif /* REDISTRIBUTE_MATRIX */
call obj%timer%stop("elpa_solve_evp_& call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
......
...@@ -581,8 +581,17 @@ subroutine tridiag_& ...@@ -581,8 +581,17 @@ subroutine tridiag_&
#ifdef WITH_OPENMP_TRADITIONAL #ifdef WITH_OPENMP_TRADITIONAL
call obj%timer%start("OpenMP parallel") call obj%timer%start("OpenMP parallel")
!$OMP PARALLEL PRIVATE(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end) !todo : check whether GPU implementation with large matrix multiply is beneficial
! for a larger number of threads; could be addressed with autotuning if this
! is the case
!$omp parallel &
!$omp num_threads(max_threads) &
!$omp default(none) &
!$omp private(my_thread,n_threads,n_iter,i,l_col_beg,l_col_end,j,l_row_beg,l_row_end) &
!$omp shared(useGPU, isSkewsymmetric, cudaMemcpyDeviceToHost, successCuda, u_row, u_row_dev, &
!$omp & v_row, v_row_dev, v_col, v_col_dev, u_col, u_col_dev, a_dev, a_offset, &
!$omp& max_local_cols, max_local_rows, obj, wantDebug, l_rows_per_tile, l_cols_per_tile, &
!$omp& matrixRows, istep, tile_size, l_rows, l_cols, ur_p, uc_p, a_mat)
my_thread = omp_get_thread_num() my_thread = omp_get_thread_num()
n_threads = omp_get_num_threads() n_threads = omp_get_num_threads()
......
...@@ -93,7 +93,7 @@ ...@@ -93,7 +93,7 @@
matrixRows = obj%local_nrows matrixRows = obj%local_nrows
matrixCols = obj%local_ncols matrixCols = obj%local_ncols
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads() omp_threads_caller = omp_get_max_threads()
...@@ -141,7 +141,7 @@ ...@@ -141,7 +141,7 @@
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2 ! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller) call omp_set_num_threads(omp_threads_caller)
......
...@@ -693,7 +693,11 @@ max_threads) ...@@ -693,7 +693,11 @@ max_threads)
!This does not help performance due to the addition of two openmp barriers around the MPI call, !This does not help performance due to the addition of two openmp barriers around the MPI call,
!But in the future this may be beneficial if these barriers are replaced with a faster implementation !But in the future this may be beneficial if these barriers are replaced with a faster implementation
!$omp parallel private(mynlc, j, lcx, ii, pp ) shared(aux1) !$omp parallel &
!$omp default(none) &
!$omp shared(lc, istep, nbw, my_pcol, np_cols, nblk, &
!$omp& lr, vr, a_mat, transformChunkSize, tau, aux1, aux2, wantDebug, mpi_comm_rows, obj) &
!$omp private(mynlc, j, lcx, ii, pp, mpierr )
mynlc = 0 ! number of local columns mynlc = 0 ! number of local columns
!This loop does not have independent iterations, !This loop does not have independent iterations,
...@@ -941,22 +945,20 @@ max_threads) ...@@ -941,22 +945,20 @@ max_threads)
n_way = 1 n_way = 1
#ifdef WITH_OPENMP_TRADITIONAL #ifdef WITH_OPENMP_TRADITIONAL
#if REALCASE == 1
n_way = max_threads n_way = max_threads
!$omp parallel private( i,lcs,lce,lrs,lre)
#endif
if (n_way > 1) then if (n_way > 1) then
#if REALCASE == 1 !$omp parallel do &
!$omp do !$omp default(none) &
#endif !$omp private(i) &
!$omp shared(l_cols_tile, l_cols, umcCPU, n_cols)
do i=1,min(l_cols_tile, l_cols) do i=1,min(l_cols_tile, l_cols)
umcCPU(i,1:n_cols) = 0.0_rck umcCPU(i,1:n_cols) = 0.0_rck
enddo enddo
#if REALCASE == 1 !$omp parallel do &
!$omp do !$omp default(none) &
#endif !$omp private(i) &
!$omp shared(l_rows, vmrCPU, n_cols)
do i=1,l_rows do i=1,l_rows
vmrCPU(i,n_cols+1:2*n_cols) = 0.0_rck vmrCPU(i,n_cols+1:2*n_cols) = 0.0_rck
enddo enddo
...@@ -977,9 +979,11 @@ max_threads) ...@@ -977,9 +979,11 @@ max_threads)
!This algorithm chosen because in this algoirhtm, the loop around the dgemm calls !This algorithm chosen because in this algoirhtm, the loop around the dgemm calls
!is easily parallelized, and regardless of choise of algorithm, !is easily parallelized, and regardless of choise of algorithm,
!the startup cost for parallelizing the dgemms inside the loop is too great !the startup cost for parallelizing the dgemms inside the loop is too great
#if REALCASE == 1 !$omp parallel do schedule(static,1) &
!$omp do schedule(static,1) !$omp default(none) &
#endif !$omp private(i, lcs, lce, lrs, lre) &
!$omp shared(istep, nbw, tile_size, obj, l_cols, l_cols_tile, l_rows, isSkewsymmetric, &
!$omp& n_cols, l_rows_tile, umcCPU, vmrCPU, a_mat)
do i=0,(istep*nbw-1)/tile_size do i=0,(istep*nbw-1)/tile_size
lcs = i*l_cols_tile+1 ! local column start lcs = i*l_cols_tile+1 ! local column start
lce = min(l_cols, (i+1)*l_cols_tile) ! local column end lce = min(l_cols, (i+1)*l_cols_tile) ! local column end
...@@ -1139,9 +1143,6 @@ max_threads) ...@@ -1139,9 +1143,6 @@ max_threads)
#ifdef WITH_OPENMP_TRADITIONAL #ifdef WITH_OPENMP_TRADITIONAL
endif ! n_way > 1 endif ! n_way > 1
#if REALCASE == 1
!$omp end parallel
#endif
#endif #endif
! Sum up all ur(:) parts along rows and add them to the uc(:) parts ! Sum up all ur(:) parts along rows and add them to the uc(:) parts
! on the processors containing the diagonal ! on the processors containing the diagonal
......
...@@ -89,6 +89,7 @@ ...@@ -89,6 +89,7 @@
use elpa_scalapack_interfaces use elpa_scalapack_interfaces
#endif #endif
use solve_tridi use solve_tridi
use thread_affinity
use, intrinsic :: iso_c_binding use, intrinsic :: iso_c_binding
implicit none implicit none
#include "../general/precision_kinds.F90" #include "../general/precision_kinds.F90"
...@@ -200,6 +201,7 @@ ...@@ -200,6 +201,7 @@
#endif #endif
integer(kind=ik) :: global_index integer(kind=ik) :: global_index
logical :: reDistributeMatrix, doRedistributeMatrix logical :: reDistributeMatrix, doRedistributeMatrix
integer(kind=ik) :: pinningInfo
#if REALCASE == 1 #if REALCASE == 1
#undef GPU_KERNEL #undef GPU_KERNEL
...@@ -238,6 +240,20 @@ ...@@ -238,6 +240,20 @@
nrThreads = 1 nrThreads = 1
#endif #endif
call obj%get("output_pinning_information", pinningInfo, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option for debug. Aborting..."
stop
endif
if (pinningInfo .eq. 1) then
call init_thread_affinity(nrThreads)
call check_thread_affinity()
if (my_pe .eq. 0) call print_thread_affinity(my_pe)
call cleanup_thread_affinity()
endif
success = .true. success = .true.
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
......
...@@ -271,6 +271,7 @@ static const elpa_index_int_entry_t int_entries[] = { ...@@ -271,6 +271,7 @@ static const elpa_index_int_entry_t int_entries[] = {
BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES), BOOL_ENTRY("print_flops", "Print FLOP rates on task 0", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES), BOOL_ENTRY("measure_performance", "Also measure with flops (via papi) with the timings", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES), BOOL_ENTRY("check_pd", "Check eigenvalues to be positive", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("output_pinning_information", "Print the pinning information", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES), BOOL_ENTRY("cannon_for_generalized", "Whether to use Cannons algorithm for the generalized EVP", 1, ELPA_AUTOTUNE_NOT_TUNABLE, 0, PRINT_YES),
}; };
......
// Copyright 2021, A. Marek
//
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <sys/types.h>
#include <unistd.h>
void get_thread_affinity(int *cpu_id) {
*cpu_id = sched_getcpu();
}
void get_process_affinity(int cpu_id) {
cpu_set_t set;
int ret, i;
int cpu;
cpu_id = 9999999 ;
ret = sched_getaffinity(0, sizeof(cpu_set_t), &set);
for (i=0; i < CPU_SETSIZE; i++)
{
cpu = CPU_ISSET(i, &set);
if (cpu == 1) { cpu_id = i; }
}
}
void get_process_id(int *process_id, int *pprocess_id) {
int id;
*process_id = 0;
*pprocess_id = 0;
id = getpid();
*process_id = id ;
id = getppid();
*pprocess_id = id ;
//printf("My pid %d \n",*process_id);
//printf("My ppid %d \n",*pprocess_id);
}
! Copyright 2021, A. Marek
!
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and