Commit f3144dd9 authored by Pavel Kus's avatar Pavel Kus

changed logic of setting mpi communicators

mpi_comm_parent is allways requred (it was not required before, but
actually the code internals expected it to be supplied, at least for
ELPA 2 calculation OR whenever GPU was used)
parent 56c5e772
......@@ -123,14 +123,16 @@ for lang, m, g, q, t, p, d, s, lay, spl in product(sorted(language_flag.keys()),
if (q == 1 and (s != "2stage" or d != "real" or t != "eigenvectors" or g == 1 or m != "random")):
continue
# one test with split communicator myself should be enough
if(spl == "myself" and (s!="1stage" or d != "real" or p != "double" or g == 1 or m != "random" or t != "eigenvectors" or lang != "Fortran" or lay != "square")):
if(spl == "myself" and (d != "real" or p != "double" or q != 0 or m != "random" or (t != "eigenvectors" and t != "cholesky") or lang != "Fortran" or lay != "square")):
continue
for kernel in ["all_kernels", "default_kernel"] if s == "2stage" else ["nokernel"]:
endifs = 0
extra_flags = []
if(spl == "myself" and kernel == "all_kernels"):
continue
if(spl == "myself"):
print("if WITH_MPI")
endifs += 1
......
......@@ -116,6 +116,12 @@
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop 1
endif
!! the elpa object needs nev to be set (in case the EVP-solver is
!! called later. Thus it is set by user, do nothing, otherwise,
!! set it to na as default
......
......@@ -138,6 +138,12 @@
print *,"Problem setting option. Aborting..."
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
if (e%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
......
......@@ -154,6 +154,12 @@
stop
endif
call e%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop 1
endif
if (e%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
success = .false.
......
......@@ -131,6 +131,11 @@
print *,"Problem setting option. Aborting..."
stop
endif
call obj%set("legacy_api", 1, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
if (obj%setup() .ne. ELPA_OK) then
print *, "Cannot setup ELPA instance"
......
......@@ -258,128 +258,6 @@ module elpa_impl
end subroutine
#endif
! we want to ensure, that my_prow(col) and np_rows(cols) values are allways accessible trhough
! the elpa object, no matter whether the user provides communicators or communicators are created
! by elpa. If the walues are present already, they are checked for consistency with the communicators.
subroutine set_or_check_missing_comm_params(self)
implicit none
class(elpa_impl_t), intent(inout) :: self
integer :: mpi_comm_parent, mpi_comm_rows, mpi_comm_cols, mpierr, error, &
my_prow, my_pcol, my_id, present_my_prow, present_my_pcol, present_my_id, &
np_rows, np_cols, np_total, present_np_rows, present_np_cols, present_np_total, &
is_process_id_zero, gpu
if (.not. (self%is_set("mpi_comm_rows") == 1 .and. self%is_set("mpi_comm_cols") == 1) ) then
print *,"MPI row and column communicators not set correctly. Aborting..."
stop
endif
call self%get("mpi_comm_rows", mpi_comm_rows, error)
call self%get("mpi_comm_cols", mpi_comm_cols, error)
call mpi_comm_size(mpi_comm_rows, np_rows, mpierr)
if(self%is_set("num_process_rows") == 1) then
call self%get("num_process_rows", present_np_rows, error)
if(np_rows .ne. present_np_rows) then
print *,"MPI row communicator not set correctly. Aborting..."
stop
endif
else
call self%set("num_process_rows", np_rows, error)
endif
call mpi_comm_size(mpi_comm_cols, np_cols, mpierr)
if(self%is_set("num_process_cols") == 1) then
call self%get("num_process_cols", present_np_cols, error)
if(np_cols .ne. present_np_cols) then
print *,"MPI column communicator not set correctly. Aborting..."
stop
endif
else
call self%set("num_process_cols", np_cols, error)
endif
call mpi_comm_rank(mpi_comm_rows, my_prow, mpierr)
if(self%is_set("process_row") == 1) then
call self%get("process_row", present_my_prow, error)
if(my_prow .ne. present_my_prow) then
print *,"MPI row communicator not set correctly. Aborting..."
stop
endif
else
call self%set("process_row", my_prow, error)
endif
call mpi_comm_rank(mpi_comm_cols, my_pcol, mpierr)
if(self%is_set("process_col") == 1) then
call self%get("process_col", present_my_pcol, error)
if(my_pcol .ne. present_my_pcol) then
print *,"MPI column communicator not set correctly. Aborting..."
stop
endif
else
call self%set("process_col", my_pcol, error)
endif
! sadly, at the moment, the parent mpi communicator is not required to be set, e.g. in legacy tests
! we thus cannot obtain process_id
! we can, however, determine the number of prcesses and determine, whether the given process has id 0,
! assuming, that that is the wan with row and column ids == 0
is_process_id_zero = 0
if (self%is_set("mpi_comm_parent") == 1) then
call self%get("mpi_comm_parent", mpi_comm_parent, error)
call mpi_comm_size(mpi_comm_parent, np_total, mpierr)
if(self%is_set("num_processes") == 1) then
call self%get("num_processes", present_np_total, error)
if(np_total .ne. present_np_total) then
print *,"MPI parent communicator not set correctly. Aborting..."
stop
endif
else
call self%set("num_processes", np_total, error)
endif
if(np_total .ne. np_rows * np_cols) then
print *,"MPI parent communicator and row/col communicators do not match. Aborting..."
stop
endif
call mpi_comm_rank(mpi_comm_parent, my_id, mpierr)
if(self%is_set("process_id") == 1) then
call self%get("process_id", present_my_id, error)
if(my_id .ne. present_my_id) then
print *,"MPI parent communicator not set correctly. Aborting..."
stop
endif
else
call self%set("process_id", my_id, error)
endif
if(my_id == 0) &
is_process_id_zero = 1
else
! for the GPU calculation, the parent communicator has to be set
call self%get("gpu",gpu,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
if(gpu == 1) then
print *,"MPI parent communicator has to be set for GPU calculation. Aborting..."
stop
endif
! we can set number of processes and whether process id is zero, but not the process id.
! we assume, that my_pcol == 0 && my_prow == 0 <==> my_id == 0
call self%set("num_process", np_rows * np_cols, error)
if((my_prow == 0) .and. (my_pcol == 0)) &
is_process_id_zero = 1
endif
call self%set("is_process_id_zero", is_process_id_zero, error)
end subroutine
!> \brief function to setup an ELPA object and to store the MPI communicators internally
!> Parameters
!> \param self class(elpa_impl_t), the allocated ELPA object
......@@ -389,9 +267,13 @@ module elpa_impl
integer :: error, timings
#ifdef WITH_MPI
integer :: mpi_comm_parent, mpi_comm_rows, mpi_comm_cols, &
mpierr, mpierr2, process_row, process_col, mpi_string_length
integer :: mpi_comm_parent, mpi_comm_rows, mpi_comm_cols, np_rows, np_cols, my_id, &
mpierr, mpierr2, process_row, process_col, mpi_string_length, &
present_np_rows, present_np_cols, is_process_id_zero, np_total, legacy_api
character(len=MPI_MAX_ERROR_STRING) :: mpierr_string
character(*), parameter :: MPI_CONSISTENCY_MSG = &
"Provide mpi_comm_parent and EITHER process_row and process_col OR mpi_comm_rows and mpi_comm_cols. Aborting..."
#endif
#ifdef HAVE_DETAILED_TIMINGS
......@@ -403,15 +285,59 @@ module elpa_impl
error = ELPA_OK
! In most cases, we actually need the parent communicator to be supplied,
! ELPA internally requires it when either GPU is enabled or when ELPA2 is
! used. It thus seems reasonable that we should ALLWAYS require it. It
! should then be accompanied by EITHER process_row and process_col
! indices, OR mpi_comm_rows and mpi_comm_cols communicators, but NOT both.
! This assumption will significanlty simplify the logic, avoid possible
! inconsistencies and is rather natural from the user point of view
#ifdef WITH_MPI
! Create communicators ourselves
if (self%is_set("mpi_comm_parent") == 1 .and. &
self%is_set("process_row") == 1 .and. &
self%is_set("process_col") == 1) then
if(self%is_set("legacy_api") == 1) then
call self%get("legacy_api", legacy_api, error)
call check_elpa_get(error)
endif
if (self%is_set("mpi_comm_parent") == 1) then
call self%get("mpi_comm_parent", mpi_comm_parent, error)
call check_elpa_get(error)
call mpi_comm_rank(mpi_comm_parent, my_id, mpierr)
call self%set("process_id", my_id, error)
call check_elpa_set(error)
call mpi_comm_size(mpi_comm_parent, np_total, mpierr)
call self%set("num_processes", np_total, error)
call check_elpa_set(error)
is_process_id_zero = 0
if(my_id == 0) &
is_process_id_zero = 1
call self%set("is_process_id_zero", is_process_id_zero, error)
else
if(legacy_api .ne. 1) then
write(error_unit,*) MPI_CONSISTENCY_MSG
error = ELPA_ERROR
return
endif
endif
! Create communicators ourselves
if (self%is_set("process_row") == 1 .and. self%is_set("process_col") == 1) then
if(self%is_set("mpi_comm_rows") == 1 .or. self%is_set("mpi_comm_cols") == 1) then
write(error_unit,*) MPI_CONSISTENCY_MSG
error = ELPA_ERROR
return
endif
call self%get("process_row", process_row, error)
call check_elpa_get(error)
call self%get("process_col", process_col, error)
call check_elpa_get(error)
! mpi_comm_rows is used for communicating WITHIN rows, i.e. all processes
! having the same column coordinate share one mpi_comm_rows.
......@@ -419,7 +345,6 @@ module elpa_impl
! Analogous for mpi_comm_cols
call mpi_comm_split(mpi_comm_parent,process_col,process_row,mpi_comm_rows,mpierr)
if (mpierr .ne. MPI_SUCCESS) then
call MPI_ERROR_STRING(mpierr,mpierr_string, mpi_string_length, mpierr2)
write(error_unit,*) "MPI ERROR occured during mpi_comm_split for row communicator: ", trim(mpierr_string)
......@@ -434,62 +359,84 @@ module elpa_impl
endif
call self%set("mpi_comm_rows", mpi_comm_rows,error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
call self%set("mpi_comm_cols", mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
call check_elpa_set(error)
call set_or_check_missing_comm_params(self)
call self%set("mpi_comm_cols", mpi_comm_cols,error)
call check_elpa_set(error)
! remember that we created those communicators and we need to free them later
self%communicators_owned = 1
error = ELPA_OK
! Externally supplied communicators
else if ( self%is_set("mpi_comm_rows") == 1 .and. self%is_set("mpi_comm_cols") == 1) then
if(self%is_set("process_row") == 1 .or. self%is_set("process_col") == 1) then
write(error_unit,*) MPI_CONSISTENCY_MSG
error = ELPA_ERROR
return
endif
! Externally supplied communicators
if (self%is_set("mpi_comm_rows") == 1 .and. self%is_set("mpi_comm_cols") == 1) then
call self%get("mpi_comm_rows", mpi_comm_rows,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
call check_elpa_get(error)
call self%get("mpi_comm_cols", mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
!write(error_unit, '(A,2I15)') "comms provided", mpi_comm_rows, mpi_comm_cols
call check_elpa_get(error)
call set_or_check_missing_comm_params(self)
call mpi_comm_rank(mpi_comm_rows, process_row, mpierr)
call self%set("process_row", process_row, error)
call check_elpa_set(error)
call mpi_comm_rank(mpi_comm_cols, process_col, mpierr)
call self%set("process_col", process_col, error)
call check_elpa_set(error)
! remember that we DID NOT created those communicators and we WILL NOT free them later
self%communicators_owned = 0
error = ELPA_OK
else
! Otherwise parameters are missing
write(error_unit,*) MPI_CONSISTENCY_MSG
error = ELPA_ERROR
return
endif
call self%get("mpi_comm_rows", mpi_comm_rows,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
! set num_process_rows (and cols), if they are not supplied. Check them
! for consistency if they are. Maybe we could instead require, that they
! are never supplied?
call mpi_comm_size(mpi_comm_rows, np_rows, mpierr)
if(self%is_set("num_process_rows") == 1) then
call self%get("num_process_rows", present_np_rows, error)
call check_elpa_get(error)
if(np_rows .ne. present_np_rows) then
print *,"MPI row communicator not set correctly. Aborting..."
stop
endif
call self%get("mpi_comm_cols", mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
else
call self%set("num_process_rows", np_rows, error)
call check_elpa_set(error)
endif
call mpi_comm_size(mpi_comm_cols, np_cols, mpierr)
if(self%is_set("num_process_cols") == 1) then
call self%get("num_process_cols", present_np_cols, error)
call check_elpa_get(error)
if(np_cols .ne. present_np_cols) then
print *,"MPI column communicator not set correctly. Aborting..."
stop
endif
call self%get("process_row", process_row, error)
call self%get("process_col", process_col, error)
!write(error_unit, '(A,2I15,2I4)') "comms provided II ", mpi_comm_rows, mpi_comm_cols, process_row, process_col
else
call self%set("num_process_cols", np_cols, error)
call check_elpa_set(error)
endif
return
if(legacy_api .ne. 1) then
if(np_total .ne. np_rows * np_cols) then
print *,"MPI parent communicator and row/col communicators do not match. Aborting..."
stop
endif
endif
! Otherwise parameters are missing
error = ELPA_ERROR
#else
call self%set("process_row", 0, error)
call self%set("process_col", 0, error)
......@@ -1557,6 +1504,23 @@ module elpa_impl
call self%autotune_print_best(tune_state, error)
end subroutine
subroutine check_elpa_get(error)
integer :: error
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
end subroutine
subroutine check_elpa_set(error)
integer :: error
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
end subroutine
#endif
......
......@@ -190,6 +190,7 @@ static const elpa_index_int_entry_t int_entries[] = {
INT_ANY_ENTRY("mpi_comm_cols", "Communicator for inter-column communication", PRINT_NO),
INT_ANY_ENTRY("mpi_comm_parent", "Parent communicator", PRINT_NO),
INT_ANY_ENTRY("blacs_context", "BLACS context", PRINT_NO),
INT_ANY_ENTRY("legacy_api", "This object has been created through the legacy api. Parameter for internal use only", PRINT_NO),
INT_ENTRY("solver", "Solver to use", ELPA_SOLVER_1STAGE, ELPA_AUTOTUNE_FAST, ELPA_AUTOTUNE_DOMAIN_ANY, \
number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
......
......@@ -561,6 +561,8 @@ program test
stop 1
endif
call e%set("mpi_comm_parent", MPI_COMM_WORLD, error)
assert_elpa_ok(error)
call e%set("mpi_comm_rows", mpi_comm_rows, error)
assert_elpa_ok(error)
call e%set("mpi_comm_cols", mpi_comm_cols, error)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment