Commit 999c1804 authored by Andreas Marek's avatar Andreas Marek
Browse files

Move global_gather to a module

parent fc9ee4e9
......@@ -58,6 +58,7 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/helpers/matrix_plot.F90 \
src/general/mod_elpa_skewsymmetric_blas.F90 \
src/solve_tridi/mod_global_product.F90 \
src/solve_tridi/mod_global_gather.F90 \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......@@ -679,6 +680,7 @@ EXTRA_DIST = \
src/elpa_impl_generalized_transform_template.F90 \
src/elpa1/elpa1_compute_template.F90 \
src/solve_tridi/global_product_template.F90 \
src/solve_tridi/global_gather_template.F90 \
src/elpa1/elpa1_merge_systems_real_template.F90 \
src/elpa1/elpa1_solve_tridi_real_template.F90 \
src/elpa1/elpa1_template.F90 \
......
......@@ -64,6 +64,7 @@
use elpa_abstract_impl
use elpa_blas_interfaces
use global_product
use global_gather
#ifdef WITH_OPENMP
use omp_lib
#endif
......@@ -231,7 +232,7 @@
call global_gather_&
&PRECISION&
&(obj, z, na)
&(obj, z, na, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next)
! Normalize z so that norm(z) = 1. Since z is the concatenation of
! two normalized vectors, norm2(z) = sqrt(2).
z = z/sqrt(2.0_rk)
......@@ -517,10 +518,10 @@
call global_gather_&
&PRECISION&
&(obj, dbase, na1)
&(obj, dbase, na1, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next)
call global_gather_&
&PRECISION&
&(obj, ddiff, na1)
&(obj, ddiff, na1, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next)
d(1:na1) = dbase(1:na1) - ddiff(1:na1)
! Calculate scale factors for eigenvectors
......@@ -557,7 +558,7 @@
call global_gather_&
&PRECISION&
&(obj, ev_scale, na1)
&(obj, ev_scale, na1, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next)
! Add the deflated eigenvalues
d(na1+1:na) = d2(1:na2)
......@@ -1058,6 +1059,7 @@
end subroutine transform_columns_&
&PRECISION
#if 0
subroutine global_gather_&
&PRECISION&
&(obj, z, n)
......@@ -1116,7 +1118,7 @@
enddo
end subroutine global_gather_&
&PRECISION
#endif
#if 0
subroutine global_product_&
&PRECISION&
......
subroutine global_gather_&
&PRECISION&
&(obj, z, n, mpi_comm_rows, mpi_comm_cols, npc_n, np_prev, np_next)
! This routine sums up z over all processors.
! It should only be used for gathering distributed results,
! i.e. z(i) should be nonzero on exactly 1 processor column,
! otherways the results may be numerically different on different columns
use precision
use elpa_abstract_impl
use elpa_mpi
#ifdef WITH_OPENMP
use elpa_omp
#endif
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: mpi_comm_cols, mpi_comm_rows
integer(kind=ik), intent(in) :: npc_n, np_prev, np_next
#ifdef WITH_MPI
integer(kind=MPI_KIND) :: mpierr, np_rowsMPI, np_colsMPI
#endif
integer(kind=ik) :: n, np_rows, np_cols
real(kind=REAL_DATATYPE) :: z(n)
real(kind=REAL_DATATYPE) :: tmp(n)
integer(kind=ik) :: np
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND) ,np_rowsMPI, mpierr)
np_rows = int(np_rowsMPI,kind=c_int)
!call mpi_comm_rank(int(mpi_comm_cols,kind=MPI_KIND) ,my_pcolMPI, mpierr)
call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND) ,np_colsMPI, mpierr)
np_cols = int(np_colsMPI,kind=c_int)
call obj%timer%stop("mpi_communication")
#else
#endif
if (npc_n==1 .and. np_rows==1) return ! nothing to do
! Do an mpi_allreduce over processor rows
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_allreduce(z, tmp, int(n,kind=MPI_KIND), MPI_REAL_PRECISION, MPI_SUM, int(mpi_comm_rows,kind=MPI_KIND), mpierr)
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
tmp = z
#endif /* WITH_MPI */
! If only 1 processor column, we are done
if (npc_n==1) then
z(:) = tmp(:)
return
endif
! If all processor columns are involved, we can use mpi_allreduce
if (npc_n==np_cols) then
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_allreduce(tmp, z, int(n,kind=MPI_KIND), MPI_REAL_PRECISION, MPI_SUM, int(mpi_comm_cols,kind=MPI_KIND), mpierr)
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
tmp = z
#endif /* WITH_MPI */
return
endif
! Do a ring send over processor columns
z(:) = 0
do np = 1, npc_n
z(:) = z(:) + tmp(:)
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_sendrecv_replace(z, int(n,kind=MPI_KIND), MPI_REAL_PRECISION, int(np_next,kind=MPI_KIND), &
1112_MPI_KIND+int(np,kind=MPI_KIND), &
int(np_prev,kind=MPI_KIND), 1112_MPI_KIND+int(np,kind=MPI_KIND), &
int(mpi_comm_cols,kind=MPI_KIND), MPI_STATUS_IGNORE, mpierr)
call obj%timer%stop("mpi_communication")
#else /* WITH_MPI */
#endif /* WITH_MPI */
enddo
end subroutine global_gather_&
&PRECISION
#include "config-f90.h"
module global_gather
use precision
implicit none
private
public :: global_gather_double
#if defined(WANT_SINGLE_PRECISION_REAL) || defined(WANT_SINGLE_PRECISION_COMPLEX)
public :: global_gather_single
#endif
contains
! real double precision first
#define DOUBLE_PRECISION_REAL
#define REALCASE
#define DOUBLE_PRECISION
#include "../general/precision_macros.h"
#include "./global_gather_template.F90"
#undef DOUBLE_PRECISION_REAL
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
! real single precision first
#define SINGLE_PRECISION_REAL
#define REALCASE
#define SINGLE_PRECISION
#include "../general/precision_macros.h"
#include "./global_gather_template.F90"
#undef SINGLE_PRECISION_REAL
#undef REALCASE
#undef SINGLE_PRECISION
#endif
end module
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment