Optional build of ELPA without MPI

The configure flag "--enable-shared-memory-only" triggers a build
of ELPA without MPI support:

- all MPI calls are skipped (or overloaded)
- all calls to scalapack functions are replaced by the corresponding
  lapack calls
- all calls to blacs are skipped

Using ELPA without MPI gives the same results as using ELPA with 1 MPI
task!

This version is not yet optimized for performance, here and there some
unecessary copies are done.

Ths version is intended for users, who do not have MPI in their
application but still would like to use ELPA on one compute node
parent 29d84527
...@@ -10,6 +10,8 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la ...@@ -10,6 +10,8 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++ libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \
src/mod_mpi.F90 \
src/mod_mpi_stubs.F90 \
src/elpa_utilities.F90 \ src/elpa_utilities.F90 \
src/elpa1_compute.F90 \ src/elpa1_compute.F90 \
src/elpa1.F90 \ src/elpa1.F90 \
...@@ -22,9 +24,9 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \ ...@@ -22,9 +24,9 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \
src/elpa2_compute.F90 \ src/elpa2_compute.F90 \
src/elpa2.F90 \ src/elpa2.F90 \
src/elpa_c_interface.F90 \ src/elpa_c_interface.F90 \
src/elpa_qr/qr_utils.f90 \ src/elpa_qr/qr_utils.F90 \
src/elpa_qr/elpa_qrkernels.f90 \ src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_pdlarfb.f90 \ src/elpa_qr/elpa_pdlarfb.F90 \
src/elpa_qr/elpa_pdgeqrf.F90 src/elpa_qr/elpa_pdgeqrf.F90
if HAVE_DETAILED_TIMINGS if HAVE_DETAILED_TIMINGS
libelpa@SUFFIX@_la_SOURCES += src/timer.F90 \ libelpa@SUFFIX@_la_SOURCES += src/timer.F90 \
...@@ -38,6 +40,13 @@ if HAVE_DETAILED_TIMINGS ...@@ -38,6 +40,13 @@ if HAVE_DETAILED_TIMINGS
src/ftimings/papi.c src/ftimings/papi.c
endif endif
if !WITH_MPI
libelpa@SUFFIX@_la_SOURCES += src/mod_time_c.F90
if !HAVE_DETAILED_TIMINGS
libelpa@SUFFIX@_la_SOURCES += src/ftimings/time.c
endif
endif
if WITH_REAL_GENERIC_KERNEL if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90 libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90
endif endif
......
...@@ -68,37 +68,48 @@ if test x"${enable_openmp}" = x"yes"; then ...@@ -68,37 +68,48 @@ if test x"${enable_openmp}" = x"yes"; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading]) AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading])
fi fi
AC_MSG_CHECKING(whether --enable-shared-memory-only is specified)
AC_ARG_ENABLE([shared-memory-only],
AS_HELP_STRING([--enable-shared-memory-only],
[do not use MPI; ELPA will be build for one node shared-memory runs only]),
[],
[enable_shared_memory_only=no])
AC_MSG_RESULT([${enable_shared_memory_only}])
AM_CONDITIONAL([WITH_MPI],[test x"$enable_shared_memory_only" = x"no"])
if test x"${enable_shared_memory_only}" = x"no"; then
AC_DEFINE([WITH_MPI], [1], [use MPI])
fi
dnl check whether mpi compilers are available; dnl check whether mpi compilers are available;
dnl if not abort since it is mandatory dnl if not abort since it is mandatory
# C # C
AC_LANG([C]) AC_LANG([C])
m4_include([m4/ax_prog_cc_mpi.m4]) AX_PROG_CC_MPI([test x"$enable_shared_memory_only" = xno],[use_mpi=yes],[use_mpi=no])
AX_PROG_CC_MPI([true],[],[AC_MSG_ERROR([no MPI C wrapper found])])
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_cc_openmp" = unsupported; then if test "$ac_cv_prog_cc_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a C program with OpenMP, adjust CFLAGS]) AC_MSG_ERROR([Could not compile a C program with OpenMP, adjust CFLAGS])
fi fi
CFLAGS="$OPENMP_CFLAGS $CFLAGS" CFLAGS="$OPENMP_CFLAGS $CFLAGS"
fi fi
AC_PROG_INSTALL AC_PROG_INSTALL
AM_PROG_AR AM_PROG_AR
AM_PROG_AS AM_PROG_AS
# Fortran # Fortran
AC_LANG([Fortran]) AC_LANG([Fortran])
m4_include([m4/ax_prog_fc_mpi.m4]) m4_include([m4/ax_prog_fc_mpi.m4])
AX_PROG_FC_MPI([],[],[AC_MSG_ERROR([no MPI Fortran wrapper found])]) AX_PROG_FC_MPI([test x"$enable_shared_memory_only" = xno],[use_mpi=yes],[use_mpi=no])
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_fc_openmp" = unsupported; then if test "$ac_cv_prog_fc_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a Fortran program with OpenMP, adjust FCFLAGS]) AC_MSG_ERROR([Could not compile a Fortran program with OpenMP, adjust FCFLAGS])
fi fi
FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS" FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS"
fi fi
# C++ # C++
...@@ -106,11 +117,11 @@ AC_LANG([C++]) ...@@ -106,11 +117,11 @@ AC_LANG([C++])
AC_PROG_CXX AC_PROG_CXX
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_cxx_openmp" = unsupported; then if test "$ac_cv_prog_cxx_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS]) AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS])
fi fi
CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS" CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS"
fi fi
...@@ -386,35 +397,37 @@ else ...@@ -386,35 +397,37 @@ else
AC_MSG_ERROR([could not link with lapack: specify path]) AC_MSG_ERROR([could not link with lapack: specify path])
fi fi
dnl test whether scalapack already contains blacs if test x"${enable_shared_memory_only}" = x"no"; then
scalapack_libs="mpiscalapack scalapack" dnl test whether scalapack already contains blacs
old_LIBS="$LIBS" scalapack_libs="mpiscalapack scalapack"
for lib in ${scalapack_libs}; do old_LIBS="$LIBS"
LIBS="-l${lib} ${old_LIBS}" for lib in ${scalapack_libs}; do
AC_MSG_CHECKING([whether -l${lib} already contains a BLACS implementation]) LIBS="-l${lib} ${old_LIBS}"
AC_LINK_IFELSE([AC_LANG_FUNC_LINK_TRY([blacs_gridinit])],[blacs_in_scalapack=yes],[blacs_in_scalapack=no]) AC_MSG_CHECKING([whether -l${lib} already contains a BLACS implementation])
AC_MSG_RESULT([${blacs_in_scalapack}]) AC_LINK_IFELSE([AC_LANG_FUNC_LINK_TRY([blacs_gridinit])],[blacs_in_scalapack=yes],[blacs_in_scalapack=no])
if test x"${blacs_in_scalapack}" = x"yes"; then AC_MSG_RESULT([${blacs_in_scalapack}])
break if test x"${blacs_in_scalapack}" = x"yes"; then
fi break
done fi
done
if test x"${blacs_in_scalapack}" = x"no"; then if test x"${blacs_in_scalapack}" = x"no"; then
LIBS="${old_LIBS}" LIBS="${old_LIBS}"
dnl Test for stand-alone blacs dnl Test for stand-alone blacs
AC_SEARCH_LIBS([bi_f77_init],[mpiblacsF77init],[],[],[-lmpiblacs]) AC_SEARCH_LIBS([bi_f77_init],[mpiblacsF77init],[],[],[-lmpiblacs])
AC_SEARCH_LIBS([blacs_gridinit],[mpiblacs blacs],[have_blacs=yes],[have_blacs=no]) AC_SEARCH_LIBS([blacs_gridinit],[mpiblacs blacs],[have_blacs=yes],[have_blacs=no])
if test x"${have_blacs}" = x"no"; then if test x"${have_blacs}" = x"no"; then
AC_MSG_ERROR([No usable BLACS found. If installed in a non-standard place, please specify suitable LDFLAGS and FCFLAGS as arguments to configure]) AC_MSG_ERROR([No usable BLACS found. If installed in a non-standard place, please specify suitable LDFLAGS and FCFLAGS as arguments to configure])
fi
fi fi
fi
AC_SEARCH_LIBS([pdtran],[$scalapack_libs],[have_scalapack=yes],[have_scalapack=no]) AC_SEARCH_LIBS([pdtran],[$scalapack_libs],[have_scalapack=yes],[have_scalapack=no])
if test x"${have_scalapack}" = x"no" ; then if test x"${have_scalapack}" = x"no" ; then
AC_MSG_ERROR([could not link with scalapack: specify path]) AC_MSG_ERROR([could not link with scalapack: specify path])
fi
fi fi
dnl check whether we can link alltogehter dnl check whether we can link alltogehter
...@@ -655,7 +668,7 @@ if test x"${use_specific_complex_kernel}" = x"no" ; then ...@@ -655,7 +668,7 @@ if test x"${use_specific_complex_kernel}" = x"no" ; then
fi fi
if test x"${use_specific_real_kernel}" = x"no" ; then if test x"${use_specific_real_kernel}" = x"no" ; then
AC_DEFINE([WITH_NO_SPECIFIC_REAL_KERNEL],[1],[do not use only one specific real kernel (set at compile time)]) AC_DEFINE([WITH_NO_SPECIFIC_REAL_KERNEL],[1],[do not use only one specific real kernel (set at compile time)])
fi fi
LT_INIT LT_INIT
...@@ -667,7 +680,7 @@ DX_HTML_FEATURE(ON) ...@@ -667,7 +680,7 @@ DX_HTML_FEATURE(ON)
DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs]) DX_INIT_DOXYGEN([ELPA], [Doxyfile], [docs])
DESPERATELY_WANT_ASSUMED_SIZE=0 DESPERATELY_WANT_ASSUMED_SIZE=0
if text x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then if test x"${DESPERATELY_WANT_ASSUMED_SIZE}" = x"yes" ; then
AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable]) AC_DEFINE([DESPERATELY_WANT_ASSUMED_SIZE],[1],[use assumed size arrays, even if not debuggable])
fi fi
......
...@@ -86,8 +86,10 @@ module ELPA1 ...@@ -86,8 +86,10 @@ module ELPA1
use elpa1_compute use elpa1_compute
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
use timings use timings
#endif #endif
use elpa_mpi
implicit none implicit none
PRIVATE ! By default, all routines contained are private PRIVATE ! By default, all routines contained are private
...@@ -110,7 +112,6 @@ module ELPA1 ...@@ -110,7 +112,6 @@ module ELPA1
logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs
include 'mpif.h'
!> \brief get_elpa_row_col_comms: old, deprecated Fortran function to create the MPI communicators for ELPA. Better use "elpa_get_communicators" !> \brief get_elpa_row_col_comms: old, deprecated Fortran function to create the MPI communicators for ELPA. Better use "elpa_get_communicators"
!> \detail !> \detail
...@@ -328,6 +329,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp ...@@ -328,6 +329,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau) call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0 if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
time_evp_fwd = ttt1-ttt0 time_evp_fwd = ttt1-ttt0
......
This diff is collapsed.
...@@ -73,6 +73,7 @@ module ELPA2 ...@@ -73,6 +73,7 @@ module ELPA2
use elpa2_compute use elpa2_compute
use elpa_pdgeqrf use elpa_pdgeqrf
use elpa_mpi
implicit none implicit none
PRIVATE ! By default, all routines contained are private PRIVATE ! By default, all routines contained are private
...@@ -82,7 +83,6 @@ module ELPA2 ...@@ -82,7 +83,6 @@ module ELPA2
public :: solve_evp_real_2stage public :: solve_evp_real_2stage
public :: solve_evp_complex_2stage public :: solve_evp_complex_2stage
include 'mpif.h'
!****** !******
contains contains
...@@ -170,7 +170,6 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -170,7 +170,6 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr) call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr) call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
wantDebug = .false. wantDebug = .false.
if (firstCall) then if (firstCall) then
! are debug messages desired? ! are debug messages desired?
...@@ -269,10 +268,10 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -269,10 +268,10 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_real :',ttt1-ttt0 write(error_unit,*) 'Time tridiag_band_real :',ttt1-ttt0
#ifdef WITH_MPI
call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr)
#endif
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts time_evp_fwd = ttt1-ttts
...@@ -399,7 +398,6 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -399,7 +398,6 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr) call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr) call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr) call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
wantDebug = .false. wantDebug = .false.
if (firstCall) then if (firstCall) then
! are debug messages desired? ! are debug messages desired?
...@@ -473,10 +471,10 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -473,10 +471,10 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_complex :',ttt1-ttt0 write(error_unit,*) 'Time tridiag_band_complex :',ttt1-ttt0
#ifdef WITH_MPI
call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr)
#endif
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts time_evp_fwd = ttt1-ttts
......
This diff is collapsed.
...@@ -110,9 +110,9 @@ ...@@ -110,9 +110,9 @@
subroutine hh_trafo_kernel_10_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_10_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
...@@ -387,9 +387,9 @@ ...@@ -387,9 +387,9 @@
subroutine hh_trafo_kernel_8_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_8_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
...@@ -629,9 +629,9 @@ ...@@ -629,9 +629,9 @@
subroutine hh_trafo_kernel_4_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_4_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
......
...@@ -48,7 +48,7 @@ module elpa_pdgeqrf ...@@ -48,7 +48,7 @@ module elpa_pdgeqrf
use elpa1_compute use elpa1_compute
use elpa_pdlarfb use elpa_pdlarfb
use qr_utils_mod use qr_utils_mod
use elpa_mpi
implicit none implicit none
PRIVATE PRIVATE
...@@ -57,7 +57,6 @@ module elpa_pdgeqrf ...@@ -57,7 +57,6 @@ module elpa_pdgeqrf
public :: qr_pqrparam_init public :: qr_pqrparam_init
public :: qr_pdlarfg2_1dcomm_check public :: qr_pdlarfg2_1dcomm_check
include 'mpif.h'
contains contains
...@@ -120,7 +119,6 @@ module elpa_pdgeqrf ...@@ -120,7 +119,6 @@ module elpa_pdgeqrf
! copy value before we are going to filter it ! copy value before we are going to filter it
total_cols = n total_cols = n
call mpi_comm_rank(mpicomm_cols,mpirank_cols,mpierr) call mpi_comm_rank(mpicomm_cols,mpirank_cols,mpierr)
call mpi_comm_rank(mpicomm_rows,mpirank_rows,mpierr) call mpi_comm_rank(mpicomm_rows,mpirank_rows,mpierr)
call mpi_comm_size(mpicomm_cols,mpiprocs_cols,mpierr) call mpi_comm_size(mpicomm_cols,mpiprocs_cols,mpierr)
...@@ -235,9 +233,10 @@ module elpa_pdgeqrf ...@@ -235,9 +233,10 @@ module elpa_pdgeqrf
!end if !end if
! initiate broadcast (send part) ! initiate broadcast (send part)
#ifdef WITH_MPI
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, & call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr) mpirank_cols_qr,mpicomm_cols,mpierr)
#endif
! copy tau parts into temporary tau buffer ! copy tau parts into temporary tau buffer
work(temptau_offset+voffset-1:temptau_offset+(voffset-1)+lcols-1) = tau(offset:offset+lcols-1) work(temptau_offset+voffset-1:temptau_offset+(voffset-1)+lcols-1) = tau(offset:offset+lcols-1)
...@@ -257,9 +256,10 @@ module elpa_pdgeqrf ...@@ -257,9 +256,10 @@ module elpa_pdgeqrf
broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1) broadcast_size = dbroadcast_size(1) + dtmat_bcast_size(1)
! initiate broadcast (recv part) ! initiate broadcast (recv part)
#ifdef WITH_MPI
call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, & call MPI_Bcast(work(broadcast_offset),broadcast_size,mpi_real8, &
mpirank_cols_qr,mpicomm_cols,mpierr) mpirank_cols_qr,mpicomm_cols,mpierr)
#endif
! last n*n elements in buffer are (still empty) T matrix elements ! last n*n elements in buffer are (still empty) T matrix elements
! fetch from first process in each column ! fetch from first process in each column
...@@ -530,10 +530,8 @@ module elpa_pdgeqrf ...@@ -530,10 +530,8 @@ module elpa_pdgeqrf
maxrank = min(PQRPARAM(1),n) maxrank = min(PQRPARAM(1),n)
updatemode = PQRPARAM(2) updatemode = PQRPARAM(2)
hgmode = PQRPARAM(4) hgmode = PQRPARAM(4)
call MPI_Comm_rank(mpicomm, mpirank, mpierr) call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr) call MPI_Comm_size(mpicomm, mpiprocs, mpierr)
if (trans .eq. 1) then if (trans .eq. 1) then
incx = lda incx = lda
else else
...@@ -713,10 +711,8 @@ module elpa_pdgeqrf ...@@ -713,10 +711,8 @@ module elpa_pdgeqrf
#endif #endif
return return
end if end if
call MPI_Comm_rank(mpi_comm, mpirank, mpierr) call MPI_Comm_rank(mpi_comm, mpirank, mpierr)
call MPI_Comm_size(mpi_comm, mpiprocs, mpierr) call MPI_Comm_size(mpi_comm, mpiprocs, mpierr)
! calculate expected work size and store in work(1) ! calculate expected work size and store in work(1)
if (hgmode .eq. ichar('s')) then if (hgmode .eq. ichar('s')) then
! allreduce (MPI_SUM) ! allreduce (MPI_SUM)
...@@ -770,11 +766,13 @@ module elpa_pdgeqrf ...@@ -770,11 +766,13 @@ module elpa_pdgeqrf
work(1) = alpha work(1) = alpha
work(2) = dot work(2) = dot
#ifdef WITH_MPI
call mpi_allreduce(work(1),work(sendsize+1), & call mpi_allreduce(work(1),work(sendsize+1), &
sendsize,mpi_real8,mpi_sum, & sendsize,mpi_real8,mpi_sum, &
mpi_comm,mpierr) mpi_comm,mpierr)
#else
work(sendsize+1:sendsize+1+sendsize-1) = work(1:sendsize)
#endif
alpha = work(sendsize+1) alpha = work(sendsize+1)
xnorm = sqrt(work(sendsize+2)) xnorm = sqrt(work(sendsize+2))
else if (hgmode .eq. ichar('x')) then else if (hgmode .eq. ichar('x')) then
...@@ -790,11 +788,13 @@ module elpa_pdgeqrf ...@@ -790,11 +788,13 @@ module elpa_pdgeqrf
work(2*iproc+1) = alpha work(2*iproc+1) = alpha
work(2*iproc+2) = xnorm work(2*iproc+2) = xnorm
end do end do
#ifdef WITH_MPI
call mpi_alltoall(work(1),2,mpi_real8, & call mpi_alltoall(work(1),2,mpi_real8, &
work(sendsize+1),2,mpi_real8, & work(sendsize+1),2,mpi_real8, &
mpi_comm,mpierr) mpi_comm,mpierr)
#else
work(sendsize+1:sendsize+1+2-1) = work(1:2)
#endif
! extract alpha value ! extract alpha value
alpha = work(sendsize+1+mpirank_top*2) alpha = work(sendsize+1+mpirank_top*2)
...@@ -816,10 +816,13 @@ module elpa_pdgeqrf ...@@ -816,10 +816,13 @@ module elpa_pdgeqrf
work(2) = xnorm work(2) = xnorm
! allgather ! allgather
#ifdef WITH_MPI
call mpi_allgather(work(1),sendsize,mpi_real8, & call mpi_allgather(work(1),sendsize,mpi_real8, &
work(sendsize+1),sendsize,mpi_real8, & work(sendsize+1),sendsize,mpi_real8, &
mpi_comm,mpierr) mpi_comm,mpierr)
#else
work(sendsize+1:sendsize+1+sendsize-1) = work(1:sendsize)
#endif
! extract alpha value ! extract alpha value
alpha = work(sendsize+1+mpirank_top*2) alpha = work(sendsize+1+mpirank_top*2)
...@@ -1040,10 +1043,8 @@ module elpa_pdgeqrf ...@@ -1040,10 +1043,8 @@ module elpa_pdgeqrf
#endif #endif
return return
end if end if
call MPI_Comm_rank(mpicomm, mpirank, mpierr) call MPI_Comm_rank(mpicomm, mpirank, mpierr)
call MPI_Comm_size(mpicomm, mpiprocs, mpierr) call MPI_Comm_size(mpicomm, mpiprocs, mpierr)