Commit d9011373 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' into ELPA_GPU

parents bb63bd9e 9af253bc
...@@ -10,6 +10,8 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la ...@@ -10,6 +10,8 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++ libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \
src/mod_mpi.F90 \
src/mod_mpi_stubs.F90 \
src/elpa_utilities.F90 \ src/elpa_utilities.F90 \
src/elpa1_compute.F90 \ src/elpa1_compute.F90 \
src/elpa1.F90 \ src/elpa1.F90 \
...@@ -26,9 +28,9 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \ ...@@ -26,9 +28,9 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.F90 \
src/elpa2_compute.F90 \ src/elpa2_compute.F90 \
src/elpa2.F90 \ src/elpa2.F90 \
src/elpa_c_interface.F90 \ src/elpa_c_interface.F90 \
src/elpa_qr/qr_utils.f90 \ src/elpa_qr/qr_utils.F90 \
src/elpa_qr/elpa_qrkernels.f90 \ src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_pdlarfb.f90 \ src/elpa_qr/elpa_pdlarfb.F90 \
src/elpa_qr/elpa_pdgeqrf.F90 src/elpa_qr/elpa_pdgeqrf.F90
if HAVE_DETAILED_TIMINGS if HAVE_DETAILED_TIMINGS
libelpa@SUFFIX@_la_SOURCES += src/timer.F90 \ libelpa@SUFFIX@_la_SOURCES += src/timer.F90 \
...@@ -47,6 +49,13 @@ if WITH_GPU_VERSION ...@@ -47,6 +49,13 @@ if WITH_GPU_VERSION
#src/interface_cuda.F90 src/interface_c_kernel.F90 src/ev_tridi_band_gpu_c_v2.cu src/cuUtils.cu #src/interface_cuda.F90 src/interface_c_kernel.F90 src/ev_tridi_band_gpu_c_v2.cu src/cuUtils.cu
endif endif
if !WITH_MPI
libelpa@SUFFIX@_la_SOURCES += src/mod_time_c.F90
if !HAVE_DETAILED_TIMINGS
libelpa@SUFFIX@_la_SOURCES += src/ftimings/time.c
endif
endif
if WITH_REAL_GENERIC_KERNEL if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90 libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.F90
endif endif
...@@ -370,6 +379,7 @@ distclean-local: ...@@ -370,6 +379,7 @@ distclean-local:
EXTRA_DIST = \ EXTRA_DIST = \
fdep/fortran_dependencies.pl \ fdep/fortran_dependencies.pl \
fdep/fortran_dependencies.mk \ fdep/fortran_dependencies.mk \
test/fortran_test_programs/elpa_test_programs_print_headers.X90 \
src/elpa_reduce_add_vectors.X90 \ src/elpa_reduce_add_vectors.X90 \
src/elpa_transpose_vectors.X90 \ src/elpa_transpose_vectors.X90 \
src/redist_band.X90 \ src/redist_band.X90 \
......
...@@ -68,37 +68,48 @@ if test x"${enable_openmp}" = x"yes"; then ...@@ -68,37 +68,48 @@ if test x"${enable_openmp}" = x"yes"; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading]) AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading])
fi fi
AC_MSG_CHECKING(whether --enable-shared-memory-only is specified)
AC_ARG_ENABLE([shared-memory-only],
AS_HELP_STRING([--enable-shared-memory-only],
[do not use MPI; ELPA will be build for one node shared-memory runs only]),
[],
[enable_shared_memory_only=no])
AC_MSG_RESULT([${enable_shared_memory_only}])
AM_CONDITIONAL([WITH_MPI],[test x"$enable_shared_memory_only" = x"no"])
if test x"${enable_shared_memory_only}" = x"no"; then
AC_DEFINE([WITH_MPI], [1], [use MPI])
fi
dnl check whether mpi compilers are available; dnl check whether mpi compilers are available;
dnl if not abort since it is mandatory dnl if not abort since it is mandatory
# C # C
AC_LANG([C]) AC_LANG([C])
m4_include([m4/ax_prog_cc_mpi.m4]) AX_PROG_CC_MPI([test x"$enable_shared_memory_only" = xno],[use_mpi=yes],[use_mpi=no])
AX_PROG_CC_MPI([true],[],[AC_MSG_ERROR([no MPI C wrapper found])])
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_cc_openmp" = unsupported; then if test "$ac_cv_prog_cc_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a C program with OpenMP, adjust CFLAGS]) AC_MSG_ERROR([Could not compile a C program with OpenMP, adjust CFLAGS])
fi fi
CFLAGS="$OPENMP_CFLAGS $CFLAGS" CFLAGS="$OPENMP_CFLAGS $CFLAGS"
fi fi
AC_PROG_INSTALL AC_PROG_INSTALL
AM_PROG_AR AM_PROG_AR
AM_PROG_AS AM_PROG_AS
# Fortran # Fortran
AC_LANG([Fortran]) AC_LANG([Fortran])
m4_include([m4/ax_prog_fc_mpi.m4]) m4_include([m4/ax_prog_fc_mpi.m4])
AX_PROG_FC_MPI([],[],[AC_MSG_ERROR([no MPI Fortran wrapper found])]) AX_PROG_FC_MPI([test x"$enable_shared_memory_only" = xno],[use_mpi=yes],[use_mpi=no])
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_fc_openmp" = unsupported; then if test "$ac_cv_prog_fc_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a Fortran program with OpenMP, adjust FCFLAGS]) AC_MSG_ERROR([Could not compile a Fortran program with OpenMP, adjust FCFLAGS])
fi fi
FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS" FCFLAGS="$OPENMP_FCFLAGS $FCFLAGS"
fi fi
# C++ # C++
...@@ -106,11 +117,11 @@ AC_LANG([C++]) ...@@ -106,11 +117,11 @@ AC_LANG([C++])
AC_PROG_CXX AC_PROG_CXX
if test x"${enable_openmp}" = x"yes"; then if test x"${enable_openmp}" = x"yes"; then
AX_ELPA_OPENMP AX_ELPA_OPENMP
if test "$ac_cv_prog_cxx_openmp" = unsupported; then if test "$ac_cv_prog_cxx_openmp" = unsupported; then
AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS]) AC_MSG_ERROR([Could not compile a C++ program with OpenMP, adjust CXXFLAGS])
fi fi
CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS" CXXFLAGS="$OPENMP_CXXFLAGS $CXXFLAGS"
fi fi
...@@ -386,35 +397,37 @@ else ...@@ -386,35 +397,37 @@ else
AC_MSG_ERROR([could not link with lapack: specify path]) AC_MSG_ERROR([could not link with lapack: specify path])
fi fi
dnl test whether scalapack already contains blacs if test x"${enable_shared_memory_only}" = x"no"; then
scalapack_libs="mpiscalapack scalapack" dnl test whether scalapack already contains blacs
old_LIBS="$LIBS" scalapack_libs="mpiscalapack scalapack"
for lib in ${scalapack_libs}; do old_LIBS="$LIBS"
LIBS="-l${lib} ${old_LIBS}" for lib in ${scalapack_libs}; do
AC_MSG_CHECKING([whether -l${lib} already contains a BLACS implementation]) LIBS="-l${lib} ${old_LIBS}"
AC_LINK_IFELSE([AC_LANG_FUNC_LINK_TRY([blacs_gridinit])],[blacs_in_scalapack=yes],[blacs_in_scalapack=no]) AC_MSG_CHECKING([whether -l${lib} already contains a BLACS implementation])
AC_MSG_RESULT([${blacs_in_scalapack}]) AC_LINK_IFELSE([AC_LANG_FUNC_LINK_TRY([blacs_gridinit])],[blacs_in_scalapack=yes],[blacs_in_scalapack=no])
if test x"${blacs_in_scalapack}" = x"yes"; then AC_MSG_RESULT([${blacs_in_scalapack}])
break if test x"${blacs_in_scalapack}" = x"yes"; then
fi break
done fi
done
if test x"${blacs_in_scalapack}" = x"no"; then if test x"${blacs_in_scalapack}" = x"no"; then
LIBS="${old_LIBS}" LIBS="${old_LIBS}"
dnl Test for stand-alone blacs dnl Test for stand-alone blacs
AC_SEARCH_LIBS([bi_f77_init],[mpiblacsF77init],[],[],[-lmpiblacs]) AC_SEARCH_LIBS([bi_f77_init],[mpiblacsF77init],[],[],[-lmpiblacs])
AC_SEARCH_LIBS([blacs_gridinit],[mpiblacs blacs],[have_blacs=yes],[have_blacs=no]) AC_SEARCH_LIBS([blacs_gridinit],[mpiblacs blacs],[have_blacs=yes],[have_blacs=no])
if test x"${have_blacs}" = x"no"; then if test x"${have_blacs}" = x"no"; then
AC_MSG_ERROR([No usable BLACS found. If installed in a non-standard place, please specify suitable LDFLAGS and FCFLAGS as arguments to configure]) AC_MSG_ERROR([No usable BLACS found. If installed in a non-standard place, please specify suitable LDFLAGS and FCFLAGS as arguments to configure])
fi
fi fi
fi
AC_SEARCH_LIBS([pdtran],[$scalapack_libs],[have_scalapack=yes],[have_scalapack=no]) AC_SEARCH_LIBS([pdtran],[$scalapack_libs],[have_scalapack=yes],[have_scalapack=no])
if test x"${have_scalapack}" = x"no" ; then if test x"${have_scalapack}" = x"no" ; then
AC_MSG_ERROR([could not link with scalapack: specify path]) AC_MSG_ERROR([could not link with scalapack: specify path])
fi
fi fi
dnl check whether we can link alltogehter dnl check whether we can link alltogehter
...@@ -718,7 +731,7 @@ if test x"${use_specific_complex_kernel}" = x"no" ; then ...@@ -718,7 +731,7 @@ if test x"${use_specific_complex_kernel}" = x"no" ; then
fi fi
if test x"${use_specific_real_kernel}" = x"no" ; then if test x"${use_specific_real_kernel}" = x"no" ; then
AC_DEFINE([WITH_NO_SPECIFIC_REAL_KERNEL],[1],[do not use only one specific real kernel (set at compile time)]) AC_DEFINE([WITH_NO_SPECIFIC_REAL_KERNEL],[1],[do not use only one specific real kernel (set at compile time)])
fi fi
LT_INIT LT_INIT
......
...@@ -48,8 +48,9 @@ module mod_check_for_gpu ...@@ -48,8 +48,9 @@ module mod_check_for_gpu
function check_for_gpu(myid, numberOfDevices, wantDebug) result(gpuAvailable) function check_for_gpu(myid, numberOfDevices, wantDebug) result(gpuAvailable)
use cuda_functions use cuda_functions
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: myid integer(kind=ik), intent(in) :: myid
logical, optional, intent(in) :: wantDebug logical, optional, intent(in) :: wantDebug
logical :: success, wantDebugMessage logical :: success, wantDebugMessage
...@@ -80,6 +81,7 @@ module mod_check_for_gpu ...@@ -80,6 +81,7 @@ module mod_check_for_gpu
! make sure that all nodes have the same number of GPU's, otherwise ! make sure that all nodes have the same number of GPU's, otherwise
! we run into loadbalancing trouble ! we run into loadbalancing trouble
#ifdef WITH_MPI
call mpi_allreduce(numberOfDevices, maxNumberOfDevices, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, mpierr) call mpi_allreduce(numberOfDevices, maxNumberOfDevices, 1, MPI_INTEGER, MPI_MAX, MPI_COMM_WORLD, mpierr)
if (maxNumberOfDevices .ne. numberOfDevices) then if (maxNumberOfDevices .ne. numberOfDevices) then
...@@ -88,7 +90,7 @@ module mod_check_for_gpu ...@@ -88,7 +90,7 @@ module mod_check_for_gpu
gpuAvailable = .false. gpuAvailable = .false.
return return
endif endif
#endif
if (numberOfDevices .ne. 0) then if (numberOfDevices .ne. 0) then
gpuAvailable = .true. gpuAvailable = .true.
! Usage of GPU is possible since devices have been detected ! Usage of GPU is possible since devices have been detected
......
...@@ -86,9 +86,11 @@ module ELPA1 ...@@ -86,9 +86,11 @@ module ELPA1
use elpa1_compute use elpa1_compute
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
use timings use timings
#endif #endif
use iso_c_binding use iso_c_binding
use elpa_mpi
implicit none implicit none
PRIVATE ! By default, all routines contained are private PRIVATE ! By default, all routines contained are private
...@@ -111,7 +113,6 @@ module ELPA1 ...@@ -111,7 +113,6 @@ module ELPA1
logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs
include 'mpif.h'
!> \brief get_elpa_row_col_comms: old, deprecated Fortran function to create the MPI communicators for ELPA. Better use "elpa_get_communicators" !> \brief get_elpa_row_col_comms: old, deprecated Fortran function to create the MPI communicators for ELPA. Better use "elpa_get_communicators"
!> \detail !> \detail
...@@ -330,6 +331,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp ...@@ -330,6 +331,7 @@ function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mp
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau) call tridiag_real(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, ev, e, tau)
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0 if(my_prow==0 .and. my_pcol==0 .and. elpa_print_times) write(error_unit,*) 'Time tridiag_real :',ttt1-ttt0
time_evp_fwd = ttt1-ttt0 time_evp_fwd = ttt1-ttt0
......
This diff is collapsed.
...@@ -80,6 +80,8 @@ module ELPA2 ...@@ -80,6 +80,8 @@ module ELPA2
! use cuda_c_kernel ! use cuda_c_kernel
! use iso_c_binding ! use iso_c_binding
#endif #endif
use elpa_mpi
implicit none implicit none
PRIVATE ! By default, all routines contained are private PRIVATE ! By default, all routines contained are private
...@@ -88,7 +90,6 @@ module ELPA2 ...@@ -88,7 +90,6 @@ module ELPA2
public :: solve_evp_real_2stage public :: solve_evp_real_2stage
public :: solve_evp_complex_2stage public :: solve_evp_complex_2stage
include 'mpif.h'
!****** !******
contains contains
...@@ -177,6 +178,7 @@ contains ...@@ -177,6 +178,7 @@ contains
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_real_2stage") call timer%start("solve_evp_real_2stage")
#endif #endif
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr) call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr) call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
...@@ -295,7 +297,7 @@ contains ...@@ -295,7 +297,7 @@ contains
ttts = ttt0 ttts = ttt0
call bandred_real(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_real(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success, useQRActual) tmat, wantDebug, useGPU, success, useQRActual)
print *, "hier 1:", q(10,10)
if (.not.(success)) return if (.not.(success)) return
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
...@@ -312,10 +314,14 @@ contains ...@@ -312,10 +314,14 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call tridiag_band_real(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_real, & call tridiag_band_real(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all) mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
print *, "hier 2:", q(10,10)
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_real :',ttt1-ttt0 write(error_unit,*) 'Time tridiag_band_real :',ttt1-ttt0
#ifdef WITH_MPI
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(ev,na,MPI_REAL8,0,mpi_comm_all,mpierr)
call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr) call mpi_bcast(e,na,MPI_REAL8,0,mpi_comm_all,mpierr)
...@@ -324,6 +330,7 @@ contains ...@@ -324,6 +330,7 @@ contains
call mpi_bcast(e,na,MPI_REAL4,0,mpi_comm_all,mpierr) call mpi_bcast(e,na,MPI_REAL4,0,mpi_comm_all,mpierr)
#endif #endif
#endif /* WITH_MPI */
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts time_evp_fwd = ttt1-ttts
...@@ -332,6 +339,8 @@ contains ...@@ -332,6 +339,8 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call solve_tridi(na, nev, ev, e, q, ldq, nblk, matrixCols, mpi_comm_rows, & call solve_tridi(na, nev, ev, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, wantDebug, success) mpi_comm_cols, wantDebug, success)
print *, "hier 3:", q(10,10)
if (.not.(success)) return if (.not.(success)) return
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
...@@ -348,9 +357,12 @@ contains ...@@ -348,9 +357,12 @@ contains
! Backtransform stage 1 ! Backtransform stage 1
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
print *, "hier 4a:", q(10,10)
call trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, & call trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, & mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL) THIS_REAL_ELPA_KERNEL)
print *, "hier 4:", q(10,10)
if (.not.(success)) return if (.not.(success)) return
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
...@@ -369,6 +381,8 @@ contains ...@@ -369,6 +381,8 @@ contains
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call trans_ev_band_to_full_real(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, & call trans_ev_band_to_full_real(na, nev, nblk, nbw, a, lda, tmat, q, ldq, matrixCols, num_blocks, mpi_comm_rows, &
mpi_comm_cols, useGPU, useQRActual) mpi_comm_cols, useGPU, useQRActual)
print *, "hier 5:", q(10,10)
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_band_to_full_real :',ttt1-ttt0 write(error_unit,*) 'Time trans_ev_band_to_full_real :',ttt1-ttt0
...@@ -461,6 +475,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -461,6 +475,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_complex_2stage") call timer%start("solve_evp_complex_2stage")
#endif #endif
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr) call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr) call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
...@@ -545,6 +560,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -545,6 +560,7 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
call bandred_complex(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, & call bandred_complex(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, useGPU, success) tmat, wantDebug, useGPU, success)
if (.not.(success)) then if (.not.(success)) then
#ifdef HAVE_DETAILED_TIMINGS #ifdef HAVE_DETAILED_TIMINGS
call timer%stop() call timer%stop()
#endif #endif
...@@ -566,10 +582,13 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -566,10 +582,13 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
call tridiag_band_complex(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_complex, & call tridiag_band_complex(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans_complex, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_all) mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) & if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time tridiag_band_complex :',ttt1-ttt0 write(error_unit,*) 'Time tridiag_band_complex :',ttt1-ttt0
#ifdef WITH_MPI
#ifdef DOUBLE_PRECISION_COMPLEX #ifdef DOUBLE_PRECISION_COMPLEX
call mpi_bcast(ev, na, mpi_real8, 0, mpi_comm_all, mpierr) call mpi_bcast(ev, na, mpi_real8, 0, mpi_comm_all, mpierr)
call mpi_bcast(e, na, mpi_real8, 0, mpi_comm_all, mpierr) call mpi_bcast(e, na, mpi_real8, 0, mpi_comm_all, mpierr)
...@@ -577,6 +596,8 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -577,6 +596,8 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
call mpi_bcast(ev, na, mpi_real4, 0, mpi_comm_all, mpierr) call mpi_bcast(ev, na, mpi_real4, 0, mpi_comm_all, mpierr)
call mpi_bcast(e, na, mpi_real4, 0, mpi_comm_all, mpierr) call mpi_bcast(e, na, mpi_real4, 0, mpi_comm_all, mpierr)
#endif #endif
#endif /* WITH_MPI */
ttt1 = MPI_Wtime() ttt1 = MPI_Wtime()
time_evp_fwd = ttt1-ttts time_evp_fwd = ttt1-ttts
...@@ -631,8 +652,6 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, & ...@@ -631,8 +652,6 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
stop stop
endif endif
! Backtransform stage 2 ! Backtransform stage 2
ttt0 = MPI_Wtime() ttt0 = MPI_Wtime()
......
This diff is collapsed.
...@@ -110,9 +110,9 @@ ...@@ -110,9 +110,9 @@
subroutine hh_trafo_kernel_10_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_10_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
...@@ -387,9 +387,9 @@ ...@@ -387,9 +387,9 @@
subroutine hh_trafo_kernel_8_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_8_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
...@@ -629,9 +629,9 @@ ...@@ -629,9 +629,9 @@
subroutine hh_trafo_kernel_4_bgp(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_4_bgp(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*) complex(kind=ck), intent(inout) :: q(ldq/2,*)
......
...@@ -48,7 +48,7 @@ module elpa_pdgeqrf ...@@ -48,7 +48,7 @@ module elpa_pdgeqrf
use elpa1_compute use elpa1_compute
use elpa_pdlarfb use elpa_pdlarfb
use qr_utils_mod use qr_utils_mod
use elpa_mpi
implicit none implicit none
PRIVATE PRIVATE
...@@ -57,7 +57,6 @@ module elpa_pdgeqrf ...@@ -57,7 +57,6 @@ module elpa_pdgeqrf
public :: qr_pqrparam_init public :: qr_pqrparam_init
public :: qr_pdlarfg2_1dcomm_check public :: qr_pdlarfg2_1dcomm_check
include 'mpif.h'