Commit 612ed368 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'ELPA_2016.05.003'

parents e6ca1d19 ff7f5dd7
This diff is collapsed.
...@@ -45,7 +45,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ ...@@ -45,7 +45,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/redist_band.X90 src/redist_band.X90
lib_LTLIBRARIES = libelpa@SUFFIX@.la lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++ libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION)
libelpa@SUFFIX@_la_LIBADD = libelpa@SUFFIX@_public.la libelpa@SUFFIX@_private.la libelpa@SUFFIX@_la_LIBADD = libelpa@SUFFIX@_public.la libelpa@SUFFIX@_private.la
libelpa@SUFFIX@_la_SOURCES = libelpa@SUFFIX@_la_SOURCES =
...@@ -219,14 +219,12 @@ noinst_PROGRAMS = \ ...@@ -219,14 +219,12 @@ noinst_PROGRAMS = \
elpa2_test_real_api@SUFFIX@ \ elpa2_test_real_api@SUFFIX@ \
elpa2_test_complex_api@SUFFIX@ \ elpa2_test_complex_api@SUFFIX@ \
elpa1_real_toeplitz@SUFFIX@ \ elpa1_real_toeplitz@SUFFIX@ \
elpa1_test_real_with_c@SUFFIX@ elpa1_test_real_with_c@SUFFIX@ \
if !WITH_OPENMP
noinst_PROGRAMS += \
elpa1_test_real_c_version@SUFFIX@ \ elpa1_test_real_c_version@SUFFIX@ \
elpa1_test_complex_c_version@SUFFIX@ \ elpa1_test_complex_c_version@SUFFIX@ \
elpa2_test_real_c_version@SUFFIX@ \ elpa2_test_real_c_version@SUFFIX@ \
elpa2_test_complex_c_version@SUFFIX@ elpa2_test_complex_c_version@SUFFIX@
endif
build_lib = libelpa@SUFFIX@.la libelpatest@SUFFIX@.la build_lib = libelpa@SUFFIX@.la libelpatest@SUFFIX@.la
...@@ -250,31 +248,25 @@ libelpatest@SUFFIX@_la_SOURCES += \ ...@@ -250,31 +248,25 @@ libelpatest@SUFFIX@_la_SOURCES += \
test/shared/redirect.F90 test/shared/redirect.F90
endif endif
if !WITH_OPENMP
elpa1_test_real_c_version@SUFFIX@_SOURCES = test/C/elpa1_test_real_c_version.c elpa1_test_real_c_version@SUFFIX@_SOURCES = test/C/elpa1_test_real_c_version.c
elpa1_test_real_c_version@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_c_version@SUFFIX@_LDADD = $(build_lib) $(FCLIBS)
elpa1_test_real_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa1_test_real_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
elpa1_test_real_c_version@SUFFIX@_LINK = $(LINK) $(FCLIBS)
EXTRA_elpa1_test_real_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa1_test_real_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa1_test_complex_c_version@SUFFIX@_SOURCES = test/C/elpa1_test_complex_c_version.c elpa1_test_complex_c_version@SUFFIX@_SOURCES = test/C/elpa1_test_complex_c_version.c
elpa1_test_complex_c_version@SUFFIX@_LDADD = $(build_lib) elpa1_test_complex_c_version@SUFFIX@_LDADD = $(build_lib) $(FCLIBS)
elpa1_test_complex_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa1_test_complex_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
elpa1_test_complex_c_version@SUFFIX@_LINK = $(LINK) $(FCLIBS)
EXTRA_elpa1_test_complex_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa1_test_complex_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa2_test_real_c_version@SUFFIX@_SOURCES = test/C/elpa2_test_real_c_version.c elpa2_test_real_c_version@SUFFIX@_SOURCES = test/C/elpa2_test_real_c_version.c
elpa2_test_real_c_version@SUFFIX@_LDADD = $(build_lib) elpa2_test_real_c_version@SUFFIX@_LDADD = $(build_lib) $(FCLIBS)
elpa2_test_real_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa2_test_real_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
elpa2_test_real_c_version@SUFFIX@_LINK = $(LINK) $(FCLIBS)
EXTRA_elpa2_test_real_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa2_test_real_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
elpa2_test_complex_c_version@SUFFIX@_SOURCES = test/C/elpa2_test_complex_c_version.c elpa2_test_complex_c_version@SUFFIX@_SOURCES = test/C/elpa2_test_complex_c_version.c
elpa2_test_complex_c_version@SUFFIX@_LDADD = $(build_lib) elpa2_test_complex_c_version@SUFFIX@_LDADD = $(build_lib) $(FCLIBS)
elpa2_test_complex_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules elpa2_test_complex_c_version@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
elpa2_test_complex_c_version@SUFFIX@_LINK = $(LINK) $(FCLIBS)
EXTRA_elpa2_test_complex_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 EXTRA_elpa2_test_complex_c_version@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
endif
elpa1_test_real@SUFFIX@_SOURCES = test/Fortran/test_real.F90 elpa1_test_real@SUFFIX@_SOURCES = test/Fortran/test_real.F90
elpa1_test_real@SUFFIX@_LDADD = $(build_lib) elpa1_test_real@SUFFIX@_LDADD = $(build_lib)
...@@ -349,16 +341,11 @@ check_SCRIPTS = \ ...@@ -349,16 +341,11 @@ check_SCRIPTS = \
elpa2_test_real_api@SUFFIX@.sh \ elpa2_test_real_api@SUFFIX@.sh \
elpa2_test_complex_api@SUFFIX@.sh \ elpa2_test_complex_api@SUFFIX@.sh \
elpa1_real_toeplitz@SUFFIX@.sh \ elpa1_real_toeplitz@SUFFIX@.sh \
elpa2_print_kernels@SUFFIX@ elpa2_print_kernels@SUFFIX@ \
if !WITH_OPENMP
check_SCRIPTS += \
elpa1_test_real_c_version@SUFFIX@.sh \ elpa1_test_real_c_version@SUFFIX@.sh \
elpa1_test_complex_c_version@SUFFIX@.sh \ elpa1_test_complex_c_version@SUFFIX@.sh \
elpa2_test_real_c_version@SUFFIX@.sh \ elpa2_test_real_c_version@SUFFIX@.sh \
elpa2_test_complex_c_version@SUFFIX@.sh elpa2_test_complex_c_version@SUFFIX@.sh
endif
# test scripts # test scripts
......
...@@ -272,26 +272,26 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ...@@ -272,26 +272,26 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
) )
AC_MSG_RESULT([${can_compile_avx}]) AC_MSG_RESULT([${can_compile_avx}])
#if test "${can_compile_avx}" = "yes" ; then dnl if test "${can_compile_avx}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++]) dnl AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
# AC_LANG_PUSH([C++]) dnl AC_LANG_PUSH([C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([ dnl AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# #include <x86intrin.h> dnl #include <x86intrin.h>
# int main(int argc, char **argv){ dnl int main(int argc, char **argv){
# double* q; dnl double* q;
# __m256d a1_1 = _mm256_load_pd(q); dnl __m256d a1_1 = _mm256_load_pd(q);
# return 0; dnl return 0;
# } dnl }
# ])], dnl ])],
# [can_compile_avx=yes], dnl [can_compile_avx=yes],
# [can_compile_avx=no] dnl [can_compile_avx=no]
# ) dnl )
# AC_LANG_POP([C++]) dnl AC_LANG_POP([C++])
# AC_MSG_RESULT([${can_compile_avx}]) dnl AC_MSG_RESULT([${can_compile_avx}])
# if test "${can_compile_avx}" = "no" ; then dnl if test "${can_compile_avx}" = "no" ; then
# AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether]) dnl AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
# fi dnl fi
#fi dnl fi
AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C]) AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
...@@ -307,27 +307,28 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ...@@ -307,27 +307,28 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[can_compile_avx2=no] [can_compile_avx2=no]
) )
AC_MSG_RESULT([${can_compile_avx2}]) AC_MSG_RESULT([${can_compile_avx2}])
#if test "${can_compile_avx2}" = "yes" ; then
# AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++]) dnl if test "${can_compile_avx2}" = "yes" ; then
# AC_LANG_PUSH([C++]) dnl AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++])
# AC_COMPILE_IFELSE([AC_LANG_SOURCE([ dnl AC_LANG_PUSH([C++])
# #include <x86intrin.h> dnl AC_COMPILE_IFELSE([AC_LANG_SOURCE([
# int main(int argc, char **argv){ dnl #include <x86intrin.h>
# double* q; dnl int main(int argc, char **argv){
# __m256d q1 = _mm256_load_pd(q); dnl double* q;
# __m256d y1 = _mm256_fmadd_pd(q1, q1, q1); dnl __m256d q1 = _mm256_load_pd(q);
# return 0; dnl __m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
# } dnl return 0;
# ])], dnl }
# [can_compile_avx2=yes], dnl ])],
# [can_compile_avx2=no] dnl [can_compile_avx2=yes],
# ) dnl [can_compile_avx2=no]
# AC_LANG_POP([C++]) dnl )
# AC_MSG_RESULT([${can_compile_avx2}]) dnl AC_LANG_POP([C++])
# if test "${can_compile_avx2}" = "no" ; then dnl AC_MSG_RESULT([${can_compile_avx2}])
# AC_MSG_WARN([Cannot compile C++ with AVX2!]) dnl if test "${can_compile_avx2}" = "no" ; then
# fi dnl AC_MSG_WARN([Cannot compile C++ with AVX2!])
#fi dnl fi
dnl fi
if test "${can_compile_avx}" = "yes" ; then if test "${can_compile_avx}" = "yes" ; then
install_real_avx_block2=yes install_real_avx_block2=yes
...@@ -558,6 +559,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([ ...@@ -558,6 +559,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[fortran_can_check_environment=no] [fortran_can_check_environment=no]
) )
AC_MSG_RESULT([${fortran_can_check_environment}]) AC_MSG_RESULT([${fortran_can_check_environment}])
if test x"${fortran_can_check_environment}" = x"yes" ; then
AC_DEFINE([HAVE_ENVIRONMENT_CHECKING],[1],[Fortran can query environment variables])
fi
dnl now check which kernels can be compiled dnl now check which kernels can be compiled
...@@ -612,10 +616,32 @@ else ...@@ -612,10 +616,32 @@ else
install_complex_bgq=no install_complex_bgq=no
fi fi
dnl Test possibility of 'use mpi', if requested
dnl environment variable setting of kernel if test x"${with_mpi}" = x"yes" ; then
if test x"${fortran_can_check_environment}" = x"yes" ; then AC_ARG_ENABLE([mpi-module],
AC_DEFINE([HAVE_ENVIRONMENT_CHECKING],[1],[Fortran can querry environment variables]) AS_HELP_STRING([--disable-mpi-module],
[Do not use the Fortran MPI module, get interfaces by 'include "mpif.h')]),
[],
[enable_mpi_module=yes])
if test x"${enable_mpi_module}" = x"yes" ; then
AC_MSG_CHECKING(whether Fortran mpi module can be used)
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
program test_mpi_module
use mpi
real :: time
time = MPI_WTime()
end program
])],
[can_use_fortran_mpi_module=yes],
[can_use_fortran_mpi_module=no]
)
AC_MSG_RESULT([${can_use_fortran_mpi_module}])
if test x"${can_use_fortran_mpi_module}" = x"yes" ; then
AC_DEFINE([HAVE_MPI_MODULE],[1],[can use the Fortran mpi module])
else
AC_MSG_ERROR([Could not compile a Fortran program with an 'use mpi' statement. You can try again with --disable-mpi-module])
fi
fi
fi fi
dnl last check whether user wants to compile only a specific kernel dnl last check whether user wants to compile only a specific kernel
......
...@@ -39,17 +39,15 @@ License: LGPL-3.0 ...@@ -39,17 +39,15 @@ License: LGPL-3.0
Group: System/Libraries Group: System/Libraries
Url: https://elpa.rzg.mpg.de/ Url: https://elpa.rzg.mpg.de/
Source0: https://elpa.mpcdf.mpg.de/html/Releases/%{version}/%{name}-%{version}.tar.gz Source0: https://elpa.mpcdf.mpg.de/html/Releases/%{version}/%{name}-%{version}.tar.gz
Requires: openmpi BuildRequires: c_compiler
# For SLE_11_SP4: # For SLE_11_SP4:
%if %{sle_11_sp4} == 1 %if %{sle_11_sp4} == 1
BuildRequires: gcc48-c++
BuildRequires: gcc48-fortran BuildRequires: gcc48-fortran
%else %else
BuildRequires: gcc-c++ >= 4.8
BuildRequires: gcc-fortran >= 4.8 BuildRequires: gcc-fortran >= 4.8
%endif %endif
BuildRequires: strace
BuildRequires: openmpi-devel BuildRequires: openmpi-devel
Requires: openmpi
BuildRequires: blas-devel BuildRequires: blas-devel
BuildRequires: lapack-devel BuildRequires: lapack-devel
BuildRequires: pkg-config BuildRequires: pkg-config
...@@ -123,7 +121,6 @@ Summary: Development files for %{name} ...@@ -123,7 +121,6 @@ Summary: Development files for %{name}
Group: Development/Libraries Group: Development/Libraries
Requires: %{name} = %{version} Requires: %{name} = %{version}
Requires: openmpi Requires: openmpi
Requires: libstdc++-devel
Requires: lapack-devel Requires: lapack-devel
Requires: blas-devel Requires: blas-devel
Requires: libscalapack2-openmpi-devel Requires: libscalapack2-openmpi-devel
...@@ -169,7 +166,6 @@ Summary: Development files for %{name}_openmp ...@@ -169,7 +166,6 @@ Summary: Development files for %{name}_openmp
Group: Development/Libraries Group: Development/Libraries
Requires: %{name}_openmp = %{version} Requires: %{name}_openmp = %{version}
Requires: openmpi Requires: openmpi
Requires: libstdc++-devel
Requires: lapack-devel Requires: lapack-devel
Requires: blas-devel Requires: blas-devel
Requires: libscalapack2-openmpi-devel Requires: libscalapack2-openmpi-devel
...@@ -229,7 +225,12 @@ pushd build ...@@ -229,7 +225,12 @@ pushd build
ln -s ../configure . ln -s ../configure .
%endif %endif
%configure --docdir=%{_docdir}/%{name}-%{version} %configure \
%if %{sle_11_sp4} == 1
--disable-mpi-module \
%endif
--docdir=%{_docdir}/%{name}-%{version}
make %{?_smp_mflags} V=1 make %{?_smp_mflags} V=1
popd popd
...@@ -245,10 +246,17 @@ pushd build_openmp ...@@ -245,10 +246,17 @@ pushd build_openmp
ln -s ../configure . ln -s ../configure .
%endif %endif
%configure --docdir=%{_docdir}/%{name}_openmp-%{version} --enable-openmp %configure \
%if %{sle_11_sp4} == 1
--disable-mpi-module \
%endif
--docdir=%{_docdir}/%{name}_openmp-%{version} \
--enable-openmp
make %{?_smp_mflags} V=1 make %{?_smp_mflags} V=1
popd popd
%endif
%endif # OpenMP
%check %check
......
...@@ -11,6 +11,9 @@ endef ...@@ -11,6 +11,9 @@ endef
_f90_verbose = $(_f90_verbose_$(V)) _f90_verbose = $(_f90_verbose_$(V))
_f90_verbose_ = $(_f90_verbose_$(AM_DEFAULT_VERBOSITY)) _f90_verbose_ = $(_f90_verbose_$(AM_DEFAULT_VERBOSITY))
_f90_verbose_0 = @echo " $1"; _f90_verbose_0 = @echo " $1";
_f90_only_verbose = $(_f90_only_verbose_$(V))
_f90_only_verbose_ = @
_f90_only_verbose_0 = @
_f90_targets = $(call translate_name,$(PROGRAMS) $(LTLIBRARIES)) _f90_targets = $(call translate_name,$(PROGRAMS) $(LTLIBRARIES))
FORTRAN_CPP ?= cpp -P -traditional -Wall -Werror FORTRAN_CPP ?= cpp -P -traditional -Wall -Werror
...@@ -87,8 +90,26 @@ endef ...@@ -87,8 +90,26 @@ endef
ifneq ($(call is_clean),1) ifneq ($(call is_clean),1)
include $(_f90_depfile) include $(_f90_depfile)
endif endif
# $1 string
# $2 file
define append_to
$(_f90_only_verbose)echo '$1' >> $2
endef
# $1 program
define program_dependencies
$(_f90_only_verbose)rm -f .$p.dep.args
$(foreach argument,$(_$p_use_mods) $(_$p_def_mods) $(foreach l,$(call recursive_lib_deps,$p),$(_$l_use_mods) $(_$l_def_mods)),$(call append_to,$(argument),.$p.dep.args))
$(_f90_only_verbose)$(top_srcdir)/fdep/fortran_dependencies.pl $p < .$p.dep.args >> $@ || { rm $@; exit 1; }
$(_f90_only_verbose)rm -f .$p.dep.args
endef
$(_f90_depfile): $(top_srcdir)/fdep/fortran_dependencies.pl $(foreach p,$(_f90_targets),$(_$p_use_mods) $(_$p_def_mods)) | $(foreach p,$(_f90_targets),$(_f90_depdir)/$p) $(_f90_depfile): $(top_srcdir)/fdep/fortran_dependencies.pl $(foreach p,$(_f90_targets),$(_$p_use_mods) $(_$p_def_mods)) | $(foreach p,$(_f90_targets),$(_f90_depdir)/$p)
$(call _f90_verbose,F90 DEPS $@)echo > $@; $(foreach p,$(_f90_targets),$(top_srcdir)/fdep/fortran_dependencies.pl $p $(_$p_use_mods) $(_$p_def_mods) $(foreach l,$(call recursive_lib_deps,$p),$(_$l_use_mods) $(_$l_def_mods)) >> $@; ) $(call _f90_verbose,F90 DEPS $@)echo > $@;
$(foreach p,$(_f90_targets),$(call program_dependencies,$p))
$(_f90_depdir): $(_f90_depdir):
@mkdir $@ @mkdir $@
......
...@@ -42,7 +42,8 @@ sub add_def { ...@@ -42,7 +42,8 @@ sub add_def {
my $target = shift; my $target = shift;
foreach my $file (@ARGV) { foreach my $file (<>) {
chomp($file);
if (exists $files{$file}) { if (exists $files{$file}) {
next; next;
} else { } else {
......
...@@ -78,7 +78,7 @@ AC_DEFUN([AX_ELPA_OPENMP], ...@@ -78,7 +78,7 @@ AC_DEFUN([AX_ELPA_OPENMP],
dnl will fail (since we know that it failed without the option), dnl will fail (since we know that it failed without the option),
dnl therefore the loop will continue searching for an option, and dnl therefore the loop will continue searching for an option, and
dnl no output file called 'penmp' or 'mp' is created. dnl no output file called 'penmp' or 'mp' is created.
for ac_option in -qopenmp -openmp -fopenmp -xopenmp -mp -omp -qsmp=omp; do for ac_option in -fopenmp -qopenmp -xopenmp -mp -omp -qsmp=omp -openmp; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_LINK_IFELSE([AC_LANG_SOURCE([_AX_ELPA_LANG_OPENMP])], AC_LINK_IFELSE([AC_LANG_SOURCE([_AX_ELPA_LANG_OPENMP])],
......
...@@ -1617,7 +1617,7 @@ module ELPA1_compute ...@@ -1617,7 +1617,7 @@ module ELPA1_compute
integer(kind=ik) :: np_off, nprocs integer(kind=ik) :: np_off, nprocs
integer(kind=ik) :: np1, np2, noff, nlen, nmid, n integer(kind=ik) :: np1, np2, noff, nlen, nmid, n
#ifdef WITH_MPI #ifdef WITH_MPI
integer(kind=ik) :: mpi_status(mpi_status_size) integer(kind=ik) :: my_mpi_status(mpi_status_size)
#endif #endif
logical, intent(in) :: wantDebug logical, intent(in) :: wantDebug
logical, intent(out) :: success logical, intent(out) :: success
...@@ -1654,7 +1654,7 @@ module ELPA1_compute ...@@ -1654,7 +1654,7 @@ module ELPA1_compute
if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then if (my_pcol>=np_off+np1 .and. my_pcol<np_off+nprocs) then
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_recv(d(noff+1),nmid,MPI_REAL8,np_off,1,mpi_comm_cols,mpi_status,mpierr) call mpi_recv(d(noff+1),nmid,MPI_REAL8,np_off,1,mpi_comm_cols,my_mpi_status,mpierr)
#else #else
d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1) d(noff+1:noff+1+nmid-1) = d(noff+1:noff+1+nmid-1)
#endif #endif
...@@ -1669,7 +1669,7 @@ module ELPA1_compute ...@@ -1669,7 +1669,7 @@ module ELPA1_compute
endif endif
if (my_pcol>=np_off .and. my_pcol<np_off+np1) then if (my_pcol>=np_off .and. my_pcol<np_off+np1) then
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_recv(d(noff+nmid+1),nlen-nmid,MPI_REAL8,np_off+np1,1,mpi_comm_cols,mpi_status,mpierr) call mpi_recv(d(noff+nmid+1),nlen-nmid,MPI_REAL8,np_off+np1,1,mpi_comm_cols,my_mpi_status,mpierr)
#else #else
d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) d(noff+nmid+1:noff+nmid+1+nlen-nmid-1) = d(noff+nmid+1:noff+nmid+1+nlen-nmid-1)
#endif #endif
...@@ -2066,7 +2066,7 @@ module ELPA1_compute ...@@ -2066,7 +2066,7 @@ module ELPA1_compute
integer(kind=ik) :: my_proc, n_procs, my_prow, my_pcol, np_rows, & integer(kind=ik) :: my_proc, n_procs, my_prow, my_pcol, np_rows, &
np_cols, mpierr np_cols, mpierr
#ifdef WITH_MPI #ifdef WITH_MPI
integer(kind=ik) :: mpi_status(mpi_status_size) integer(kind=ik) :: my_mpi_status(mpi_status_size)
#endif #endif
integer(kind=ik) :: np_next, np_prev, np_rem integer(kind=ik) :: np_next, np_prev, np_rem
integer(kind=ik) :: idx(na), idx1(na), idx2(na) integer(kind=ik) :: idx(na), idx1(na), idx2(na)
...@@ -2623,7 +2623,7 @@ module ELPA1_compute ...@@ -2623,7 +2623,7 @@ module ELPA1_compute
#ifdef WITH_MPI #ifdef WITH_MPI
call MPI_Sendrecv_replace(qtmp1, l_rows*max_local_cols, MPI_REAL8, & call MPI_Sendrecv_replace(qtmp1, l_rows*max_local_cols, MPI_REAL8, &
np_next, 1111, np_prev, 1111, & np_next, 1111, np_prev, 1111, &
mpi_comm_cols, mpi_status, mpierr) mpi_comm_cols, my_mpi_status, mpierr)
#endif #endif
endif endif
...@@ -2810,7 +2810,7 @@ module ELPA1_compute ...@@ -2810,7 +2810,7 @@ module ELPA1_compute
endif endif
else if (pc2==my_pcol) then else if (pc2==my_pcol) then
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_recv(qtmp(1,nc),l_rows,MPI_REAL8,pc1,mod(i,4096),mpi_comm_cols,mpi_status,mpierr) call mpi_recv(qtmp(1,nc),l_rows,MPI_REAL8,pc1,mod(i,4096),mpi_comm_cols,my_mpi_status,mpierr)
#else #else
qtmp(1:l_rows,nc) = q(l_rqs:l_rqe,nc) qtmp(1:l_rows,nc) = q(l_rqs:l_rqe,nc)
#endif #endif
...@@ -2863,7 +2863,7 @@ module ELPA1_compute ...@@ -2863,7 +2863,7 @@ module ELPA1_compute
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_sendrecv(q(l_rqs,lc1),l_rows,MPI_REAL8,pc2,1, & call mpi_sendrecv(q(l_rqs,lc1),l_rows,MPI_REAL8,pc2,1, &
tmp,l_rows,MPI_REAL8,pc2,1, & tmp,l_rows,MPI_REAL8,pc2,1, &
mpi_comm_cols,mpi_status,mpierr) mpi_comm_cols,my_mpi_status,mpierr)
#else #else
tmp(1:l_rows) = q(l_rqs:l_rqe,lc1) tmp(1:l_rows) = q(l_rqs:l_rqe,lc1)
#endif #endif
...@@ -2873,7 +2873,7 @@ module ELPA1_compute ...@@ -2873,7 +2873,7 @@ module ELPA1_compute
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_sendrecv(q(l_rqs,lc2),l_rows,MPI_REAL8,pc1,1, & call mpi_sendrecv(q(l_rqs,lc2),l_rows,MPI_REAL8,pc1,1, &
tmp,l_rows,MPI_REAL8,pc1,1, & tmp,l_rows,MPI_REAL8,pc1,1, &
mpi_comm_cols,mpi_status,mpierr) mpi_comm_cols,my_mpi_status,mpierr)
#else #else
tmp(1:l_rows) = q(l_rqs:l_rqe,lc2) tmp(1:l_rows) = q(l_rqs:l_rqe,lc2)
#endif #endif
...@@ -2925,7 +2925,7 @@ module ELPA1_compute ...@@ -2925,7 +2925,7 @@ module ELPA1_compute
z(:) = z(:) + tmp(:) z(:) = z(:) + tmp(:)
#ifdef WITH_MPI #ifdef WITH_MPI
call MPI_Sendrecv_replace(z, n, MPI_REAL8, np_next, 1111, np_prev, 1111, & call MPI_Sendrecv_replace(z, n, MPI_REAL8, np_next, 1111, np_prev, 1111, &
mpi_comm_cols, mpi_status, mpierr) mpi_comm_cols, my_mpi_status, mpierr)
#endif #endif
enddo enddo
...@@ -2973,7 +2973,7 @@ module ELPA1_compute ...@@ -2973,7 +2973,7 @@ module ELPA1_compute
z(1:n) = tmp(1:n) z(1:n) = tmp(1:n)
do np = npc_0+1, npc_0+npc_n-1 do np = npc_0+1, npc_0+npc_n-1
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_recv(tmp,n,MPI_REAL8,np,1111,mpi_comm_cols,mpi_status,mpierr) call mpi_recv(tmp,n,MPI_REAL8,np,1111,mpi_comm_cols,my_mpi_status,mpierr)
#else #else
tmp(1:n) = z(1:n) tmp(1:n) = z(1:n)
#endif #endif
...@@ -2987,7 +2987,7 @@ module ELPA1_compute ...@@ -2987,7 +2987,7 @@ module ELPA1_compute
else else
#ifdef WITH_MPI #ifdef WITH_MPI
call mpi_send(tmp,n,MPI_REAL8,npc_0,1111,mpi_comm_cols,mpierr) call mpi_send(tmp,n,MPI_REAL8,npc_0,1111,mpi_comm_cols,mpierr)
call mpi_recv(z ,n,MPI_REAL8,npc_0,1111,mpi_comm_cols,mpi_status,mpierr) call mpi_recv(z ,n,MPI_REAL8,npc_0,1111,mpi_comm_cols,my_mpi_status,mpierr)
#else #else
z(1:n) = tmp(1:n) z(1:n) = tmp(1:n)
#endif #endif
......
This diff is collapsed.
...@@ -109,10 +109,9 @@ contains ...@@ -109,10 +109,9 @@ contains
subroutine hh_trafo_kernel_24_bgq(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_24_bgq(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none
include 'mpif.h'
integer(kind=ik), intent(in) :: nb, ldq, ldh integer(kind=ik), intent(in) :: nb, ldq, ldh
real(kind=rk), intent(inout) :: q(ldq,*) real(kind=rk), intent(inout) :: q(ldq,*)
...@@ -317,10 +316,9 @@ contains ...@@ -317,10 +316,9 @@ contains
subroutine hh_trafo_kernel_16_bgq(q, hh, nb, ldq, ldh, s) subroutine hh_trafo_kernel_16_bgq(q, hh, nb, ldq, ldh, s)
use precision use precision
use elpa_mpi
implicit none implicit none