Commit f0c7cb0d authored by Andreas Marek's avatar Andreas Marek
Browse files

ELPA_2013.11.006 bugfix for kernels real blocking 6 and 4

Due to an error in a preprocessor statement, the results for
real matrices were wrong if the kernels "avx-real-block6" or
"avx-real-block4" were chosen. No other kernels are affected.

The test programms always correctly stated that the results for
these kernels are wrong.
parent 191ad3a5
...@@ -120,17 +120,23 @@ endif ...@@ -120,17 +120,23 @@ endif
if WITH_AVX_REAL_BLOCK4 if WITH_AVX_REAL_BLOCK4
if WITH_OPENMP if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
else else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
endif endif
endif endif
if WITH_AVX_REAL_BLOCK6 if WITH_AVX_REAL_BLOCK6
if WITH_OPENMP if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
else else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
endif endif
endif endif
......
...@@ -131,10 +131,20 @@ host_triplet = @host@ ...@@ -131,10 +131,20 @@ host_triplet = @host@
@WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__append_19 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c @WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__append_19 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__append_20 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c @WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__append_20 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@am__append_21 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@am__append_21 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@am__append_22 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@am__append_23 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@am__append_24 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@am__append_22 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@am__append_23 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@am__append_24 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
bin_PROGRAMS = test_real$(EXEEXT) test_real2$(EXEEXT) \ bin_PROGRAMS = test_real$(EXEEXT) test_real2$(EXEEXT) \
test_complex$(EXEEXT) test_complex2$(EXEEXT) test_complex$(EXEEXT) test_complex2$(EXEEXT)
subdir = . subdir = .
...@@ -146,9 +156,9 @@ DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ ...@@ -146,9 +156,9 @@ DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
ltmain.sh ltmain.sh
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_check_gnu_make.m4 \
$(top_srcdir)/m4/libtool.m4 $(top_srcdir)/m4/ltoptions.m4 \ $(top_srcdir)/m4/ax_elpa_openmp.m4 $(top_srcdir)/m4/libtool.m4 \
$(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
$(top_srcdir)/m4/lt~obsolete.m4 \ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
$(top_srcdir)/fdep/fortran_dependencies.m4 \ $(top_srcdir)/fdep/fortran_dependencies.m4 \
$(top_srcdir)/m4/ax_prog_fc_mpi.m4 $(top_srcdir)/configure.ac $(top_srcdir)/m4/ax_prog_fc_mpi.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
...@@ -223,8 +233,11 @@ am__dirstamp = $(am__leading_dot)dirstamp ...@@ -223,8 +233,11 @@ am__dirstamp = $(am__leading_dot)dirstamp
@WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__objects_9 = src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.lo \ @WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__objects_9 = src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.lo \
@WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.lo @WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.lo
@WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__objects_10 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo @WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_FALSE@am__objects_10 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@am__objects_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@am__objects_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@am__objects_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@am__objects_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_FALSE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_OPENMP_FALSE@am_libelpa_la_OBJECTS = src/elpa1.lo src/elpa2.lo \ @WITH_OPENMP_FALSE@am_libelpa_la_OBJECTS = src/elpa1.lo src/elpa2.lo \
@WITH_OPENMP_FALSE@ $(am__objects_1) $(am__objects_2) \ @WITH_OPENMP_FALSE@ $(am__objects_1) $(am__objects_2) \
@WITH_OPENMP_FALSE@ $(am__objects_3) $(am__objects_4) \ @WITH_OPENMP_FALSE@ $(am__objects_3) $(am__objects_4) \
...@@ -273,8 +286,11 @@ am__libelpa_mt_la_SOURCES_DIST = src/elpa1.F90 src/elpa2.F90 \ ...@@ -273,8 +286,11 @@ am__libelpa_mt_la_SOURCES_DIST = src/elpa1.F90 src/elpa2.F90 \
@WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__objects_21 = src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.lo \ @WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__objects_21 = src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.lo \
@WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.lo @WITH_AVX_COMPLEX_BLOCK2_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.lo
@WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__objects_22 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo @WITH_AVX_REAL_BLOCK2_TRUE@@WITH_OPENMP_TRUE@am__objects_22 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@am__objects_23 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@am__objects_23 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@am__objects_24 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo @WITH_AVX_REAL_BLOCK4_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@am__objects_24 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@@WITH_OPENMP_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_OPENMP_TRUE@am_libelpa_mt_la_OBJECTS = src/elpa1.lo src/elpa2.lo \ @WITH_OPENMP_TRUE@am_libelpa_mt_la_OBJECTS = src/elpa1.lo src/elpa2.lo \
@WITH_OPENMP_TRUE@ $(am__objects_13) $(am__objects_14) \ @WITH_OPENMP_TRUE@ $(am__objects_13) $(am__objects_14) \
@WITH_OPENMP_TRUE@ $(am__objects_15) $(am__objects_16) \ @WITH_OPENMP_TRUE@ $(am__objects_15) $(am__objects_16) \
...@@ -679,6 +695,7 @@ NM = @NM@ ...@@ -679,6 +695,7 @@ NM = @NM@
NMEDIT = @NMEDIT@ NMEDIT = @NMEDIT@
OBJDUMP = @OBJDUMP@ OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@ OBJEXT = @OBJEXT@
OPENMP_FCFLAGS = @OPENMP_FCFLAGS@
OTOOL = @OTOOL@ OTOOL = @OTOOL@
OTOOL64 = @OTOOL64@ OTOOL64 = @OTOOL64@
PACKAGE = @PACKAGE@ PACKAGE = @PACKAGE@
......
...@@ -1149,6 +1149,7 @@ AC_SUBST([am__untar]) ...@@ -1149,6 +1149,7 @@ AC_SUBST([am__untar])
]) # _AM_PROG_TAR ]) # _AM_PROG_TAR
m4_include([m4/ax_check_gnu_make.m4]) m4_include([m4/ax_check_gnu_make.m4])
m4_include([m4/ax_elpa_openmp.m4])
m4_include([m4/libtool.m4]) m4_include([m4/libtool.m4])
m4_include([m4/ltoptions.m4]) m4_include([m4/ltoptions.m4])
m4_include([m4/ltsugar.m4]) m4_include([m4/ltsugar.m4])
......
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for elpa 2013.11.005. # Generated by GNU Autoconf 2.69 for elpa 2013.11.006.
# #
# Report bugs to <elpa-library@rzg.mpg.de>. # Report bugs to <elpa-library@rzg.mpg.de>.
# #
...@@ -590,8 +590,8 @@ MAKEFLAGS= ...@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='elpa' PACKAGE_NAME='elpa'
PACKAGE_TARNAME='elpa' PACKAGE_TARNAME='elpa'
PACKAGE_VERSION='2013.11.005' PACKAGE_VERSION='2013.11.006'
PACKAGE_STRING='elpa 2013.11.005' PACKAGE_STRING='elpa 2013.11.006'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de' PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL='' PACKAGE_URL=''
   
...@@ -672,6 +672,7 @@ build_vendor ...@@ -672,6 +672,7 @@ build_vendor
build_cpu build_cpu
build build
LIBTOOL LIBTOOL
OPENMP_FCFLAGS
WITH_OPENMP_FALSE WITH_OPENMP_FALSE
WITH_OPENMP_TRUE WITH_OPENMP_TRUE
FC_MODOUT FC_MODOUT
...@@ -823,6 +824,7 @@ with_avx_real_block4 ...@@ -823,6 +824,7 @@ with_avx_real_block4
with_avx_real_block6 with_avx_real_block6
with_avx_optimization with_avx_optimization
with_openmp with_openmp
enable_openmp
enable_shared enable_shared
enable_static enable_static
with_pic with_pic
...@@ -1388,7 +1390,7 @@ if test "$ac_init_help" = "long"; then ...@@ -1388,7 +1390,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures elpa 2013.11.005 to adapt to many kinds of systems. \`configure' configures elpa 2013.11.006 to adapt to many kinds of systems.
   
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
   
...@@ -1458,7 +1460,7 @@ fi ...@@ -1458,7 +1460,7 @@ fi
   
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of elpa 2013.11.005:";; short | recursive ) echo "Configuration of elpa 2013.11.006:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
   
...@@ -1472,6 +1474,7 @@ Optional Features: ...@@ -1472,6 +1474,7 @@ Optional Features:
do not reject slow dependency extractors do not reject slow dependency extractors
--disable-dependency-tracking --disable-dependency-tracking
speeds up one-time build speeds up one-time build
--disable-openmp do not use OpenMP
--enable-shared[=PKGS] build shared libraries [default=yes] --enable-shared[=PKGS] build shared libraries [default=yes]
--enable-static[=PKGS] build static libraries [default=yes] --enable-static[=PKGS] build static libraries [default=yes]
--enable-fast-install[=PKGS] --enable-fast-install[=PKGS]
...@@ -1596,7 +1599,7 @@ fi ...@@ -1596,7 +1599,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
elpa configure 2013.11.005 elpa configure 2013.11.006
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
   
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
...@@ -2079,7 +2082,7 @@ cat >config.log <<_ACEOF ...@@ -2079,7 +2082,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
   
It was created by elpa $as_me 2013.11.005, which was It was created by elpa $as_me 2013.11.006, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
   
$ $0 $@ $ $0 $@
...@@ -2944,7 +2947,7 @@ fi ...@@ -2944,7 +2947,7 @@ fi
   
# Define the identity of the package. # Define the identity of the package.
PACKAGE='elpa' PACKAGE='elpa'
VERSION='2013.11.005' VERSION='2013.11.006'
   
   
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
...@@ -5911,7 +5914,7 @@ fi ...@@ -5911,7 +5914,7 @@ fi
   
   
   
ELPA_LIB_VERSION=2013.11.005 ELPA_LIB_VERSION=2013.11.006
   
# this is the version of the API, should be changed in the major revision # this is the version of the API, should be changed in the major revision
# if and only if the actual API changes # if and only if the actual API changes
...@@ -6144,7 +6147,61 @@ fi ...@@ -6144,7 +6147,61 @@ fi
   
$as_echo "#define WITH_OPENMP 1" >>confdefs.h $as_echo "#define WITH_OPENMP 1" >>confdefs.h
   
AX_ELPA_OPENMP
OPENMP_FCFLAGS=
# Check whether --enable-openmp was given.
if test "${enable_openmp+set}" = set; then :
enableval=$enable_openmp;
fi
if test "$enable_openmp" != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5
$as_echo_n "checking for $CC option to support OpenMP... " >&6; }
if ${ac_cv_prog_fc_openmp+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.$ac_ext <<_ACEOF
program main
call omp_get_num_threads
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"; then :
ac_cv_prog_fc_openmp='none needed'
else
ac_cv_prog_fc_openmp='unsupported'
for ac_option in -openmp -fopenmp -xopenmp -mp -omp -qsmp=omp; do
ac_save_FCFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_option"
cat > conftest.$ac_ext <<_ACEOF
program main
call omp_get_num_threads
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"; then :
ac_cv_prog_fc_openmp=$ac_option
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
FCFLAGS=$ac_save_FCFLAGS
if test "$ac_cv_prog_fc_openmp" != unsupported; then
break
fi
done
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_openmp" >&5
$as_echo "$ac_cv_prog_fc_openmp" >&6; }
case $ac_cv_prog_fc_openmp in #(
"none needed" | unsupported)
;; #(
*)
OPENMP_FCFLAGS=$ac_cv_prog_fc_openmp ;;
esac
fi
fi fi
   
FCFLAGS="$FCFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS" FCFLAGS="$FCFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS"
...@@ -20726,7 +20783,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ...@@ -20726,7 +20783,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by elpa $as_me 2013.11.005, which was This file was extended by elpa $as_me 2013.11.006, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
   
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
...@@ -20792,7 +20849,7 @@ _ACEOF ...@@ -20792,7 +20849,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
elpa config.status 2013.11.005 elpa config.status 2013.11.006
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"
   
......
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([elpa],[2013.11.005], elpa-library@rzg.mpg.de) AC_INIT([elpa],[2013.11.006], elpa-library@rzg.mpg.de)
AC_CONFIG_SRCDIR([src/elpa1.F90]) AC_CONFIG_SRCDIR([src/elpa1.F90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects]) AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
...@@ -196,7 +196,7 @@ AX_PROG_FC_MPI([],[have_mpi=yes],[have_mpi=no ...@@ -196,7 +196,7 @@ AX_PROG_FC_MPI([],[have_mpi=yes],[have_mpi=no
fi]) fi])
AC_SUBST([ELPA_LIB_VERSION], [2013.11.005]) AC_SUBST([ELPA_LIB_VERSION], [2013.11.006])
# this is the version of the API, should be changed in the major revision # this is the version of the API, should be changed in the major revision
# if and only if the actual API changes # if and only if the actual API changes
AC_SUBST([ELPA_SO_VERSION], [0:0:0]) AC_SUBST([ELPA_SO_VERSION], [0:0:0])
......
...@@ -2431,7 +2431,7 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows ...@@ -2431,7 +2431,7 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
#endif #endif
#endif #endif
#if (defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2)) || defined(WITH_AMD_BULLDOZER) #if defined(WITH_AVX_REAL_BLOCK4) || defined(WITH_AMD_BULLDOZER)
! X86 INTRINSIC CODE, USING 4 HOUSEHOLDER VECTORS ! X86 INTRINSIC CODE, USING 4 HOUSEHOLDER VECTORS
do j = ncols, 4, -4 do j = ncols, 4, -4
...@@ -2470,7 +2470,7 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows ...@@ -2470,7 +2470,7 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
#endif #endif
#if (defined(WITH_AVX_REAL_BLOCK6) && defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2)) #if defined(WITH_AVX_REAL_BLOCK6)
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS ! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6 do j = ncols, 6, -6
w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
......
...@@ -112,12 +112,16 @@ Several ...@@ -112,12 +112,16 @@ Several
CFLAGS and CXXFLAGS automatically. CFLAGS and CXXFLAGS automatically.
On Intel Sandybridge architectures the On Intel Sandybridge architectures the
configure option "--with-intel-sandybride" configure option "--with-avx-sandybride"
use the best combination. uses the best combination, which is a
combination of block2 for real matrices
and block1 for complex matrices.
On AMD Bulldozer architectures the On AMD Bulldozer architectures the
configure option "--with-amd-bulldozer" configure option "--with-amd-bulldozer"
use the best combination. uses the best combination, which is a
combination of block4 for real matrices
and block1 for complex matrices.
Otherwise, you can try out your own Otherwise, you can try out your own
combinations with the configure options combinations with the configure options
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment