Commit 191ad3a5 authored by Andreas Marek's avatar Andreas Marek

ELPA_2013.08.005: bugfix for kernels real blocking 6 and 4

Due to an error in a preprocessor statement, the results for
real matrices were wrong if the kernels "avx-real-block6" or
"avx-real-block4" were chosen. No other kernels are affected.

The test programms always correctly stated that the results for
these kernels are wrong.
parent 94020ebc
......@@ -65,11 +65,14 @@ if WITH_AVX_REAL_BLOCK2
endif
if WITH_AVX_REAL_BLOCK4
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
endif
if WITH_AVX_REAL_BLOCK6
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
endif
......
......@@ -106,8 +106,13 @@ host_triplet = @host@
@WITH_AVX_COMPLEX_BLOCK2_TRUE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
@WITH_AVX_REAL_BLOCK2_TRUE@am__append_10 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK4_TRUE@am__append_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@am__append_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
@WITH_AVX_REAL_BLOCK4_TRUE@am__append_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK4_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
@WITH_AVX_REAL_BLOCK6_TRUE@am__append_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
@WITH_AVX_REAL_BLOCK6_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
bin_PROGRAMS = test_real$(EXEEXT) test_real2$(EXEEXT) \
test_complex$(EXEEXT) test_complex2$(EXEEXT)
subdir = .
......@@ -199,8 +204,11 @@ am__dirstamp = $(am__leading_dot)dirstamp
@WITH_AVX_COMPLEX_BLOCK2_TRUE@am__objects_9 = src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.lo \
@WITH_AVX_COMPLEX_BLOCK2_TRUE@ src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.lo
@WITH_AVX_REAL_BLOCK2_TRUE@am__objects_10 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK4_TRUE@am__objects_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo
@WITH_AVX_REAL_BLOCK6_TRUE@am__objects_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo
@WITH_AVX_REAL_BLOCK4_TRUE@am__objects_11 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK4_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
@WITH_AVX_REAL_BLOCK6_TRUE@am__objects_12 = src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.lo \
@WITH_AVX_REAL_BLOCK6_TRUE@ src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.lo
am_libelpa_la_OBJECTS = src/elpa1.lo src/elpa2.lo $(am__objects_1) \
$(am__objects_2) $(am__objects_3) $(am__objects_4) \
$(am__objects_5) $(am__objects_6) $(am__objects_7) \
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for elpa 2013.08.004.
# Generated by GNU Autoconf 2.69 for elpa 2013.08.005.
#
# Report bugs to <elpa-library@rzg.mpg.de>.
#
......@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='elpa'
PACKAGE_TARNAME='elpa'
PACKAGE_VERSION='2013.08.004'
PACKAGE_STRING='elpa 2013.08.004'
PACKAGE_VERSION='2013.08.005'
PACKAGE_STRING='elpa 2013.08.005'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL=''
......@@ -1385,7 +1385,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures elpa 2013.08.004 to adapt to many kinds of systems.
\`configure' configures elpa 2013.08.005 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1455,7 +1455,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of elpa 2013.08.004:";;
short | recursive ) echo "Configuration of elpa 2013.08.005:";;
esac
cat <<\_ACEOF
......@@ -1592,7 +1592,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
elpa configure 2013.08.004
elpa configure 2013.08.005
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -2075,7 +2075,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by elpa $as_me 2013.08.004, which was
It was created by elpa $as_me 2013.08.005, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
......@@ -2940,7 +2940,7 @@ fi
# Define the identity of the package.
PACKAGE='elpa'
VERSION='2013.08.004'
VERSION='2013.08.005'
cat >>confdefs.h <<_ACEOF
......@@ -5903,7 +5903,7 @@ fi
ELPA_LIB_VERSION=2013.08.004
ELPA_LIB_VERSION=2013.08.005
# this is the version of the API, should be changed in the major revision
# if and only if the actual API changes
......@@ -20673,7 +20673,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by elpa $as_me 2013.08.004, which was
This file was extended by elpa $as_me 2013.08.005, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -20739,7 +20739,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
elpa config.status 2013.08.004
elpa config.status 2013.08.005
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
......
AC_PREREQ([2.69])
AC_INIT([elpa],[2013.08.004], elpa-library@rzg.mpg.de)
AC_INIT([elpa],[2013.08.005], elpa-library@rzg.mpg.de)
AC_CONFIG_SRCDIR([src/elpa1.f90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
......@@ -192,7 +192,7 @@ AX_PROG_FC_MPI([],[have_mpi=yes],[have_mpi=no
fi])
AC_SUBST([ELPA_LIB_VERSION], [2013.08.004])
AC_SUBST([ELPA_LIB_VERSION], [2013.08.005])
# this is the version of the API, should be changed in the major revision
# if and only if the actual API changes
AC_SUBST([ELPA_SO_VERSION], [0:0:0])
......
......@@ -1785,7 +1785,7 @@ contains
#endif
#if (defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2)) || defined(WITH_AMD_BULLDOZER)
#if defined(WITH_AVX_REAL_BLOCK4) || defined(WITH_AMD_BULLDOZER)
! X86 INTRINSIC CODE, USING 4 HOUSEHOLDER VECTORS
do j = ncols, 4, -4
w(:,1) = bcast_buffer(1:nbw,j+off)
......@@ -1803,7 +1803,7 @@ contains
#endif
#if (defined(WITH_AVX_REAL_BLOCK6) && defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2))
#if defined(WITH_AVX_REAL_BLOCK6)
! X86 INTRINSIC CODE, USING 6 HOUSEHOLDER VECTORS
do j = ncols, 6, -6
w(:,1) = bcast_buffer(1:nbw,j+off)
......
......@@ -96,12 +96,16 @@ Several
CFLAGS and CXXFLAGS automatically.
On Intel Sandybridge architectures the
configure option "--with-intel-sandybride"
use the best combination.
configure option "--with-avx-sandybride"
uses the best combination, which is a
combination of block2 for real matrices
and block1 for complex matrices.
On AMD Bulldozer architectures the
configure option "--with-amd-bulldozer"
use the best combination.
uses the best combination, which is a
combination of block4 for real matrices
and block1 for complex matrices
Otherwise, you can try out your own
combinations with the configure options
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment