Commit 48e712ef authored by Andreas Marek's avatar Andreas Marek

Introducing OpenMP functionality in ELPA_development_version_OpenMP

This commit introduces OpenMP functionality in the
ELPA_development_version_OpenMP branch.

It contains several bugfixes to the OpenMP functionality in the
branch "ELPA_development_version", the later will soon be deleted
since the new branch is the new reference implementation.

The current branch contains the following features/bugfixes:
- building of the OpenMP version of ELPA via configure and the
  "--with-openmp" flag. The build library contains a "_mt"
  (multi-threaded) in its name.
  The configure procedure should (hopefully) determine for each
  compiler the neccessary OpenMP flags.
  If the "--with-openmp" flag is ommitted exactly the same code
  as in the ELPA 2013.08.001 release is used and build in the
  same way
- The example test cases print which kernels have been used and
  how many OpenMP threads are used at runtime
- correct handling of OpenMP stack arrays: the previous implementation
  caused compiler dependent segmentation faults
- OpenMP capability with all available kernels: the correctness of
  the computations have been checked for all kernels except the
  Bluegene (P/Q) versions
parent bc9a3d07
......@@ -7,73 +7,138 @@ AM_LDFLAGS = @AM_LDFLAGS@ @BLACS_LDFLAGS@
BLACS_LDFLAGS = @BLACS_LDFLAGS@
# libelpa
if WITH_OPENMP
lib_LTLIBRARIES = libelpa_mt.la
else
lib_LTLIBRARIES = libelpa.la
endif
##rule to produce fortran config file:
#config_f90.h: ./config.h
# grep "^#define" ./config.h > $@
libelpa_la_SOURCES = src/elpa1.f90 src/elpa2.F90
if WITH_OPENMP
libelpa_mt_la_SOURCES = src/elpa1.F90 src/elpa2.F90
else
libelpa_la_SOURCES = src/elpa1.F90 src/elpa2.F90
endif
if WITH_GENERIC_SIMPLE
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.f90 \
src/elpa2_kernels/elpa2_kernels_real_simple.f90
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.f90 \
src/elpa2_kernels/elpa2_kernels_real_simple.f90
src/elpa2_kernels/elpa2_kernels_real_simple.f90
endif
endif
if WITH_GENERIC
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex.f90 \
src/elpa2_kernels/elpa2_kernels_real.f90
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex.f90 \
src/elpa2_kernels/elpa2_kernels_real.f90
src/elpa2_kernels/elpa2_kernels_real.f90
endif
endif
if WITH_BGP
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_bgp.f90 \
src/elpa2_kernels/elpa2_kernels_complex.f90
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_bgp.f90 \
src/elpa2_kernels/elpa2_kernels_complex.f90
src/elpa2_kernels/elpa2_kernels_complex.f90
endif
endif
if WITH_BGQ
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_bgq.f90 \
src/elpa2_kernels/elpa2_kernels_complex.f90
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_bgq.f90 \
src/elpa2_kernels/elpa2_kernels_complex.f90
src/elpa2_kernels/elpa2_kernels_complex.f90
endif
endif
if WITH_SSE_AS
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64.s
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_asm_x86_64.s
endif
endif
if WITH_AVX_SANDYBRIDGE
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
endif
endif
if WITH_AMD_BULLDOZER
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c \
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
endif
endif
if WITH_AVX_COMPLEX_BLOCK1
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
endif
endif
if WITH_AVX_COMPLEX_BLOCK2
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.cpp \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.cpp \
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
endif
endif
if WITH_AVX_REAL_BLOCK2
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
endif
endif
if WITH_AVX_REAL_BLOCK4
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c
endif
endif
if WITH_AVX_REAL_BLOCK6
if WITH_OPENMP
libelpa_mt_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
else
libelpa_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
endif
endif
if WITH_OPENMP
libelpa_la_LDFLAGS = -version-info $(ELPA_SO_VERSION)
else
libelpa_mt_la_LDFLAGS = -version-info $(ELPA_SO_VERSION)
endif
# install any .mod files in the include/ dir
elpa_includedir = $(includedir)/elpa
......@@ -84,11 +149,11 @@ filesdir = $(datarootdir)
files_DATA = \
test/read_real.f90 \
test/read_real_gen.f90 \
test/test_complex2.f90 \
test/test_complex.f90 \
test/test_complex2.F90 \
test/test_complex.F90 \
test/test_complex_gen.f90 \
test/test_real2.f90 \
test/test_real.f90 \
test/test_real2.F90 \
test/test_real.F90 \
test/test_real_gen.f90
# pkg-config stuff
......@@ -96,20 +161,25 @@ pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = elpa.pc
# test programs
if WITH_OPENMP
build_lib = libelpa_mt.la
else
build_lib = libelpa.la
endif
noinst_bindir = $(abs_top_builddir)
noinst_bin_PROGRAMS = test_real test_real2 test_complex test_complex2
test_real_SOURCES = test/test_real.f90
test_real_LDADD = libelpa.la
test_real_SOURCES = test/test_real.F90
test_real_LDADD = $(build_lib)
test_real2_SOURCES = test/test_real2.f90
test_real2_LDADD = libelpa.la
test_real2_SOURCES = test/test_real2.F90
test_real2_LDADD = $(build_lib)
test_complex_SOURCES = test/test_complex.f90
test_complex_LDADD = libelpa.la
test_complex_SOURCES = test/test_complex.F90
test_complex_LDADD = $(build_lib)
test_complex2_SOURCES = test/test_complex2.f90
test_complex2_LDADD = libelpa.la
test_complex2_SOURCES = test/test_complex2.F90
test_complex2_LDADD = $(build_lib)
check_SCRIPTS = test_real.sh test_real2.sh test_complex.sh test_complex2.sh
......
This diff is collapsed.
......@@ -101,5 +101,8 @@
optimizations) */
#undef WITH_GENERIC_SIMPLE
/* use OpenMP threading */
#undef WITH_OPENMP
/* use kernel tuned for SSE (written in gcc assembler) */
#undef WITH_SSE_AS
......@@ -595,7 +595,7 @@ PACKAGE_STRING='elpa 2013.08.001'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL=''
ac_unique_file="src/elpa1.f90"
ac_unique_file="src/elpa1.F90"
# Factoring default headers for most tests.
ac_includes_default="\
#include <stdio.h>
......@@ -672,6 +672,9 @@ build_vendor
build_cpu
build
LIBTOOL
OPENMP_FCFLAGS
WITH_OPENMP_FALSE
WITH_OPENMP_TRUE
WITH_AVX_REAL_BLOCK6_FALSE
WITH_AVX_REAL_BLOCK6_TRUE
WITH_AVX_REAL_BLOCK4_FALSE
......@@ -811,6 +814,8 @@ with_avx_complex_block2
with_avx_real_block2
with_avx_real_block4
with_avx_real_block6
with_openmp
enable_openmp
enable_shared
enable_static
with_pic
......@@ -1458,6 +1463,7 @@ Optional Features:
do not reject slow dependency extractors
--disable-dependency-tracking
speeds up one-time build
--disable-openmp do not use OpenMP
--enable-shared[=PKGS] build shared libraries [default=yes]
--enable-static[=PKGS] build static libraries [default=yes]
--enable-fast-install[=PKGS]
......@@ -1491,6 +1497,7 @@ Optional Packages:
(written in gcc assembler), default no.
--with-avx-real-block6 use AVX optimized real kernel with blocking 6
(written in gcc assembler), default no.
--with-openmp use OpenMP threading, default no.
--with-pic[=PKGS] try to use only PIC/non-PIC objects [default=use
both]
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
......@@ -5794,6 +5801,101 @@ $as_echo "#define WITH_AVX_REAL_BLOCK6 1" >>confdefs.h
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP usage is specified" >&5
$as_echo_n "checking whether OpenMP usage is specified... " >&6; }
# Check whether --with-openmp was given.
if test "${with_openmp+set}" = set; then :
withval=$with_openmp; with_openmp=yes
else
with_openmp=no
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_openmp}" >&5
$as_echo "${with_openmp}" >&6; }
if test x"$with_openmp" = x"yes"; then
WITH_OPENMP_TRUE=
WITH_OPENMP_FALSE='#'
else
WITH_OPENMP_TRUE='#'
WITH_OPENMP_FALSE=
fi
if test "x${with_openmp}" = xyes; then
$as_echo "#define WITH_OPENMP 1" >>confdefs.h
OPENMP_FCFLAGS=
# Check whether --enable-openmp was given.
if test "${enable_openmp+set}" = set; then :
enableval=$enable_openmp;
fi
if test "$enable_openmp" != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $FC option to support OpenMP" >&5
$as_echo_n "checking for $FC option to support OpenMP... " >&6; }
if ${ac_cv_prog_fc_openmp+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.$ac_ext <<_ACEOF
program main
implicit none
!$ integer tid
tid = 42
call omp_set_num_threads(2)
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"; then :
ac_cv_prog_fc_openmp='none needed'
else
ac_cv_prog_fc_openmp='unsupported'
for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \
-Popenmp --openmp; do
ac_save_FCFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_option"
cat > conftest.$ac_ext <<_ACEOF
program main
implicit none
!$ integer tid
tid = 42
call omp_set_num_threads(2)
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"; then :
ac_cv_prog_fc_openmp=$ac_option
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
FCFLAGS=$ac_save_FCFLAGS
if test "$ac_cv_prog_fc_openmp" != unsupported; then
break
fi
done
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_openmp" >&5
$as_echo "$ac_cv_prog_fc_openmp" >&6; }
case $ac_cv_prog_fc_openmp in #(
"none needed" | unsupported)
;; #(
*)
OPENMP_FCFLAGS=$ac_cv_prog_fc_openmp ;;
esac
fi
fi
FCFLAGS="$FCFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS"
#LDFLAGS="$LDFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS"
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
......@@ -19759,6 +19861,8 @@ ac_compiler_gnu=$ac_cv_fc_compiler_gnu
#AC_SUBST(OPT_FCFLAGS)
mkdir modules
#gl_VISIBILITY
......@@ -19967,6 +20071,10 @@ if test -z "${WITH_AVX_REAL_BLOCK6_TRUE}" && test -z "${WITH_AVX_REAL_BLOCK6_FAL
as_fn_error $? "conditional \"WITH_AVX_REAL_BLOCK6\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${WITH_OPENMP_TRUE}" && test -z "${WITH_OPENMP_FALSE}"; then
as_fn_error $? "conditional \"WITH_OPENMP\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
......
AC_PREREQ([2.69])
AC_INIT([elpa],[2013.08.001], elpa-library@rzg.mpg.de)
AC_CONFIG_SRCDIR([src/elpa1.f90])
AC_CONFIG_SRCDIR([src/elpa1.F90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
AC_CONFIG_MACRO_DIR([m4])
......@@ -102,6 +102,22 @@ DEFINE_OPTION([WITH_AVX_REAL_BLOCK6], [avx-real-block6],
[no],[])
AC_MSG_CHECKING(whether OpenMP usage is specified)
AC_ARG_WITH([openmp],
AS_HELP_STRING([--with-openmp],
[use OpenMP threading, default no.]),
[with_openmp=yes],
[with_openmp=no])
AC_MSG_RESULT([${with_openmp}])
AM_CONDITIONAL([WITH_OPENMP],[test x"$with_openmp" = x"yes"])
if test "x${with_openmp}" = xyes; then
AC_DEFINE([WITH_OPENMP], [1], [use OpenMP threading])
AC_OPENMP
fi
FCFLAGS="$FCFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS"
#LDFLAGS="$LDFLAGS $OPENMP_FCFLAGS $OPENMP_FFFLAGS"
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
......@@ -231,6 +247,8 @@ AC_SUBST([FC_MODOUT])
AC_SUBST(BLACS_LDFLAGS)
AC_SUBST(BLACS_FCFLAGS)
#AC_SUBST(OPT_FCFLAGS)
mkdir modules
#gl_VISIBILITY
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment