Commit b360f7ae authored by Andreas Marek's avatar Andreas Marek

ELPA_2013.11.007: OpenMP for complex block2 kernel

By mistake the complex kernel with blocking 2 was not called
in parallel if OpenMP was used.
parent be241bd8
#! /bin/sh #! /bin/sh
# Guess values for system-dependent variables and create Makefiles. # Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for elpa 2013.11.006. # Generated by GNU Autoconf 2.69 for elpa 2013.11.007.
# #
# Report bugs to <elpa-library@rzg.mpg.de>. # Report bugs to <elpa-library@rzg.mpg.de>.
# #
...@@ -590,8 +590,8 @@ MAKEFLAGS= ...@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package. # Identity of this package.
PACKAGE_NAME='elpa' PACKAGE_NAME='elpa'
PACKAGE_TARNAME='elpa' PACKAGE_TARNAME='elpa'
PACKAGE_VERSION='2013.11.006' PACKAGE_VERSION='2013.11.007'
PACKAGE_STRING='elpa 2013.11.006' PACKAGE_STRING='elpa 2013.11.007'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de' PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL='' PACKAGE_URL=''
...@@ -1390,7 +1390,7 @@ if test "$ac_init_help" = "long"; then ...@@ -1390,7 +1390,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing. # Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh. # This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF cat <<_ACEOF
\`configure' configures elpa 2013.11.006 to adapt to many kinds of systems. \`configure' configures elpa 2013.11.007 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]... Usage: $0 [OPTION]... [VAR=VALUE]...
...@@ -1460,7 +1460,7 @@ fi ...@@ -1460,7 +1460,7 @@ fi
if test -n "$ac_init_help"; then if test -n "$ac_init_help"; then
case $ac_init_help in case $ac_init_help in
short | recursive ) echo "Configuration of elpa 2013.11.006:";; short | recursive ) echo "Configuration of elpa 2013.11.007:";;
esac esac
cat <<\_ACEOF cat <<\_ACEOF
...@@ -1599,7 +1599,7 @@ fi ...@@ -1599,7 +1599,7 @@ fi
test -n "$ac_init_help" && exit $ac_status test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then if $ac_init_version; then
cat <<\_ACEOF cat <<\_ACEOF
elpa configure 2013.11.006 elpa configure 2013.11.007
generated by GNU Autoconf 2.69 generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc. Copyright (C) 2012 Free Software Foundation, Inc.
...@@ -2082,7 +2082,7 @@ cat >config.log <<_ACEOF ...@@ -2082,7 +2082,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake. running configure, to aid debugging if configure makes a mistake.
It was created by elpa $as_me 2013.11.006, which was It was created by elpa $as_me 2013.11.007, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@ $ $0 $@
...@@ -2947,7 +2947,7 @@ fi ...@@ -2947,7 +2947,7 @@ fi
# Define the identity of the package. # Define the identity of the package.
PACKAGE='elpa' PACKAGE='elpa'
VERSION='2013.11.006' VERSION='2013.11.007'
cat >>confdefs.h <<_ACEOF cat >>confdefs.h <<_ACEOF
...@@ -20783,7 +20783,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ...@@ -20783,7 +20783,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their # report actual input values of CONFIG_FILES etc. instead of their
# values after options handling. # values after options handling.
ac_log=" ac_log="
This file was extended by elpa $as_me 2013.11.006, which was This file was extended by elpa $as_me 2013.11.007, which was
generated by GNU Autoconf 2.69. Invocation command line was generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES CONFIG_FILES = $CONFIG_FILES
...@@ -20849,7 +20849,7 @@ _ACEOF ...@@ -20849,7 +20849,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\ ac_cs_version="\\
elpa config.status 2013.11.006 elpa config.status 2013.11.007
configured by $0, generated by GNU Autoconf 2.69, configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\" with options \\"\$ac_cs_config\\"
......
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([elpa],[2013.11.006], elpa-library@rzg.mpg.de) AC_INIT([elpa],[2013.11.007], elpa-library@rzg.mpg.de)
AC_CONFIG_SRCDIR([src/elpa1.F90]) AC_CONFIG_SRCDIR([src/elpa1.F90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects]) AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
......
...@@ -4580,14 +4580,31 @@ contains ...@@ -4580,14 +4580,31 @@ contains
complex*16 w(nbw,2) complex*16 w(nbw,2)
#if defined(WITH_AVX_COMPLEX_BLOCK2) #if defined(WITH_AVX_COMPLEX_BLOCK2)
ttt = mpi_wtime() ttt = mpi_wtime()
#ifdef WITH_OPENMP
if(istripe<stripe_count) then
nl = stripe_width
else
noff = (my_thread-1)*thread_width + (istripe-1)*stripe_width
nl = min(my_thread*thread_width-noff, l_nev-noff)
if(nl<=0) return
endif
#else
nl = merge(stripe_width, last_stripe_width, istripe<stripe_count) nl = merge(stripe_width, last_stripe_width, istripe<stripe_count)
#endif
do j = ncols, 2, -2 do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off) w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1) w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw) call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#endif
enddo enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width,my_thread)
#else
if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width) if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
#endif
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment