Commit 91174de5 authored by Andreas Marek's avatar Andreas Marek

Bugfix in OpenMP handling and a few kernels

A few kernels could not be used together with OpenMP:
an interface error prohibited the compilation.
parent 15dcc76b
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for elpa 2013.11.003.
# Generated by GNU Autoconf 2.69 for elpa 2013.11.004.
#
# Report bugs to <elpa-library@rzg.mpg.de>.
#
......@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='elpa'
PACKAGE_TARNAME='elpa'
PACKAGE_VERSION='2013.11.003'
PACKAGE_STRING='elpa 2013.11.003'
PACKAGE_VERSION='2013.11.004'
PACKAGE_STRING='elpa 2013.11.004'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL=''
......@@ -1390,7 +1390,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures elpa 2013.11.003 to adapt to many kinds of systems.
\`configure' configures elpa 2013.11.004 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1460,7 +1460,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of elpa 2013.11.003:";;
short | recursive ) echo "Configuration of elpa 2013.11.004:";;
esac
cat <<\_ACEOF
......@@ -1599,7 +1599,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
elpa configure 2013.11.003
elpa configure 2013.11.004
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -2082,7 +2082,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by elpa $as_me 2013.11.003, which was
It was created by elpa $as_me 2013.11.004, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
......@@ -2947,7 +2947,7 @@ fi
# Define the identity of the package.
PACKAGE='elpa'
VERSION='2013.11.003'
VERSION='2013.11.004'
cat >>confdefs.h <<_ACEOF
......@@ -5891,7 +5891,7 @@ fi
ELPA_LIB_VERSION=2013.11.002
ELPA_LIB_VERSION=2013.11.004
# this is the version of the API, should be changed in the major revision
# if and only if the actual API changes
......@@ -20771,7 +20771,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by elpa $as_me 2013.11.003, which was
This file was extended by elpa $as_me 2013.11.004, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -20837,7 +20837,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
elpa config.status 2013.11.003
elpa config.status 2013.11.004
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
......
AC_PREREQ([2.69])
AC_INIT([elpa],[2013.11.003], elpa-library@rzg.mpg.de)
AC_INIT([elpa],[2013.11.004], elpa-library@rzg.mpg.de)
AC_CONFIG_SRCDIR([src/elpa1.F90])
AM_INIT_AUTOMAKE([foreign -Wall subdir-objects])
......@@ -184,7 +184,7 @@ AX_PROG_FC_MPI([],[have_mpi=yes],[have_mpi=no
fi])
AC_SUBST([ELPA_LIB_VERSION], [2013.11.002])
AC_SUBST([ELPA_LIB_VERSION], [2013.11.004])
# this is the version of the API, should be changed in the major revision
# if and only if the actual API changes
AC_SUBST([ELPA_SO_VERSION], [0:0:0])
......
......@@ -2336,15 +2336,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2355,15 +2361,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2374,15 +2386,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2394,10 +2412,23 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe, my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
#if (defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2)) || defined(WITH_AMD_BULLDOZER)
......@@ -2409,24 +2440,32 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,3) = bcast_buffer(1:nbw,j+off-2)
w(:,4) = bcast_buffer(1:nbw,j+off-3)
#ifdef WITH_OPENMP
call quad_hh_trafo(a(1,j+off+a_off-3,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call quad_hh_trafo(a(1,j+off+a_off-3,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo(a(1,j+off+a_off-3,istripe), w, nbw, nl, stripe_width, nbw)
call quad_hh_trafo(a(1,j+off+a_off-3,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 2, -2
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,jj+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,jj+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(jj==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(jj==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2440,21 +2479,47 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,4) = bcast_buffer(1:nbw,j+off-3)
w(:,5) = bcast_buffer(1:nbw,j+off-4)
w(:,6) = bcast_buffer(1:nbw,j+off-5)
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 4, -4
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
w(:,3) = bcast_buffer(1:nbw,jj+off-2)
w(:,4) = bcast_buffer(1:nbw,jj+off-3)
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jjj = jj, 2, -2
w(:,1) = bcast_buffer(1:nbw,jjj+off)
w(:,2) = bcast_buffer(1:nbw,jjj+off-1)
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
#ifdef WITH_OPENMP
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
#ifdef WITH_OPENMP
......@@ -4543,9 +4608,13 @@ contains
#endif
do j = ncols, 1, -1
#ifdef WITH_OPENMP
call single_hh_trafo_complex(a(1,j+off+a_off,istripe,my_thread),bcast_buffer(1,j+off),nbw,nl,stripe_width)
call single_hh_trafo_complex(a(1,j+off+a_off,istripe,my_thread), &
bcast_buffer(1,j+off),nbw,nl, &
stripe_width)
#else
call single_hh_trafo_complex(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
call single_hh_trafo_complex(a(1,j+off+a_off,istripe), &
bcast_buffer(1,j+off),nbw,nl, &
stripe_width)
#endif
enddo
#ifdef WITH_OPENMP
......
......@@ -2464,15 +2464,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2483,15 +2489,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2502,15 +2514,21 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2522,10 +2540,23 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe, my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,j+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
#if (defined(WITH_AVX_REAL_BLOCK4) && defined(WITH_AVX_REAL_BLOCK2)) || defined(WITH_AMD_BULLDOZER)
......@@ -2537,24 +2568,32 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,3) = bcast_buffer(1:nbw,j+off-2)
w(:,4) = bcast_buffer(1:nbw,j+off-3)
#ifdef WITH_OPENMP
call quad_hh_trafo(a(1,j+off+a_off-3,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call quad_hh_trafo(a(1,j+off+a_off-3,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo(a(1,j+off+a_off-3,istripe), w, nbw, nl, stripe_width, nbw)
call quad_hh_trafo(a(1,j+off+a_off-3,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 2, -2
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,j+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,jj+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
call double_hh_trafo(a(1,jj+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(j==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(jj==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
if(jj==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
......@@ -2568,21 +2607,47 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
w(:,4) = bcast_buffer(1:nbw,j+off-3)
w(:,5) = bcast_buffer(1:nbw,j+off-4)
w(:,6) = bcast_buffer(1:nbw,j+off-5)
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call hexa_hh_trafo(a(1,j+off+a_off-5,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jj = j, 4, -4
w(:,1) = bcast_buffer(1:nbw,jj+off)
w(:,2) = bcast_buffer(1:nbw,jj+off-1)
w(:,3) = bcast_buffer(1:nbw,jj+off-2)
w(:,4) = bcast_buffer(1:nbw,jj+off-3)
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call quad_hh_trafo(a(1,jj+off+a_off-3,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
do jjj = jj, 2, -2
w(:,1) = bcast_buffer(1:nbw,jjj+off)
w(:,2) = bcast_buffer(1:nbw,jjj+off-1)
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe,my_thread), w, &
nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo(a(1,jjj+off+a_off-1,istripe), w, &
nbw, nl, stripe_width, nbw)
#endif
enddo
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
#ifdef WITH_OPENMP
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#else
if(jjj==1) call single_hh_trafo(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, &
stripe_width)
#endif
#endif
#ifdef WITH_OPENMP
......@@ -2594,7 +2659,6 @@ subroutine trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, mpi_comm_rows
endif
#endif
end subroutine compute_hh_trafo
end subroutine trans_ev_tridi_to_band_real
!-------------------------------------------------------------------------------
......@@ -4647,7 +4711,13 @@ contains
do j = ncols, 2, -2
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe), w, nbw, nl, stripe_width, nbw)
#ifdef WITH_OPENMP
call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe,my_thread), &
w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe), &
w, nbw, nl, stripe_width, nbw)
#endif
enddo
if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe),bcast_buffer(1,off+1), nbw, nl, stripe_width)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment