Unverified Commit ff7beab2 authored by Andreas Marek's avatar Andreas Marek
Browse files

Prepare release of ELPA_2015.02.001

The qr decomposition is now available as a runtime choice.
Some testing has still to be done
parent 2d84e981
......@@ -11,8 +11,9 @@ If you want to build (or have to since no packages are available) ELPA yourself,
please note that ELPA is shipped with a typical "configure" and "make"
procedure. It is recommended to use this way to install ELPA, see (A).
If you do not want to install ELPA as library, but to include it in your
source code, please refer to point (B).
An example makefile "Makefile.example" can be found in ./test,
source code, please refer to point (B). Note, that this is not recommended
and no support whatsoever can be given for this approach !
However, an example makefile "Makefile.example" can be found in ./test,
to give some hints how this is done. Please distibute then all files of ELPA
with your code. Please note, that usage of ELPA as described in Section (B)
requires advanced knowledge about compilers, preprocessor flags, and
......@@ -35,7 +36,7 @@ The configure installation is best done in four steps
1.1) Choice of ELPA2 kernels
With this release of ELPA (2014.06 or newer) it is _not_
With the release of ELPA (2014.06 or newer) it is _not_
mandatory anymore to define the (real and complex) kernels
at build time. The configure procedure will build all the
kernels which can be used on the build system. The choice of
......
......@@ -10,7 +10,10 @@ lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/elpa1.F90 src/elpa2.F90
libelpa@SUFFIX@_la_SOURCES += src/elpa_qr/qr_utils.f90 \
src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_pdlarfb.f90 \
src/elpa_qr/elpa_pdgeqrf.f90
if HAVE_DETAILED_TIMINGS
libelpa@SUFFIX@_la_SOURCES += src/timer.F90 \
src/ftimings/ftimings.F90 \
......@@ -23,13 +26,6 @@ if HAVE_DETAILED_TIMINGS
src/ftimings/papi.c
endif
if WITH_QR
libelpa@SUFFIX@_la_SOURCES += src/elpa_qr/qr_utils.f90 \
src/elpa_qr/elpa_qrkernels.f90 \
src/elpa_qr/elpa_pdlarfb.f90 \
src/elpa_qr/elpa_pdgeqrf.f90
endif
if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real.f90
endif
......@@ -103,6 +99,7 @@ dist_files_DATA = \
test/test_complex_gen.F90 \
test/test_real2.F90 \
test/test_real2_default_kernel.F90 \
test/test_real2_default_kernel_qr_decomposition.F90 \
test/test_real2_choose_kernel_with_api.F90 \
src/print_available_elpa2_kernels.F90 \
test/test_real.F90 \
......@@ -124,6 +121,7 @@ bin_PROGRAMS = \
noinst_PROGRAMS = \
elpa2_test_real_default_kernel@SUFFIX@ \
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@ \
elpa2_test_complex_default_kernel@SUFFIX@ \
elpa2_test_real_choose_kernel_with_api@SUFFIX@ \
elpa2_test_complex_choose_kernel_with_api@SUFFIX@
......@@ -147,6 +145,10 @@ elpa2_test_real_default_kernel@SUFFIX@_SOURCES = test/test_real2_default_kernel.
elpa2_test_real_default_kernel@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_SOURCES = test/test_real2_default_kernel_qr_decomposition.F90 test/util.F90 $(redirect_sources)
elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_SOURCES = test/test_real2_choose_kernel_with_api.F90 test/util.F90 $(redirect_sources)
elpa2_test_real_choose_kernel_with_api@SUFFIX@_LDADD = $(build_lib)
......@@ -178,6 +180,7 @@ check_SCRIPTS = \
elpa1_test_complex.sh \
elpa2_test_complex.sh \
elpa2_test_complex_default_kernel.sh \
elpa2_test_complex_default_kernel_qr_decomposition.sh \
elpa2_test_real_choose_kernel_with_api.sh \
elpa2_test_complex_choose_kernel_with_api.sh \
elpa2_print_kernels@SUFFIX@
......@@ -196,6 +199,10 @@ elpa2_test_real_default_kernel.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel.sh
chmod +x elpa2_test_real_default_kernel.sh
elpa2_test_real_default_kernel_qr_decomposition.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_default_kernel_qr_decomposition.sh
chmod +x elpa2_test_real_default_kernel_qr_decomposition.sh
elpa2_test_real_choose_kernel_with_api.sh:
echo 'mpiexec -n 2 ./elpa2_test_real_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS' > elpa2_test_real_choose_kernel_with_api.sh
chmod +x elpa2_test_real_choose_kernel_with_api.sh
......@@ -227,6 +234,7 @@ CLEANFILES = \
elpa1_test_complex.sh \
elpa2_test_real.sh \
elpa2_test_real_default_kernel.sh \
elpa2_test_real_default_kernel_qr_decomposition.sh \
elpa2_test_complex.sh \
elpa2_test_complex_default_kernel.sh \
elpa2_test_real_choose_kernel_with_api.sh \
......
This diff is collapsed.
......@@ -85,82 +85,8 @@ as library to your system.
*** Structure of this repository:
* README file - this file. Please also consult the ELPA Wiki, and
consider adding any useful information that you may have.
* COPYING directory - the copyright and licensing information for ELPA.
* src directory - contains all the files that are needed for the
actual ELPA subroutines. If you are attempting to use ELPA in your
own application, these are the files which you need.
* test directory
- Contains the Makefile that demonstrates how to compile and link to
the ELPA routines
- All test programs solve a eigenvalue problem and check the correctnes
of the result by evaluating || A*x - x*lamba || and checking the
orthogonality of the eigenvectors
elpa1_test_real Real eigenvalue problem, 1 stage solver
test_real_gen Real generalized eigenvalue problem, 1 stage solver
elpa1test_complex Complex eigenvalue problem, 1 stage solver
test_complex_gen Complex generalized eigenvalue problem, 1 stage solver
elpa2_test_real Real eigenvalue problem, 2 stage solver
elpa2test_complex Complex eigenvalue problem, 2 stage solver
- There are two programs which read matrices from a file, solve the
eigenvalue problem, print the eigenvalues and check the correctness
of the result (all using elpa1 only)
read_real for the real eigenvalue problem
read_real_gen for the real generalized eigenvalue problem
A*x - B*x*lambda = 0
read_real has to be called with 1 command line argument (the file
containing the matrix). The file must be in ASCII (formatted) form.
read_real_gen has to be called with 3 command line arguments. The
first argument is either 'asc' or 'bin' (without quotes) and
determines the format of the following files. 'asc' refers to ASCII
(formatted) and 'bin' to binary (unformatted). Command line
arguments 2 and 3 are the names of the files which contain matrices
A and B.
The structure of the matrix files for read_real and read_real_gen
depends on the format of the files:
* ASCII format (both read_real and read_real_gen):
The files must contain the following lines:
- 1st line containing the matrix size
- then following the upper half of the matrix in column-major
(i.e. Fortran) order, one number per line:
a(1,1)
a(1,2)
a(2,2)
...
a(1,i)
...
a(i,i)
...
a(1,n)
...
a(n,n)
* Binary format (read_real_gen only):
The files must contain the following records:
- 1st record: matrix size (type integer)
- 2nd record: a(1,1)
- 3rd record: a(1,2) a(2,2)
- ...
- ... a(1,i) ... a(i,i)
- ...
- ... a(1,n) ... a(n,n)
The type of the matrix elements a(i,j) is real*8.
As in most git repositories, also this repository contains different branches.
The branch "master" is always identical to the one representing the latest release
of ELPA. All other branches, either represent development work, or previous releases of
ELPA.
.
This file contains the release notes for the ELPA 2014.06.001 version
This file contains the release notes for the ELPA 2015.02.001 version
......@@ -10,56 +10,31 @@ a)
Most importantly, the ABI of the ELPA libray changed!
A rebuild/relink of the user code using the ELPA library is mandatory!
Furthermore, the name of the installed library has changed, in order to allow the
possiblity to have several versions of ELPA installed with
different ABIs. In order to have an unique identifier, the library will from
now on be called "libelpa.so.[versioning]" (for single threaded version)
and "libelpa_openmp.so.[versioning]" (for the hybrid MPI/OpenMP version).
"Versioning", is an implementation (i.e. operating system) dependent, which
uniquely defines the library version. In this release this is is most
propably "elpa{_openmp}.so.1.0.0"
b)
With this release (and newer) it is not mandatory anymore to specify the real
and complex kernels at build-time! Instead the choice of kernel is now a
run-time option
The kernels can either be choosen by environment variables "REAL_ELPA_KERNEL"
and "COMPLEX_ELPA_KERNEL", or in the code with an additional argument in
the call to the library (see the examples in ./test for more details)
It is still possible to build ELPA with a specific real and complex kernel, if
one wants to obtain the old behaviour (see configure --help for the exact
options)
The only major change (which results in point a) is in the ELPA-2
part of the library for real matrices:
c)
the fully blocked QR decomposition has been moved from the development part
to the release!
At build time, configure now expects variables "SCALAPACK_FCFLAGS" and
"SCLAPACK_LDFLAGS" to be set, which replace the previous "BLACS_FCFLAGS" and
"BLACS_LDFLAGS".
It is now possible to use this QR decomposition by either setting an
environment variable "ELPA_QR_DECOMPOSITION" to "yes", or to call the
"solve_evp_real_2stage" solver with the additional (optional) argument
"useQR=.true."
d)
Binaries names for the test programs have been renamed: instead of
"test_real1" (for ELPA 1) and "test_real2" (for ELPA 2) and so forth, now
the binary names are "elpa1_test_real" and "elpa2_test_real" ...
Note, that the environment variable always takes precedence over the setting in
the API call.
Furthernote, that if neither the environment variable or the API keyword are not
set, or set to "no" or ".false.", respectively, then no qr decomposition is used
(i.e. the previous behaviour is maintained).
Any incompatibles to previous version?
---------------------------------------
As mentioned before, the ABI of ELPA has changed! It will be necessary
to rebuild the programs using ELPA, if this new version should be used.
Beware, using the new library with code which was build with an older version
Beware, using the new library with code which was build with an older verion
should not even run. If it does, the results will be wrong !
Among others, the ELPA drivers are now functions, which return a logical
"success" value, which is false in case that an error occured.
Please, catch this error flag in your user code! See the the examples in the
subdirectoy "./test".
......@@ -100,9 +100,6 @@
/* use OpenMP threading */
#undef WITH_OPENMP
/* use QR decomposition */
#undef WITH_QR
/* can use real_avx_block2 kernel */
#undef WITH_REAL_AVX_BLOCK2_KERNEL
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for elpa 2014.06.004.
# Generated by GNU Autoconf 2.69 for elpa 2015.02.001.
#
# Report bugs to <elpa-library@rzg.mpg.de>.
#
......@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='elpa'
PACKAGE_TARNAME='elpa'
PACKAGE_VERSION='2014.06.004'
PACKAGE_STRING='elpa 2014.06.004'
PACKAGE_VERSION='2015.02.001'
PACKAGE_STRING='elpa 2015.02.001'
PACKAGE_BUGREPORT='elpa-library@rzg.mpg.de'
PACKAGE_URL=''
 
......@@ -700,8 +700,6 @@ WITH_REAL_GENERIC_KERNEL_FALSE
WITH_REAL_GENERIC_KERNEL_TRUE
SCALAPACK_FCFLAGS
SCALAPACK_LDFLAGS
WITH_QR_FALSE
WITH_QR_TRUE
OPENMP_FCFLAGS
WITH_OPENMP_FALSE
WITH_OPENMP_TRUE
......@@ -830,7 +828,6 @@ enable_papi
with_avx_optimization
with_openmp
enable_openmp
with_qr
with_only_real_generic_kernel
with_only_real_generic_simple_kernel
with_only_real_sse_kernel
......@@ -1414,7 +1411,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures elpa 2014.06.004 to adapt to many kinds of systems.
\`configure' configures elpa 2015.02.001 to adapt to many kinds of systems.
 
Usage: $0 [OPTION]... [VAR=VALUE]...
 
......@@ -1484,7 +1481,7 @@ fi
 
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of elpa 2014.06.004:";;
short | recursive ) echo "Configuration of elpa 2015.02.001:";;
esac
cat <<\_ACEOF
 
......@@ -1519,7 +1516,6 @@ Optional Packages:
ftimings), default no.
--with-avx-optimization use AVX optimization, default no.
--with-openmp use OpenMP threading, default no.
--with-qr use QR decomposition, default no.
--with-only-real-generic-kernel
only compile generic-kernel for real case
--with-only-real-generic-simple-kernel
......@@ -1647,7 +1643,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
elpa configure 2014.06.004
elpa configure 2015.02.001
generated by GNU Autoconf 2.69
 
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -2130,7 +2126,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
 
It was created by elpa $as_me 2014.06.004, which was
It was created by elpa $as_me 2015.02.001, which was
generated by GNU Autoconf 2.69. Invocation command line was
 
$ $0 $@
......@@ -2997,7 +2993,7 @@ fi
 
# Define the identity of the package.
PACKAGE='elpa'
VERSION='2014.06.004'
VERSION='2015.02.001'
 
 
cat >>confdefs.h <<_ACEOF
......@@ -3178,7 +3174,7 @@ rm -rf config.h config-f90.h
# by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface)
#
ELPA_SO_VERSION=1:0:0
ELPA_SO_VERSION=2:0:0
 
#
 
......@@ -6049,32 +6045,6 @@ $as_echo "$ac_cv_prog_fc_openmp" >&6; }
LDFLAGS="$OPENMP_FCFLAGS $LDFLAGS"
fi
 
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether QR-decomposition usage is specified" >&5
$as_echo_n "checking whether QR-decomposition usage is specified... " >&6; }
# Check whether --with-qr was given.
if test "${with_qr+set}" = set; then :
withval=$with_qr; with_qr=yes
else
with_qr=no
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_qr}" >&5
$as_echo "${with_qr}" >&6; }
if test x"$with_qr" = x"yes"; then
WITH_QR_TRUE=
WITH_QR_FALSE='#'
else
WITH_QR_TRUE='#'
WITH_QR_FALSE=
fi
if test "x${with_qr}" = xyes; then
$as_echo "#define WITH_QR 1" >>confdefs.h
fi
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
 
......@@ -21479,10 +21449,6 @@ if test -z "${WITH_OPENMP_TRUE}" && test -z "${WITH_OPENMP_FALSE}"; then
as_fn_error $? "conditional \"WITH_OPENMP\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${WITH_QR_TRUE}" && test -z "${WITH_QR_FALSE}"; then
as_fn_error $? "conditional \"WITH_QR\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${WITH_REAL_GENERIC_KERNEL_TRUE}" && test -z "${WITH_REAL_GENERIC_KERNEL_FALSE}"; then
as_fn_error $? "conditional \"WITH_REAL_GENERIC_KERNEL\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
......@@ -21932,7 +21898,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by elpa $as_me 2014.06.004, which was
This file was extended by elpa $as_me 2015.02.001, which was
generated by GNU Autoconf 2.69. Invocation command line was
 
CONFIG_FILES = $CONFIG_FILES
......@@ -21998,7 +21964,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
elpa config.status 2014.06.004
elpa config.status 2015.02.001
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
 
......
AC_PREREQ([2.69])
AC_INIT([elpa],[2014.06.004], [elpa-library@rzg.mpg.de])
AC_INIT([elpa],[2015.02.001], [elpa-library@rzg.mpg.de])
AC_SUBST([PACKAGE_VERSION])
AC_CONFIG_SRCDIR([src/elpa1.F90])
......@@ -34,7 +34,7 @@ rm -rf config.h config-f90.h
# by the current interface, as they are ABI compatible (e.g. only new symbols
# were added by the new interface)
#
AC_SUBST([ELPA_SO_VERSION], [1:0:0])
AC_SUBST([ELPA_SO_VERSION], [2:0:0])
#
......@@ -254,18 +254,6 @@ AC_ARG_WITH([openmp],
LDFLAGS="$OPENMP_FCFLAGS $LDFLAGS"
fi
AC_MSG_CHECKING(whether QR-decomposition usage is specified)
AC_ARG_WITH([qr],
AS_HELP_STRING([--with-qr],
[use QR decomposition, default no.]),
[with_qr=yes],
[with_qr=no])
AC_MSG_RESULT([${with_qr}])
AM_CONDITIONAL([WITH_QR],[test x"$with_qr" = x"yes"])
if test "x${with_qr}" = xyes; then
AC_DEFINE([WITH_QR], [1], [use QR decomposition])
fi
save_FCFLAGS=$FCFLAGS
save_LDFLAGS=$LDFLAGS
......
......@@ -68,9 +68,7 @@ module ELPA2
use iso_fortran_env, only : error_unit
#endif
#ifdef WITH_QR
use elpa_pdgeqrf
#endif
implicit none
......@@ -241,15 +239,12 @@ module ELPA2
#endif
/)
#ifdef WITH_QR
public :: band_band_real
public :: divide_band
integer, public :: which_qr_decomposition = 1 ! defines, which QR-decomposition algorithm will be used
! 0 for unblocked
! 1 for blocked (maxrank: nblk)
#endif
!-------------------------------------------------------------------------------
! The following array contains the Householder vectors of the
......@@ -374,6 +369,29 @@ function check_allowed_complex_kernels(THIS_COMPLEX_ELPA_KERNEL) result(err)
if (AVAILABLE_COMPLEX_ELPA_KERNELS(THIS_COMPLEX_ELPA_KERNEL) .ne. 1) err=.true.
end function check_allowed_complex_kernels
function qr_decomposition_via_environment_variable(useQR) result(isSet)
implicit none
logical, intent(out) :: useQR
logical :: isSet
CHARACTER(len=255) :: ELPA_QR_DECOMPOSITION
isSet = .false.
#if defined(HAVE_ENVIRONMENT_CHECKING)
call get_environment_variable("ELPA_QR_DECOMPOSITION",ELPA_QR_DECOMPOSITION)
#endif
if (trim(ELPA_QR_DECOMPOSITION) .eq. "yes") then
useQR = .true.
isSet = .true.
endif
if (trim(ELPA_QR_DECOMPOSITION) .eq. "no") then
useQR = .false.
isSet = .true.
endif
end function qr_decomposition_via_environment_variable
function real_kernel_via_environment_variable() result(kernel)
implicit none
integer :: kernel
......@@ -417,7 +435,8 @@ end function complex_kernel_via_environment_variable
function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_REAL_ELPA_KERNEL_API) result(success)
mpi_comm_all, THIS_REAL_ELPA_KERNEL_API,&
useQR) result(success)
!-------------------------------------------------------------------------------
! solve_evp_real_2stage: Solves the real eigenvalue problem with a 2 stage approach
......@@ -457,6 +476,8 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
use timings
#endif
implicit none
logical, intent(in), optional :: useQR
logical :: useQRActual, useQREnvironment
integer, intent(in), optional :: THIS_REAL_ELPA_KERNEL_API
integer :: THIS_REAL_ELPA_KERNEL
......@@ -470,6 +491,7 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
real*8 :: ttt0, ttt1, ttts
integer :: i
logical :: success
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_real_2stage")
#endif
......@@ -483,6 +505,20 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
success = .true.
useQRActual = .false.
! set usage of qr decomposition via API call
if (present(useQR)) then
if (useQR) useQRActual = .true.
if (useQR .and. na .lt. 800) useQRActual = .false.
if (.not.(useQR)) useQRACtual = .false.
endif
! overwrite this with environment variable settings
if (qr_decomposition_via_environment_variable(useQREnvironment)) then
useQRActual = useQREnvironment
endif
if (present(THIS_REAL_ELPA_KERNEL_API)) then
! user defined kernel via the optional argument in the API call
THIS_REAL_ELPA_KERNEL = THIS_REAL_ELPA_KERNEL_API
......@@ -528,7 +564,7 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
ttt0 = MPI_Wtime()
ttts = ttt0
call bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
tmat, success)
tmat, success, useQRActual)
if (.not.(success)) return
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
......@@ -582,7 +618,8 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
! Backtransform stage 2
ttt0 = MPI_Wtime()
call trans_ev_band_to_full_real(na, nev, nblk, nbw, a, lda, tmat, q, ldq, mpi_comm_rows, mpi_comm_cols)
call trans_ev_band_to_full_real(na, nev, nblk, nbw, a, lda, tmat, q, ldq, mpi_comm_rows, &
mpi_comm_cols, useQRActual)
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time trans_ev_band_to_full_real :',ttt1-ttt0
......@@ -797,7 +834,7 @@ end function solve_evp_complex_2stage
!-------------------------------------------------------------------------------
subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
tmat, success)
tmat, success, useQR)
!-------------------------------------------------------------------------------
! bandred_real: Reduces a distributed symmetric matrix to band form
......@@ -846,18 +883,18 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
integer :: pcol, prow
#ifdef WITH_QR
! needed for blocked QR decomposition
integer :: PQRPARAM(11), work_size
real*8 :: dwork_size(1)
real*8, allocatable :: work_blocked(:), tauvector(:), blockheuristic(:)
#endif
pcol(i) = MOD((i-1)/nblk,np_cols) !Processor col for global col number
prow(i) = MOD((i-1)/nblk,np_rows) !Processor row for global row number
logical, intent(out):: success
logical, intent(in) :: useQR
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("bandred_real")
#endif
......@@ -886,8 +923,7 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
l_rows_tile = tile_size/np_rows ! local rows of a tile
l_cols_tile = tile_size/np_cols ! local cols of a tile
#ifdef WITH_QR
if (useQR) then
if (which_qr_decomposition == 1) then
call qr_pqrparam_init(pqrparam, nblk,'M',0, nblk,'M',0, nblk,'M',1,'s')
allocate(tauvector(na))
......@@ -903,8 +939,7 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
work_blocked = 0.0d0
deallocate(vmr)
endif
#endif
endif
do istep = (na-1)/nbw, 1, -1
......@@ -926,7 +961,8 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
tmat(:,:,istep) = 0
! Reduce current block to lower triangular form
#ifdef WITH_QR
if (useQR) then
if (which_qr_decomposition == 1) then
call qr_pdgeqrf_2dcomm(a, lda, vmr, max(l_rows,1), tauvector(1), &
tmat(1,1,istep), nbw, work_blocked, &
......@@ -934,9 +970,9 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
istep*nbw+n_cols-nbw, istep*nbw+n_cols, 1,&
0, PQRPARAM, mpi_comm_rows, mpi_comm_cols,&
blockheuristic)
endif
else
#endif
do lc = n_cols, 1, -1
ncol = istep*nbw + lc ! absolute column number of householder vector
......@@ -1043,10 +1079,7 @@ subroutine bandred_real(na, a, lda, nblk, nbw, mpi_comm_rows, mpi_comm_cols, &
tmat(lc,lc+1:n_cols,istep) = -tau * vav(lc+1:n_cols,lc)
endif
enddo
#ifdef WITH_QR
endif
#endif