Commit 69972fbf authored by Andreas Marek's avatar Andreas Marek

Some cleanup

parent 03d44007
...@@ -46,7 +46,7 @@ AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, w ...@@ -46,7 +46,7 @@ AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, w
AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20200417], [Current ELPA autotune version]) AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20200417], [Current ELPA autotune version])
AC_DEFINE_SUBST(CURRENT_AUTOTUNE_VERSION, 20200417, "Current ELPA autotune version") AC_DEFINE_SUBST(CURRENT_AUTOTUNE_VERSION, 20200417, "Current ELPA autotune version")
AC_DEFINE_UNQUOTED([ELPA_BUILDTIME], [$ELPA_BUILDTIME], ["Time of build"]) AC_DEFINE_UNQUOTED([ELPA_BUILDTIME], [$ELPA_BUILDTIME], ["Time of build"])
AX_COMPARE_VERSION([$ELPA_BUILDTIME], [gt], [1604905771],[old_elpa_version=yes],[old_elpa_version=no])
AX_CHECK_GNU_MAKE() AX_CHECK_GNU_MAKE()
if test x$_cv_gnu_make_command = x ; then if test x$_cv_gnu_make_command = x ; then
...@@ -1776,6 +1776,9 @@ else ...@@ -1776,6 +1776,9 @@ else
echo "build config should be compiled into the library: no" echo "build config should be compiled into the library: no"
fi fi
if test x"$have_loop_blocking" = x"yes"; then
AC_DEFINE([LOOP_BLOCKING],[1],[use blocking in loops])
fi
AC_SUBST([SUFFIX]) AC_SUBST([SUFFIX])
AC_SUBST([PKG_CONFIG_FILE],[elpa${SUFFIX}-${PACKAGE_VERSION}.pc]) AC_SUBST([PKG_CONFIG_FILE],[elpa${SUFFIX}-${PACKAGE_VERSION}.pc])
...@@ -1986,4 +1989,10 @@ else ...@@ -1986,4 +1989,10 @@ else
make -f $srcdir/generated_headers.am generated-headers top_srcdir="$srcdir" CPP="$CPP" make -f $srcdir/generated_headers.am generated-headers top_srcdir="$srcdir" CPP="$CPP"
fi fi
if test x"$old_elpa_version" = x"yes"; then
echo " "
echo " It is possible that your current version of ELPA is not the latest one."
echo " You might want to have a look at https://elpa.mpcdf.mpg.de, whether a more recent"
echo " version has been released already"
echo " "
fi
AC_DEFUN([AX_GCC_VERSION], [
GCC_VERSION=""
echo "calling gcc"
echo $CC
$CC | grep gcc
echo $?
AX_CHECK_COMPILE_FLAG([-dumpversion],
[ax_gcc_version_option=yes],
[ax_gcc_version_option=no])
AS_IF([test "x$GCC" = "xyes"],[
AS_IF([test "x$ax_gcc_version_option" != "xno"],[
AC_CACHE_CHECK([gcc version],[ax_cv_gcc_version],[
ax_cv_gcc_version="`$CC -dumpversion`"
AS_IF([test "x$ax_cv_gcc_version" = "x"],[
ax_cv_gcc_version=""
])
])
GCC_VERSION=$ax_cv_gcc_version
])
])
AC_SUBST([GCC_VERSION])
])
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_compare_version.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_COMPARE_VERSION(VERSION_A, OP, VERSION_B, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
#
# DESCRIPTION
#
# This macro compares two version strings. Due to the various number of
# minor-version numbers that can exist, and the fact that string
# comparisons are not compatible with numeric comparisons, this is not
# necessarily trivial to do in a autoconf script. This macro makes doing
# these comparisons easy.
#
# The six basic comparisons are available, as well as checking equality
# limited to a certain number of minor-version levels.
#
# The operator OP determines what type of comparison to do, and can be one
# of:
#
# eq - equal (test A == B)
# ne - not equal (test A != B)
# le - less than or equal (test A <= B)
# ge - greater than or equal (test A >= B)
# lt - less than (test A < B)
# gt - greater than (test A > B)
#
# Additionally, the eq and ne operator can have a number after it to limit
# the test to that number of minor versions.
#
# eq0 - equal up to the length of the shorter version
# ne0 - not equal up to the length of the shorter version
# eqN - equal up to N sub-version levels
# neN - not equal up to N sub-version levels
#
# When the condition is true, shell commands ACTION-IF-TRUE are run,
# otherwise shell commands ACTION-IF-FALSE are run. The environment
# variable 'ax_compare_version' is always set to either 'true' or 'false'
# as well.
#
# Examples:
#
# AX_COMPARE_VERSION([3.15.7],[lt],[3.15.8])
# AX_COMPARE_VERSION([3.15],[lt],[3.15.8])
#
# would both be true.
#
# AX_COMPARE_VERSION([3.15.7],[eq],[3.15.8])
# AX_COMPARE_VERSION([3.15],[gt],[3.15.8])
#
# would both be false.
#
# AX_COMPARE_VERSION([3.15.7],[eq2],[3.15.8])
#
# would be true because it is only comparing two minor versions.
#
# AX_COMPARE_VERSION([3.15.7],[eq0],[3.15])
#
# would be true because it is only comparing the lesser number of minor
# versions of the two values.
#
# Note: The characters that separate the version numbers do not matter. An
# empty string is the same as version 0. OP is evaluated by autoconf, not
# configure, so must be a string, not a variable.
#
# The author would like to acknowledge Guido Draheim whose advice about
# the m4_case and m4_ifvaln functions make this macro only include the
# portions necessary to perform the specific comparison specified by the
# OP argument in the final configure script.
#
# LICENSE
#
# Copyright (c) 2008 Tim Toolan <toolan@ele.uri.edu>
#
# Copying and distribution of this file, with or without modification, are
# permitted in any medium without royalty provided the copyright notice
# and this notice are preserved. This file is offered as-is, without any
# warranty.
#serial 13
dnl #########################################################################
AC_DEFUN([AX_COMPARE_VERSION], [
AC_REQUIRE([AC_PROG_AWK])
# Used to indicate true or false condition
ax_compare_version=false
# Convert the two version strings to be compared into a format that
# allows a simple string comparison. The end result is that a version
# string of the form 1.12.5-r617 will be converted to the form
# 0001001200050617. In other words, each number is zero padded to four
# digits, and non digits are removed.
AS_VAR_PUSHDEF([A],[ax_compare_version_A])
A=`echo "$1" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
-e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
-e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-e 's/[[^0-9]]//g'`
AS_VAR_PUSHDEF([B],[ax_compare_version_B])
B=`echo "$3" | sed -e 's/\([[0-9]]*\)/Z\1Z/g' \
-e 's/Z\([[0-9]]\)Z/Z0\1Z/g' \
-e 's/Z\([[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-e 's/Z\([[0-9]][[0-9]][[0-9]]\)Z/Z0\1Z/g' \
-e 's/[[^0-9]]//g'`
dnl # In the case of le, ge, lt, and gt, the strings are sorted as necessary
dnl # then the first line is used to determine if the condition is true.
dnl # The sed right after the echo is to remove any indented white space.
m4_case(m4_tolower($2),
[lt],[
ax_compare_version=`echo "x$A
x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/false/;s/x${B}/true/;1q"`
],
[gt],[
ax_compare_version=`echo "x$A
x$B" | sed 's/^ *//' | sort | sed "s/x${A}/false/;s/x${B}/true/;1q"`
],
[le],[
ax_compare_version=`echo "x$A
x$B" | sed 's/^ *//' | sort | sed "s/x${A}/true/;s/x${B}/false/;1q"`
],
[ge],[
ax_compare_version=`echo "x$A
x$B" | sed 's/^ *//' | sort -r | sed "s/x${A}/true/;s/x${B}/false/;1q"`
],[
dnl Split the operator from the subversion count if present.
m4_bmatch(m4_substr($2,2),
[0],[
# A count of zero means use the length of the shorter version.
# Determine the number of characters in A and B.
ax_compare_version_len_A=`echo "$A" | $AWK '{print(length)}'`
ax_compare_version_len_B=`echo "$B" | $AWK '{print(length)}'`
# Set A to no more than B's length and B to no more than A's length.
A=`echo "$A" | sed "s/\(.\{$ax_compare_version_len_B\}\).*/\1/"`
B=`echo "$B" | sed "s/\(.\{$ax_compare_version_len_A\}\).*/\1/"`
],
[[0-9]+],[
# A count greater than zero means use only that many subversions
A=`echo "$A" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
B=`echo "$B" | sed "s/\(\([[0-9]]\{4\}\)\{m4_substr($2,2)\}\).*/\1/"`
],
[.+],[
AC_WARNING(
[invalid OP numeric parameter: $2])
],[])
# Pad zeros at end of numbers to make same length.
ax_compare_version_tmp_A="$A`echo $B | sed 's/./0/g'`"
B="$B`echo $A | sed 's/./0/g'`"
A="$ax_compare_version_tmp_A"
# Check for equality or inequality as necessary.
m4_case(m4_tolower(m4_substr($2,0,2)),
[eq],[
test "x$A" = "x$B" && ax_compare_version=true
],
[ne],[
test "x$A" != "x$B" && ax_compare_version=true
],[
AC_WARNING([invalid OP parameter: $2])
])
])
AS_VAR_POPDEF([A])dnl
AS_VAR_POPDEF([B])dnl
dnl # Execute ACTION-IF-TRUE / ACTION-IF-FALSE.
if test "$ax_compare_version" = "true" ; then
have_loop_blocking=yes
m4_ifvaln([$4],[$4],[:])dnl
m4_ifvaln([$5],[else $5])dnl
fi
]) dnl AX_COMPARE_VERSION
...@@ -56,304 +56,304 @@ ...@@ -56,304 +56,304 @@
#if REALCASE == 1 #if REALCASE == 1
subroutine v_add_s_& subroutine v_add_s_&
&PRECISION& &PRECISION&
&(obj, v,n,s) &(obj, v,n,s)
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90" #include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n integer(kind=ik) :: n
real(kind=rk) :: v(n),s real(kind=rk) :: v(n),s
v(:) = v(:) + s v(:) = v(:) + s
end subroutine v_add_s_& end subroutine v_add_s_&
&PRECISION &PRECISION
subroutine distribute_global_column_& subroutine distribute_global_column_&
&PRECISION& &PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk) &(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90" #include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
real(kind=rk) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size real(kind=rk) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
nbs = noff/(nblk*np_rows) nbs = noff/(nblk*np_rows)
nbe = (noff+nlen-1)/(nblk*np_rows) nbe = (noff+nlen-1)/(nblk*np_rows)
do jb = nbs, nbe do jb = nbs, nbe
g_off = jb*nblk*np_rows + nblk*my_prow g_off = jb*nblk*np_rows + nblk*my_prow
l_off = jb*nblk l_off = jb*nblk
js = MAX(noff+1-g_off,1) js = MAX(noff+1-g_off,1)
je = MIN(noff+nlen-g_off,nblk) je = MIN(noff+nlen-g_off,nblk)
if (je<js) cycle if (je<js) cycle
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff) l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo enddo
end subroutine distribute_global_column_& end subroutine distribute_global_column_&
&PRECISION &PRECISION
subroutine solve_secular_equation_& subroutine solve_secular_equation_&
&PRECISION& &PRECISION&
&(obj, n, i, d, z, delta, rho, dlam) &(obj, n, i, d, z, delta, rho, dlam)
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified ! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix: ! diagonal matrix:
! !
! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0 ! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
! !
! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique ! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
! which is more robust (it always yields a solution) but also slower ! which is more robust (it always yields a solution) but also slower
! than the algorithm used in DLAED4. ! than the algorithm used in DLAED4.
! !
! The same restictions than in DLAED4 hold, namely: ! The same restictions than in DLAED4 hold, namely:
! !
! rho > 0 and d(i+1) > d(i) ! rho > 0 and d(i+1) > d(i)
! !
! but this routine will not terminate with error if these are not satisfied ! but this routine will not terminate with error if these are not satisfied
! (it will normally converge to a pole in this case). ! (it will normally converge to a pole in this case).
! !
! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases ! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
! N=1 and N=2 which is not compatible with DLAED4. ! N=1 and N=2 which is not compatible with DLAED4.
! Thus this routine shouldn't be used for these cases as a simple replacement ! Thus this routine shouldn't be used for these cases as a simple replacement
! of DLAED4. ! of DLAED4.
! !
! The arguments are the same as in DLAED4 (with the exception of the INFO argument): ! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
! !
! !
! N (input) INTEGER ! N (input) INTEGER
! The length of all arrays. ! The length of all arrays.
! !
! I (input) INTEGER ! I (input) INTEGER
! The index of the eigenvalue to be computed. 1 <= I <= N. ! The index of the eigenvalue to be computed. 1 <= I <= N.
! !
! D (input) DOUBLE PRECISION array, dimension (N) ! D (input) DOUBLE PRECISION array, dimension (N)
! The original eigenvalues. It is assumed that they are in ! The original eigenvalues. It is assumed that they are in
! order, D(I) < D(J) for I < J. ! order, D(I) < D(J) for I < J.
! !
! Z (input) DOUBLE PRECISION array, dimension (N) ! Z (input) DOUBLE PRECISION array, dimension (N)
! The components of the updating Vector. ! The components of the updating Vector.
! !
! DELTA (output) DOUBLE PRECISION array, dimension (N) ! DELTA (output) DOUBLE PRECISION array, dimension (N)
! DELTA contains (D(j) - lambda_I) in its j-th component. ! DELTA contains (D(j) - lambda_I) in its j-th component.
! See remark above about DLAED4 compatibility! ! See remark above about DLAED4 compatibility!
! !
! RHO (input) DOUBLE PRECISION ! RHO (input) DOUBLE PRECISION
! The scalar in the symmetric updating formula. ! The scalar in the symmetric updating formula.
! !
! DLAM (output) DOUBLE PRECISION ! DLAM (output) DOUBLE PRECISION
! The computed lambda_I, the I-th updated eigenvalue. ! The computed lambda_I, the I-th updated eigenvalue.
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../../src/general/precision_kinds.F90" #include "../../src/general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n, i integer(kind=ik) :: n, i
real(kind=rk) :: d(n), z(n), delta(n), rho, dlam real(kind=rk) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter integer(kind=ik) :: iter
real(kind=rk) :: a, b, x, y, dshift real(kind=rk) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem ! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution ! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b ! Upper and lower bound of the shifted solution interval are a and b
call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX) call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then if (i==n) then
! Special case: Last eigenvalue ! Special case: Last eigenvalue
! We shift always by d(n), lower bound is d(n), ! We shift always by d(n), lower bound is d(n),
! upper bound is determined by a guess: ! upper bound is determined by a guess:
dshift = d(n) dshift = d(n)
delta(:) = d(:) - dshift delta(:) = d(:) - dshift
a = 0.0_rk ! delta(n) a = 0.0_rk ! delta(n)
b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess
else else
! Other eigenvalues: lower bound is d(i), upper bound is d(i+1) ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
! We check the sign of the function in the midpoint of the interval ! We check the sign of the function in the midpoint of the interval
! in order to determine if eigenvalue is more close to d(i) or d(i+1) ! in order to determine if eigenvalue is more close to d(i) or d(i+1)
x = 0.5_rk*(d(i)+d(i+1)) x = 0.5_rk*(d(i)+d(i+1))
y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x)) y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x))
if (y>0) then if (y>0) then
! solution is next to d(i) ! solution is next to d(i)
dshift = d(i) dshift = d(i)
else else
! solution is next to d(i+1) ! solution is next to d(i+1)
dshift = d(i+1) dshift = d(i+1)
endif endif
delta(:) = d(:) - dshift delta(:) = d(:) - dshift
a = delta(i) a = delta(i)
b = delta(i+1) b = delta(i+1)
endif endif
! Bisection: ! Bisection:
do iter=1,200 do iter=1,200
! Interval subdivision ! Interval subdivision
x = 0.5_rk*(a+b) x = 0.5_rk*(a+b)
if (x==a .or. x==b) exit ! No further interval subdivisions possible if (x==a .or. x==b) exit ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
if (abs(x) < 1.e-200_rk8) exit ! x next to pole if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else #else
if (abs(x) < 1.e-20_rk4) exit ! x next to pole if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif #endif
! evaluate value at x ! evaluate value at x
y = 1. + rho*SUM(z(:)**2/(delta(:)-x)) y = 1. + rho*SUM(z(:)**2/(delta(:)-x))
if (y==0) then if (y==0) then
! found exact solution ! found exact solution
exit exit
elseif (y>0) then elseif (y>0) then
b = x b = x
else else
a = x a = x
endif endif
enddo enddo
! Solution: ! Solution:
dlam = x + dshift dlam = x + dshift
delta(:) = delta(:) - x delta(:) = delta(:) - x
call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX) call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine solve_secular_equation_& end subroutine solve_secular_equation_&
&PRECISION &PRECISION
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
#endif #endif
#if REALCASE == 1 #if REALCASE == 1
subroutine hh_transform_real_& subroutine hh_transform_real_&
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
subroutine hh_transform_complex_& subroutine hh_transform_complex_&
#endif #endif
&PRECISION & &PRECISION &
(obj, alpha, xnorm_sq, xf, tau, wantDebug) (obj, alpha, xnorm_sq, xf, tau, wantDebug)
#if REALCASE == 1 #if REALCASE == 1
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:) ! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:) ! Similar to LAPACK routine ZLARFP, but uses ||x||**2 instead of x(:)
#endif #endif
! and returns the factor xf by which x has to be scaled. ! and returns the factor xf by which x has to be scaled.
! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150 ! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
! since this would be expensive for the parallel implementation. ! since this would be expensive for the parallel implementation.
use precision use precision
use elpa_abstract_impl use elpa_abstract_impl
implicit none implicit none
#include "../general/precision_kinds.F90" #include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
logical, intent(in) :: wantDebug logical, intent(in) :: wantDebug
#if REALCASE == 1 #if REALCASE == 1
real(kind=rk), intent(inout) :: alpha real(kind=rk), intent(inout) :: alpha
#endif #endif
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
complex(kind=ck), intent(inout) :: alpha complex(kind=ck), intent(inout) :: alpha
#endif #endif