Commit f59e2758 authored by Pavel Kus's avatar Pavel Kus
Browse files

elpa1_template splitted

some intent (in) or (out) added
some comments added

Conflicts:
	src/elpa1.F90
	src/elpa1_compute_real_template.X90
parent 1605e66c
This diff is collapsed.
...@@ -844,6 +844,11 @@ EXTRA_DIST = \ ...@@ -844,6 +844,11 @@ EXTRA_DIST = \
src/elpa_reduce_add_vectors.X90 \ src/elpa_reduce_add_vectors.X90 \
src/elpa_transpose_vectors.X90 \ src/elpa_transpose_vectors.X90 \
src/elpa1_compute_real_template.X90 \ src/elpa1_compute_real_template.X90 \
src/elpa1_merge_systems_real_template.X90 \
src/elpa1_solve_tridi_real_template.X90 \
src/elpa1_tools_real_template.X90 \
src/elpa1_trans_ev_real_template.X90 \
src/elpa1_tridiag_real_template.X90 \
src/elpa1_compute_complex_template.X90 \ src/elpa1_compute_complex_template.X90 \
src/elpa2_compute_real_template.X90 \ src/elpa2_compute_real_template.X90 \
src/elpa2_compute_complex_template.X90 \ src/elpa2_compute_complex_template.X90 \
......
...@@ -215,7 +215,6 @@ module ELPA1_COMPUTE ...@@ -215,7 +215,6 @@ module ELPA1_COMPUTE
#define DOUBLE_PRECISION_REAL 1 #define DOUBLE_PRECISION_REAL 1
#define REAL_DATATYPE rk8 #define REAL_DATATYPE rk8
#include "precision_macros.h"
#include "elpa1_compute_real_template.X90" #include "elpa1_compute_real_template.X90"
#undef DOUBLE_PRECISION_REAL #undef DOUBLE_PRECISION_REAL
...@@ -227,7 +226,6 @@ module ELPA1_COMPUTE ...@@ -227,7 +226,6 @@ module ELPA1_COMPUTE
#undef DOUBLE_PRECISION_REAL #undef DOUBLE_PRECISION_REAL
#define REAL_DATATYPE rk4 #define REAL_DATATYPE rk4
#include "precision_macros.h"
#include "elpa1_compute_real_template.X90" #include "elpa1_compute_real_template.X90"
#undef DOUBLE_PRECISION_REAL #undef DOUBLE_PRECISION_REAL
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif
subroutine M_v_add_s_PRECISSION(v,n,s)
use precision
implicit none
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: v(n),s
v(:) = v(:) + s
end subroutine M_v_add_s_PRECISSION
subroutine M_distribute_global_column_PRECISSION(g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
implicit none
real(kind=REAL_DATATYPE) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
nbs = noff/(nblk*np_rows)
nbe = (noff+nlen-1)/(nblk*np_rows)
do jb = nbs, nbe
g_off = jb*nblk*np_rows + nblk*my_prow
l_off = jb*nblk
js = MAX(noff+1-g_off,1)
je = MIN(noff+nlen-g_off,nblk)
if (je<js) cycle
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo
end subroutine M_distribute_global_column_PRECISSION
subroutine M_solve_secular_equation_PRECISSION(n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
!
! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
!
! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
! which is more robust (it always yields a solution) but also slower
! than the algorithm used in DLAED4.
!
! The same restictions than in DLAED4 hold, namely:
!
! rho > 0 and d(i+1) > d(i)
!
! but this routine will not terminate with error if these are not satisfied
! (it will normally converge to a pole in this case).
!
! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
! N=1 and N=2 which is not compatible with DLAED4.
! Thus this routine shouldn't be used for these cases as a simple replacement
! of DLAED4.
!
! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
!
!
! N (input) INTEGER
! The length of all arrays.
!
! I (input) INTEGER
! The index of the eigenvalue to be computed. 1 <= I <= N.
!
! D (input) DOUBLE PRECISION array, dimension (N)
! The original eigenvalues. It is assumed that they are in
! order, D(I) < D(J) for I < J.
!
! Z (input) DOUBLE PRECISION array, dimension (N)
! The components of the updating vector.
!
! DELTA (output) DOUBLE PRECISION array, dimension (N)
! DELTA contains (D(j) - lambda_I) in its j-th component.
! See remark above about DLAED4 compatibility!
!
! RHO (input) DOUBLE PRECISION
! The scalar in the symmetric updating formula.
!
! DLAM (output) DOUBLE PRECISION
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use precision
implicit none
integer(kind=ik) :: n, i
real(kind=REAL_DATATYPE) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=REAL_DATATYPE) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_secular_equation" + M_PRECISSION_SUFFIX)
#endif
if (i==n) then
! Special case: Last eigenvalue
! We shift always by d(n), lower bound is d(n),
! upper bound is determined by a guess:
dshift = d(n)
delta(:) = d(:) - dshift
a = M_CONST_0_0 ! delta(n)
b = rho*SUM(z(:)**2) + M_CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess
else
! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
! We check the sign of the function in the midpoint of the interval
! in order to determine if eigenvalue is more close to d(i) or d(i+1)
x = M_CONST_0_5*(d(i)+d(i+1))
y = M_CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x))
if (y>0) then
! solution is next to d(i)
dshift = d(i)
else
! solution is next to d(i+1)
dshift = d(i+1)
endif
delta(:) = d(:) - dshift
a = delta(i)
b = delta(i+1)
endif
! Bisection:
do iter=1,200
! Interval subdivision
x = M_CONST_0_5*(a+b)
if (x==a .or. x==b) exit ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else
if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif
! evaluate value at x
y = 1. + rho*SUM(z(:)**2/(delta(:)-x))
if (y==0) then
! found exact solution
exit
elseif (y>0) then
b = x
else
a = x
endif
enddo
! Solution:
dlam = x + dshift
delta(:) = delta(:) - x
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("solve_secular_equation" + M_PRECISSION_SUFFIX)
#endif
end subroutine M_solve_secular_equation_PRECISSION
!-------------------------------------------------------------------------------
#ifndef ALREADY_DEFINED
integer function local_index(idx, my_proc, num_procs, nblk, iflag)
!-------------------------------------------------------------------------------
! local_index: returns the local index for a given global index
! If the global index has no local index on the
! processor my_proc behaviour is defined by iflag
!
! Parameters
!
! idx Global index
!
! my_proc Processor row/column for which to calculate the local index
!
! num_procs Total number of processors along row/column
!
! nblk Blocksize
!
! iflag Controls the behaviour if idx is not on local processor
! iflag< 0 : Return last local index before that row/col
! iflag==0 : Return 0
! iflag> 0 : Return next local index after that row/col
!-------------------------------------------------------------------------------
use precision
implicit none
integer(kind=ik) :: idx, my_proc, num_procs, nblk, iflag
integer(kind=ik) :: iblk
iblk = (idx-1)/nblk ! global block number, 0 based
if (mod(iblk,num_procs) == my_proc) then
! block is local, always return local row/col number
local_index = (iblk/num_procs)*nblk + mod(idx-1,nblk) + 1
else
! non local block
if (iflag == 0) then
local_index = 0
else
local_index = (iblk/num_procs)*nblk
if (mod(iblk,num_procs) > my_proc) local_index = local_index + nblk
if (iflag>0) local_index = local_index + 1
endif
endif
end function local_index
#endif /* ALREADY_DEFINED */
#ifndef ALREADY_DEFINED
integer function least_common_multiple(a, b)
! Returns the least common multiple of a and b
! There may be more efficient ways to do this, we use the most simple approach
use precision
implicit none
integer(kind=ik), intent(in) :: a, b
do least_common_multiple = a, a*(b-1), a
if(mod(least_common_multiple,b)==0) exit
enddo
! if the loop is left regularly, least_common_multiple = a*b
end function least_common_multiple
#endif /* ALREADY_DEFINED */
subroutine M_hh_transform_real_PRECISSION(alpha, xnorm_sq, xf, tau)
! Similar to LAPACK routine DLARFP, but uses ||x||**2 instead of x(:)
! and returns the factor xf by which x has to be scaled.
! It also hasn't the special handling for numbers < 1.d-300 or > 1.d150
! since this would be expensive for the parallel implementation.
use precision
implicit none
real(kind=REAL_DATATYPE), intent(inout) :: alpha
real(kind=REAL_DATATYPE), intent(in) :: xnorm_sq
real(kind=REAL_DATATYPE), intent(out) :: xf, tau
real(kind=REAL_DATATYPE) :: BETA
if ( XNORM_SQ==0. ) then
if ( ALPHA>=0. ) then
TAU = 0.
else
TAU = 2.
ALPHA = -ALPHA
endif
XF = 0.
else
BETA = SIGN( SQRT( ALPHA**2 + XNORM_SQ ), ALPHA )
ALPHA = ALPHA + BETA
IF ( BETA<0 ) THEN
BETA = -BETA
TAU = -ALPHA / BETA
ELSE
ALPHA = XNORM_SQ / ALPHA
TAU = ALPHA / BETA
ALPHA = -ALPHA
END IF
XF = 1./ALPHA
ALPHA = BETA
endif
end subroutine M_hh_transform_real_PRECISSION
#define ALREADY_DEFINED 1
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment