Commit a9367d8c authored by Andreas Marek's avatar Andreas Marek
Browse files

Move transform_columns to a module

parent b4b78f42
......@@ -60,6 +60,15 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/solve_tridi/mod_global_product.F90 \
src/solve_tridi/mod_global_gather.F90 \
src/solve_tridi/mod_resort_ev.F90 \
src/solve_tridi/mod_transform_columns.F90 \
src/solve_tridi/mod_check_monotony.F90 \
src/solve_tridi/mod_add_tmp.F90 \
src/solve_tridi/mod_merge_systems.F90 \
src/solve_tridi/mod_merge_recursive.F90 \
src/solve_tridi/mod_solve_tridi.F90 \
src/elpa1/mod_distribute_global_column.F90 \
src/elpa1/mod_v_add_s.F90 \
src/elpa1/mod_solve_secular_equation.F90 \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......@@ -683,8 +692,15 @@ EXTRA_DIST = \
src/solve_tridi/global_product_template.F90 \
src/solve_tridi/global_gather_template.F90 \
src/solve_tridi/resort_ev_template.F90 \
src/elpa1/elpa1_merge_systems_real_template.F90 \
src/elpa1/elpa1_solve_tridi_real_template.F90 \
src/solve_tridi/transform_columns_template.F90 \
src/solve_tridi/check_monotony_template.F90 \
src/solve_tridi/add_tmp_template.F90 \
src/elpa1/v_add_s_template.F90 \
src/elpa1/solve_secular_equation_template.F90 \
src/elpa1/distribute_global_column_template.F90 \
src/solve_tridi/merge_systems_template.F90 \
src/solve_tridi/merge_recursive_template.F90 \
src/solve_tridi/solve_tridi_template.F90 \
src/elpa1/elpa1_template.F90 \
src/elpa1/elpa1_tools_template.F90 \
src/elpa1/elpa1_trans_ev_template.F90 \
......
subroutine distribute_global_column_&
&PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
real(kind=rk) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
nbs = noff/(nblk*np_rows)
nbe = (noff+nlen-1)/(nblk*np_rows)
do jb = nbs, nbe
g_off = jb*nblk*np_rows + nblk*my_prow
l_off = jb*nblk
js = MAX(noff+1-g_off,1)
je = MIN(noff+nlen-g_off,nblk)
if (je<js) cycle
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo
end subroutine distribute_global_column_&
&PRECISION
......@@ -64,7 +64,7 @@ module elpa1_compute
public :: trans_ev_real_double ! Transform real eigenvectors of a tridiagonal matrix back
public :: trans_ev_real
public :: solve_tridi_double
!public :: solve_tridi_double
public :: solve_tridi_double_impl
interface tridiag_real
......@@ -78,7 +78,7 @@ module elpa1_compute
#ifdef WANT_SINGLE_PRECISION_REAL
public :: tridiag_real_single ! Transform real single-precision symmetric matrix to tridiagonal form
public :: trans_ev_real_single ! Transform real single-precision eigenvectors of a tridiagonal matrix back
public :: solve_tridi_single
!public :: solve_tridi_single
public :: solve_tridi_single_impl
#endif
......
......@@ -75,16 +75,16 @@
#else
#define PRECISION_AND_SUFFIX single
#endif
#include "elpa1_solve_tridi_real_template.F90"
!#include "elpa1_solve_tridi_real_template.F90"
#undef PRECISION_AND_SUFFIX
#ifdef DOUBLE_PRECISION_REAL
#define PRECISION_AND_SUFFIX double_impl
#else
#define PRECISION_AND_SUFFIX single_impl
#endif
#include "elpa1_solve_tridi_real_template.F90"
#include "../solve_tridi/solve_tridi_template.F90"
#undef PRECISION_AND_SUFFIX
#include "elpa1_merge_systems_real_template.F90"
!#include "elpa1_merge_systems_real_template.F90"
#include "elpa1_tools_template.F90"
#endif
......
......@@ -82,7 +82,7 @@ function elpa_solve_evp_&
#ifdef REDISTRIBUTE_MATRIX
use elpa_scalapack_interfaces
#endif
use solve_tridi
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -446,7 +446,8 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1
q_real, l_rows, &
#endif
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
success, nrThreads)
#ifdef WITH_NVTX
call nvtxRangePop()
......
......@@ -54,8 +54,10 @@
#include "../general/sanity.F90"
#if REALCASE == 1
#if 0
subroutine v_add_s_&
&PRECISION&
&(obj, v,n,s)
......@@ -70,7 +72,9 @@
v(:) = v(:) + s
end subroutine v_add_s_&
&PRECISION
#endif
#if 0
subroutine distribute_global_column_&
&PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
......@@ -103,7 +107,9 @@
enddo
end subroutine distribute_global_column_&
&PRECISION
#endif
#if 0
subroutine solve_secular_equation_&
&PRECISION&
&(obj, n, i, d, z, delta, rho, dlam)
......@@ -242,6 +248,7 @@
&PRECISION
!-------------------------------------------------------------------------------
#endif
#endif
#if REALCASE == 1
subroutine hh_transform_real_&
......
......@@ -64,10 +64,11 @@
use precision
use elpa_abstract_impl
use elpa_omp
use solve_tridi
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, nev, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
integer(kind=ik) :: mpi_comm_all
real(kind=REAL_DATATYPE) :: d(obj%na), e(obj%na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: q(obj%local_nrows,*)
......@@ -114,6 +115,11 @@
print *,"Problem getting option for mpi_comm_cols. Aborting..."
stop
endif
call obj%get("mpi_comm_parent", mpi_comm_all,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_all. Aborting..."
stop
endif
call obj%get("debug",debug,error)
if (error .ne. ELPA_OK) then
......@@ -130,7 +136,7 @@
call solve_tridi_&
&PRECISION&
&_private_impl(obj, na, nev, d, e, q, matrixRows, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
mpi_comm_all, mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
nrThreads)
......
#include "config-f90.h"
module distribute_global_column
use precision
implicit none
private
public :: distribute_global_column_double
#if defined(WANT_SINGLE_PRECISION_REAL) || defined(WANT_SINGLE_PRECISION_COMPLEX)
public :: distribute_global_column_single
#endif
contains
! real double precision first
#define REALCASE
#define DOUBLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_double
#include "./distribute_global_column_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#undef _rk
#ifdef WANT_SINGLE_PRECISION_REAL
! real single precision first
#define REALCASE
#define SINGLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_float
#include "./distribute_global_column_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#undef _rk
#endif
end module
#include "config-f90.h"
module solve_secular_equation
use precision
implicit none
private
public :: solve_secular_equation_double
#if defined(WANT_SINGLE_PRECISION_REAL) || defined(WANT_SINGLE_PRECISION_COMPLEX)
public :: solve_secular_equation_single
#endif
contains
! real double precision first
#define REALCASE
#define DOUBLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_double
#include "./solve_secular_equation_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#undef _rk
#ifdef WANT_SINGLE_PRECISION_REAL
! real single precision first
#define REALCASE
#define SINGLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_float
#include "./solve_secular_equation_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#undef _rk
#endif
end module
#include "config-f90.h"
module v_add_s
use precision
implicit none
private
public :: v_add_s_double
#if defined(WANT_SINGLE_PRECISION_REAL) || defined(WANT_SINGLE_PRECISION_COMPLEX)
public :: v_add_s_single
#endif
contains
! real double precision first
#define REALCASE
#define DOUBLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_double
#include "./v_add_s_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#undef _rk
#ifdef WANT_SINGLE_PRECISION_REAL
! real single precision first
#define REALCASE
#define SINGLE_PRECISION
#include "../general/precision_macros.h"
#define _rk _c_float
#include "./v_add_s_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#undef _rk
#endif
end module
subroutine solve_secular_equation_&
&PRECISION&
&(obj, n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
!
! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
!
! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
! which is more robust (it always yields a solution) but also slower
! than the algorithm used in DLAED4.
!
! The same restictions than in DLAED4 hold, namely:
!
! rho > 0 and d(i+1) > d(i)
!
! but this routine will not terminate with error if these are not satisfied
! (it will normally converge to a pole in this case).
!
! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
! N=1 and N=2 which is not compatible with DLAED4.
! Thus this routine shouldn't be used for these cases as a simple replacement
! of DLAED4.
!
! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
!
!
! N (input) INTEGER
! The length of all arrays.
!
! I (input) INTEGER
! The index of the eigenvalue to be computed. 1 <= I <= N.
!
! D (input) DOUBLE PRECISION array, dimension (N)
! The original eigenvalues. It is assumed that they are in
! order, D(I) < D(J) for I < J.
!
! Z (input) DOUBLE PRECISION array, dimension (N)
! The components of the updating Vector.
!
! DELTA (output) DOUBLE PRECISION array, dimension (N)
! DELTA contains (D(j) - lambda_I) in its j-th component.
! See remark above about DLAED4 compatibility!
!
! RHO (input) DOUBLE PRECISION
! The scalar in the symmetric updating formula.
!
! DLAM (output) DOUBLE PRECISION
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
use precision
use elpa_abstract_impl
implicit none
#include "../../src/general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n, i
real(kind=rk) :: d(n), z(n), delta(n), rho, dlam
integer(kind=ik) :: iter
real(kind=rk) :: a, b, x, y, dshift
! In order to obtain sufficient numerical accuracy we have to shift the problem
! either by d(i) or d(i+1), whichever is closer to the solution
! Upper and lower bound of the shifted solution interval are a and b
call obj%timer%start("solve_secular_equation" // PRECISION_SUFFIX)
if (i==n) then
! Special case: Last eigenvalue
! We shift always by d(n), lower bound is d(n),
! upper bound is determined by a guess:
dshift = d(n)
delta(:) = d(:) - dshift
a = 0.0_rk ! delta(n)
b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess
else
! Other eigenvalues: lower bound is d(i), upper bound is d(i+1)
! We check the sign of the function in the midpoint of the interval
! in order to determine if eigenvalue is more close to d(i) or d(i+1)
x = 0.5_rk*(d(i)+d(i+1))
y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x))
if (y>0) then
! solution is next to d(i)
dshift = d(i)
else
! solution is next to d(i+1)
dshift = d(i+1)
endif
delta(:) = d(:) - dshift
a = delta(i)
b = delta(i+1)
endif
! Bisection:
do iter=1,200
! Interval subdivision
x = 0.5_rk*(a+b)
if (x==a .or. x==b) exit ! No further interval subdivisions possible
#ifdef DOUBLE_PRECISION_REAL
if (abs(x) < 1.e-200_rk8) exit ! x next to pole
#else
if (abs(x) < 1.e-20_rk4) exit ! x next to pole
#endif
! evaluate value at x
y = 1. + rho*SUM(z(:)**2/(delta(:)-x))
if (y==0) then
! found exact solution
exit
elseif (y>0) then
b = x
else
a = x
endif
enddo
! Solution:
dlam = x + dshift
delta(:) = delta(:) - x
call obj%timer%stop("solve_secular_equation" // PRECISION_SUFFIX)
end subroutine solve_secular_equation_&
&PRECISION
subroutine v_add_s_&
&PRECISION&
&(obj, v,n,s)
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n
real(kind=rk) :: v(n),s
v(:) = v(:) + s
end subroutine v_add_s_&
&PRECISION
......@@ -88,6 +88,7 @@
#ifdef REDISTRIBUTE_MATRIX
use elpa_scalapack_interfaces
#endif
use solve_tridi
use iso_c_binding
implicit none
#include "../general/precision_kinds.F90"
......@@ -796,7 +797,8 @@
#if COMPLEXCASE == 1
q_real, ubound(q_real,dim=1), &
#endif
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
success, nrThreads)
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("solve")
#endif
......
......@@ -1340,6 +1340,8 @@
subroutine elpa_solve_tridiagonal_&
&ELPA_IMPL_SUFFIX&
& (self, d, e, q, error)
use solve_tridi
implicit none
class(elpa_impl_t) :: self
real(kind=C_REAL_DATATYPE) :: d(self%na), e(self%na)
#ifdef USE_ASSUMED_SIZE
......
subroutine add_tmp_&
&PRECISION&
&(obj, d1, dbase, ddiff, z, ev_scale_value, na1,i)
use precision
use v_add_s
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na1, i
real(kind=REAL_DATATYPE), intent(in) :: d1(:), dbase(:), ddiff(:), z(:)
real(kind=REAL_DATATYPE), intent(inout) :: ev_scale_value
real(kind=REAL_DATATYPE) :: tmp(1:na1)
! tmp(1:na1) = z(1:na1) / delta(1:na1,i) ! original code
! tmp(1:na1) = z(1:na1) / (d1(1:na1)-d(i))! bad results
! All we want to calculate is tmp = (d1(1:na1)-dbase(i))+ddiff(i)
! in exactly this order, but we want to prevent compiler optimization
tmp(1:na1) = d1(1:na1) -dbase(i)
call v_add_s_&
&PRECISION&
&(obj, tmp(1:na1),na1,ddiff(i))
tmp(1:na1) = z(1:na1) / tmp(1:na1)
ev_scale_value = 1.0_rk/sqrt(dot_product(tmp(1:na1),tmp(1:na1)))
end subroutine add_tmp_&
&PRECISION
subroutine check_monotony_&
&PRECISION&
&(obj, n,d,text, wantDebug, success)
! This is a test routine for checking if the eigenvalues are monotonically increasing.
! It is for debug purposes only, an error should never be triggered!
use precision
use ELPA_utilities
use elpa_abstract_impl
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n
real(kind=REAL_DATATYPE) :: d(n)
character*(*) :: text
integer(kind=ik) :: i
logical, intent(in) :: wantDebug
logical, intent(out) :: success
success = .true.
do i=1,n-1
if (d(i+1)<d(i)) then
if (wantDebug) write(error_unit,'(a,a,i8,2g25.17)') 'ELPA1_check_monotony: Monotony error on ',text,i,d(i),d(i+1)
success = .false.
return
endif
enddo
end subroutine check_monotony_&
&PRECISION
recursive subroutine merge_recursive_&
&PRECISION &
(obj, np_off, nprocs, ldq, matrixCols, nblk, &
l_col, p_col, l_col_bc, p_col_bc, limits, &
np_cols, na, q, d, e, &
mpi_comm_all, mpi_comm_rows, mpi_comm_cols, &
useGPU, wantDebug, success, max_threads)
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
use elpa_omp
#endif
use elpa_mpi
use merge_systems
use ELPA_utilities
implicit none
! noff is always a multiple of nblk_ev
! nlen-noff is always > nblk_ev
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: max_threads
integer(kind=ik), intent(in) :: mpi_comm_all, mpi_comm_rows, mpi_comm_cols
integer(kind=ik), intent(in) :: ldq, matrixCols, nblk, na, np_cols
integer(kind=ik), intent(in) :: l_col_bc(na), p_col_bc(na), l_col(na), p_col(na), &
limits(0:np_cols)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,*)
#else
real(kind=REAL_DATATYPE), intent(inout) :: q(ldq,matrixCols)
#endif
#ifdef WITH_MPI
integer(kind=MPI_KIND) :: mpierr, my_pcolMPI
#endif
integer(kind=ik) :: my_pcol
real(kind=REAL_DATATYPE), intent(inout) :: d(na), e(na)
integer(kind=ik) :: np_off, nprocs
integer(kind=ik) :: np1, np2, noff, nlen, nmid, n
logical, intent(in) :: useGPU, wantDebug
logical, intent(out) :: success
success = .true.
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_comm_rank(</