Commit a7123fc0 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master_pre_stage' into reproducible_code

parents c7fb95e3 eaa78017
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -57,6 +57,18 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_generalized/cannon.c \
src/helpers/matrix_plot.F90 \
src/general/mod_elpa_skewsymmetric_blas.F90 \
src/solve_tridi/mod_global_product.F90 \
src/solve_tridi/mod_global_gather.F90 \
src/solve_tridi/mod_resort_ev.F90 \
src/solve_tridi/mod_transform_columns.F90 \
src/solve_tridi/mod_check_monotony.F90 \
src/solve_tridi/mod_add_tmp.F90 \
src/solve_tridi/mod_merge_systems.F90 \
src/solve_tridi/mod_merge_recursive.F90 \
src/solve_tridi/mod_solve_tridi.F90 \
src/elpa1/mod_distribute_global_column.F90 \
src/elpa1/mod_v_add_s.F90 \
src/elpa1/mod_solve_secular_equation.F90 \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......@@ -680,8 +692,18 @@ EXTRA_DIST = \
src/helpers/elpa_redistribute_template.F90 \
src/elpa_impl_generalized_transform_template.F90 \
src/elpa1/elpa1_compute_template.F90 \
src/elpa1/elpa1_merge_systems_real_template.F90 \
src/elpa1/elpa1_solve_tridi_real_template.F90 \
src/solve_tridi/global_product_template.F90 \
src/solve_tridi/global_gather_template.F90 \
src/solve_tridi/resort_ev_template.F90 \
src/solve_tridi/transform_columns_template.F90 \
src/solve_tridi/check_monotony_template.F90 \
src/solve_tridi/add_tmp_template.F90 \
src/elpa1/v_add_s_template.F90 \
src/elpa1/solve_secular_equation_template.F90 \
src/elpa1/distribute_global_column_template.F90 \
src/solve_tridi/merge_systems_template.F90 \
src/solve_tridi/merge_recursive_template.F90 \
src/solve_tridi/solve_tridi_template.F90 \
src/elpa1/elpa1_template.F90 \
src/elpa1/elpa1_tools_template.F90 \
src/elpa1/elpa1_trans_ev_template.F90 \
......
#!/usr/bin/python3
#!/usr/bin/env python3
from itertools import product
......@@ -775,9 +775,9 @@ for cc, fc, m, o, p, a, b, g, instr, addr, na in product(
print("# " + cc + "-" + fc + "-" + m + "-" + o + "-" + p + "-" + a + "-" + b + "-" +g + "-" + cov + "-" + instr + "-" + addr)
print(cc + "-" + fc + "-" + m + "-" + o + "-" + p + "-" +a + "-" +b + "-" +g + "-" + cov + "-" + instr + "-" + addr + "-jobs:")
#if (MasterOnly):
# print(" only:")
# print(" - /.*master.*/")
if (MasterOnly):
print(" only:")
print(" - /.*master.*/")
if (instr == "power8"):
print(" allow_failure: true")
print(" tags:")
......
#!/usr/bin/python3
#!/usr/bin/env python3
simple_tokens = [
"PRECISION",
......
#!/usr/bin/python3
#!/usr/bin/env python3
from itertools import product
language_flag = {
......
#!/usr/bin/python3
#!/usr/bin/env python3
from __future__ import print_function
import os
import sys
......
#!/usr/bin/python3
#!/usr/bin/env python3
import numpy as np
from pyelpa import DistributedMatrix
import sys
......
subroutine distribute_global_column_&
&PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: noff, nlen, my_prow, np_rows, nblk
real(kind=rk) :: g_col(nlen), l_col(*) ! chnage this to proper 2d 1d matching ! remove assumed size
integer(kind=ik) :: nbs, nbe, jb, g_off, l_off, js, je
nbs = noff/(nblk*np_rows)
nbe = (noff+nlen-1)/(nblk*np_rows)
do jb = nbs, nbe
g_off = jb*nblk*np_rows + nblk*my_prow
l_off = jb*nblk
js = MAX(noff+1-g_off,1)
je = MIN(noff+nlen-g_off,nblk)
if (je<js) cycle
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo
end subroutine distribute_global_column_&
&PRECISION
......@@ -64,7 +64,7 @@ module elpa1_compute
public :: trans_ev_real_double ! Transform real eigenvectors of a tridiagonal matrix back
public :: trans_ev_real
public :: solve_tridi_double
!public :: solve_tridi_double
public :: solve_tridi_double_impl
interface tridiag_real
......@@ -78,7 +78,7 @@ module elpa1_compute
#ifdef WANT_SINGLE_PRECISION_REAL
public :: tridiag_real_single ! Transform real single-precision symmetric matrix to tridiagonal form
public :: trans_ev_real_single ! Transform real single-precision eigenvectors of a tridiagonal matrix back
public :: solve_tridi_single
!public :: solve_tridi_single
public :: solve_tridi_single_impl
#endif
......
......@@ -75,16 +75,16 @@
#else
#define PRECISION_AND_SUFFIX single
#endif
#include "elpa1_solve_tridi_real_template.F90"
!#include "elpa1_solve_tridi_real_template.F90"
#undef PRECISION_AND_SUFFIX
#ifdef DOUBLE_PRECISION_REAL
#define PRECISION_AND_SUFFIX double_impl
#else
#define PRECISION_AND_SUFFIX single_impl
#endif
#include "elpa1_solve_tridi_real_template.F90"
#include "../solve_tridi/solve_tridi_template.F90"
#undef PRECISION_AND_SUFFIX
#include "elpa1_merge_systems_real_template.F90"
!#include "elpa1_merge_systems_real_template.F90"
#include "elpa1_tools_template.F90"
#endif
......
This diff is collapsed.
This diff is collapsed.
......@@ -82,7 +82,7 @@ function elpa_solve_evp_&
#ifdef REDISTRIBUTE_MATRIX
use elpa_scalapack_interfaces
#endif
use solve_tridi
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -158,6 +158,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: na, nev, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, check_pd, i, error, matrixRows
real(kind=C_DATATYPE_KIND) :: thres_pd
#ifdef REDISTRIBUTE_MATRIX
integer(kind=ik) :: nblkInternal, matrixOrder
......@@ -180,7 +181,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: global_index
logical :: reDistributeMatrix, doRedistributeMatrix
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......@@ -191,7 +192,7 @@ function elpa_solve_evp_&
matrixRows = obj%local_nrows
matrixCols = obj%local_ncols
call obj%get("mpi_comm_parent", mpi_comm_all, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
......@@ -291,15 +292,15 @@ function elpa_solve_evp_&
else
useGPU = .false.
endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for skewsymmetric. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
......@@ -324,7 +325,7 @@ function elpa_solve_evp_&
wantDebug = debug == 1
do_useGPU = .false.
if (useGPU) then
call obj%timer%start("check_for_gpu")
......@@ -375,7 +376,7 @@ function elpa_solve_evp_&
do_useGPU_trans_ev = (gpu == 1)
endif
! for elpa1 the easy thing is, that the individual phases of the algorithm
! do not share any data on the GPU.
! do not share any data on the GPU.
! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
......@@ -446,7 +447,8 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1
q_real, l_rows, &
#endif
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, success, nrThreads)
nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
success, nrThreads)
#ifdef WITH_NVTX
call nvtxRangePop()
......@@ -467,9 +469,19 @@ function elpa_solve_evp_&
stop
endif
if (check_pd .eq. 1) then
call obj%get("thres_pd_&
&PRECISION&
&",thres_pd,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for thres_pd_&
&PRECISION&
&. Aborting..."
stop
endif
check_pd = 0
do i = 1, na
if (ev(i) .gt. THRESHOLD) then
if (ev(i) .gt. thres_pd) then
check_pd = check_pd + 1
endif
enddo
......@@ -489,7 +501,7 @@ function elpa_solve_evp_&
#endif
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q(1:matrixRows, matrixCols+1:2*matrixCols) = 0.0
do i = 1, matrixRows
......@@ -509,7 +521,7 @@ function elpa_solve_evp_&
q(i,matrixCols+1:2*matrixCols) = -q(i,1:matrixCols)
q(i,1:matrixCols) = 0
end if
end do
end do
endif
call obj%timer%start("back")
......
......@@ -54,29 +54,33 @@
#include "../general/sanity.F90"
#if REALCASE == 1
subroutine v_add_s_&
&PRECISION&
&(obj, v,n,s)
use precision
use elpa_abstract_impl
implicit none
#if 0
subroutine v_add_s_&
&PRECISION&
&(obj, v,n,s)
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n
real(kind=rk) :: v(n),s
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n
real(kind=rk) :: v(n),s
v(:) = v(:) + s
end subroutine v_add_s_&
&PRECISION
v(:) = v(:) + s
end subroutine v_add_s_&
&PRECISION
#endif
subroutine distribute_global_column_&
&PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_abstract_impl
implicit none
#if 0
subroutine distribute_global_column_&
&PRECISION&
&(obj, g_col, l_col, noff, nlen, my_prow, np_rows, nblk)
use precision
use elpa_abstract_impl
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
......@@ -100,65 +104,67 @@ subroutine distribute_global_column_&
l_col(l_off+js:l_off+je) = g_col(g_off+js-noff:g_off+je-noff)
enddo
end subroutine distribute_global_column_&
&PRECISION
subroutine solve_secular_equation_&
&PRECISION&
&(obj, n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
!
! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
!
! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
! which is more robust (it always yields a solution) but also slower
! than the algorithm used in DLAED4.
!
! The same restictions than in DLAED4 hold, namely:
!
! rho > 0 and d(i+1) > d(i)
!
! but this routine will not terminate with error if these are not satisfied
! (it will normally converge to a pole in this case).
!
! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
! N=1 and N=2 which is not compatible with DLAED4.
! Thus this routine shouldn't be used for these cases as a simple replacement
! of DLAED4.
!
! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
!
!
! N (input) INTEGER
! The length of all arrays.
!
! I (input) INTEGER
! The index of the eigenvalue to be computed. 1 <= I <= N.
!
! D (input) DOUBLE PRECISION array, dimension (N)
! The original eigenvalues. It is assumed that they are in
! order, D(I) < D(J) for I < J.
!
! Z (input) DOUBLE PRECISION array, dimension (N)
! The components of the updating Vector.
!
! DELTA (output) DOUBLE PRECISION array, dimension (N)
! DELTA contains (D(j) - lambda_I) in its j-th component.
! See remark above about DLAED4 compatibility!
!
! RHO (input) DOUBLE PRECISION
! The scalar in the symmetric updating formula.
!
! DLAM (output) DOUBLE PRECISION
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
enddo
end subroutine distribute_global_column_&
&PRECISION
#endif
use precision
use elpa_abstract_impl
implicit none
#if 0
subroutine solve_secular_equation_&
&PRECISION&
&(obj, n, i, d, z, delta, rho, dlam)
!-------------------------------------------------------------------------------
! This routine solves the secular equation of a symmetric rank 1 modified
! diagonal matrix:
!
! 1. + rho*SUM(z(:)**2/(d(:)-x)) = 0
!
! It does the same as the LAPACK routine DLAED4 but it uses a bisection technique
! which is more robust (it always yields a solution) but also slower
! than the algorithm used in DLAED4.
!
! The same restictions than in DLAED4 hold, namely:
!
! rho > 0 and d(i+1) > d(i)
!
! but this routine will not terminate with error if these are not satisfied
! (it will normally converge to a pole in this case).
!
! The output in DELTA(j) is always (D(j) - lambda_I), even for the cases
! N=1 and N=2 which is not compatible with DLAED4.
! Thus this routine shouldn't be used for these cases as a simple replacement
! of DLAED4.
!
! The arguments are the same as in DLAED4 (with the exception of the INFO argument):
!
!
! N (input) INTEGER
! The length of all arrays.
!
! I (input) INTEGER
! The index of the eigenvalue to be computed. 1 <= I <= N.
!
! D (input) DOUBLE PRECISION array, dimension (N)
! The original eigenvalues. It is assumed that they are in
! order, D(I) < D(J) for I < J.
!
! Z (input) DOUBLE PRECISION array, dimension (N)
! The components of the updating Vector.
!
! DELTA (output) DOUBLE PRECISION array, dimension (N)
! DELTA contains (D(j) - lambda_I) in its j-th component.
! See remark above about DLAED4 compatibility!
!
! RHO (input) DOUBLE PRECISION
! The scalar in the symmetric updating formula.
!
! DLAM (output) DOUBLE PRECISION
! The computed lambda_I, the I-th updated eigenvalue.
!-------------------------------------------------------------------------------
use precision
use elpa_abstract_impl
implicit none
#include "../../src/general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: n, i
......@@ -242,6 +248,7 @@ end subroutine solve_secular_equation_&
&PRECISION
!-------------------------------------------------------------------------------
#endif
#endif
#if REALCASE == 1
subroutine hh_transform_real_&
......
......@@ -56,97 +56,103 @@
#include "../general/sanity.F90"
use elpa1_compute, solve_tridi_&
&PRECISION&
&_private_impl => solve_tridi_&
&PRECISION&
&_impl
use precision
use elpa_abstract_impl
use elpa_omp
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, nev, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=REAL_DATATYPE) :: d(obj%na), e(obj%na)
use elpa1_compute, solve_tridi_&
&PRECISION&
&_private_impl => solve_tridi_&
&PRECISION&
&_impl
use precision
use elpa_abstract_impl
use elpa_omp
use solve_tridi
implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, nev, matrixRows, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
integer(kind=ik) :: mpi_comm_all
real(kind=REAL_DATATYPE) :: d(obj%na), e(obj%na)
#ifdef USE_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: q(obj%local_nrows,*)
real(kind=REAL_DATATYPE) :: q(obj%local_nrows,*)
#else
real(kind=REAL_DATATYPE) :: q(obj%local_nrows, obj%local_ncols)
real(kind=REAL_DATATYPE) :: q(obj%local_nrows, obj%local_ncols)
#endif
logical :: wantDebug
logical :: success
logical :: wantDebug
logical :: success
integer :: debug, error
integer :: nrThreads
integer :: debug, error
integer :: nrThreads
call obj%timer%start("elpa_solve_tridi_public_&
&MATH_DATATYPE&
&_&
&PRECISION&
&")
na = obj%na
nev = obj%nev
nblk = obj%nblk
matrixRows = obj%local_nrows
matrixCols = obj%local_ncols
call obj%timer%start("elpa_solve_tridi_public_&
&MATH_DATATYPE&
&_&
&PRECISION&
&")
na = obj%na
nev = obj%nev
nblk = obj%nblk
matrixRows = obj%local_nrows
matrixCols = obj%local_ncols
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller = omp_get_max_threads()
! check the number of threads that ELPA should use internally
! check the number of threads that ELPA should use internally
call obj%get("omp_threads",nrThreads,error)
call obj%get("omp_threads",nrThreads,error)
#else
nrThreads=1
nrThreads=1
#endif
call obj%get("mpi_comm_rows", mpi_comm_rows,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_rows. Aborting..."
stop
endif
call obj%get("mpi_comm_cols", mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_cols. Aborting..."
stop
endif
call obj%get("debug",debug,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for debug. Aborting..."
stop
endif
if (debug == 1) then
wantDebug = .true.
else
wantDebug = .false.
endif
success = .false.
call solve_tridi_&
&PRECISION&
&_private_impl(obj, na, nev, d, e, q, matrixRows, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
nrThreads)
! restore original OpenMP settings
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call omp_set_num_threads(omp_threads_caller)
call obj%get("mpi_comm_rows", mpi_comm_rows,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_rows. Aborting..."
stop
endif
call obj%get("mpi_comm_cols", mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_cols. Aborting..."
stop
endif
call obj%get("mpi_comm_parent", mpi_comm_all,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for mpi_comm_all. Aborting..."
stop
endif
call obj%get("debug",debug,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for debug. Aborting..."
stop
endif
if (debug == 1) then
wantDebug = .true.
else
wantDebug = .false.
endif
success = .false.
call solve_tridi_&
&PRECISION&
&_private_impl(obj, na, nev, d, e, q, matrixRows, nblk, matrixCols, &
mpi_comm_all, mpi_comm_rows, mpi_comm_cols,.false., wantDebug, success, &
nrThreads)