...
 
Commits (2)
......@@ -158,6 +158,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: na, nev, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, check_pd, i, error, matrixRows
real(kind=C_DATATYPE_KIND) :: thres_pd
#ifdef REDISTRIBUTE_MATRIX
integer(kind=ik) :: nblkInternal, matrixOrder
......@@ -180,7 +181,7 @@ function elpa_solve_evp_&
integer(kind=ik) :: global_index
logical :: reDistributeMatrix, doRedistributeMatrix
call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......@@ -191,7 +192,7 @@ function elpa_solve_evp_&
matrixRows = obj%local_nrows
matrixCols = obj%local_ncols
call obj%get("mpi_comm_parent", mpi_comm_all, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
......@@ -291,15 +292,15 @@ function elpa_solve_evp_&
else
useGPU = .false.
endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for skewsymmetric. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
call obj%timer%start("mpi_communication")
call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
......@@ -324,7 +325,7 @@ function elpa_solve_evp_&
wantDebug = debug == 1
do_useGPU = .false.
if (useGPU) then
call obj%timer%start("check_for_gpu")
......@@ -375,7 +376,7 @@ function elpa_solve_evp_&
do_useGPU_trans_ev = (gpu == 1)
endif
! for elpa1 the easy thing is, that the individual phases of the algorithm
! do not share any data on the GPU.
! do not share any data on the GPU.
! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
......@@ -467,9 +468,15 @@ function elpa_solve_evp_&
stop
endif
if (check_pd .eq. 1) then
call obj%get("thres_pd",thres_pd,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for thres_pd. Aborting..."
stop
endif
check_pd = 0
do i = 1, na
if (ev(i) .gt. THRESHOLD) then
if (ev(i) .gt. thres_pd) then
check_pd = check_pd + 1
endif
enddo
......@@ -489,7 +496,7 @@ function elpa_solve_evp_&
#endif
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q(1:matrixRows, matrixCols+1:2*matrixCols) = 0.0
do i = 1, matrixRows
......@@ -509,7 +516,7 @@ function elpa_solve_evp_&
q(i,matrixCols+1:2*matrixCols) = -q(i,1:matrixCols)
q(i,1:matrixCols) = 0
end if
end do
end do
endif
call obj%timer%start("back")
......
......@@ -99,7 +99,7 @@
logical :: useQRActual
#endif
integer(kind=c_int) :: kernel, kernelByUser
#ifdef REDISTRIBUTE_MATRIX
#ifdef REDISTRIBUTE_MATRIX
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout), target :: aExtern(obj%local_nrows,*)
......@@ -113,7 +113,7 @@
#endif
#endif
#else /* REDISTRIBUTE_MATRIX */
#else /* REDISTRIBUTE_MATRIX */
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*)
......@@ -127,7 +127,7 @@
#endif
#endif
#endif /* REDISTRIBUTE_MATRIX */
#endif /* REDISTRIBUTE_MATRIX */
#ifdef REDISTRIBUTE_MATRIX
MATH_DATATYPE(kind=rck), pointer :: a(:,:)
......@@ -167,9 +167,11 @@
&PRECISION&
&_&
&MATH_DATATYPE
integer(kind=ik) :: na, nev, nblk, matrixCols, &
integer(kind=ik) :: na, nev, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, check_pd, error, matrixRows
real(kind=C_DATATYPE_KIND) :: thres_pd
#ifdef REDISTRIBUTE_MATRIX
integer(kind=ik) :: nblkInternal, matrixOrder
character(len=1) :: layoutInternal, layoutExternal
......@@ -187,15 +189,15 @@
#endif
logical :: do_bandred, do_tridiag, do_solve_tridi, &
logical :: do_bandred, do_tridiag, do_solve_tridi, &
do_trans_to_band, do_trans_to_full
logical :: good_nblk_gpu
logical :: good_nblk_gpu
integer(kind=ik) :: nrThreads
integer(kind=ik) :: nrThreads
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
integer(kind=c_int) :: simdSetAvailable(NUMBER_OF_INSTR)
integer(kind=c_int) :: simdSetAvailable(NUMBER_OF_INSTR)
#endif
integer(kind=ik) :: global_index
integer(kind=ik) :: global_index
logical :: reDistributeMatrix, doRedistributeMatrix
#if REALCASE == 1
......@@ -329,7 +331,7 @@
print *,"Problem getting option for kernel settings. Aborting..."
stop
endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for skewsymmetric settings. Aborting..."
......@@ -425,7 +427,7 @@
stop
endif
do_useGPU_trans_ev_tridi_to_band = (gpu == 1)
call obj%get("gpu_trans_ev_band_to_full", gpu, error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for gpu_trans_ev_band_to_full settings. Aborting..."
......@@ -575,7 +577,7 @@
stop
endif
if (my_pe == 0 ) write(error_unit,*) "ELPA decided to use ",elpa_int_value_to_string(KERNEL_STRING, kernel)
exit
exit
endif
endif
enddo
......@@ -818,9 +820,15 @@
stop
endif
if (check_pd .eq. 1) then
call obj%get("thres_pd",thres_pd,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option for thres_pd. Aborting..."
stop
endif
check_pd = 0
do i = 1, na
if (ev(i) .gt. THRESHOLD) then
if (ev(i) .gt. thres_pd) then
check_pd = check_pd + 1
endif
enddo
......@@ -835,17 +843,20 @@
endif
endif ! eigenvalues only
if (do_trans_to_band) then
#if COMPLEXCASE == 1
if (do_trans_to_band) then
! q must be given thats why from here on we can use q and not q_actual
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
endif
! make sure q_real is deallocated when using check_pd
if (allocated(q_real)) then
deallocate(q_real, stat=istat, errmsg=errorMessage)
check_deallocate("elpa2_template: q_real", istat, errorMessage)
#endif
endif
#endif
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
......@@ -891,7 +902,7 @@
call obj%timer%stop("trans_ev_to_band")
if (.not.(success)) return
endif ! do_trans_to_band
! the array q (currently) always resides on host even when using GPU
......@@ -904,7 +915,7 @@
! Backtransform stage 2
! In the skew-symemtric case this transforms the real part
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
......@@ -938,9 +949,9 @@
deallocate(hh_trans, stat=istat, errmsg=errorMessage)
check_deallocate("elpa2_template: hh_trans", istat, errorMessage)
endif
if (do_trans_to_full) then
call obj%timer%start("trans_ev_to_full")
call obj%timer%start("trans_ev_to_full")
if (isSkewsymmetric) then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_band_to_full_ acting on the n x 2n matrix.
......@@ -958,14 +969,18 @@
)
endif
deallocate(tmat, stat=istat, errmsg=errorMessage)
check_deallocate("elpa2_template: tmat", istat, errorMessage)
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("trans_ev_to_full")
#endif
call obj%timer%stop("trans_ev_to_full")
endif ! do_trans_to_full
! make sure tmat is deallocated when using check_pd
if (allocated(tmat)) then
deallocate(tmat, stat=istat, errmsg=errorMessage)
check_deallocate("elpa2_template: tmat", istat, errorMessage)
endif
if (obj%eigenvalues_only) then
deallocate(q_dummy, stat=istat, errmsg=errorMessage)
check_deallocate("elpa2_template: q_dummy", istat, errorMessage)
......
......@@ -274,9 +274,14 @@ static const elpa_index_int_entry_t int_entries[] = {
BASE_ENTRY(option_name, option_description, 0, 1, 0) \
}
#define DOUBLE_ENTRY(option_name, option_description, default, print_flag) \
{ \
BASE_ENTRY(option_name, option_description, 0, 0, print_flag), \
.default_value = default, \
}
static const elpa_index_double_entry_t double_entries[] = {
/* Empty for now */
READONLY_DOUBLE_ENTRY("dummy", "dummy"),
DOUBLE_ENTRY("thres_pd", "Threshold to define ill-conditioning, default 0.00001", 0.00001, PRINT_YES),
};
void elpa_index_free(elpa_index_t index) {
......
......@@ -64,7 +64,6 @@
#undef MPI_MATH_DATATYPE_PRECISION_C
#undef MPI_MATH_DATATYPE_PRECISION_EXPL
#undef C_DATATYPE_KIND
#undef THRESHOLD
#if 0
......@@ -138,7 +137,6 @@
#define C_PLACPY pdlacpy_
#define C_PTRAN pdtran_
#define THRESHOLD 1e-11_rk8
#endif /* DOUBLE_PRECISION */
#ifdef SINGLE_PRECISION
......@@ -206,7 +204,6 @@
#define C_PLACPY pslacpy_
#define C_PTRAN pstran_
#define THRESHOLD 1e-4_rk4
#endif /* SINGLE_PRECISION */
#endif /* REALCASE */
......@@ -290,7 +287,6 @@
#undef ELPA_PRECISION_SSMV
#undef ELPA_PRECISION_SSR2
#undef THRESHOLD
#if 0
/* General definitions needed in single and double case */
......@@ -369,8 +365,6 @@
#define ELPA_PRECISION_SSMV elpa_zssmv
#define ELPA_PRECISION_SSR2 elpa_zssr2
#define THRESHOLD 1e-11_rk8
#endif /* DOUBLE PRECISION */
#ifdef SINGLE_PRECISION
......@@ -442,7 +436,6 @@
#define ELPA_PRECISION_SSMV elpa_cssmv
#define ELPA_PRECISION_SSR2 elpa_cssr2
#define THRESHOLD 1e-4_rk4
#endif /* SINGLE PRECISION */
#endif /* COMPLEXCASE */