diff --git a/src/elpa1/elpa1_merge_systems_real_template.F90 b/src/elpa1/elpa1_merge_systems_real_template.F90 index b5ec1bb2ca1e2d5f710ada98d3bf178a9b148c74..d5b243679e785baf3a2ea75c19ff042ff385be19 100644 --- a/src/elpa1/elpa1_merge_systems_real_template.F90 +++ b/src/elpa1/elpa1_merge_systems_real_template.F90 @@ -193,7 +193,7 @@ ! Calculations start here beta = abs(e) - sig = sign(CONST_1_0,e) + sig = sign(1.0_rk,e) ! Calculate rank-1 modifier z @@ -218,8 +218,8 @@ &(obj, z, na) ! Normalize z so that norm(z) = 1. Since z is the concatenation of ! two normalized vectors, norm2(z) = sqrt(2). - z = z/sqrt(CONST_2_0) - rho = CONST_2_0*beta + z = z/sqrt(2.0_rk) + rho = 2.0_rk*beta ! Calculate index for merging both systems by ascending eigenvalues call obj%timer%start("blas") call PRECISION_LAMRG( nm, na-nm, d, 1, 1, idx ) @@ -230,7 +230,7 @@ zmax = maxval(abs(z)) dmax = maxval(abs(d)) EPS = PRECISION_LAMCH( 'Epsilon' ) - TOL = CONST_8_0*EPS*MAX(dmax,zmax) + TOL = 8.0_rk*EPS*MAX(dmax,zmax) ! If the rank-1 modifier is small enough, no more needs to be done ! except to reorganize D and Q @@ -733,8 +733,8 @@ ! else call obj%timer%start("blas") if (l_rnm>0 .and. ncnt>0 .and. nnzu>0) & - call PRECISION_GEMM('N', 'N', l_rnm, ncnt, nnzu, CONST_1_0, qtmp1, ubound(qtmp1,dim=1), ev, ubound(ev,dim=1), & - CONST_1_0, qtmp2(1,1), ubound(qtmp2,dim=1)) + call PRECISION_GEMM('N', 'N', l_rnm, ncnt, nnzu, 1.0_rk, qtmp1, ubound(qtmp1,dim=1), ev, ubound(ev,dim=1), & + 1.0_rk, qtmp2(1,1), ubound(qtmp2,dim=1)) call obj%timer%stop("blas") ! endif ! useGPU ! Compute eigenvectors of the rank-1 modified matrix. @@ -760,8 +760,8 @@ ! else call obj%timer%start("blas") if (l_rows-l_rnm>0 .and. ncnt>0 .and. nnzl>0) & - call PRECISION_GEMM('N', 'N', l_rows-l_rnm, ncnt, nnzl, CONST_1_0, qtmp1(l_rnm+1,1), ubound(qtmp1,dim=1), ev, & - ubound(ev,dim=1), CONST_1_0, qtmp2(l_rnm+1,1), ubound(qtmp2,dim=1)) + call PRECISION_GEMM('N', 'N', l_rows-l_rnm, ncnt, nnzl, 1.0_rk, qtmp1(l_rnm+1,1), ubound(qtmp1,dim=1), ev, & + ubound(ev,dim=1), 1.0_rk, qtmp2(l_rnm+1,1), ubound(qtmp2,dim=1)) call obj%timer%stop("blas") ! endif ! useGPU ! Put partial result into (output) Q @@ -817,7 +817,7 @@ &PRECISION& &(obj, tmp(1:na1),na1,ddiff(i)) tmp(1:na1) = z(1:na1) / tmp(1:na1) - ev_scale_value = CONST_1_0/sqrt(dot_product(tmp(1:na1),tmp(1:na1))) + ev_scale_value = 1.0_rk/sqrt(dot_product(tmp(1:na1),tmp(1:na1))) end subroutine add_tmp_& &PRECISION diff --git a/src/elpa1/elpa1_template.F90 b/src/elpa1/elpa1_template.F90 index 52a48877823283858f85a723bdbd686c2417e0ce..caad60e75aaee835a7d4a2edfe08751b5c6069a8 100644 --- a/src/elpa1/elpa1_template.F90 +++ b/src/elpa1/elpa1_template.F90 @@ -56,9 +56,9 @@ function elpa_solve_evp_& &MATH_DATATYPE& - &_1stage_& - &PRECISION& - &_impl (obj, a, ev, q) result(success) + &_1stage_& + &PRECISION& + &_impl (obj, a, ev, q) result(success) use precision use cuda_functions use mod_check_for_gpu @@ -67,31 +67,25 @@ function elpa_solve_evp_& use elpa_mpi use elpa1_compute implicit none - +#include "../general/precision_kinds.F90" class(elpa_abstract_impl_t), intent(inout) :: obj real(kind=REAL_DATATYPE), intent(out) :: ev(obj%na) -#if REALCASE == 1 + #ifdef USE_ASSUMED_SIZE - real(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*) - real(kind=C_DATATYPE_KIND), optional,target,intent(out) :: q(obj%local_nrows,*) + MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,*) + MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*) #else - real(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols) - real(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols) + MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,obj%local_ncols) + MATH_DATATYPE(kind=rck), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols) #endif + +#if REALCASE == 1 real(kind=C_DATATYPE_KIND), allocatable :: tau(:) real(kind=C_DATATYPE_KIND), allocatable, target :: q_dummy(:,:) real(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:) #endif /* REALCASE */ #if COMPLEXCASE == 1 -#ifdef USE_ASSUMED_SIZE - complex(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*) - complex(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,*) -#else - complex(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols) - complex(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols) -#endif - real(kind=REAL_DATATYPE), allocatable :: q_real(:,:) complex(kind=C_DATATYPE_KIND), allocatable :: tau(:) complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:) @@ -112,7 +106,7 @@ function elpa_solve_evp_& character(200) :: errorMessage integer(kind=ik) :: na, nev, lda, ldq, nblk, matrixCols, & mpi_comm_rows, mpi_comm_cols, & - mpi_comm_all, check_pd, i + mpi_comm_all, check_pd, i logical :: do_bandred, do_solve, do_trans_ev @@ -141,16 +135,13 @@ function elpa_solve_evp_& if (na .eq. 1) then #if REALCASE == 1 ev(1) = a(1,1) - if (.not.(obj%eigenvalues_only)) then - q(1,1) = CONST_REAL_1_0 - endif #endif #if COMPLEXCASE == 1 ev(1) = real(a(1,1)) +#endif if (.not.(obj%eigenvalues_only)) then - q(1,1) = CONST_COMPLEX_PAIR_1_0 + q(1,1) = ONE endif -#endif call obj%timer%stop("elpa_solve_evp_& &MATH_DATATYPE& &_1stage_& diff --git a/src/elpa1/elpa1_tools_template.F90 b/src/elpa1/elpa1_tools_template.F90 index c147c0d440d6f419398284ff1f7b1724bf713853..513c350599b034da407e2d1f93cdd4a4b9de8a2a 100644 --- a/src/elpa1/elpa1_tools_template.F90 +++ b/src/elpa1/elpa1_tools_template.F90 @@ -181,14 +181,14 @@ delta(:) = d(:) - dshift a = 0.0_rk ! delta(n) - b = rho*SUM(z(:)**2) + CONST_1_0 ! rho*SUM(z(:)**2) is the lower bound for the guess + b = rho*SUM(z(:)**2) + 1.0_rk ! rho*SUM(z(:)**2) is the lower bound for the guess else ! Other eigenvalues: lower bound is d(i), upper bound is d(i+1) ! We check the sign of the function in the midpoint of the interval ! in order to determine if eigenvalue is more close to d(i) or d(i+1) - x = CONST_0_5*(d(i)+d(i+1)) - y = CONST_1_0 + rho*SUM(z(:)**2/(d(:)-x)) + x = 0.5_rk*(d(i)+d(i+1)) + y = 1.0_rk + rho*SUM(z(:)**2/(d(:)-x)) if (y>0) then ! solution is next to d(i) dshift = d(i) @@ -208,7 +208,7 @@ do iter=1,200 ! Interval subdivision - x = CONST_0_5*(a+b) + x = 0.5_rk*(a+b) if (x==a .or. x==b) exit ! No further interval subdivisions possible #ifdef DOUBLE_PRECISION_REAL if (abs(x) < 1.e-200_rk8) exit ! x next to pole diff --git a/src/elpa1/elpa1_tridiag_template.F90 b/src/elpa1/elpa1_tridiag_template.F90 index 8621b1ca70eb030e6bc2c528038f5bd0ddddc941..3f5eace15d37b3ad9055d094d36eb0c5f1335797 100644 --- a/src/elpa1/elpa1_tridiag_template.F90 +++ b/src/elpa1/elpa1_tridiag_template.F90 @@ -103,6 +103,7 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ use elpa_abstract_impl use matrix_plot implicit none +#include "../general/precision_kinds.F90" class(elpa_abstract_impl_t), intent(inout) :: obj integer(kind=ik), intent(in) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols logical, intent(in) :: useGPU, wantDebug @@ -135,20 +136,6 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ! id in processor row and column and total numbers of processor rows and columns integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, my_rank integer(kind=ik) :: mpierr -#if REALCASE == 1 -#ifdef DOUBLE_PRECISION_REAL - real(kind=rk8), parameter :: ZERO=0.0_rk8, ONE = 1.0_rk8 -#else - real(kind=rk4), parameter :: ZERO=0.0_rk4, ONE = 1.0_rk4 -#endif -#endif -#if COMPLEXCASE == 1 -#ifdef DOUBLE_PRECISION_COMPLEX - complex(kind=ck8), parameter :: ZERO = (0.0_rk8,0.0_rk8), ONE = (1.0_rk8,0.0_rk8) -#else - complex(kind=ck4), parameter :: ZERO = (0.0_rk4,0.0_rk4), ONE = (1.0_rk4,0.0_rk4) -#endif -#endif integer(kind=ik) :: totalblocks, max_loc_block_rows, max_loc_block_cols, max_local_rows, & max_local_cols ! updated after each istep (in the main cycle) to contain number of @@ -880,7 +867,7 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ endif !useGPU call hh_transform_complex_& &PRECISION & - (obj, vrl, CONST_REAL_0_0, xf, tau(2), wantDebug) + (obj, vrl, 0.0_rk, xf, tau(2), wantDebug) e_vec(1) = vrl a_mat(1,l_cols) = 1. ! for consistency only