Commit d042ae10 by Pavel Kus

### introducing single precision variant of the blas kernel

parent f7af9173
 ... ... @@ -69,6 +69,7 @@ use precision use elpa_abstract_impl implicit none #include "../../general/precision_kinds.F90" !class(elpa_abstract_impl_t), intent(inout) :: obj integer(kind=ik), intent(in) :: nb, nq, ldq, ldh ... ... @@ -90,12 +91,12 @@ ! Calculate dot product of the two Householder vectors h_mat(:,:) = 0.0 h_mat(:,:) = 0.0_rk h_mat(1,4) = -1.0 h_mat(2,3) = -1.0 h_mat(3,2) = -1.0 h_mat(4,1) = -1.0 h_mat(1,4) = -1.0_rk h_mat(2,3) = -1.0_rk h_mat(3,2) = -1.0_rk h_mat(4,1) = -1.0_rk h_mat(1,5:nb+3) = -hh(2:nb, 1) h_mat(2,4:nb+2) = -hh(2:nb, 2) ... ... @@ -104,32 +105,32 @@ ! TODO we do not need the diagonal, but how to do it with BLAS? !s_mat = - matmul(h_mat, transpose(h_mat)) call DSYRK('L', 'N', 4, nb+3, & -1.0_rk8, h_mat, 4, & 0.0_rk8, s_mat, 4) call PRECISION_SYRK('L', 'N', 4, nb+3, & -ONE, h_mat, 4, & ZERO, s_mat, 4) ! w_comb = - matmul(q(1:nq, 1:nb+3), transpose(h_mat)) call DGEMM('N', 'T', nq, 4, nb+3, & -1.0_rk8, q, ldq, & h_mat, 4, & 0.0_rk8, w_comb, nq) call PRECISION_GEMM('N', 'T', nq, 4, nb+3, & -ONE, q, ldq, & h_mat, 4, & ZERO, w_comb, nq) ! Rank-1 update !w_comb(1:nq,1) = hh(1,1) * w_comb(1:nq, 1) call DSCAL(nq, hh(1,1), w_comb(1:nq, 1), 1) call PRECISION_SCAL(nq, hh(1,1), w_comb(1:nq, 1), 1) do i = 2, 4 ! w_comb(1:nq,i) = matmul(w_comb(1:nq,1:i-1), hh(1,i) * s_mat(i,1:i-1)) + hh(1,i) * w_comb(1:nq, i) call DGEMV('N', nq, i-1, & hh(1,i), w_comb(1:nq, 1:i-1), nq, & s_mat(i,1:i-1), 1, & hh(1,i), w_comb(1:nq,i), 1) call PRECISION_GEMV('N', nq, i-1, & hh(1,i), w_comb(1:nq, 1:i-1), nq, & s_mat(i,1:i-1), 1, & hh(1,i), w_comb(1:nq,i), 1) enddo !q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3) call DGEMM('N', 'N', nq, nb+3, 4, & 1.0_rk8, w_comb, nq, & h_mat, 4, & 1.0_rk8, q, ldq) call PRECISION_GEMM('N', 'N', nq, nb+3, 4, & ONE, w_comb, nq, & h_mat, 4, & ONE, q, ldq) end subroutine ... ...
 ... ... @@ -16,6 +16,7 @@ #undef C_PLACPY #undef C_PTRAN #undef PRECISION_SCAL #undef PRECISION_TRTRI #undef PRECISION_POTRF #undef PRECISION_TRSM ... ... @@ -75,6 +76,7 @@ #define BLAS_CHAR_AND_SY_OR_HE DSY #define SPECIAL_COMPLEX_DATATYPE ck8 #define PRECISION_SCAL DSCAL #define PRECISION_TRTRI DTRTRI #define PRECISION_POTRF DPOTRF #define PRECISION_TRSM DTRSM ... ... @@ -135,6 +137,7 @@ #define BLAS_CHAR_AND_SY_OR_HE SSY #define SPECIAL_COMPLEX_DATATYPE ck4 #define PRECISION_SCAL SSCAL #define PRECISION_TRTRI STRTRI #define PRECISION_POTRF SPOTRF #define PRECISION_TRSM STRSM ... ... @@ -202,6 +205,7 @@ #undef C_PLACPY #undef C_PTRAN #undef PRECISION_SCAL #undef PRECISION_TRTRI #undef PRECISION_POTRF #undef PRECISION_TRSM ... ... @@ -273,6 +277,7 @@ #define C_PLACPY pzlacpy_ #define C_PTRAN pztranc_ #define PRECISION_SCAL ZSCAL #define PRECISION_TRTRI ZTRTRI #define PRECISION_POTRF ZPOTRF #define PRECISION_TRSM ZTRSM ... ... @@ -337,6 +342,7 @@ #define C_PLACPY pclacpy_ #define C_PTRAN pctranc_ #define PRECISION_SCAL CSCAL #define PRECISION_TRTRI CTRTRI #define PRECISION_POTRF CPOTRF #define PRECISION_TRSM CTRSM ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!