Commit d042ae10 authored by Pavel Kus's avatar Pavel Kus

introducing single precision variant of the blas kernel

parent f7af9173
......@@ -69,6 +69,7 @@
use precision
use elpa_abstract_impl
implicit none
#include "../../general/precision_kinds.F90"
!class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
......@@ -90,12 +91,12 @@
! Calculate dot product of the two Householder vectors
h_mat(:,:) = 0.0
h_mat(:,:) = 0.0_rk
h_mat(1,4) = -1.0
h_mat(2,3) = -1.0
h_mat(3,2) = -1.0
h_mat(4,1) = -1.0
h_mat(1,4) = -1.0_rk
h_mat(2,3) = -1.0_rk
h_mat(3,2) = -1.0_rk
h_mat(4,1) = -1.0_rk
h_mat(1,5:nb+3) = -hh(2:nb, 1)
h_mat(2,4:nb+2) = -hh(2:nb, 2)
......@@ -104,32 +105,32 @@
! TODO we do not need the diagonal, but how to do it with BLAS?
!s_mat = - matmul(h_mat, transpose(h_mat))
call DSYRK('L', 'N', 4, nb+3, &
-1.0_rk8, h_mat, 4, &
0.0_rk8, s_mat, 4)
call PRECISION_SYRK('L', 'N', 4, nb+3, &
-ONE, h_mat, 4, &
ZERO, s_mat, 4)
! w_comb = - matmul(q(1:nq, 1:nb+3), transpose(h_mat))
call DGEMM('N', 'T', nq, 4, nb+3, &
-1.0_rk8, q, ldq, &
h_mat, 4, &
0.0_rk8, w_comb, nq)
call PRECISION_GEMM('N', 'T', nq, 4, nb+3, &
-ONE, q, ldq, &
h_mat, 4, &
ZERO, w_comb, nq)
! Rank-1 update
!w_comb(1:nq,1) = hh(1,1) * w_comb(1:nq, 1)
call DSCAL(nq, hh(1,1), w_comb(1:nq, 1), 1)
call PRECISION_SCAL(nq, hh(1,1), w_comb(1:nq, 1), 1)
do i = 2, 4
! w_comb(1:nq,i) = matmul(w_comb(1:nq,1:i-1), hh(1,i) * s_mat(i,1:i-1)) + hh(1,i) * w_comb(1:nq, i)
call DGEMV('N', nq, i-1, &
hh(1,i), w_comb(1:nq, 1:i-1), nq, &
s_mat(i,1:i-1), 1, &
hh(1,i), w_comb(1:nq,i), 1)
call PRECISION_GEMV('N', nq, i-1, &
hh(1,i), w_comb(1:nq, 1:i-1), nq, &
s_mat(i,1:i-1), 1, &
hh(1,i), w_comb(1:nq,i), 1)
enddo
!q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3)
call DGEMM('N', 'N', nq, nb+3, 4, &
1.0_rk8, w_comb, nq, &
h_mat, 4, &
1.0_rk8, q, ldq)
call PRECISION_GEMM('N', 'N', nq, nb+3, 4, &
ONE, w_comb, nq, &
h_mat, 4, &
ONE, q, ldq)
end subroutine
......
......@@ -16,6 +16,7 @@
#undef C_PLACPY
#undef C_PTRAN
#undef PRECISION_SCAL
#undef PRECISION_TRTRI
#undef PRECISION_POTRF
#undef PRECISION_TRSM
......@@ -75,6 +76,7 @@
#define BLAS_CHAR_AND_SY_OR_HE DSY
#define SPECIAL_COMPLEX_DATATYPE ck8
#define PRECISION_SCAL DSCAL
#define PRECISION_TRTRI DTRTRI
#define PRECISION_POTRF DPOTRF
#define PRECISION_TRSM DTRSM
......@@ -135,6 +137,7 @@
#define BLAS_CHAR_AND_SY_OR_HE SSY
#define SPECIAL_COMPLEX_DATATYPE ck4
#define PRECISION_SCAL SSCAL
#define PRECISION_TRTRI STRTRI
#define PRECISION_POTRF SPOTRF
#define PRECISION_TRSM STRSM
......@@ -202,6 +205,7 @@
#undef C_PLACPY
#undef C_PTRAN
#undef PRECISION_SCAL
#undef PRECISION_TRTRI
#undef PRECISION_POTRF
#undef PRECISION_TRSM
......@@ -273,6 +277,7 @@
#define C_PLACPY pzlacpy_
#define C_PTRAN pztranc_
#define PRECISION_SCAL ZSCAL
#define PRECISION_TRTRI ZTRTRI
#define PRECISION_POTRF ZPOTRF
#define PRECISION_TRSM ZTRSM
......@@ -337,6 +342,7 @@
#define C_PLACPY pclacpy_
#define C_PTRAN pctranc_
#define PRECISION_SCAL CSCAL
#define PRECISION_TRTRI CTRTRI
#define PRECISION_POTRF CPOTRF
#define PRECISION_TRSM CTRSM
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment