...

Commits (4)
 ... ... @@ -81,21 +81,11 @@ real(kind=C_DATATYPE_KIND), intent(in) :: hh(1:ldh,1:6) #endif real(kind=C_DATATYPE_KIND) :: h_2_1, h_3_2, h_3_1, h_4_3, h_4_2, h_4_1 real(kind=C_DATATYPE_KIND) :: a_1_1(nq), a_2_1(nq), a_3_1(nq), a_4_1(nq) real(kind=C_DATATYPE_KIND) :: h1, h2, h3, h4 real(kind=C_DATATYPE_KIND) :: w(nq), z(nq), x(nq), y(nq) real(kind=C_DATATYPE_KIND) :: w_orig(nq), z_orig(nq), x_orig(nq), y_orig(nq) real(kind=C_DATATYPE_KIND) :: w_comb(nq, 4) real(kind=C_DATATYPE_KIND) :: h_comb(4) real(kind=C_DATATYPE_KIND) :: h_mat(4, nb+3) real(kind=C_DATATYPE_KIND) :: h4m(4, 4) real(kind=C_DATATYPE_KIND) :: s_mat(4, 4) real(kind=C_DATATYPE_KIND) :: tau1, tau2, tau3, tau4 integer(kind=ik) :: i integer(kind=ik) :: i, j, k ! Calculate dot product of the two Householder vectors ... ... @@ -112,48 +102,33 @@ h_mat(3,3:nb+1) = -hh(2:nb, 3) h_mat(4,2:nb) = -hh(2:nb, 4) ! TODO we actually need just the strictly upper triangle of s_mat ! TODO take care when changing to BLAS ! TODO we do not even need diagonal, which might not be achievable by blas. ! TODO lets see how much does it matter s_mat = matmul(h_mat, transpose(h_mat)) w_comb = matmul(q(1:ldq, 1:nb+3), -transpose(h_mat)) ! Rank-1 update tau1 = hh(1,1) tau2 = hh(1,2) tau3 = hh(1,3) tau4 = hh(1,4) ! x_orig = x ! y_orig = y ! z_orig = z ! w_orig = w h4m = 0.0 h4m(1,1) = tau1 w_comb(1:nq,1) = w_comb(1:nq,1) * h4m(1,1) h4m(2,1) = - tau2 * s_mat(1,2) h4m(2,2) = tau2 w_comb(1:nq,2) = w_comb(1:nq,1) * h4m(2,1) + w_comb(1:nq,2) * h4m(2,2) h4m(3,1) = - tau3 * s_mat(1,3) h4m(3,2) = - tau3 * s_mat(2,3) h4m(3,3) = tau3 w_comb(1:nq,3) = w_comb(1:nq,1) * h4m(3,1) + w_comb(1:nq,2) * h4m(3,2) + w_comb(1:nq,3) * h4m(3,3) h4m(4,1) = - tau4 * s_mat(1,4) h4m(4,2) = - tau4 * s_mat(2,4) h4m(4,3) = - tau4 * s_mat(3,4) h4m(4,4) = tau4 w_comb(1:nq,4) = w_comb(1:nq,1) * h4m(4,1) + w_comb(1:nq,2) * h4m(4,2) + w_comb(1:nq,3) * h4m(4,3) + w_comb(1:nq,4) * h4m(4,4) q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3) ! TODO we do not need the diagonal, but how to do it with BLAS? !s_mat = - matmul(h_mat, transpose(h_mat)) call DSYRK('L', 'N', 4, nb+3, & -1.0_rk8, h_mat, 4, & 0.0_rk8, s_mat, 4) s_mat(1,1) = 1 s_mat(2,2) = 1 s_mat(3,3) = 1 s_mat(4,4) = 1 ! w_comb = - matmul(q(1:nq, 1:nb+3), transpose(h_mat)) call DGEMM('N', 'T', nq, 4, nb+3, & -1.0_rk8, q, ldq, & h_mat, 4, & 0.0_rk8, w_comb, nq) ! Rank-1 update do i = 1, 4 w_comb(1:nq,i) = matmul(w_comb(1:nq,1:i), hh(1,i) * s_mat(i,1:i)) enddo !q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3) call DGEMM('N', 'N', nq, nb+3, 4, & 1.0_rk8, w_comb, nq, & h_mat, 4, & 1.0_rk8, q, ldq) end subroutine ... ...