diff --git a/src/elpa2/kernels/blas_block4_template.F90 b/src/elpa2/kernels/blas_block4_template.F90 index 6fcce811dc80e178fe342f0aca4df27a909fac95..daac43cdf72bbda2ec750f0be668232d2cea5777 100644 --- a/src/elpa2/kernels/blas_block4_template.F90 +++ b/src/elpa2/kernels/blas_block4_template.F90 @@ -81,20 +81,10 @@ real(kind=C_DATATYPE_KIND), intent(in) :: hh(1:ldh,1:6) #endif - real(kind=C_DATATYPE_KIND) :: h_2_1, h_3_2, h_3_1, h_4_3, h_4_2, h_4_1 - real(kind=C_DATATYPE_KIND) :: a_1_1(nq), a_2_1(nq), a_3_1(nq), a_4_1(nq) - real(kind=C_DATATYPE_KIND) :: h1, h2, h3, h4 - real(kind=C_DATATYPE_KIND) :: w(nq), z(nq), x(nq), y(nq) - real(kind=C_DATATYPE_KIND) :: w_orig(nq), z_orig(nq), x_orig(nq), y_orig(nq) - real(kind=C_DATATYPE_KIND) :: w_comb(nq, 4) - real(kind=C_DATATYPE_KIND) :: h_comb(4) real(kind=C_DATATYPE_KIND) :: h_mat(4, nb+3) - real(kind=C_DATATYPE_KIND) :: h4m(4, 4) real(kind=C_DATATYPE_KIND) :: s_mat(4, 4) - real(kind=C_DATATYPE_KIND) :: tau1, tau2, tau3, tau4 - integer(kind=ik) :: i @@ -116,36 +106,19 @@ ! TODO take care when changing to BLAS ! TODO we do not even need diagonal, which might not be achievable by blas. ! TODO lets see how much does it matter - s_mat = matmul(h_mat, transpose(h_mat)) + s_mat = - matmul(h_mat, transpose(h_mat)) + s_mat(1,1) = 1 + s_mat(2,2) = 1 + s_mat(3,3) = 1 + s_mat(4,4) = 1 w_comb = matmul(q(1:ldq, 1:nb+3), -transpose(h_mat)) ! Rank-1 update - tau1 = hh(1,1) - tau2 = hh(1,2) - tau3 = hh(1,3) - tau4 = hh(1,4) - - h4m = 0.0 - - h4m(1,1) = tau1 - - h4m(2,1) = - tau2 * s_mat(1,2) - h4m(2,2) = tau2 - - h4m(3,1) = - tau3 * s_mat(1,3) - h4m(3,2) = - tau3 * s_mat(2,3) - h4m(3,3) = tau3 - - h4m(4,1) = - tau4 * s_mat(1,4) - h4m(4,2) = - tau4 * s_mat(2,4) - h4m(4,3) = - tau4 * s_mat(3,4) - h4m(4,4) = tau4 - - w_comb(1:nq,1) = w_comb(1:nq,1) * h4m(1,1) - w_comb(1:nq,2) = matmul(w_comb(1:nq,1:2),h4m(2,1:2)) - w_comb(1:nq,3) = matmul(w_comb(1:nq,1:3),h4m(3,1:3)) - w_comb(1:nq,4) = matmul(w_comb(1:nq,1:4),h4m(4,1:4)) + w_comb(1:nq,1) = w_comb(1:nq,1) * hh(1,1) * s_mat(1,1) + w_comb(1:nq,2) = matmul(w_comb(1:nq,1:2), hh(1,2) * s_mat(2,1:2)) + w_comb(1:nq,3) = matmul(w_comb(1:nq,1:3), hh(1,3) * s_mat(3,1:3)) + w_comb(1:nq,4) = matmul(w_comb(1:nq,1:4), hh(1,4) * s_mat(4,1:4)) q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3)