blas_block4_template.F90 4.51 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! --------------------------------------------------------------------------------------------------
!
! This file contains the compute intensive kernels for the Householder transformations.
!
! This is the small and simple version (no hand unrolling of loops etc.) but for some
! compilers this performs better than a sophisticated version with transformed and unrolled loops.
!
! It should be compiled with the highest possible optimization level.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
! --------------------------------------------------------------------------------------------------
#endif


#if REALCASE==1
  subroutine quad_hh_trafo_&
  &MATH_DATATYPE&
  &_generic_blas_4hv_&
  &PRECISION&
  & (q, hh, nb, nq, ldq, ldh)

    use precision
    use elpa_abstract_impl
    implicit none

    !class(elpa_abstract_impl_t), intent(inout) :: obj
    integer(kind=ik), intent(in)    :: nb, nq, ldq, ldh

#ifdef USE_ASSUMED_SIZE
    real(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*)
    real(kind=C_DATATYPE_KIND), intent(in)    :: hh(ldh,*)
#else
    real(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb+3)
    real(kind=C_DATATYPE_KIND), intent(in)    :: hh(1:ldh,1:6)
#endif

    real(kind=C_DATATYPE_KIND)                :: w_comb(nq, 4)
85
    real(kind=C_DATATYPE_KIND)                :: h_mat(4, nb+3)
86
    real(kind=C_DATATYPE_KIND)                :: s_mat(4, 4)
87

88
    integer(kind=ik)                             :: i, j, k
89 90 91 92


    ! Calculate dot product of the two Householder vectors

93 94 95 96 97 98 99 100 101 102 103 104
   h_mat(:,:) = 0.0

   h_mat(1,4) = -1.0
   h_mat(2,3) = -1.0
   h_mat(3,2) = -1.0
   h_mat(4,1) = -1.0

   h_mat(1,5:nb+3) = -hh(2:nb, 1)
   h_mat(2,4:nb+2) = -hh(2:nb, 2)
   h_mat(3,3:nb+1) = -hh(2:nb, 3)
   h_mat(4,2:nb)   = -hh(2:nb, 4)

105 106 107 108 109 110 111 112 113 114 115
   ! TODO we do not need the diagonal, but how to do it with BLAS?
   !s_mat = - matmul(h_mat, transpose(h_mat))
   call DSYRK('L', 'N', 4, nb+3, &
              -1.0_rk8, h_mat, 4, &
              0.0_rk8, s_mat, 4)

!   w_comb = - matmul(q(1:nq, 1:nb+3), transpose(h_mat))
   call DGEMM('N', 'T', nq, 4, nb+3, &
              -1.0_rk8, q, ldq, &
              h_mat, 4, &
              0.0_rk8, w_comb, nq)
116

117
   ! Rank-1 update
Pavel Kus's avatar
Pavel Kus committed
118
   do i = 1, 4
119
     w_comb(1:nq,i) = matmul(w_comb(1:nq,1:i-1), hh(1,i) * s_mat(i,1:i-1)) + hh(1,i) * w_comb(1:nq, i)
Pavel Kus's avatar
Pavel Kus committed
120
   enddo
121

122 123 124 125 126
   !q(1:nq, 1:nb+3) = matmul(w_comb, h_mat) + q(1:nq, 1:nb+3)
   call DGEMM('N', 'N', nq, nb+3, 4, &
              1.0_rk8, w_comb, nq, &
              h_mat, 4, &
              1.0_rk8, q, ldq)
127 128 129 130

  end subroutine

#endif