Commit 04065afc authored by Andreas Marek's avatar Andreas Marek
Browse files

Timing information for complex generic simple kernel

parent 7a564731
...@@ -51,7 +51,7 @@ if WITH_REAL_GENERIC_SIMPLE_KERNEL ...@@ -51,7 +51,7 @@ if WITH_REAL_GENERIC_SIMPLE_KERNEL
endif endif
if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.f90 libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.F90
endif endif
if WITH_REAL_BGP_KERNEL if WITH_REAL_BGP_KERNEL
......
...@@ -48,13 +48,16 @@ ...@@ -48,13 +48,16 @@
! compilers this performs better than a sophisticated version with transformed and unrolled loops. ! compilers this performs better than a sophisticated version with transformed and unrolled loops.
! !
! It should be compiled with the highest possible optimization level. ! It should be compiled with the highest possible optimization level.
! !
! Copyright of the original code rests with the authors inside the ELPA ! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest ! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms ! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING". ! distributed along with the original code in the file "COPYING".
! !
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
#include "config-f90.h"
module complex_generic_simple_kernel module complex_generic_simple_kernel
private private
...@@ -62,7 +65,9 @@ module complex_generic_simple_kernel ...@@ -62,7 +65,9 @@ module complex_generic_simple_kernel
contains contains
subroutine single_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq) subroutine single_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq)
use precision use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq integer(kind=ik), intent(in) :: nb, nq, ldq
...@@ -72,6 +77,9 @@ contains ...@@ -72,6 +77,9 @@ contains
integer(kind=ik) :: i integer(kind=ik) :: i
complex(kind=ck) :: h1, tau1, x(nq) complex(kind=ck) :: h1, tau1, x(nq)
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple")
#endif
! Just one Householder transformation ! Just one Householder transformation
x(1:nq) = q(1:nq,1) x(1:nq) = q(1:nq,1)
...@@ -88,12 +96,17 @@ contains ...@@ -88,12 +96,17 @@ contains
do i=2,nb do i=2,nb
q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i) q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i)
enddo enddo
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple")
#endif
end subroutine single_hh_trafo_complex_generic_simple end subroutine single_hh_trafo_complex_generic_simple
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
subroutine double_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq, ldh) subroutine double_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq, ldh)
use precision use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
implicit none implicit none
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
...@@ -103,6 +116,9 @@ contains ...@@ -103,6 +116,9 @@ contains
complex(kind=ck) :: s, h1, h2, tau1, tau2, x(nq), y(nq) complex(kind=ck) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
integer(kind=ik) :: i integer(kind=ik) :: i
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("kernel complex generic simple: double_hh_trafo_complex_generic_simple")
#endif
! Calculate dot product of the two Householder vectors ! Calculate dot product of the two Householder vectors
s = conjg(hh(2,2))*1 s = conjg(hh(2,2))*1
...@@ -145,6 +161,9 @@ contains ...@@ -145,6 +161,9 @@ contains
q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1) q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1)
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple")
#endif
end subroutine double_hh_trafo_complex_generic_simple end subroutine double_hh_trafo_complex_generic_simple
end module complex_generic_simple_kernel end module complex_generic_simple_kernel
! -------------------------------------------------------------------------------------------------- ! --------------------------------------------------------------------------------------------------
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment