From 04065afc1293cfd4de3489f5463ca5ec3f4d5631 Mon Sep 17 00:00:00 2001 From: Andreas Marek Date: Thu, 28 Jan 2016 14:38:59 +0100 Subject: [PATCH] Timing information for complex generic simple kernel --- Makefile.am | 2 +- ...e.f90 => elpa2_kernels_complex_simple.F90} | 25 ++++++++++++++++--- 2 files changed, 23 insertions(+), 4 deletions(-) rename src/elpa2_kernels/{elpa2_kernels_complex_simple.f90 => elpa2_kernels_complex_simple.F90} (88%) diff --git a/Makefile.am b/Makefile.am index b879d074..e20e5856 100644 --- a/Makefile.am +++ b/Makefile.am @@ -51,7 +51,7 @@ if WITH_REAL_GENERIC_SIMPLE_KERNEL endif if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL - libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.f90 + libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_simple.F90 endif if WITH_REAL_BGP_KERNEL diff --git a/src/elpa2_kernels/elpa2_kernels_complex_simple.f90 b/src/elpa2_kernels/elpa2_kernels_complex_simple.F90 similarity index 88% rename from src/elpa2_kernels/elpa2_kernels_complex_simple.f90 rename to src/elpa2_kernels/elpa2_kernels_complex_simple.F90 index d12a4e20..787a2ec5 100644 --- a/src/elpa2_kernels/elpa2_kernels_complex_simple.f90 +++ b/src/elpa2_kernels/elpa2_kernels_complex_simple.F90 @@ -48,13 +48,16 @@ ! compilers this performs better than a sophisticated version with transformed and unrolled loops. ! ! It should be compiled with the highest possible optimization level. -! +! ! Copyright of the original code rests with the authors inside the ELPA ! consortium. The copyright of any additional modifications shall rest ! with their original authors, but shall adhere to the licensing terms ! distributed along with the original code in the file "COPYING". ! ! -------------------------------------------------------------------------------------------------- + +#include "config-f90.h" + module complex_generic_simple_kernel private @@ -62,7 +65,9 @@ module complex_generic_simple_kernel contains subroutine single_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq) use precision - +#ifdef HAVE_DETAILED_TIMINGS + use timings +#endif implicit none integer(kind=ik), intent(in) :: nb, nq, ldq @@ -72,6 +77,9 @@ contains integer(kind=ik) :: i complex(kind=ck) :: h1, tau1, x(nq) +#ifdef HAVE_DETAILED_TIMINGS + call timer%start("kernel complex generic simple: single_hh_trafo_complex_generic_simple") +#endif ! Just one Householder transformation x(1:nq) = q(1:nq,1) @@ -88,12 +96,17 @@ contains do i=2,nb q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i) enddo - +#ifdef HAVE_DETAILED_TIMINGS + call timer%stop("kernel complex generic simple: single_hh_trafo_complex_generic_simple") +#endif end subroutine single_hh_trafo_complex_generic_simple ! -------------------------------------------------------------------------------------------------- subroutine double_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq, ldh) use precision +#ifdef HAVE_DETAILED_TIMINGS + use timings +#endif implicit none integer(kind=ik), intent(in) :: nb, nq, ldq, ldh @@ -103,6 +116,9 @@ contains complex(kind=ck) :: s, h1, h2, tau1, tau2, x(nq), y(nq) integer(kind=ik) :: i +#ifdef HAVE_DETAILED_TIMINGS + call timer%start("kernel complex generic simple: double_hh_trafo_complex_generic_simple") +#endif ! Calculate dot product of the two Householder vectors s = conjg(hh(2,2))*1 @@ -145,6 +161,9 @@ contains q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1) +#ifdef HAVE_DETAILED_TIMINGS + call timer%stop("kernel complex generic simple: double_hh_trafo_complex_generic_simple") +#endif end subroutine double_hh_trafo_complex_generic_simple end module complex_generic_simple_kernel ! -------------------------------------------------------------------------------------------------- -- GitLab