! This file is part of ELPA. ! ! The ELPA library was originally created by the ELPA consortium, ! consisting of the following organizations: ! ! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ! Informatik, ! - Technische Universität München, Lehrstuhl für Informatik mit ! Schwerpunkt Wissenschaftliches Rechnen , ! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn, ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ! and ! - IBM Deutschland GmbH ! ! ! More information can be found here: ! http://elpa.rzg.mpg.de/ ! ! ELPA is free software: you can redistribute it and/or modify ! it under the terms of the version 3 of the license of the ! GNU Lesser General Public License as published by the Free ! Software Foundation. ! ! ELPA is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Lesser General Public License for more details. ! ! You should have received a copy of the GNU Lesser General Public License ! along with ELPA. If not, see ! ! ELPA reflects a substantial effort on the part of the original ! ELPA consortium, and we ask you to respect the spirit of the ! license that we chose: i.e., please contribute any changes you ! may have back to the original ELPA library distribution, and keep ! any derivatives of ELPA under the same license that we chose for ! the original distribution, the GNU Lesser General Public License. ! ! This file was written by A. Marek, MPCDF module compute_hh_trafo_complex #include "config-f90.h" use elpa_mpi implicit none #ifdef WITH_OPENMP public compute_hh_trafo_complex_cpu_openmp_double #else public compute_hh_trafo_complex_cpu_double #endif #ifdef WANT_SINGLE_PRECISION_COMPLEX #ifdef WITH_OPENMP public compute_hh_trafo_complex_cpu_openmp_single #else public compute_hh_trafo_complex_cpu_single #endif #endif contains #ifdef WITH_OPENMP subroutine compute_hh_trafo_complex_cpu_openmp_double(a, stripe_width, a_dim2, stripe_count, max_threads, l_nev, & a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, & off, ncols, istripe, & my_thread, thread_width, THIS_COMPLEX_ELPA_KERNEL) #else subroutine compute_hh_trafo_complex_cpu_double (a, stripe_width, a_dim2, stripe_count, & a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, & off, ncols, istripe, last_stripe_width, & THIS_COMPLEX_ELPA_KERNEL) #endif use precision use elpa2_utilities #if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL) use complex_generic_simple_kernel !, only : single_hh_trafo_complex_generic_simple #endif #if defined(WITH_COMPLEX_GENERIC_KERNEL) use complex_generic_kernel !, only : single_hh_trafo_complex_generic #endif #ifdef HAVE_DETAILED_TIMINGS use timings #endif use iso_c_binding implicit none real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double integer(kind=lik) :: kernel_flops integer(kind=ik), intent(in) :: nbw, max_blk_size complex(kind=ck8) :: bcast_buffer(nbw,max_blk_size) integer(kind=ik), intent(in) :: a_off integer(kind=ik), intent(in) :: stripe_width, a_dim2, stripe_count #ifndef WITH_OPENMP integer(kind=ik), intent(in) :: last_stripe_width complex(kind=ck8) :: a(stripe_width,a_dim2,stripe_count) #else integer(kind=ik), intent(in) :: max_threads, l_nev, thread_width complex(kind=ck8) :: a(stripe_width,a_dim2,stripe_count,max_threads) #endif integer(kind=ik), intent(in) :: THIS_COMPLEX_ELPA_KERNEL ! Private variables in OMP regions (my_thread) should better be in the argument list! integer(kind=ik) :: off, ncols, istripe, j, nl, jj #ifdef WITH_OPENMP integer(kind=ik) :: my_thread, noff #endif real(kind=c_double) :: ttt ! MPI_WTIME always needs double !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Currently (on Sandy Bridge), single is faster than double !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! complex(kind=ck8) :: w(nbw,2) #ifdef HAVE_DETAILED_TIMINGS #ifdef WITH_OPENMP call timer%stop("compute_hh_trafo_complex_cpu_openmp_double") #else call timer%stop("compute_hh_trafo_complex_cpu_double") #endif #endif #ifdef WITH_OPENMP if (istripe