! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaftrn,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.rzg.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
module compute_hh_trafo_complex
#include "config-f90.h"
use elpa_mpi
implicit none
#ifdef WITH_OPENMP
public compute_hh_trafo_complex_cpu_openmp_double
#else
public compute_hh_trafo_complex_cpu_double
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WITH_OPENMP
public compute_hh_trafo_complex_cpu_openmp_single
#else
public compute_hh_trafo_complex_cpu_single
#endif
#endif
contains
#ifdef WITH_OPENMP
subroutine compute_hh_trafo_complex_cpu_openmp_double(a, stripe_width, a_dim2, stripe_count, max_threads, l_nev, &
a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
off, ncols, istripe, &
my_thread, thread_width, THIS_COMPLEX_ELPA_KERNEL)
#else
subroutine compute_hh_trafo_complex_cpu_double (a, stripe_width, a_dim2, stripe_count, &
a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, &
off, ncols, istripe, last_stripe_width, &
THIS_COMPLEX_ELPA_KERNEL)
#endif
use precision
use elpa2_utilities
#if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL)
use complex_generic_simple_kernel !, only : single_hh_trafo_complex_generic_simple
#endif
#if defined(WITH_COMPLEX_GENERIC_KERNEL)
use complex_generic_kernel !, only : single_hh_trafo_complex_generic
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use iso_c_binding
implicit none
real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double
integer(kind=lik) :: kernel_flops
integer(kind=ik), intent(in) :: nbw, max_blk_size
complex(kind=ck8) :: bcast_buffer(nbw,max_blk_size)
integer(kind=ik), intent(in) :: a_off
integer(kind=ik), intent(in) :: stripe_width, a_dim2, stripe_count
#ifndef WITH_OPENMP
integer(kind=ik), intent(in) :: last_stripe_width
complex(kind=ck8) :: a(stripe_width,a_dim2,stripe_count)
#else
integer(kind=ik), intent(in) :: max_threads, l_nev, thread_width
complex(kind=ck8) :: a(stripe_width,a_dim2,stripe_count,max_threads)
#endif
integer(kind=ik), intent(in) :: THIS_COMPLEX_ELPA_KERNEL
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer(kind=ik) :: off, ncols, istripe, j, nl, jj
#ifdef WITH_OPENMP
integer(kind=ik) :: my_thread, noff
#endif
real(kind=c_double) :: ttt ! MPI_WTIME always needs double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
complex(kind=ck8) :: w(nbw,2)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call timer%stop("compute_hh_trafo_complex_cpu_openmp_double")
#else
call timer%stop("compute_hh_trafo_complex_cpu_double")
#endif
#endif
#ifdef WITH_OPENMP
if (istripe