module compute_hh_trafo_complex #include "config-f90.h" use elpa_mpi implicit none #ifdef WITH_OPENMP public compute_hh_trafo_complex_cpu_openmp #else public compute_hh_trafo_complex_cpu #endif contains #ifdef WITH_OPENMP subroutine compute_hh_trafo_complex_cpu_openmp(a, stripe_width, a_dim2, stripe_count, max_threads, l_nev, & a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, & off, ncols, istripe, & my_thread, thread_width, THIS_COMPLEX_ELPA_KERNEL) #else subroutine compute_hh_trafo_complex_cpu (a, stripe_width, a_dim2, stripe_count, & a_off, nbw, max_blk_size, bcast_buffer, kernel_flops, kernel_time, & off, ncols, istripe, last_stripe_width, & THIS_COMPLEX_ELPA_KERNEL) #endif use precision use elpa2_utilities #if defined(WITH_COMPLEX_GENERIC_SIMPLE_KERNEL) use complex_generic_simple_kernel, only : single_hh_trafo_complex_generic_simple #endif #if defined(WITH_COMPLEX_GENERIC_KERNEL) use complex_generic_kernel, only : single_hh_trafo_complex_generic #endif #ifdef HAVE_DETAILED_TIMINGS use timings #endif #if defined(HAVE_AVX) || defined(HAVE_SSE_INTRINSICS) || defined(HAVE_SSE_ASSEMBLY) use kernel_interfaces #endif implicit none real(kind=rk), intent(inout) :: kernel_time integer(kind=lik) :: kernel_flops integer(kind=ik), intent(in) :: nbw, max_blk_size complex(kind=ck) :: bcast_buffer(nbw,max_blk_size) integer(kind=ik), intent(in) :: a_off integer(kind=ik), intent(in) :: stripe_width, a_dim2, stripe_count #ifndef WITH_OPENMP integer(kind=ik), intent(in) :: last_stripe_width complex(kind=ck) :: a(stripe_width,a_dim2,stripe_count) #else integer(kind=ik), intent(in) :: max_threads, l_nev, thread_width complex(kind=ck) :: a(stripe_width,a_dim2,stripe_count,max_threads) #endif integer(kind=ik), intent(in) :: THIS_COMPLEX_ELPA_KERNEL ! Private variables in OMP regions (my_thread) should better be in the argument list! integer(kind=ik) :: off, ncols, istripe, j, nl, jj #ifdef WITH_OPENMP integer(kind=ik) :: my_thread, noff #endif real(kind=rk) :: ttt !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! Currently (on Sandy Bridge), single is faster than double !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! complex(kind=ck) :: w(nbw,2) #ifdef HAVE_DETAILED_TIMINGS #ifdef WITH_OPENMP call timer%stop("compute_hh_trafo_complex_cpu_openmp") #else call timer%stop("compute_hh_trafo_complex_cpu") #endif #endif #ifdef WITH_OPENMP if (istripe