Rename AVX kernel files ; seperate out SSE part

- The SSE part will be available in different files.
- Specify whether AVX or AVX2 was used to build
parent fd2b5206
......@@ -80,28 +80,28 @@ endif
endif
if WITH_REAL_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c
endif
if WITH_REAL_AVX_BLOCK4_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_4hv.c
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_4hv.c
endif
if WITH_REAL_AVX_BLOCK6_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_6hv.c
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_6hv.c
endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.cpp
endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_sse-avx_2hv.cpp
libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_2hv.cpp
endif
#if WITH_AVX_SANDYBRIDGE
# libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_sse-avx_2hv.c \
# src/elpa2_kernels/elpa2_kernels_complex_sse-avx_1hv.cpp
# libelpa@SUFFIX@_la_SOURCES += src/elpa2_kernels/elpa2_kernels_real_avx-avx2_2hv.c \
# src/elpa2_kernels/elpa2_kernels_complex_avx-avx2_1hv.cpp
#endif
# install any .mod files in the include/ dir
......
......@@ -221,6 +221,7 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
[can_compile_avx=no]
)
AC_MSG_RESULT([${can_compile_avx}])
if test "${can_compile_avx}" = "yes" ; then
AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
AC_LANG_PUSH([C++])
......@@ -278,7 +279,6 @@ if test "${can_compile_avx2}" = "yes" ; then
fi
fi
if test "${can_compile_avx}" = "yes" ; then
install_real_avx_block2=yes
install_real_avx_block4=yes
......@@ -299,6 +299,19 @@ else
want_avx=yes
fi
AM_CONDITIONAL([HAVE_SSE],[test x"$can_compile_sse" = x"yes"])
if test x"${can_compile_sse}" = x"yes" ; then
AC_DEFINE([HAVE_SSE],[1],[SSE is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX],[test x"$can_compile_avx" = x"yes"])
if test x"${can_compile_avx}" = x"yes" ; then
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
fi
AM_CONDITIONAL([HAVE_AVX2],[test x"$can_compile_avx2" = x"yes"])
if test x"${can_compile_avx2}" = x"yes" ; then
AC_DEFINE([HAVE_AVX2],[1],[AVX2 is supported on this CPU])
fi
dnl set the AVX optimization flags if this option is specified
AC_MSG_CHECKING(whether AVX optimization flags should be set automatically)
AC_ARG_WITH([avx-optimization],
......@@ -464,7 +477,7 @@ AC_MSG_CHECKING([whether we can use the intrinsic Fortran function "get_environm
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
program test_get_environment
nn
character(len=256) :: homedir
call get_environment_variable("HOME",homedir)
end program
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -99,18 +99,18 @@ module compute_hh_trafo_complex
w(:,1) = bcast_buffer(1:nbw,j+off)
w(:,2) = bcast_buffer(1:nbw,j+off-1)
#ifdef WITH_OPENMP
call double_hh_trafo_complex_sse_avx_2hv(a(1,j+off+a_off-1,istripe,my_thread), &
call double_hh_trafo_complex_avx_avx2_2hv(a(1,j+off+a_off-1,istripe,my_thread), &
w, nbw, nl, stripe_width, nbw)
#else
call double_hh_trafo_complex_sse_avx_2hv(a(1,j+off+a_off-1,istripe), &
call double_hh_trafo_complex_avx_avx2_2hv(a(1,j+off+a_off-1,istripe), &
w, nbw, nl, stripe_width, nbw)
#endif
enddo
#ifdef WITH_OPENMP
if (j==1) call single_hh_trafo_complex_sse_avx_1hv(a(1,1+off+a_off,istripe,my_thread), &
if (j==1) call single_hh_trafo_complex_avx_avx2_1hv(a(1,1+off+a_off,istripe,my_thread), &
bcast_buffer(1,off+1), nbw, nl, stripe_width)
#else
if (j==1) call single_hh_trafo_complex_sse_avx_1hv(a(1,1+off+a_off,istripe), &
if (j==1) call single_hh_trafo_complex_avx_avx2_1hv(a(1,1+off+a_off,istripe), &
bcast_buffer(1,off+1), nbw, nl, stripe_width)
#endif
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
......@@ -206,11 +206,11 @@ module compute_hh_trafo_complex
!#if defined(WITH_AVX_SANDYBRIDGE)
! call single_hh_trafo_complex_sse_avx_1hv(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
! call single_hh_trafo_complex_avx_avx2_1hv(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#endif
!#if defined(WITH_AMD_BULLDOZER)
! call single_hh_trafo_complex_sse_avx_1hv(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
! call single_hh_trafo_complex_avx_avx2_1hv(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#endif
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
......@@ -220,10 +220,10 @@ module compute_hh_trafo_complex
ttt = mpi_wtime()
do j = ncols, 1, -1
#ifdef WITH_OPENMP
call single_hh_trafo_complex_sse_avx_1hv(a(1,j+off+a_off,istripe,my_thread), &
call single_hh_trafo_complex_avx_avx2_1hv(a(1,j+off+a_off,istripe,my_thread), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#else
call single_hh_trafo_complex_sse_avx_1hv(a(1,j+off+a_off,istripe), &
call single_hh_trafo_complex_avx_avx2_1hv(a(1,j+off+a_off,istripe), &
bcast_buffer(1,j+off),nbw,nl,stripe_width)
#endif
enddo
......
This diff is collapsed.
......@@ -107,6 +107,9 @@ program print_available_elpa2_kernels
#endif
print *
print *, " Available real kernels are: "
#ifdef HAVE_AVX2
print *, " AVX kernels are optimized for FMA (AVX2)"
#endif
call print_available_real_kernels()
print *
......@@ -121,6 +124,9 @@ program print_available_elpa2_kernels
#endif
print *
print *, " Available complex kernels are: "
#ifdef HAVE_AVX2
print *, " AVX kernels are optimized for FMA (AVX2)"
#endif
call print_available_complex_kernels()
end program print_available_elpa2_kernels
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment