Commit e531f75e authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master' into ELPA_GPU

parents 09d59a44 65c8e0f3
......@@ -2021,7 +2021,6 @@ gfortran-single-precision-mpi-openmp-ftimings-redirect-real-avx_block4-complex-a
- make check TEST_FLAGS='1500 50 16'
# - ./test_scripts/get_coverage_summary.sh
#real avx block4, complex avx block2
intel-single-precision-mpi-noopenmp-ftimings-redirect-real-avx_block4-complex-avx-block2-kernel-jobs:
tags:
......@@ -2043,7 +2042,7 @@ gfortran-single-precision-mpi-noopenmp-ftimings-redirect-real-avx_block4-complex
- export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
- make check TEST_FLAGS='1500 50 16'
gfortran-double-precision-mpi-noopenmp-ftimings-redirect-real-avx_block4-complex-avx_block2-kernel-assumed-size-jobs:
gfortran-double-precision-mpi-noopenmp-ftimings-redirect-real-avx-block4-complex-avx_block2-kernel-assumed-size-jobs:
tags:
- cpu
script:
......
......@@ -123,9 +123,9 @@ fi
dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun], [no])
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun], [no])
if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun])
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
fi
fi
......@@ -286,19 +286,32 @@ fi
dnl check whether one can compile with avx - gcc intrinsics
dnl first pass: try with specified CFLAGS and CXXFLAGS
AC_MSG_CHECKING([whether we can compile AVX intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d a1_1 = _mm256_load_pd(q);
return 0;
}
])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
AC_MSG_RESULT([${can_compile_avx}])
AC_MSG_CHECKING(whether --enable-avx is specified)
AC_ARG_ENABLE([avx],
AS_HELP_STRING([--enable-avx],
[check whether AVX kernels can be build, default yes]),
[check_avx=no],
[check_avx=yes])
AC_MSG_RESULT([$check_avx])
if test "${check_avx}" = "yes"; then
AC_MSG_CHECKING([whether we can compile AVX intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d a1_1 = _mm256_load_pd(q);
return 0;
}
])],
[can_compile_avx=yes],
[can_compile_avx=no]
)
AC_MSG_RESULT([${can_compile_avx}])
else
can_compile_avx=no
fi
dnl if test "${can_compile_avx}" = "yes" ; then
dnl AC_MSG_CHECKING([whether we can compile AVX intrinsics in C++])
......@@ -321,20 +334,32 @@ dnl AC_MSG_WARN([Cannot compile C++ with AVX: disabling AVX alltogether])
dnl fi
dnl fi
AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d q1 = _mm256_load_pd(q);
__m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
return 0;
}
])],
[can_compile_avx2=yes],
[can_compile_avx2=no]
)
AC_MSG_RESULT([${can_compile_avx2}])
AC_MSG_CHECKING(whether --enable-avx2 is specified)
AC_ARG_ENABLE([avx2],
AS_HELP_STRING([--enable-avx2],
[check whether AVX2 kernels can be build, default yes]),
[check_avx2=no],
[check_avx2=yes])
AC_MSG_RESULT([$check_avx2])
if test "${check_avx2}" = "yes"; then
AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
#include <x86intrin.h>
int main(int argc, char **argv){
double* q;
__m256d q1 = _mm256_load_pd(q);
__m256d y1 = _mm256_fmadd_pd(q1, q1, q1);
return 0;
}
])],
[can_compile_avx2=yes],
[can_compile_avx2=no]
)
AC_MSG_RESULT([${can_compile_avx2}])
else
can_compile_avx2=no
fi
dnl if test "${can_compile_avx2}" = "yes" ; then
dnl AC_MSG_CHECKING([whether we can compile AVX2 intrinsics in C++])
......
......@@ -353,9 +353,11 @@ function solve_evp_real_1stage_double(na, nev, a, lda, ev, q, ldq, nblk, &
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: a(lda,*), q(ldq,*)
#else
real(kind=REAL_DATATYPE) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
! was
! real a(lda,*), q(ldq,*)
#endif
integer(kind=ik) :: my_prow, my_pcol, mpierr
real(kind=REAL_DATATYPE), allocatable :: e(:), tau(:)
......@@ -481,9 +483,11 @@ function solve_evp_real_1stage_single(na, nev, a, lda, ev, q, ldq, nblk, matrixC
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: a(lda,*), q(ldq,*)
#else
real(kind=REAL_DATATYPE) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
! was
! real a(lda,*), q(ldq,*)
#endif
integer(kind=ik) :: my_prow, my_pcol, mpierr
real(kind=REAL_DATATYPE), allocatable :: e(:), tau(:)
......@@ -608,9 +612,11 @@ function solve_evp_complex_1stage_double(na, nev, a, lda, ev, q, ldq, nblk, matr
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), q(ldq,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols), q(ldq,matrixCols)
! was
! complex a(lda,*), q(ldq,*)
#endif
real(kind=REAL_DATATYPE) :: ev(na)
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
......@@ -751,9 +757,11 @@ function solve_evp_complex_1stage_single(na, nev, a, lda, ev, q, ldq, nblk, matr
implicit none
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), q(ldq,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,matrixCols), q(ldq,matrixCols)
! was
! complex a(lda,*), q(ldq,*)
#endif
real(kind=REAL_DATATYPE) :: ev(na)
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
......
......@@ -295,10 +295,11 @@ module elpa1_auxiliary
implicit none
integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=REAL_DATATYPE) :: a(lda,*)
#else
real(kind=REAL_DATATYPE) :: a(lda,matrixCols)
! was
! real a(lda, *)
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
integer(kind=ik) :: n, nc, i, info
......@@ -2722,8 +2723,10 @@ module elpa1_auxiliary
!> \param ncb Number of columns of B and C
!> \param a matrix a
!> \param lda leading dimension of matrix a
!> \param ldaCols columns of matrix a
!> \param b matrix b
!> \param ldb leading dimension of matrix b
!> \param ldbCols columns of matrix b
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
......@@ -2745,8 +2748,11 @@ module elpa1_auxiliary
character*1 :: uplo_a, uplo_c
integer(kind=ik), intent(in) :: lda, ldaCols, ldb, ldbCols, ldc, ldcCols
integer(kind=ik) :: na, ncb, nblk, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), b(ldb,*), c(ldc,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols) ! removed assumed_size
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: l_cols, l_rows, l_rows_np
integer(kind=ik) :: np, n, nb, nblk_mult, lrs, lre, lcs, lce
......@@ -3036,8 +3042,10 @@ module elpa1_auxiliary
!> \param ncb Number of columns of B and C
!> \param a matrix a
!> \param lda leading dimension of matrix a
!> \param ldaCols columns of matrix a
!> \param b matrix b
!> \param ldb leading dimension of matrix b
!> \param ldbCols columns of matrix b
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
......@@ -3059,7 +3067,11 @@ module elpa1_auxiliary
character*1 :: uplo_a, uplo_c
integer(kind=ik), intent(in) :: lda, ldaCols, ldb, ldbCols, ldc, ldcCols
integer(kind=ik) :: na, ncb, nblk, mpi_comm_rows, mpi_comm_cols
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=COMPLEX_DATATYPE) :: a(lda,*), b(ldb,*), c(ldc,*)
#else
complex(kind=COMPLEX_DATATYPE) :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols) ! removed assumed_size
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: l_cols, l_rows, l_rows_np
......
......@@ -175,10 +175,13 @@ contains
integer(kind=ik), intent(in) :: na, nev, lda, ldq, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all
integer(kind=ik), intent(in) :: nblk
real(kind=rk8), intent(inout) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
! was
! real a(lda,*), q(ldq,*)
real(kind=rk8), allocatable :: hh_trans_real(:,:)
real(kind=rk8), intent(inout) :: ev(na)
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=rk8), intent(inout) :: a(lda,*), q(ldq,*)
#else
real(kind=rk8), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
#endif
real(kind=rk8), allocatable :: hh_trans_real(:,:)
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks
......@@ -529,10 +532,14 @@ contains
integer(kind=ik), intent(in) :: na, nev, lda, ldq, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all
integer(kind=ik), intent(in) :: nblk
real(kind=rk4), intent(inout) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
! was
! real a(lda,*), q(ldq,*)
real(kind=rk4), allocatable :: hh_trans_real(:,:)
real(kind=rk4), intent(inout) :: ev(na)
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
real(kind=rk4), intent(inout) :: a(lda,*), q(ldq,*)
#else
real(kind=rk4), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
#endif
real(kind=rk4), allocatable :: hh_trans_real(:,:)
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks
......@@ -871,11 +878,13 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
integer(kind=ik), intent(in), optional :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=ik) :: THIS_COMPLEX_ELPA_KERNEL
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all
complex(kind=ck8), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
! was
! complex a(lda,*), q(ldq,*)
real(kind=rk8), intent(inout) :: ev(na)
complex(kind=ck8), allocatable :: hh_trans_complex(:,:)
real(kind=rk8), intent(inout) :: ev(na)
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck8), intent(inout) :: a(lda,*), q(ldq,*)
#else
complex(kind=ck8), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
#endif
complex(kind=ck8), allocatable :: hh_trans_complex(:,:)
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr, my_pe, n_pes
integer(kind=ik) :: l_cols, l_rows, l_cols_nev, nbw, num_blocks
......@@ -1190,9 +1199,11 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
integer(kind=ik), intent(in), optional :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=ik) :: THIS_COMPLEX_ELPA_KERNEL
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all
#ifdef DESPERATELY_WANT_ASSUMED_SIZE
complex(kind=ck4), intent(inout) :: a(lda,*), q(ldq,*)
#else
complex(kind=ck4), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
! was
! complex a(lda,*), q(ldq,*)
#endif
real(kind=rk4), intent(inout) :: ev(na)
complex(kind=ck4), allocatable :: hh_trans_complex(:,:)
......
......@@ -113,10 +113,12 @@ void single_hh_trafo_complex_avx_avx2_1hv_double(double complex* q, double compl
{
hh_trafo_complex_kernel_12_AVX_1hv_double(&q[i], hh, nb, ldq);
}
if (nq-i == 0)
if (nq == i)
{
return;
}
if (nq-i == 8)
{
hh_trafo_complex_kernel_8_AVX_1hv_double(&q[i], hh, nb, ldq);
......
......@@ -121,7 +121,9 @@ void double_hh_trafo_complex_avx_avx2_2hv_double(double complex* q, double compl
{
hh_trafo_complex_kernel_8_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
if (nq-i > 0)
if (nq-i == 0) {
return;
} else
{
hh_trafo_complex_kernel_4_AVX_2hv_double(&q[i], hh, nb, ldq, ldh, s);
}
......@@ -444,7 +446,7 @@ static __forceinline void hh_trafo_complex_kernel_8_AVX_2hv_double(double comple
q1 = _mm256_add_pd(q1, _mm256_addsub_pd( _mm256_mul_pd(h2_real, y1), _mm256_shuffle_pd(tmp1, tmp1, 0x5)));
#endif
tmp2 = _mm256_mul_pd(h2_imag, y2);
#ifdef __FMA4_
#ifdef __ELPA_USE_FMA_
q2 = _mm256_add_pd(q2, _mm256_FMADDSUB_pd(h2_real, y2, _mm256_shuffle_pd(tmp2, tmp2, 0x5)));
#else
q2 = _mm256_add_pd(q2, _mm256_addsub_pd( _mm256_mul_pd(h2_real, y2), _mm256_shuffle_pd(tmp2, tmp2, 0x5)));
......
......@@ -103,13 +103,18 @@ void single_hh_trafo_complex_sse_1hv_double(double complex* q, double complex* h
{
hh_trafo_complex_kernel_6_SSE_1hv_double(&q[i], hh, nb, ldq);
}
if (nq-i == 0) {
return;
} else {
if (nq-i > 2)
{
hh_trafo_complex_kernel_4_SSE_1hv_double(&q[i], hh, nb, ldq);
}
else if (nq-i > 0)
else
{
hh_trafo_complex_kernel_2_SSE_1hv_double(&q[i], hh, nb, ldq);
}
}
}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment