Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
d4c65f38
Commit
d4c65f38
authored
Apr 25, 2016
by
Andreas Marek
Browse files
Merge branch 'master' of gitlab.mpcdf.mpg.de:elpa/elpa
parents
cd033b34
281e2be8
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
d4c65f38
...
...
@@ -105,7 +105,7 @@ mpi-openmp-ftimings-redirect-real-generic-complex-generic-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-generic-kernel-only --with-complex-generic-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -116,7 +116,7 @@ mpi-openmp-ftimings-redirect-real-generic-simple-complex-generic-simple-kernel-j
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-generic-kernel-only --with-complex-generic-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -127,7 +127,7 @@ mpi-openmp-ftimings-redirect-real-sse_assembly-complex-sse_assembly-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-generic-kernel-only --with-complex-generic-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -138,7 +138,7 @@ mpi-openmp-ftimings-redirect-real-sse_block2-complex-sse_block1-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-sse_block2-kernel-only --with-complex-sse_block1-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -149,7 +149,7 @@ mpi-openmp-ftimings-redirect-real-sse_block4-complex-sse_block2-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-sse_block4-kernel-only --with-complex-sse_block2-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -160,7 +160,7 @@ mpi-openmp-ftimings-redirect-real-sse_block6-complex-avx_block1-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-sse_block6-kernel-only --with-complex-avx_block1-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -171,7 +171,7 @@ mpi-openmp-ftimings-redirect-real-avx_block2-complex-avx_block2-kernel-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-avx_block2-kernel-only --with-complex-avx_block2-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -182,7 +182,7 @@ mpi-openmp-ftimings-redirect-real-avx_block4-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-avx_block4-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
@@ -193,7 +193,7 @@ mpi-openmp-ftimings-redirect-real-avx_block6-jobs:
-
./autogen.sh
-
./configure CFLAGS="-O3 -mavx" CXXFLAGS="-O3 -mavx" FCFLAGS="-O3 -mavx" SCALAPACK_LDFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -Wl,-rpath,$MKL_HOME/lib/intel64" SCALAPACK_FCFLAGS="-L/afs/@cell/common/soft/intel/ics2015/15.0/mkl/lib/intel64 -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lmkl_blacs_intelmpi_lp64 -lpthread -lm -I$MKLROOT/include/intel64/lp64" --enable-openmp --with-ftimings --with-redirect --with-real-avx_block6-kernel-only
-
make -j
8
-
export OMP_NU
J
M_THREADS=2
-
export OMP_NUM_THREADS=2
-
export LD_LIBRARY_PATH=$MKL_HOME/lib/intel64:$LD_LIBRARY_PATH
-
make check TEST_FLAGS='1500 50 16'
...
...
Makefile.am
View file @
d4c65f38
...
...
@@ -256,80 +256,81 @@ elpa2_print_kernels@SUFFIX@_SOURCES = src/print_available_elpa2_kernels.F90 $(sh
elpa2_print_kernels@SUFFIX@
_LDADD
=
$(build_lib)
check_SCRIPTS
=
\
elpa1_test_real.sh
\
elpa1_test_real_with_c.sh
\
elpa2_test_real.sh
\
elpa2_test_real_default_kernel.sh
\
elpa1_test_complex.sh
\
elpa2_test_complex.sh
\
elpa2_test_complex_default_kernel.sh
\
elpa2_test_real_default_kernel_qr_decomposition.sh
\
elpa2_test_real_choose_kernel_with_api.sh
\
elpa2_test_complex_choose_kernel_with_api.sh
\
elpa1_test_real
@SUFFIX@
.sh
\
elpa1_test_real_with_c
@SUFFIX@
.sh
\
elpa2_test_real
@SUFFIX@
.sh
\
elpa2_test_real_default_kernel
@SUFFIX@
.sh
\
elpa1_test_complex
@SUFFIX@
.sh
\
elpa2_test_complex
@SUFFIX@
.sh
\
elpa2_test_complex_default_kernel
@SUFFIX@
.sh
\
elpa2_test_real_default_kernel_qr_decomposition
@SUFFIX@
.sh
\
elpa2_test_real_choose_kernel_with_api
@SUFFIX@
.sh
\
elpa2_test_complex_choose_kernel_with_api
@SUFFIX@
.sh
\
elpa2_print_kernels@SUFFIX@
if
!WITH_OPENMP
check_SCRIPTS
+=
\
elpa1_test_real_c_version.sh
\
elpa1_test_complex_c_version.sh
\
elpa2_test_real_c_version.sh
\
elpa2_test_complex_c_version.sh
elpa1_test_real_c_version
@SUFFIX@
.sh
\
elpa1_test_complex_c_version
@SUFFIX@
.sh
\
elpa2_test_real_c_version
@SUFFIX@
.sh
\
elpa2_test_complex_c_version
@SUFFIX@
.sh
endif
TESTS
=
$(check_SCRIPTS)
elpa1_test_real.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real.sh
chmod
+x elpa1_test_real.sh
elpa1_test_real
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real
@SUFFIX@
.sh
chmod
+x elpa1_test_real
@SUFFIX@
.sh
elpa1_test_real_with_c.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real_with_c@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real_with_c.sh
chmod
+x elpa1_test_real_with_c.sh
elpa1_test_real_with_c
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real_with_c@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real_with_c
@SUFFIX@
.sh
chmod
+x elpa1_test_real_with_c
@SUFFIX@
.sh
elpa2_test_real_c_version.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_c_version.sh
chmod
+x elpa2_test_real_c_version.sh
elpa2_test_real_c_version
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_c_version
@SUFFIX@
.sh
chmod
+x elpa2_test_real_c_version
@SUFFIX@
.sh
elpa2_test_complex_c_version.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_c_version.sh
chmod
+x elpa2_test_complex_c_version.sh
elpa2_test_complex_c_version
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_c_version
@SUFFIX@
.sh
chmod
+x elpa2_test_complex_c_version
@SUFFIX@
.sh
elpa1_test_real_c_version.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real_c_version.sh
chmod
+x elpa1_test_real_c_version.sh
elpa1_test_real_c_version
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa1_test_real_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_real_c_version
@SUFFIX@
.sh
chmod
+x elpa1_test_real_c_version
@SUFFIX@
.sh
elpa1_test_complex_c_version.sh
:
echo
'mpiexec -n 2 ./elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_complex_c_version.sh
chmod
+x elpa1_test_complex_c_version.sh
elpa2_test_real.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real.sh
chmod
+x elpa2_test_real.sh
elpa1_test_complex_c_version
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa1_test_complex_c_version@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_complex_c_version
@SUFFIX@
.sh
chmod
+x elpa1_test_complex_c_version
@SUFFIX@
.sh
elpa2_test_real
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real
@SUFFIX@
.sh
chmod
+x elpa2_test_real
@SUFFIX@
.sh
elpa2_test_real_default_kernel.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_default_kernel@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_default_kernel.sh
chmod
+x elpa2_test_real_default_kernel.sh
elpa2_test_real_default_kernel
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_default_kernel@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_default_kernel
@SUFFIX@
.sh
chmod
+x elpa2_test_real_default_kernel
@SUFFIX@
.sh
elpa2_test_real_default_kernel_qr_decomposition.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@'
>
elpa2_test_real_default_kernel_qr_decomposition.sh
chmod
+x elpa2_test_real_default_kernel_qr_decomposition.sh
elpa2_test_real_default_kernel_qr_decomposition
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@'
>
elpa2_test_real_default_kernel_qr_decomposition
@SUFFIX@
.sh
chmod
+x elpa2_test_real_default_kernel_qr_decomposition
@SUFFIX@
.sh
elpa2_test_real_choose_kernel_with_api.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_choose_kernel_with_api.sh
chmod
+x elpa2_test_real_choose_kernel_with_api.sh
elpa2_test_real_choose_kernel_with_api
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_real_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_real_choose_kernel_with_api
@SUFFIX@
.sh
chmod
+x elpa2_test_real_choose_kernel_with_api
@SUFFIX@
.sh
elpa1_test_complex.sh
:
echo
'mpiexec -n 2 ./elpa1_test_complex@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_complex.sh
chmod
+x elpa1_test_complex.sh
elpa1_test_complex
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa1_test_complex@SUFFIX@ $$TEST_FLAGS'
>
elpa1_test_complex
@SUFFIX@
.sh
chmod
+x elpa1_test_complex
@SUFFIX@
.sh
elpa2_test_complex.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex.sh
chmod
+x elpa2_test_complex.sh
elpa2_test_complex
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex
@SUFFIX@
.sh
chmod
+x elpa2_test_complex
@SUFFIX@
.sh
elpa2_test_complex_default_kernel.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_default_kernel@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_default_kernel.sh
chmod
+x elpa2_test_complex_default_kernel.sh
elpa2_test_complex_default_kernel
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_default_kernel@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_default_kernel
@SUFFIX@
.sh
chmod
+x elpa2_test_complex_default_kernel
@SUFFIX@
.sh
elpa2_test_complex_choose_kernel_with_api.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_choose_kernel_with_api.sh
chmod
+x elpa2_test_complex_choose_kernel_with_api.sh
elpa2_test_complex_choose_kernel_with_api
@SUFFIX@
.sh
:
echo
'mpiexec -n 2 ./elpa2_test_complex_choose_kernel_with_api@SUFFIX@ $$TEST_FLAGS'
>
elpa2_test_complex_choose_kernel_with_api
@SUFFIX@
.sh
chmod
+x elpa2_test_complex_choose_kernel_with_api
@SUFFIX@
.sh
elpa2_utilities.i
:
$(top_srcdir)/src/elpa2_utilities.F90
$(CPP)
$(CPPFLAGS)
-I
$(top_builddir)
/
-c
$(top_srcdir)
/src/elpa2_utilities.F90
-o
$@
...
...
@@ -346,24 +347,16 @@ elpa2_kernels_real.i: $(top_srcdir)/src/elpa2_kernels/elpa2_kernels_real.F90
mod_compute_hh_trafo_real.i
:
$(top_srcdir)/src/mod_compute_hh_trafo_real.F90
$(CPP)
$(CPPFLAGS)
-I
$(top_builddir)
/
-c
$(top_srcdir)
/src/mod_compute_hh_trafo_real.F90
-o
$@
mod_compute_hh_trafo_complex.i
:
$(top_srcdir)/src/mod_compute_hh_trafo_complex.F90
$(CPP)
$(CPPFLAGS)
-I
$(top_builddir)
/
-c
$(top_srcdir)
/src/mod_compute_hh_trafo_complex.F90
-o
$@
include
doxygen.am
CLEANFILES
=
\
elpa-generated.h
\
elpa1_test_real.sh
\
elpa1_test_complex.sh
\
elpa2_test_real.sh
\
elpa2_test_real_default_kernel.sh
\
elpa2_test_real_default_kernel_qr_decomposition.sh
\
elpa2_test_complex.sh
\
elpa2_test_complex_default_kernel.sh
\
elpa2_test_real_choose_kernel_with_api.sh
\
elpa2_test_complex_choose_kernel_with_api.sh
\
elpa1_test_real_with_c.sh
\
elpa1_test_real_c_version.sh
\
elpa1_test_complex_c_version.sh
\
elpa2_test_real_c_version.sh
\
elpa2_test_complex_c_version.sh
\
elpa1_test
*
\
elpa2_test
*
\
*
.i
clean-local
:
...
...
configure.ac
View file @
d4c65f38
...
...
@@ -800,10 +800,14 @@ fi
if test x"${use_specific_complex_kernel}" = x"no" ; then
AC_DEFINE([WITH_NO_SPECIFIC_COMPLEX_KERNEL],[1],[do not use only one specific complex kernel (set at compile time)])
else
AC_DEFINE([WITH_ONE_SPECIFIC_COMPLEX_KERNEL],[1],[use only one specific complex kernel (set at compile time)])
fi
if test x"${use_specific_real_kernel}" = x"no" ; then
AC_DEFINE([WITH_NO_SPECIFIC_REAL_KERNEL],[1],[do not use only one specific real kernel (set at compile time)])
else
AC_DEFINE([WITH_ONE_SPECIFIC_REAL_KERNEL],[1],[use only one specific real kernel (set at compile time)])
fi
LT_INIT
...
...
src/elpa2.F90
View file @
d4c65f38
...
...
@@ -214,7 +214,7 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
THIS_REAL_ELPA_KERNEL
=
get_actual_real_kernel
()
endif
! check whether choosen kernel is allowed
! check whether choosen kernel is allowed
: function returns true if NOT allowed! change this
if
(
check_allowed_real_kernels
(
THIS_REAL_ELPA_KERNEL
))
then
if
(
my_pe
==
0
)
then
...
...
@@ -230,10 +230,18 @@ function solve_evp_real_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
enddo
write
(
error_unit
,
*
)
" "
write
(
error_unit
,
*
)
"The defaul kernel REAL_ELPA_KERNEL_GENERIC will be used !"
! check whether generic kernel is defined
if
(
AVAILABLE_REAL_ELPA_KERNELS
(
REAL_ELPA_KERNEL_GENERIC
)
.eq.
1
)
then
write
(
error_unit
,
*
)
"The default kernel REAL_ELPA_KERNEL_GENERIC will be used !"
else
write
(
error_unit
,
*
)
"As default kernel "
,
REAL_ELPA_KERNEL_NAMES
(
DEFAULT_REAL_ELPA_KERNEL
),
" will be used"
endif
endif
! my_pe == 0
if
(
AVAILABLE_REAL_ELPA_KERNELS
(
REAL_ELPA_KERNEL_GENERIC
)
.eq.
1
)
then
THIS_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
else
THIS_REAL_ELPA_KERNEL
=
DEFAULT_REAL_ELPA_KERNEL
endif
THIS_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
endif
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
...
...
@@ -433,9 +441,18 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
enddo
write
(
error_unit
,
*
)
" "
write
(
error_unit
,
*
)
"The defaul kernel COMPLEX_ELPA_KERNEL_GENERIC will be used !"
! check whether generic kernel is defined
if
(
AVAILABLE_COMPLEX_ELPA_KERNELS
(
COMPLEX_ELPA_KERNEL_GENERIC
)
.eq.
1
)
then
write
(
error_unit
,
*
)
"The default kernel COMPLEX_ELPA_KERNEL_GENERIC will be used !"
else
write
(
error_unit
,
*
)
"As default kernel "
,
COMPLEX_ELPA_KERNEL_NAMES
(
DEFAULT_COMPLEX_ELPA_KERNEL
),
" will be used"
endif
endif
! my_pe == 0
if
(
AVAILABLE_COMPLEX_ELPA_KERNELS
(
COMPLEX_ELPA_KERNEL_GENERIC
)
.eq.
1
)
then
THIS_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
else
THIS_COMPLEX_ELPA_KERNEL
=
DEFAULT_COMPLEX_ELPA_KERNEL
endif
THIS_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
endif
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
...
...
src/elpa2_utilities.F90
View file @
d4c65f38
...
...
@@ -76,14 +76,16 @@ module ELPA2_utilities
REAL_ELPA_KERNEL_AVX_BLOCK2
,
&
REAL_ELPA_KERNEL_AVX_BLOCK4
,
REAL_ELPA_KERNEL_AVX_BLOCK6
,
&
REAL_ELPA_KERNEL_AVX2_BLOCK2
,
&
REAL_ELPA_KERNEL_AVX2_BLOCK4
,
REAL_ELPA_KERNEL_AVX2_BLOCK6
REAL_ELPA_KERNEL_AVX2_BLOCK4
,
REAL_ELPA_KERNEL_AVX2_BLOCK6
,&
DEFAULT_REAL_ELPA_KERNEL
public
::
COMPLEX_ELPA_KERNEL_GENERIC
,
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
,
&
COMPLEX_ELPA_KERNEL_BGP
,
COMPLEX_ELPA_KERNEL_BGQ
,
&
COMPLEX_ELPA_KERNEL_SSE
,
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
,
&
COMPLEX_ELPA_KERNEL_SSE_BLOCK2
,
&
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
,
COMPLEX_ELPA_KERNEL_AVX_BLOCK2
,
&
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1
,
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1
,
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2
,
&
DEFAULT_COMPLEX_ELPA_KERNEL
public
::
REAL_ELPA_KERNEL_NAMES
,
COMPLEX_ELPA_KERNEL_NAMES
...
...
@@ -115,10 +117,114 @@ module ELPA2_utilities
integer
,
parameter
::
REAL_ELPA_KERNEL_AVX2_BLOCK6
=
ELPA2_REAL_KERNEL_AVX2_BLOCK6
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL)
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BGQ
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#else /* WITH_REAL_AVX_BLOCK2_KERNEL */
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_REAL_SSE_ASSEMBLY_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE
#endif
#if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL)
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK6
#else
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK4
#else
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_SSE_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_SSE_BLOCK2_KERNEL) || defined(WITH_REAL_SSE_BLOCK4_KERNEL) || defined(WITH_REAL_SSE_BLOCK6_KERNEL) */
#if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL)
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK6
#else
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK4
#else
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BLOCK2
#endif
#endif
#endif
#endif /* #if defined(WITH_REAL_AVX_BLOCK2_KERNEL) || defined(WITH_REAL_AVX_BLOCK4_KERNEL) || defined(WITH_REAL_AVX_BLOCK6_KERNEL) */
#ifdef WITH_REAL_BGP_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BGP
#endif
#ifdef WITH_REAL_BGQ_KERNEL
integer
,
parameter
::
DEFAULT_REAL_ELPA_KERNEL
=
REAL_ELPA_KERNEL_AVX_BGQ
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#endif /* WITH_REAL_AVX_BLOCK2_KERNEL */
character
(
35
),
parameter
,
dimension
(
number_of_real_kernels
)
::
&
REAL_ELPA_KERNEL_NAMES
=
(/
"REAL_ELPA_KERNEL_GENERIC "
,
&
"REAL_ELPA_KERNEL_GENERIC_SIMPLE "
,
&
...
...
@@ -149,10 +255,86 @@ module ELPA2_utilities
integer
,
parameter
::
COMPLEX_ELPA_KERNEL_AVX2_BLOCK2
=
ELPA2_COMPLEX_KERNEL_AVX2_BLOCK2
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
! go through all kernels and set them
#ifdef WITH_COMPLEX_GENERIC_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#else /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
#ifndef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
#else /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
! go through all kernels and set them
#ifdef WITH_COMPLEX_GENERIC_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC
#endif
#ifdef WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
#endif
#ifdef WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_SSE_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE_BLOCK2
#else
#ifdef WITH_COMPLEX_SSE_BLOCK1_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEXL_SSE_BLOCK1_KERNEL) || defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL) */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#ifdef WITH_COMPLEX_AVX_BLOCK2_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_AVX_BLOCK2
#else
#ifdef WITH_COMPLEX_AVX_BLOCK1_KERNEL
integer
,
parameter
::
DEFAULT_COMPLEX_ELPA_KERNEL
=
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
#endif
#endif
#endif /* defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) */
#endif /* WITH_ONE_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNEL */
character
(
35
),
parameter
,
dimension
(
number_of_complex_kernels
)
::
&
COMPLEX_ELPA_KERNEL_NAMES
=
(/
"COMPLEX_ELPA_KERNEL_GENERIC "
,
&
"COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE "
,
&
...
...
src/mod_compute_hh_trafo_complex.F90
View file @
d4c65f38
...
...
@@ -71,9 +71,9 @@ module compute_hh_trafo_complex
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
st
op
(
"compute_hh_trafo_complex_cpu_openmp"
)
call
timer
%
st
art
(
"compute_hh_trafo_complex_cpu_openmp"
)
#else
call
timer
%
st
op
(
"compute_hh_trafo_complex_cpu"
)
call
timer
%
st
art
(
"compute_hh_trafo_complex_cpu"
)
#endif
#endif
...
...
@@ -250,6 +250,8 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
)
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL))
ttt
=
mpi_wtime
()
do
j
=
ncols
,
1
,
-1
#ifdef WITH_OPENMP
...
...
@@ -260,16 +262,20 @@ module compute_hh_trafo_complex
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)) */
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNE */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNE
L
*/
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
((
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX2_BLOCK1
))
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) && !defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL))
ttt
=
mpi_wtime
()
do
j
=
ncols
,
1
,
-1
#ifdef WITH_OPENMP
...
...
@@ -280,6 +286,8 @@ module compute_hh_trafo_complex
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL) || (defined(WITH_ONE_SPECIFIC_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL) && !defined(WITH_COMPLEX_AVX2_BLOCK2_KERNEL)) */
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
...
...
src/mod_compute_hh_trafo_real.F90
View file @
d4c65f38
...
...
@@ -218,6 +218,8 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if
(
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_SSE_BLOCK2
)
then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL))
do
j
=
ncols
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
...
...
@@ -229,6 +231,8 @@ module compute_hh_trafo_real
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL) && !defined(WITH_REAL_SSE_BLOCK4_KERNEL)) */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
...
...
@@ -239,6 +243,8 @@ module compute_hh_trafo_real
if
((
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_AVX_BLOCK2
)
.or.
&
(
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_AVX2_BLOCK2
))
then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_AVX_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX_BLOCK4_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK6_KERNEL) && !defined(WITH_REAL_AVX2_BLOCK4_KERNEL))
do
j
=
ncols
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
...
...
@@ -250,6 +256,8 @@ module compute_hh_trafo_real
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
enddo
#endif /* defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) ... */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
...
...
@@ -322,6 +330,8 @@ module compute_hh_trafo_real
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL)
if
(
THIS_REAL_ELPA_KERNEL
.eq.
REAL_ELPA_KERNEL_SSE_BLOCK4
)
then
#endif /* WITH_NO_SPECIFIC_REAL_KERNEL */
#if defined(WITH_NO_SPECIFIC_REAL_KERNEL) || (defined(WITH_ONE_SPECIFIC_REAL_KERNEL) && !defined(WITH_REAL_SSE_BLOCK6_KERNEL))