Commit 0bb7c35f authored by Andreas Marek's avatar Andreas Marek

Rename the kernel files

parent e25616cf
......@@ -930,47 +930,47 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.X90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \
@top_srcdir@/src/elpa2/elpa2_tridiag_band_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_simple.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgp.f90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgq.f90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_simple.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_simple_template.X90 \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_double_precision.s \
@top_srcdir@/src/elpa2/kernels/asm_x86_64_single_precision.s \
@top_srcdir@/src/elpa2/kernels/complex.F90 \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_simple.F90 \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/complex_template.X90 \
@top_srcdir@/src/elpa2/kernels/real.F90 \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_avx512_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_bgp.f90 \
@top_srcdir@/src/elpa2/kernels/real_bgq.f90 \
@top_srcdir@/src/elpa2/kernels/real_simple.F90 \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/real_sse_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/real_template.X90 \
@top_srcdir@/src/elpa2/kernels/simple_template.X90 \
@top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \
@top_srcdir@/src/elpa2/legacy_interface/elpa2.F90 \
@top_srcdir@/src/elpa2/legacy_interface/elpa2_c_interface_template.X90 \
......
......@@ -88,9 +88,9 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/elpa2_tridiag_band_template.X90 \
src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \
src/elpa2/elpa2_herm_matrix_allreduce_complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_real_template.X90 \
src/elpa2/kernels/elpa2_kernels_complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_simple_template.X90 \
src/elpa2/kernels/real_template.X90 \
src/elpa2/kernels/complex_template.X90 \
src/elpa2/kernels/simple_template.X90 \
src/elpa2/pack_unpack_cpu.X90 \
src/elpa2/pack_unpack_gpu.X90 \
src/elpa2/compute_hh_trafo.X90 \
......@@ -154,186 +154,186 @@ endif
endif
if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real.F90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real.F90
endif
if WITH_COMPLEX_GENERIC_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex.F90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex.F90
endif
if WITH_REAL_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_simple.F90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple.F90
endif
if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_simple.F90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_simple.F90
endif
if WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_bgp.f90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgp.f90
endif
if WITH_REAL_BGQ_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_bgq.f90
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgq.f90
endif
if WITH_REAL_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_single_precision.s
endif
else
if WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_single_precision.s
endif
endif
endif
if WITH_REAL_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_2hv_single_precision.c
endif
endif
if WITH_REAL_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c
endif
else
if WITH_REAL_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c
endif
endif
endif
if WITH_REAL_AVX512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_2hv_single_precision.c
endif
endif
if WITH_REAL_SSE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_4hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_4hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_4hv_single_precision.c
endif
endif
if WITH_REAL_AVX_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c
endif
else
if WITH_REAL_AVX2_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c
endif
endif
endif
if WITH_REAL_AVX512_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_4hv_single_precision.c
endif
endif
if WITH_REAL_SSE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_6hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_6hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_6hv_single_precision.c
endif
endif
if WITH_REAL_AVX_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c
endif
else
if WITH_REAL_AVX2_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c
endif
endif
endif
if WITH_REAL_AVX512_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_6hv_single_precision.c
endif
endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_1hv_single_precision.c
endif
endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c
endif
else
if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c
endif
endif
endif
if WITH_COMPLEX_AVX512_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_1hv_single_precision.c
endif
endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_2hv_single_precision.c
endif
endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c
endif
else
if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c
endif
endif
endif
if WITH_COMPLEX_AVX512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_2hv_single_precision.c
endif
endif
......@@ -587,24 +587,24 @@ EXTRA_DIST = \
src/general/precision_macros.h \
src/elpa_index.h \
src/fortran_constants.h \
src/elpa2/kernels/elpa2_kernels_real_template.X90 \
src/elpa2/kernels/elpa2_kernels_complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_simple_template.X90 \
src/elpa2/kernels/elpa2_kernels_real_sse_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_sse_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_sse_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_template.Xc \
src/elpa2/kernels/real_template.X90 \
src/elpa2/kernels/complex_template.X90 \
src/elpa2/kernels/simple_template.X90 \
src/elpa2/kernels/real_sse_2hv_template.Xc \
src/elpa2/kernels/real_sse_4hv_template.Xc \
src/elpa2/kernels/real_sse_6hv_template.Xc \
src/elpa2/kernels/real_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/real_avx-avx2_4hv_template.Xc \
src/elpa2/kernels/real_avx-avx2_6hv_template.Xc \
src/elpa2/kernels/real_avx512_2hv_template.Xc \
src/elpa2/kernels/real_avx512_4hv_template.Xc \
src/elpa2/kernels/real_avx512_6hv_template.Xc \
src/elpa2/kernels/complex_sse_1hv_template.Xc \
src/elpa2/kernels/complex_sse_2hv_template.Xc \
src/elpa2/kernels/complex_avx-avx2_1hv_template.Xc \
src/elpa2/kernels/complex_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/complex_avx512_1hv_template.Xc \
src/elpa2/kernels/complex_avx512_2hv_template.Xc \
src/elpa2/redist_band.X90 \
src/elpa2/pack_unpack_cpu.X90 \
src/elpa2/pack_unpack_gpu.X90 \
......
......@@ -670,7 +670,7 @@ fi
if test x"${need_sse_assembly}" = x"yes"; then
AC_MSG_CHECKING(whether double-precision SSE assembly kernels can be compiled)
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s -o conftest.o 2>&5
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/asm_x86_64_double_precision.s -o conftest.o 2>&5
if test "$?" == 0; then
can_compile_sse_asm_double=yes
......@@ -686,7 +686,7 @@ if test x"${need_sse_assembly}" = x"yes"; then
if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernels can be compiled)
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s -o conftest.o 2>&5
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/asm_x86_64_single_precision.s -o conftest.o 2>&5
if test "$?" == 0; then
can_compile_sse_asm_single=yes
......
......@@ -70,14 +70,14 @@ module complex_generic_kernel
#define DOUBLE_PRECISION_COMPLEX 1
#define COMPLEX_DATATYPE ck8
#include "elpa2_kernels_complex_template.X90"
#include "complex_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX
#define COMPLEX_DATATYPE ck4
#include "elpa2_kernels_complex_template.X90"
#include "complex_template.X90"
#undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE
#endif
......
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_1hv_template.Xc"
#include "complex_avx-avx2_1hv_template.Xc"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_1hv_template.Xc"
#undef COMPLEXCASE
#include "complex_avx-avx2_1hv_template.Xc"
#undef SINGLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_2hv_template.Xc"
#include "complex_avx-avx2_2hv_template.Xc"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_2hv_template.Xc"
#include "complex_avx-avx2_2hv_template.Xc"
#undef SINGLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_1hv_template.Xc"
#include "complex_avx512_1hv_template.Xc"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_2hv_template.Xc"
#include "complex_avx512_1hv_template.Xc"
#undef SINGLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_2hv_template.Xc"
#include "complex_avx512_2hv_template.Xc"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
......@@ -49,7 +49,7 @@
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_1hv_template.Xc"
#include "complex_avx512_2hv_template.Xc"
#undef SINGLE_PRECISION
#undef COMPLEXCASE
......@@ -73,7 +73,7 @@ module complex_generic_simple_kernel
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#include "simple_template.X90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
......@@ -81,7 +81,7 @@ module complex_generic_simple_kernel
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "elpa2_kernels_simple_template.X90"
#include "simple_template.X90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif
......
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config-f90.h"
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h"
#include "complex_sse_1hv_template.Xc"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here: