Commit 0bb7c35f authored by Andreas Marek's avatar Andreas Marek
Browse files

Rename the kernel files

parent e25616cf
...@@ -930,47 +930,47 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \ ...@@ -930,47 +930,47 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.X90 \ @top_srcdir@/src/elpa2/elpa2_trans_ev_band_to_full_template.X90 \
@top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \ @top_srcdir@/src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \
@top_srcdir@/src/elpa2/elpa2_tridiag_band_template.X90 \ @top_srcdir@/src/elpa2/elpa2_tridiag_band_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s \ @top_srcdir@/src/elpa2/kernels/asm_x86_64_double_precision.s \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s \ @top_srcdir@/src/elpa2/kernels/asm_x86_64_single_precision.s \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex.F90 \ @top_srcdir@/src/elpa2/kernels/complex.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx512_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_simple.F90 \ @top_srcdir@/src/elpa2/kernels/complex_simple.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_sse_1hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_sse_1hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/complex_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_complex_template.X90 \ @top_srcdir@/src/elpa2/kernels/complex_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real.F90 \ @top_srcdir@/src/elpa2/kernels/real.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_avx512_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgp.f90 \ @top_srcdir@/src/elpa2/kernels/real_bgp.f90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_bgq.f90 \ @top_srcdir@/src/elpa2/kernels/real_bgq.f90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_simple.F90 \ @top_srcdir@/src/elpa2/kernels/real_simple.F90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_2hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_2hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_2hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_4hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_4hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_4hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_double_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_6hv_double_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_sse_6hv_single_precision.c \ @top_srcdir@/src/elpa2/kernels/real_sse_6hv_single_precision.c \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_real_template.X90 \ @top_srcdir@/src/elpa2/kernels/real_template.X90 \
@top_srcdir@/src/elpa2/kernels/elpa2_kernels_simple_template.X90 \ @top_srcdir@/src/elpa2/kernels/simple_template.X90 \
@top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \ @top_srcdir@/src/elpa2/kernels/mod_single_hh_trafo_real.F90 \
@top_srcdir@/src/elpa2/legacy_interface/elpa2.F90 \ @top_srcdir@/src/elpa2/legacy_interface/elpa2.F90 \
@top_srcdir@/src/elpa2/legacy_interface/elpa2_c_interface_template.X90 \ @top_srcdir@/src/elpa2/legacy_interface/elpa2_c_interface_template.X90 \
......
...@@ -88,9 +88,9 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ ...@@ -88,9 +88,9 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/elpa2_tridiag_band_template.X90 \ src/elpa2/elpa2_tridiag_band_template.X90 \
src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \ src/elpa2/elpa2_trans_ev_tridi_to_band_template.X90 \
src/elpa2/elpa2_herm_matrix_allreduce_complex_template.X90 \ src/elpa2/elpa2_herm_matrix_allreduce_complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_real_template.X90 \ src/elpa2/kernels/real_template.X90 \
src/elpa2/kernels/elpa2_kernels_complex_template.X90 \ src/elpa2/kernels/complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_simple_template.X90 \ src/elpa2/kernels/simple_template.X90 \
src/elpa2/pack_unpack_cpu.X90 \ src/elpa2/pack_unpack_cpu.X90 \
src/elpa2/pack_unpack_gpu.X90 \ src/elpa2/pack_unpack_gpu.X90 \
src/elpa2/compute_hh_trafo.X90 \ src/elpa2/compute_hh_trafo.X90 \
...@@ -154,186 +154,186 @@ endif ...@@ -154,186 +154,186 @@ endif
endif endif
if WITH_REAL_GENERIC_KERNEL if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real.F90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real.F90
endif endif
if WITH_COMPLEX_GENERIC_KERNEL if WITH_COMPLEX_GENERIC_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex.F90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex.F90
endif endif
if WITH_REAL_GENERIC_SIMPLE_KERNEL if WITH_REAL_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_simple.F90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_simple.F90
endif endif
if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL if WITH_COMPLEX_GENERIC_SIMPLE_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_simple.F90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_simple.F90
endif endif
if WITH_REAL_BGP_KERNEL if WITH_REAL_BGP_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_bgp.f90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgp.f90
endif endif
if WITH_REAL_BGQ_KERNEL if WITH_REAL_BGQ_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_bgq.f90 libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_bgq.f90
endif endif
if WITH_REAL_SSE_ASSEMBLY_KERNEL if WITH_REAL_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_single_precision.s
endif endif
else else
if WITH_COMPLEX_SSE_ASSEMBLY_KERNEL if WITH_COMPLEX_SSE_ASSEMBLY_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_double_precision.s
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/asm_x86_64_single_precision.s
endif endif
endif endif
endif endif
if WITH_REAL_SSE_BLOCK2_KERNEL if WITH_REAL_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_2hv_single_precision.c
endif endif
endif endif
if WITH_REAL_AVX_BLOCK2_KERNEL if WITH_REAL_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c
endif endif
else else
if WITH_REAL_AVX2_BLOCK2_KERNEL if WITH_REAL_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_2hv_single_precision.c
endif endif
endif endif
endif endif
if WITH_REAL_AVX512_BLOCK2_KERNEL if WITH_REAL_AVX512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_2hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_2hv_single_precision.c
endif endif
endif endif
if WITH_REAL_SSE_BLOCK4_KERNEL if WITH_REAL_SSE_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_4hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_4hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_4hv_single_precision.c
endif endif
endif endif
if WITH_REAL_AVX_BLOCK4_KERNEL if WITH_REAL_AVX_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c
endif endif
else else
if WITH_REAL_AVX2_BLOCK4_KERNEL if WITH_REAL_AVX2_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_4hv_single_precision.c
endif endif
endif endif
endif endif
if WITH_REAL_AVX512_BLOCK4_KERNEL if WITH_REAL_AVX512_BLOCK4_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_4hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_4hv_single_precision.c
endif endif
endif endif
if WITH_REAL_SSE_BLOCK6_KERNEL if WITH_REAL_SSE_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_6hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_sse_6hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sse_6hv_single_precision.c
endif endif
endif endif
if WITH_REAL_AVX_BLOCK6_KERNEL if WITH_REAL_AVX_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c
endif endif
else else
if WITH_REAL_AVX2_BLOCK6_KERNEL if WITH_REAL_AVX2_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx-avx2_6hv_single_precision.c
endif endif
endif endif
endif endif
if WITH_REAL_AVX512_BLOCK6_KERNEL if WITH_REAL_AVX512_BLOCK6_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_6hv_double_precision.c
if WANT_SINGLE_PRECISION_REAL if WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_avx512_6hv_single_precision.c
endif endif
endif endif
if WITH_COMPLEX_SSE_BLOCK1_KERNEL if WITH_COMPLEX_SSE_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_1hv_single_precision.c
endif endif
endif endif
if WITH_COMPLEX_AVX_BLOCK1_KERNEL if WITH_COMPLEX_AVX_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c
endif endif
else else
if WITH_COMPLEX_AVX2_BLOCK1_KERNEL if WITH_COMPLEX_AVX2_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_1hv_single_precision.c
endif endif
endif endif
endif endif
if WITH_COMPLEX_AVX512_BLOCK1_KERNEL if WITH_COMPLEX_AVX512_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_1hv_single_precision.c
endif endif
endif endif
if WITH_COMPLEX_SSE_BLOCK2_KERNEL if WITH_COMPLEX_SSE_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sse_2hv_single_precision.c
endif endif
endif endif
if WITH_COMPLEX_AVX_BLOCK2_KERNEL if WITH_COMPLEX_AVX_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c
endif endif
else else
if WITH_COMPLEX_AVX2_BLOCK2_KERNEL if WITH_COMPLEX_AVX2_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx-avx2_2hv_single_precision.c
endif endif
endif endif
endif endif
if WITH_COMPLEX_AVX512_BLOCK2_KERNEL if WITH_COMPLEX_AVX512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_double_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_single_precision.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_2hv_single_precision.c
endif endif
endif endif
...@@ -587,24 +587,24 @@ EXTRA_DIST = \ ...@@ -587,24 +587,24 @@ EXTRA_DIST = \
src/general/precision_macros.h \ src/general/precision_macros.h \
src/elpa_index.h \ src/elpa_index.h \
src/fortran_constants.h \ src/fortran_constants.h \
src/elpa2/kernels/elpa2_kernels_real_template.X90 \ src/elpa2/kernels/real_template.X90 \
src/elpa2/kernels/elpa2_kernels_complex_template.X90 \ src/elpa2/kernels/complex_template.X90 \
src/elpa2/kernels/elpa2_kernels_simple_template.X90 \ src/elpa2/kernels/simple_template.X90 \
src/elpa2/kernels/elpa2_kernels_real_sse_2hv_template.Xc \ src/elpa2/kernels/real_sse_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_sse_4hv_template.Xc \ src/elpa2/kernels/real_sse_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_sse_6hv_template.Xc \ src/elpa2/kernels/real_sse_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_2hv_template.Xc \ src/elpa2/kernels/real_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_4hv_template.Xc \ src/elpa2/kernels/real_avx-avx2_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx-avx2_6hv_template.Xc \ src/elpa2/kernels/real_avx-avx2_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_2hv_template.Xc \ src/elpa2/kernels/real_avx512_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_4hv_template.Xc \ src/elpa2/kernels/real_avx512_4hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_real_avx512_6hv_template.Xc \ src/elpa2/kernels/real_avx512_6hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_sse_1hv_template.Xc \ src/elpa2/kernels/complex_sse_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_sse_2hv_template.Xc \ src/elpa2/kernels/complex_sse_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_1hv_template.Xc \ src/elpa2/kernels/complex_avx-avx2_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx-avx2_2hv_template.Xc \ src/elpa2/kernels/complex_avx-avx2_2hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx512_1hv_template.Xc \ src/elpa2/kernels/complex_avx512_1hv_template.Xc \
src/elpa2/kernels/elpa2_kernels_complex_avx512_2hv_template.Xc \ src/elpa2/kernels/complex_avx512_2hv_template.Xc \
src/elpa2/redist_band.X90 \ src/elpa2/redist_band.X90 \
src/elpa2/pack_unpack_cpu.X90 \ src/elpa2/pack_unpack_cpu.X90 \
src/elpa2/pack_unpack_gpu.X90 \ src/elpa2/pack_unpack_gpu.X90 \
......
...@@ -670,7 +670,7 @@ fi ...@@ -670,7 +670,7 @@ fi
if test x"${need_sse_assembly}" = x"yes"; then if test x"${need_sse_assembly}" = x"yes"; then
AC_MSG_CHECKING(whether double-precision SSE assembly kernels can be compiled) AC_MSG_CHECKING(whether double-precision SSE assembly kernels can be compiled)
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/elpa2_kernels_asm_x86_64_double_precision.s -o conftest.o 2>&5 $CC $CFLAGS -c $srcdir/src/elpa2/kernels/asm_x86_64_double_precision.s -o conftest.o 2>&5
if test "$?" == 0; then if test "$?" == 0; then
can_compile_sse_asm_double=yes can_compile_sse_asm_double=yes
...@@ -686,7 +686,7 @@ if test x"${need_sse_assembly}" = x"yes"; then ...@@ -686,7 +686,7 @@ if test x"${need_sse_assembly}" = x"yes"; then
if test x"${want_single_precision}" = x"yes" ; then if test x"${want_single_precision}" = x"yes" ; then
AC_MSG_CHECKING(whether single-precision SSE assembly kernels can be compiled) AC_MSG_CHECKING(whether single-precision SSE assembly kernels can be compiled)
$CC $CFLAGS -c $srcdir/src/elpa2/kernels/elpa2_kernels_asm_x86_64_single_precision.s -o conftest.o 2>&5 $CC $CFLAGS -c $srcdir/src/elpa2/kernels/asm_x86_64_single_precision.s -o conftest.o 2>&5
if test "$?" == 0; then if test "$?" == 0; then
can_compile_sse_asm_single=yes can_compile_sse_asm_single=yes
......
...@@ -70,14 +70,14 @@ module complex_generic_kernel ...@@ -70,14 +70,14 @@ module complex_generic_kernel
#define DOUBLE_PRECISION_COMPLEX 1 #define DOUBLE_PRECISION_COMPLEX 1
#define COMPLEX_DATATYPE ck8 #define COMPLEX_DATATYPE ck8
#include "elpa2_kernels_complex_template.X90" #include "complex_template.X90"
#undef DOUBLE_PRECISION_COMPLEX #undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE #undef COMPLEX_DATATYPE
#ifdef WANT_SINGLE_PRECISION_COMPLEX #ifdef WANT_SINGLE_PRECISION_COMPLEX
#undef DOUBLE_PRECISION_COMPLEX #undef DOUBLE_PRECISION_COMPLEX
#define COMPLEX_DATATYPE ck4 #define COMPLEX_DATATYPE ck4
#include "elpa2_kernels_complex_template.X90" #include "complex_template.X90"
#undef DOUBLE_PRECISION_COMPLEX #undef DOUBLE_PRECISION_COMPLEX
#undef COMPLEX_DATATYPE #undef COMPLEX_DATATYPE
#endif #endif
......
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_1hv_template.Xc" #include "complex_avx-avx2_1hv_template.Xc"
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#undef COMPLEXCASE #undef COMPLEXCASE
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_1hv_template.Xc" #include "complex_avx-avx2_1hv_template.Xc"
#undef COMPLEXCASE
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#undef COMPLEXCASE
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_2hv_template.Xc" #include "complex_avx-avx2_2hv_template.Xc"
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#undef COMPLEXCASE #undef COMPLEXCASE
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_sse_2hv_template.Xc" #include "complex_avx-avx2_2hv_template.Xc"
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#undef COMPLEXCASE #undef COMPLEXCASE
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_1hv_template.Xc" #include "complex_avx512_1hv_template.Xc"
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#undef COMPLEXCASE #undef COMPLEXCASE
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#include "../../general/precision_macros.h" #include "../../general/precision_macros.h"
#include "elpa2_kernels_complex_avx512_2hv_template.Xc" #include "complex_avx512_1hv_template.Xc"
#undef SINGLE_PRECISION #undef SINGLE_PRECISION