Commit 6e388bcb authored by Andreas Marek's avatar Andreas Marek
Browse files

Complex kernels

parent 0868886f
......@@ -401,7 +401,6 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
if WITH_COMPLEX_AVX512_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_avx512_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
......@@ -409,6 +408,13 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
if WITH_COMPLEX_SVE512_BLOCK1_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sve512_1hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sve512_1hv_single_precision.c
endif
endif
#if WITH_COMPLEX_SPARC64_BLOCK2_KERNEL
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sparc64_2hv_double_precision.c
#if WANT_SINGLE_PRECISION_COMPLEX
......@@ -451,6 +457,13 @@ if WANT_SINGLE_PRECISION_COMPLEX
endif
endif
if WITH_COMPLEX_SVE512_BLOCK2_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sve512_2hv_double_precision.c
if WANT_SINGLE_PRECISION_COMPLEX
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/complex_sve512_2hv_single_precision.c
endif
endif
if STORE_BUILD_CONFIG
libelpa@SUFFIX@_private_la_SOURCES += src/helpers/print_build_config.c
......
......@@ -70,6 +70,7 @@
#define AVX_256 256
#define AVX2_256 2562
#define AVX_512 512
#define SVE_512 5121
#define NEON_ARCH64_128 1285
#if VEC_SET == SSE_128 || VEC_SET == AVX_256 || VEC_SET == AVX2_256 || VEC_SET == AVX_512
......@@ -88,6 +89,10 @@
#include <arm_neon.h>
#endif
#if VEC_SET == SVE_512
#include <arm_sve.h>
#endif
#include <complex.h>
#include <stdio.h>
......@@ -123,8 +128,13 @@
#define SIMD_SET AVX512
#endif
#if VEC_SET == SVE_512
#define SIMD_SET SVE512
#endif
#if VEC_SET == SSE_128
#define ADDITIONAL_ARGUMENT
#ifdef DOUBLE_PRECISION_COMPLEX
#define offset 2
......@@ -168,6 +178,7 @@
#endif /* VEC_SET == SSE_128 */
#if VEC_SET == NEON_128
#define ADDITIONAL_ARGUMENT
#ifdef DOUBLE_PRECISION_COMPLEX
#define offset 2
......@@ -211,6 +222,7 @@
#endif /* VEC_SET == NEON_128 */
#if VEC_SET == AVX_256 || VEC_SET == AVX2_256
#define ADDITIONAL_ARGUMENT
#ifdef DOUBLE_PRECISION_COMPLEX
#define offset 4
......@@ -290,6 +302,7 @@
#endif /* VEC_SET == AVX_256 || VEC_SET == AVX2_256 */
#if VEC_SET == AVX_512
#define ADDITIONAL_ARGUMENT
#ifdef DOUBLE_PRECISION_COMPLEX
#define offset 8
......@@ -358,6 +371,65 @@
#endif /* VEC_SET == AVX_512 */
#if VEC_SET == SVE_512
#define ADDITIONAL_ARGUMENT svptrue_b64(),
#ifdef DOUBLE_PRECISION_COMPLEX
#define offset 8
#define __SIMD_DATATYPE svfloat64_t
#define _SIMD_LOAD svld1_f64
#define _SIMD_LOADU 1
#define _SIMD_STORE svst1_f64
#define _SIMD_STOREU 1
#define _SIMD_MUL svmul_f64_z
#define _SIMD_ADD svadd_f64_z
#define _SIMD_BROADCAST 1
#define _SIMD_SET1 svdup_f64
#define _SIMD_SET 1
#define _SIMD_XOR_EPI _mm512_xor_epi64
#define _SIMD_ADDSUB 1
#define _SIMD_SHUFFLE _mm512_shuffle_pd
#define _SIMD_MASK_STOREU _mm512_mask_storeu_pd
#define _SHUFFLE 0x55
#define __ELPA_USE_FMA__
#define _mm512_FMADDSUB_pd(a,b,c) _mm512_fmaddsub_pd(a,b,c)
#define _mm512_FMSUBADD_pd(a,b,c) _mm512_fmsubadd_pd(a,b,c)
#define _SIMD_FMADDSUB _mm512_FMADDSUB_pd
#define _SIMD_FMSUBADD _mm512_FMSUBADD_pd
#endif /* DOUBLE_PRECISION_COMPLEX */
#ifdef SINGLE_PRECISION_COMPLEX
#define offset 16
#define __SIMD_DATATYPE svfloat32_t
#define _SIMD_LOAD svld1_f32
#define _SIMD_LOADU 1
#define _SIMD_STORE svst1_f32
#define _SIMD_STOREU 1
#define _SIMD_MUL svmul_f32_z
#define _SIMD_ADD svadd_f32_z
#define _SIMD_BROADCAST 1
#define _SIMD_SET1 svdup_f32
#define _SIMD_SET 1
#define _SIMD_ADDSUB 1
#define _SIMD_SHUFFLE _mm512_shuffle_ps
#define _SIMD_MASK_STOREU _mm512_mask_storeu_ps
#define _SIMD_XOR_EPI _mm512_xor_epi32
#define _SHUFFLE 0xb1
#define __ELPA_USE_FMA__
#define _mm512_FMADDSUB_ps(a,b,c) _mm512_fmaddsub_ps(a,b,c)
#define _mm512_FMSUBADD_ps(a,b,c) _mm512_fmsubadd_ps(a,b,c)
#define _SIMD_FMADDSUB _mm512_FMADDSUB_ps
#define _SIMD_FMSUBADD _mm512_FMSUBADD_ps
#endif /* SINGLE_PRECISION_COMPLEX */
#endif /* VEC_SET == SVE_512 */
......
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config-f90.h"
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#define BLOCK1 1
#define VEC_SET SVE_512
#include "../../general/precision_macros.h"
#include "complex_128bit_256bit_512bit_BLOCK_template.c"
#undef VEC_SET
#undef BLOCK1
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config-f90.h"
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#define BLOCK1 1
#define VEC_SET SVE_512
#include "../../general/precision_macros.h"
#include "complex_128bit_256bit_512bit_BLOCK_template.c"
#undef VEC_SET
#undef BLOCK1
#undef SINGLE_PRECISION
#undef COMPLEXCASE
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config-f90.h"
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#define BLOCK2 1
#define VEC_SET SVE_512
#include "../../general/precision_macros.h"
#include "complex_128bit_256bit_512bit_BLOCK_template.c"
#undef VEC_SET
#undef BLOCK2
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config-f90.h"
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#define BLOCK2 1
#define VEC_SET SVE_512
#include "../../general/precision_macros.h"
#include "complex_128bit_256bit_512bit_BLOCK_template.c"
#undef VEC_SET
#undef BLOCK2
#undef SINGLE_PRECISION
#undef COMPLEXCASE
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment