Commit 65e33ecf authored by Andreas Marek's avatar Andreas Marek

Merge branch 'auto_detect' into 'master_pre_stage'

Auto detect

See merge request !20
parents 6412b4f7 dd47b584
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -177,6 +177,11 @@ if !HAVE_DETAILED_TIMINGS
endif
endif
if HAVE_HETEROGENOUS_CLUSTER_SUPPORT
libelpa@SUFFIX@_private_la_SOURCES += src/helpers/get_cpuid_set.c src/helpers/mod_simd_kernel.F90
endif
if WITH_REAL_GENERIC_KERNEL
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real.F90
endif
......@@ -480,6 +485,7 @@ nobase_elpa_include_HEADERS = \
$(wildcard modules/*) \
src/helpers/lapack_interfaces.h \
src/helpers/scalapack_interfaces.h \
elpa/elpa_simd_constants.h \
elpa/elpa.h \
elpa/elpa_generic.h \
elpa/elpa_legacy.h
......
This diff is collapsed.
......@@ -187,6 +187,27 @@ if test x"$c11_standard" = x"no"; then
fi
fi
AX_EXT
dnl heterogenous-cluster-support
AC_MSG_CHECKING(whether heterogenous-cluster-support should be enabled)
AC_ARG_ENABLE([heterogenous-cluster-support],
AS_HELP_STRING([--heterogenous-cluster-support],
[allow ELPA to automatically switch to a kernel supported on all CPUs of a cluster (only works for Intel CPUs at the moment), default no. Activate only if necessary has a performance penalty! This feature is exerpimental!]),
[
if test x"$enableval" = x"yes"; then
enable_heterogenous_cluster_support=yes
else
enable_heterogenous_cluster_support=no
fi
],
[enable_heterogenous_cluster_support="no"])
AC_MSG_RESULT([$enable_heterogenous_cluster_support])
if test x"${enable_heterogenous_cluster_support}" = x"yes"; then
AC_DEFINE([HAVE_HETEROGENOUS_CLUSTER_SUPPORT], [1], [automatically support clusters with different Intel CPUs])
fi
AM_CONDITIONAL([HAVE_HETEROGENOUS_CLUSTER_SUPPORT],[test x"$enable_heterogenous_cluster_support" = x"yes"])
AC_MSG_CHECKING(whether C compiler can use _Generic )
AC_COMPILE_IFELSE([AC_LANG_SOURCE([
int main(int argc, char **argv) {
......@@ -745,7 +766,7 @@ ELPA_SELECT_KERNELS([sse],[enable])
ELPA_SELECT_KERNELS([sse_assembly],[enable])
ELPA_SELECT_KERNELS([avx],[enable])
ELPA_SELECT_KERNELS([avx2],[enable])
ELPA_SELECT_KERNELS([avx512],[disable])
ELPA_SELECT_KERNELS([avx512],[enable])
ELPA_SELECT_KERNELS([gpu],[disable])
ELPA_SELECT_KERNELS([bgp],[disable])
ELPA_SELECT_KERNELS([bgq],[disable])
......@@ -996,7 +1017,7 @@ if test x"${need_sse_assembly}" = x"yes"; then
rm -f ./conftest.o
AC_MSG_RESULT([${can_compile_sse_asm_double}])
if test x"$can_compile_sse_asm_double" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-sse-assembly, or adjust the C compiler or CFLAGS])
AC_MSG_ERROR([Could not compile test program, try with --disable-sse-assembly, or adjust the C compiler or CFLAGS. Possibly (some of) the flags " $SIMD_FLAGS " solve this issue])
fi
if test x"${want_single_precision}" = x"yes" ; then
......@@ -1012,7 +1033,7 @@ if test x"${need_sse_assembly}" = x"yes"; then
rm -f ./conftest.o
AC_MSG_RESULT([${can_compile_sse_asm_single}])
if test x"$can_compile_sse_asm_single" != x"yes"; then
AC_MSG_ERROR([Could not compile test program, try with --disable-sse-assembly, or adjust the C compiler or CFLAGS])
AC_MSG_ERROR([Could not compile test program, try with --disable-sse-assembly, or adjust the C compiler or CFLAGS. Possibly (some of) the flags " $SIMD_FLAGS " solve this issue])
fi
fi
fi
......@@ -1035,7 +1056,7 @@ if test x"${need_avx}" = x"yes"; then
)
AC_MSG_RESULT([${can_compile_avx}])
if test x"$can_compile_avx" != x"yes"; then
AC_MSG_ERROR([Could not compile a test program with AVX, try with --disable-avx, or adjust the C compiler or CFLAGS])
AC_MSG_ERROR([Could not compile a test program with AVX, try with --disable-avx, or adjust the C compiler or CFLAGS. Possibly (some of) the flags " $SIMD_FLAGS " solve this issue])
fi
AC_DEFINE([HAVE_AVX],[1],[AVX is supported on this CPU])
fi
......@@ -1057,7 +1078,7 @@ if test x"${need_avx2}" = x"yes"; then
)
AC_MSG_RESULT([${can_compile_avx2}])
if test x"$can_compile_avx2" != x"yes"; then
AC_MSG_ERROR([Could not compile a test program with AVX2, try with --disable-avx2, or adjust the C compiler or CFLAGS])
AC_MSG_ERROR([Could not compile a test program with AVX2, try with --disable-avx2, or adjust the C compiler or CFLAGS. Possibly (some of) the flags " $SIMD_FLAGS " solve this issue])
fi
AC_DEFINE([HAVE_AVX2],[1],[AVX2 is supported on this CPU])
fi
......@@ -1079,7 +1100,7 @@ if test x"${need_avx512}" = x"yes"; then
)
AC_MSG_RESULT([${can_compile_avx512}])
if test x"$can_compile_avx512" != x"yes"; then
AC_MSG_ERROR([Could not compile a test program with AVX512, adjust the C compiler or CFLAGS])
AC_MSG_ERROR([Could not compile a test program with AVX512, adjust the C compiler or CFLAGS. Possibly (some of) the flags " $SIMD_FLAGS " solve this issue])
fi
AC_DEFINE([HAVE_AVX512],[1],[AVX512 is supported on this CPU])
......@@ -1584,6 +1605,41 @@ m4_foreach_w([elpa_m4_kind],[real complex],[
])
])
if test x"${ax_cv_have_sse3_cpu_ext}" = x"yes" -a x"${need_sse}" = x"no"; then
echo " "
AC_MSG_WARN([You did not request SSE support (--enable-sse), but your local CPU supports it.])
AC_MSG_WARN([You might want to re-configure, except you are cross-compiling])
echo " "
fi
if test x"${ax_cv_have_sse3_cpu_ext}" = x"yes" -a x"${need_sse_assembly}" = x"no"; then
echo " "
AC_MSG_WARN([You did not request SSE-ASSEMBLY support (--enable-sse-assembly), but your local CPU supports it.])
AC_MSG_WARN([You might want to re-configure, except you are cross-compiling])
echo " "
fi
if test x"${ax_cv_have_avx_cpu_ext}" = x"yes" -a x"${need_avx}" = x"no"; then
echo " "
AC_MSG_WARN([You did not request AVX support (--enable-avx), but your local CPU supports it.])
AC_MSG_WARN([You might want to re-configure, except you are cross-compiling])
echo " "
fi
if test x"${ax_cv_have_avx2_cpu_ext}" = x"yes" -a x"${need_avx2}" = x"no"; then
echo " "
AC_MSG_WARN([You did not request AVX2 support (--enable-avx2), but your local CPU supports it.])
AC_MSG_WARN([You might want to re-configure, except you are cross-compiling])
echo " "
fi
if test x"${ax_cv_have_avx512f_cpu_ext}" = x"yes" -a x"${need_avx512}" = x"no"; then
echo " "
AC_MSG_WARN([You did not request AVX512 support (--enable-avx512), but your local CPU supports it.])
AC_MSG_WARN([You might want to re-configure, except you are cross-compiling])
echo " "
fi
#echo " "
#echo "***********************************************************************"
#echo "* ELPA 2018.11.001 will be the last release where the old, obsolete, *"
......
#define CPU_MANUFACTURER 1
#define GENERIC_INSTR 2
#define BLUEGENE_INSTR 3
#define SSE_INSTR 4
#define AVX_INSTR 5
#define AVX2_INSTR 6
#define AVX512_INSTR 7
#define NVIDIA_INSTR 8
#define VSX_INSTR 9
#define ARCH64_INSTR 10
#define SPARC_INSTR 11
#define NUMBER_OF_INSTR 12
This diff is collapsed.
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_AVX_XGETBV
#
# DESCRIPTION
#
# On later x86 processors with AVX SIMD support, with gcc or a compiler
# that has a compatible syntax for inline assembly instructions, run a
# small program that executes the xgetbv instruction with input OP. This
# can be used to detect if the OS supports AVX instruction usage.
#
# On output, the values of the eax and edx registers are stored as
# hexadecimal strings as "eax:edx" in the cache variable
# ax_cv_gcc_x86_avx_xgetbv.
#
# If the xgetbv instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction),
# ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
#
# This macro mainly exists to be used in AX_EXT.
#
# LICENSE
#
# Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 3
AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, eax, edx;
FILE *f;
/* Opcodes for xgetbv */
__asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0"
: "=a" (eax), "=d" (edx)
: "c" (op));
f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
fprintf(f, "%x:%x\n", eax, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
[ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
AC_LANG_POP([C])
])
# ===========================================================================
# https://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
# ===========================================================================
#
# SYNOPSIS
#
# AX_GCC_X86_CPUID(OP)
# AX_GCC_X86_CPUID_COUNT(OP, COUNT)
#
# DESCRIPTION
#
# On Pentium and later x86 processors, with gcc or a compiler that has a
# compatible syntax for inline assembly instructions, run a small program
# that executes the cpuid instruction with input OP. This can be used to
# detect the CPU type. AX_GCC_X86_CPUID_COUNT takes an additional COUNT
# parameter that gets passed into register ECX before calling cpuid.
#
# On output, the values of the eax, ebx, ecx, and edx registers are stored
# as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
# ax_cv_gcc_x86_cpuid_OP.
#
# If the cpuid instruction fails (because you are running a
# cross-compiler, or because you are not using gcc, or because you are on
# a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
# is set to the string "unknown".
#
# This macro mainly exists to be used in AX_GCC_ARCHFLAG.
#
# LICENSE
#
# Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
# Copyright (c) 2008 Matteo Frigo
# Copyright (c) 2015 Michael Petch <mpetch@capp-sysware.com>
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program. If not, see <https://www.gnu.org/licenses/>.
#
# As a special exception, the respective Autoconf Macro's copyright owner
# gives unlimited permission to copy, distribute and modify the configure
# scripts that are the output of Autoconf when processing the Macro. You
# need not follow the terms of the GNU General Public License when using
# or distributing such scripts, even though portions of the text of the
# Macro appear in them. The GNU General Public License (GPL) does govern
# all other use of the material that constitutes the Autoconf Macro.
#
# This special exception to the GPL applies to versions of the Autoconf
# Macro released by the Autoconf Archive. When you make and distribute a
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
#serial 10
AC_DEFUN([AX_GCC_X86_CPUID],
[AX_GCC_X86_CPUID_COUNT($1, 0)
])
AC_DEFUN([AX_GCC_X86_CPUID_COUNT],
[AC_REQUIRE([AC_PROG_CC])
AC_LANG_PUSH([C])
AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
[AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
int op = $1, level = $2, eax, ebx, ecx, edx;
FILE *f;
__asm__ __volatile__ ("xchg %%ebx, %1\n"
"cpuid\n"
"xchg %%ebx, %1\n"
: "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
: "a" (op), "2" (level));
f = fopen("conftest_cpuid", "w"); if (!f) return 1;
fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
fclose(f);
return 0;
])],
[ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
[ax_cv_gcc_x86_cpuid_$1=unknown])])
AC_LANG_POP([C])
])
......@@ -80,7 +80,7 @@ module elpa2_impl
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpasolve_evp_real_2stage_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_evp_real_2stage_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......
......@@ -49,6 +49,9 @@
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#include "elpa/elpa_simd_constants.h"
function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
......@@ -64,7 +67,9 @@
use cuda_functions
use mod_check_for_gpu
use elpa_omp
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
use simd_kernel
#endif
use iso_c_binding
implicit none
#include "../general/precision_kinds.F90"
......@@ -74,14 +79,14 @@
logical :: useQR
logical :: useQRActual
#endif
integer(kind=c_int) :: kernel
integer(kind=c_int) :: kernel, kernelByUser
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, intent(out), target :: q(obj%local_nrows,*)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, intent(out), target :: q(obj%local_nrows,*)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, intent(out), target :: q(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, intent(out), target :: q(obj%local_nrows,obj%local_ncols)
#endif
real(kind=C_DATATYPE_KIND), intent(inout) :: ev(obj%na)
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: hh_trans(:,:)
......@@ -124,6 +129,12 @@
do_trans_to_band, do_trans_to_full
integer(kind=ik) :: nrThreads
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
integer(kind=c_int) :: simdSetAvailable(NUMBER_OF_INSTR)
#endif
#if REALCASE == 1
#undef GPU_KERNEL
#undef GENERIC_KERNEL
......@@ -377,6 +388,88 @@
#endif
! consistency check: is user set kernel still identical with "kernel" or did
! we change it above? This is a mess and should be cleaned up
call obj%get(KERNEL_STRING,kernelByUser,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
if (kernelByUser .ne. kernel) then
call obj%set(KERNEL_STRING, kernel, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
endif
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
! find a kernel which is supported on all used CPUs
! at the moment this works only on Intel CPUs
simdSetAvailable(:) = 0
call get_cpuid_set(simdSetAvailable, NUMBER_OF_INSTR)
#ifdef WITH_MPI
call MPI_ALLREDUCE(mpi_in_place, simdSetAvailable, NUMBER_OF_INSTR, MPI_INTEGER, MPI_BAND, mpi_comm_all, mpierr)
#endif
! compare user chosen kernel with possible kernels
call obj%get(KERNEL_STRING,kernelByUser,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
! map kernel to SIMD Set, and check whether this is set is available on all cores
#if REALCASE == 1
if (simdSetAvailable(map_real_kernel_to_simd_instruction(kernelByUser)) /= 1) then
#endif
#if COMPLEXCASE == 1
if (simdSetAvailable(map_complex_kernel_to_simd_instruction(kernelByUser)) /=1) then
#endif
! if we are not purely running on Intel CPUs, this feature does not work at the moment
! this restriction should be lifted step by step
if (simdSetAvailable(CPU_MANUFACTURER) /= 1) then
if (my_pe == 0 ) then
write(error_unit,*) "You enabled the experimental feature of an heterogenous cluster support."
write(error_unit,*) "However, this works at the moment only if ELPA is run on (different) Intel CPUs!"
write(error_unit,*) "ELPA detected also non Intel-CPUs, and will this abort now"
stop
endif
else
if (my_pe == 0 ) then
write(error_unit,*) "The ELPA 2stage kernel of your choice, cannot be run on all CPUs"
write(error_unit,*) "ELPA will use another kernel..."
endif
! find best kernel available for supported instruction sets
do i = NUMBER_OF_INSTR, 2, -1
if (simdSetAvailable(i) == 1) then
! map to "best" kernel with this instruction set
! this can be only done for kernels that ELPA has been configured to use
#if REALCASE == 1
kernel = map_simd_instruction_to_real_kernel(i)
#endif
#if COMPLEXCASE == 1
kernel = map_simd_instruction_to_complex_kernel(i)
#endif
if (obj%can_set(KERNEL_STRING, kernel) == ELPA_OK) then
call obj%set(KERNEL_STRING, kernel, error)
if (error .ne. ELPA_OK) then
print *,"Problem setting option. Aborting..."
stop
endif
if (my_pe == 0 ) write(error_unit,*) "ELPA decided to use ",elpa_int_value_to_string(KERNEL_STRING, kernel)
exit
endif
endif
enddo
endif
endif
#endif /* HAVE_HETEROGENOUS_CLUSTER_SUPPORT */
#if REALCASE == 1
call obj%get("qr",qr,error)
......
......@@ -1799,7 +1799,6 @@ module elpa_impl
#endif
end select
!print *, "testing, before C call, ts_impl%current is ", ts_impl%current
if (elpa_index_load_autotune_state_c(self%index, ts_impl%level, ts_impl%domain, ts_impl%min_loc, &
ts_impl%min_val, ts_impl%current, ts_impl%cardinality, file_name // c_null_char) /= 1) then
......@@ -1812,7 +1811,6 @@ module elpa_impl
error = ELPA_ERROR_CANNOT_OPEN_FILE
#endif
endif
!print *, "testing, after C call, ts_impl%current is ", ts_impl%current
end subroutine
......
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include "config.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include "elpa/elpa_simd_constants.h"
static inline void get_cpu_manufacturer(int *set)
{
u_int32_t registers[4];
registers[0] = 0;
asm volatile("cpuid": "=a" (registers[0]),"=b" (registers[1]),"=c" (registrers[3]),"=d" (registers[2]): "0" (registers[0]), "2" (registers[2]): "memory");
char str[13]="GenuineIntel\0";
char manufacturer[13];
memcpy(manufacturer, registers[1], 12);
manufacturer[12] = '\0';
if (strcmp(manufacturer, str) == 0) {
set[CPU_MANUFACTURER - 1] = 1;
} else {
set[CPU_MANUFACTURER - 1] = 0;
}
}
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}
#endif
/*
!f>#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
!f> interface
!f> subroutine get_cpuid_set(simdSet, n) &
!f> bind(C, name="get_cpuid_set")
!f> use, intrinsic :: iso_c_binding
!f> integer(kind=c_int), value :: n
!f> integer(kind=c_int) :: simdSet(n)
!f> end subroutine
!f> end interface
!f>#endif
*/
void get_cpuid_set(int *set, int nlength){
get_cpu_manufacturer(set);
// Code below taken from http://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set/7495023#7495023
// Misc.
bool HW_MMX;
bool HW_x64;
bool HW_ABM; // Advanced Bit Manipulation
bool HW_RDRAND;
bool HW_BMI1;
bool HW_BMI2;
bool HW_ADX;
bool HW_PREFETCHWT1;
// SIMD: 128-bit
bool HW_SSE;
bool HW_SSE2;
bool HW_SSE3;
bool HW_SSSE3;
bool HW_SSE41;
bool HW_SSE42;
bool HW_SSE4a;
bool HW_AES;
bool HW_SHA;
// SIMD: 256-bit
bool HW_AVX;
bool HW_XOP;
bool HW_FMA3;
bool HW_FMA4;
bool HW_AVX2;
// SIMD: 512-bit
bool HW_AVX512F; // AVX512 Foundation
bool HW_AVX512CD; // AVX512 Conflict Detection
bool HW_AVX512PF; // AVX512 Prefetch
bool HW_AVX512ER; // AVX512 Exponential + Reciprocal
bool HW_AVX512VL; // AVX512 Vector Length Extensions
bool HW_AVX512BW; // AVX512 Byte + Word
bool HW_AVX512DQ; // AVX512 Doubleword + Quadword
bool HW_AVX512IFMA; // AVX512 Integer 52-bit Fused Multiply-Add
bool HW_AVX512VBMI; // AVX512 Vector Byte Manipulation Instructions
int info[4];
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
cpuid(info, 0);
int nIds = info[0];
cpuid(info, 0x80000000);
unsigned nExIds = info[0];
#endif
// Detect Features
if (nIds >= 0x00000001){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
cpuid(info,0x00000001);
#endif
HW_MMX = (info[3] & ((int)1 << 23)) != 0;
HW_SSE = (info[3] & ((int)1 << 25)) != 0;
HW_SSE2 = (info[3] & ((int)1 << 26)) != 0;
HW_SSE3 = (info[2] & ((int)1 << 0)) != 0;
HW_SSSE3 = (info[2] & ((int)1 << 9)) != 0;
HW_SSE41 = (info[2] & ((int)1 << 19)) != 0;
HW_SSE42 = (info[2] & ((int)1 << 20)) != 0;
HW_AES = (info[2] & ((int)1 << 25)) != 0;
HW_AVX = (info[2] & ((int)1 << 28)) != 0;
HW_FMA3 = (info[2] & ((int)1 << 12)) != 0;
HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
cpuid(info,0x00000007);
#endif
HW_AVX2 = (info[1] & ((int)1 << 5)) != 0;
HW_BMI1 = (info[1] & ((int)1 << 3)) != 0;
HW_BMI2 = (info[1] & ((int)1 << 8)) != 0;
HW_ADX = (info[1] & ((int)1 << 19)) != 0;
HW_SHA = (info[1] & ((int)1 << 29)) != 0;
HW_PREFETCHWT1 = (info[2] & ((int)1 << 0)) != 0;
HW_AVX512F = (info[1] & ((int)1 << 16)) != 0;
HW_AVX512CD = (info[1] & ((int)1 << 28)) != 0;
HW_AVX512PF = (info[1] & ((int)1 << 26)) != 0;
HW_AVX512ER = (info[1] & ((int)1 << 27)) != 0;
HW_AVX512VL = (info[1] & ((int)1 << 31)) != 0;
HW_AVX512BW = (info[1] & ((int)1 << 30)) != 0;
HW_AVX512DQ = (info[1] & ((int)1 << 17)) != 0;
HW_AVX512IFMA = (info[1] & ((int)1 << 21)) != 0;
HW_AVX512VBMI = (info[2] & ((int)1 << 1)) != 0;
}
if (nExIds >= 0x80000001){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
cpuid(info,0x80000001);
#endif
HW_x64 = (info[3] & ((int)1 << 29)) != 0;
HW_ABM = (info[2] & ((int)1 << 5)) != 0;
HW_SSE4a = (info[2] & ((int)1 << 6)) != 0;
HW_FMA4 = (info[2] & ((int)1 << 16)) != 0;
HW_XOP = (info[2] & ((int)1 << 11)) != 0;
}