get_cpuid_set.c 6.67 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
//    This file is part of ELPA.
//
//    The ELPA library was originally created by the ELPA consortium,
//    consisting of the following organizations:
//
//    - Max Planck Computing and Data Facility (MPCDF), formerly known as
//      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
//    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
//      Informatik,
//    - Technische Universität München, Lehrstuhl für Informatik mit
//      Schwerpunkt Wissenschaftliches Rechnen ,
//    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
//    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
//      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
//      and
//    - IBM Deutschland GmbH
//
//
//    This particular source code file contains additions, changes and
//    enhancements authored by Intel Corporation which is not part of
//    the ELPA consortium.
//
//    More information can be found here:
//    http://elpa.mpcdf.mpg.de/
//
//    ELPA is free software: you can redistribute it and/or modify
//    it under the terms of the version 3 of the license of the
//    GNU Lesser General Public License as published by the Free
//    Software Foundation.
//
//    ELPA is distributed in the hope that it will be useful,
//    but WITHOUT ANY WARRANTY; without even the implied warranty of
//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//    GNU Lesser General Public License for more details.
//
//    You should have received a copy of the GNU Lesser General Public License
//    along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
//    ELPA reflects a substantial effort on the part of the original
//    ELPA consortium, and we ask you to respect the spirit of the
//    license that we chose: i.e., please contribute any changes you
//    may have back to the original ELPA library distribution, and keep
//    any derivatives of ELPA under the same license that we chose for
//    the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF

#include "config.h"

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>

#include "elpa/elpa_simd_constants.h"

static inline void get_cpu_manufacturer(int *set)
{
  u_int32_t registers[4];
  registers[0] = 0;
  asm volatile("cpuid": "=a" (registers[0]),"=b" (registers[1]),"=c" (registrers[3]),"=d" (registers[2]): "0" (registers[0]), "2" (registers[2]): "memory");

  char str[13]="GenuineIntel\0";
  char manufacturer[13];

  memcpy(manufacturer, registers[1], 12); 
  manufacturer[12] = '\0';

  if (strcmp(manufacturer, str) == 0) {
    set[CPU_MANUFACTURER - 1] = 1;
  } else { 
    set[CPU_MANUFACTURER - 1] = 0;
  }
}

#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}
#endif

/*
!f>#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
!f> interface
!f>   subroutine get_cpuid_set(simdSet, n) &
!f>              bind(C, name="get_cpuid_set")
!f>     use, intrinsic :: iso_c_binding
!f>     integer(kind=c_int), value :: n
!f>     integer(kind=c_int)        :: simdSet(n)
!f>   end subroutine
!f> end interface
!f>#endif
*/
void get_cpuid_set(int *set, int nlength){

  get_cpu_manufacturer(set);


  // Code below taken from http://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set/7495023#7495023

  //  Misc.
  bool HW_MMX;
  bool HW_x64;
  bool HW_ABM;      // Advanced Bit Manipulation
  bool HW_RDRAND;
  bool HW_BMI1;
  bool HW_BMI2;
  bool HW_ADX;
  bool HW_PREFETCHWT1;
  
  //  SIMD: 128-bit
  bool HW_SSE;
  bool HW_SSE2;
  bool HW_SSE3;
  bool HW_SSSE3;
  bool HW_SSE41;
  bool HW_SSE42;
  bool HW_SSE4a;
  bool HW_AES;
  bool HW_SHA;
  
  //  SIMD: 256-bit
  bool HW_AVX;
  bool HW_XOP;
  bool HW_FMA3;
  bool HW_FMA4;
  bool HW_AVX2;
  //  SIMD: 512-bit
  bool HW_AVX512F;    //  AVX512 Foundation
  bool HW_AVX512CD;   //  AVX512 Conflict Detection
  bool HW_AVX512PF;   //  AVX512 Prefetch
  bool HW_AVX512ER;   //  AVX512 Exponential + Reciprocal
  bool HW_AVX512VL;   //  AVX512 Vector Length Extensions
  bool HW_AVX512BW;   //  AVX512 Byte + Word
  bool HW_AVX512DQ;   //  AVX512 Doubleword + Quadword
  bool HW_AVX512IFMA; //  AVX512 Integer 52-bit Fused Multiply-Add
  bool HW_AVX512VBMI; //  AVX512 Vector Byte Manipulation Instructions
  
  int info[4];
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT

  cpuid(info, 0);
  int nIds = info[0];
  
  cpuid(info, 0x80000000);
  unsigned nExIds = info[0];
#endif  
  //  Detect Features
  if (nIds >= 0x00000001){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
    cpuid(info,0x00000001);
#endif
    HW_MMX    = (info[3] & ((int)1 << 23)) != 0;
    HW_SSE    = (info[3] & ((int)1 << 25)) != 0;
    HW_SSE2   = (info[3] & ((int)1 << 26)) != 0;
    HW_SSE3   = (info[2] & ((int)1 <<  0)) != 0;

    HW_SSSE3  = (info[2] & ((int)1 <<  9)) != 0;
    HW_SSE41  = (info[2] & ((int)1 << 19)) != 0;
    HW_SSE42  = (info[2] & ((int)1 << 20)) != 0;
    HW_AES    = (info[2] & ((int)1 << 25)) != 0;

    HW_AVX    = (info[2] & ((int)1 << 28)) != 0;
    HW_FMA3   = (info[2] & ((int)1 << 12)) != 0;
    HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
  }
  if (nIds >= 0x00000007){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
    cpuid(info,0x00000007);
#endif
    HW_AVX2   = (info[1] & ((int)1 <<  5)) != 0;

    HW_BMI1        = (info[1] & ((int)1 <<  3)) != 0;
    HW_BMI2        = (info[1] & ((int)1 <<  8)) != 0;
    HW_ADX         = (info[1] & ((int)1 << 19)) != 0;
    HW_SHA         = (info[1] & ((int)1 << 29)) != 0;
    HW_PREFETCHWT1 = (info[2] & ((int)1 <<  0)) != 0;

    HW_AVX512F     = (info[1] & ((int)1 << 16)) != 0;
    HW_AVX512CD    = (info[1] & ((int)1 << 28)) != 0;
    HW_AVX512PF    = (info[1] & ((int)1 << 26)) != 0;
    HW_AVX512ER    = (info[1] & ((int)1 << 27)) != 0;
    HW_AVX512VL    = (info[1] & ((int)1 << 31)) != 0;
    HW_AVX512BW    = (info[1] & ((int)1 << 30)) != 0;
    HW_AVX512DQ    = (info[1] & ((int)1 << 17)) != 0;
    HW_AVX512IFMA  = (info[1] & ((int)1 << 21)) != 0;
    HW_AVX512VBMI  = (info[2] & ((int)1 <<  1)) != 0;
  }

  if (nExIds >= 0x80000001){
#ifdef HAVE_HETEROGENOUS_CLUSTER_SUPPORT
    cpuid(info,0x80000001);
#endif
    HW_x64   = (info[3] & ((int)1 << 29)) != 0;
    HW_ABM   = (info[2] & ((int)1 <<  5)) != 0;
    HW_SSE4a = (info[2] & ((int)1 <<  6)) != 0;
    HW_FMA4  = (info[2] & ((int)1 << 16)) != 0;
    HW_XOP   = (info[2] & ((int)1 << 11)) != 0;
  }

  //allways allow GENERIC
  set[GENERIC_INSTR -1] =1;

  // the rest depends on the CPU
  if (HW_SSE42) {
    set[SSE_INSTR - 1] = 1;
  }
  if (HW_AVX) {
    set[AVX_INSTR - 1] = 1;
  }
  if (HW_AVX2) {
    set[AVX2_INSTR - 1] = 1;
  }
  if (HW_AVX512F) {
    set[AVX512_INSTR -1] = 1;
  }

}