Planned maintenance on Wednesday, 2021-01-20, 17:00-18:00. Expect some interruptions during that time

mod_simd_kernel.F90 8.11 KB
Newer Older
Andreas Marek's avatar
Andreas Marek committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.rzg.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF

#include "config-f90.h"
#include "elpa/elpa_simd_constants.h"

module simd_kernel
  use elpa_constants
  use iso_c_binding

  integer(kind=c_int) :: realKernels_to_simdTable(ELPA_2STAGE_NUMBER_OF_REAL_KERNELS)
  integer(kind=c_int) :: simdTable_to_realKernels(NUMBER_OF_INSTR)
  integer(kind=c_int) :: complexKernels_to_simdTable(ELPA_2STAGE_NUMBER_OF_COMPLEX_KERNELS)
  integer(kind=c_int) :: simdTable_to_complexKernels(NUMBER_OF_INSTR)

  contains

  function map_real_kernel_to_simd_instruction(kernel) result(simd_set_index)
    
    use iso_c_binding
    implicit none

    integer(kind=c_int), intent(in) :: kernel
    integer(kind=c_int)             :: simd_set_index

    realKernels_to_simdTable(ELPA_2STAGE_REAL_GENERIC)               = GENERIC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_GENERIC_SIMPLE)        = GENERIC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_BGP)                   = BLUEGENE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_BGQ)                   = BLUEGENE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SSE_ASSEMBLY)          = SSE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SSE_BLOCK2)            = SSE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SSE_BLOCK4)            = SSE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SSE_BLOCK6)            = SSE_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX_BLOCK2)            = AVX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX_BLOCK4)            = AVX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX_BLOCK6)            = AVX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX2_BLOCK2)           = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX2_BLOCK4)           = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX2_BLOCK6)           = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX512_BLOCK2)         = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX512_BLOCK4)         = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_AVX512_BLOCK6)         = AVX2_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_GPU)                   = NVIDIA_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK2)        = SPARC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK4)        = SPARC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_SPARC64_BLOCK6)        = SPARC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2)    = ARCH64_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK4)    = ARCH64_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK6)    = ARCH64_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_VSX_BLOCK2)            = VSX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_VSX_BLOCK4)            = VSX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_VSX_BLOCK6)            = VSX_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK4) = GENERIC_INSTR
    realKernels_to_simdTable(ELPA_2STAGE_REAL_GENERIC_SIMPLE_BLOCK6) = GENERIC_INSTR

    simd_set_index = realKernels_to_simdTable(kernel)


  end

  function map_simd_instruction_to_real_kernel(simd_set_index) result(kernel)
    
    use iso_c_binding
    implicit none


    integer(kind=c_int)                        :: kernel
    integer(kind=c_int), intent(in)            :: simd_set_index

    simdTable_to_realKernels(GENERIC_INSTR)  = ELPA_2STAGE_REAL_GENERIC
    simdTable_to_realKernels(BLUEGENE_INSTR) = ELPA_2STAGE_REAL_BGP
    simdTable_to_realKernels(SSE_INSTR)      = ELPA_2STAGE_REAL_SSE_BLOCK2
    simdTable_to_realKernels(AVX_INSTR)      = ELPA_2STAGE_REAL_AVX_BLOCK2
    simdTable_to_realKernels(AVX2_INSTR)     = ELPA_2STAGE_REAL_AVX2_BLOCK2
    simdTable_to_realKernels(AVX512_INSTR)   = ELPA_2STAGE_REAL_AVX512_BLOCK2
    simdTable_to_realKernels(NVIDIA_INSTR)   = ELPA_2STAGE_REAL_GPU
    simdTable_to_realKernels(SPARC_INSTR)    = ELPA_2STAGE_REAL_SPARC64_BLOCK2
    simdTable_to_realKernels(ARCH64_INSTR)   = ELPA_2STAGE_REAL_NEON_ARCH64_BLOCK2
    simdTable_to_realKernels(VSX_INSTR)      = ELPA_2STAGE_REAL_VSX_BLOCK2

    kernel = simdTable_to_realKernels(simd_set_index)

  end

  function map_complex_kernel_to_simd_instruction(kernel) result(simd_set_index)
    
    use iso_c_binding
    implicit none
    integer(kind=c_int), intent(in)  :: kernel
    integer(kind=c_int)              :: simd_set_index

    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_GENERIC)        = GENERIC_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_GENERIC_SIMPLE) = GENERIC_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_BGP)            = BLUEGENE_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_BGQ)            = BLUEGENE_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_SSE_ASSEMBLY)   = SSE_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_SSE_BLOCK1)     = SSE_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_SSE_BLOCK2)     = SSE_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX_BLOCK1)     = AVX_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX_BLOCK2)     = AVX_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX2_BLOCK1)    = AVX2_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX2_BLOCK2)    = AVX2_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX512_BLOCK1)  = AVX512_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_AVX512_BLOCK2)  = AVX512_INSTR
    complexKernels_to_simdTable(ELPA_2STAGE_COMPLEX_GPU)            = NVIDIA_INSTR
    

    simd_set_index = complexKernels_to_simdTable(kernel)

  end

  function map_simd_instruction_to_complex_kernel(simd_set_index) result(kernel)
    
    use iso_c_binding
    implicit none
    integer(kind=c_int)              :: kernel
    integer(kind=c_int), intent(in)  :: simd_set_index

    simdTable_to_complexKernels(GENERIC_INSTR) = ELPA_2STAGE_COMPLEX_GENERIC
    simdTable_to_complexKernels(BLUEGENE_INSTR) = ELPA_2STAGE_COMPLEX_BGP
    simdTable_to_complexKernels(SSE_INSTR) = ELPA_2STAGE_COMPLEX_SSE_BLOCK1
    simdTable_to_complexKernels(AVX_INSTR) = ELPA_2STAGE_COMPLEX_AVX_BLOCK1
    simdTable_to_complexKernels(AVX2_INSTR) = ELPA_2STAGE_COMPLEX_AVX2_BLOCK1
    simdTable_to_complexKernels(AVX512_INSTR) = ELPA_2STAGE_COMPLEX_AVX512_BLOCK1
    simdTable_to_complexKernels(NVIDIA_INSTR) = ELPA_2STAGE_COMPLEX_GPU

    kernel = simdTable_to_complexKernels(simd_set_index)

  end

end module