Commit b7eff19f authored by Andreas Marek's avatar Andreas Marek
Browse files

Implement FMA for Neon

parent f859cc58
...@@ -193,6 +193,7 @@ ...@@ -193,6 +193,7 @@
#endif /* VEC_SET == 1281 */ #endif /* VEC_SET == 1281 */
   
#if VEC_SET == NEON_ARCH64_128 #if VEC_SET == NEON_ARCH64_128
#define __ELPA_USE_FMA__
#ifdef DOUBLE_PRECISION_REAL #ifdef DOUBLE_PRECISION_REAL
#define offset 2 #define offset 2
#define __SIMD_DATATYPE __Float64x2_t #define __SIMD_DATATYPE __Float64x2_t
...@@ -202,7 +203,9 @@ ...@@ -202,7 +203,9 @@
#define _SIMD_MUL vmulq_f64 #define _SIMD_MUL vmulq_f64
#define _SIMD_SUB vsubq_f64 #define _SIMD_SUB vsubq_f64
#define _SIMD_NEG vnegq_f64 #define _SIMD_NEG vnegq_f64
// FMA #define _SIMD_FMA(a, b, c) vfmaq_f64(c ,b, a)
#define _SIMD_NFMA(a, b, c) vnegq_f64(vfmaq_f64(c ,b, a))
#define _SIMD_FMSUB(a, b, c) vfmsq_f64(c, b, a)
//#define _SIMD_XOR _mm_xor_pd //#define _SIMD_XOR _mm_xor_pd
#define _SIMD_SET1 vdupq_n_f64 #define _SIMD_SET1 vdupq_n_f64
#endif /* DOUBLE_PRECISION_REAL */ #endif /* DOUBLE_PRECISION_REAL */
...@@ -215,7 +218,9 @@ ...@@ -215,7 +218,9 @@
#define _SIMD_MUL vmulq_f32 #define _SIMD_MUL vmulq_f32
#define _SIMD_SUB vsubq_f32 #define _SIMD_SUB vsubq_f32
#define _SIMD_NEG vnegq_f32 #define _SIMD_NEG vnegq_f32
//FMA #define _SIMD_FMA(a, b, c) vfmaq_f32(c ,b, a)
#define _SIMD_NFMA(a, b, c) vnegq_f32(vfmaq_f32(c ,b, a))
#define _SIMD_FMSUB(a, b, c) vfmsq_f32(c, b, a)
//#define _SIMD_XOR _mm_xor_ps //#define _SIMD_XOR _mm_xor_ps
#define _SIMD_SET1 vdupq_n_f32 #define _SIMD_SET1 vdupq_n_f32
#endif /* SINGLE_PRECISION_REAL */ #endif /* SINGLE_PRECISION_REAL */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment