Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
13
Issues
13
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
5822cb18
Commit
5822cb18
authored
Jul 12, 2019
by
Andreas Marek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
More work on complex NEON kernels
parent
f1c0480f
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
21 additions
and
9 deletions
+21
-9
src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c
...pa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c
+21
-9
No files found.
src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c
View file @
5822cb18
...
...
@@ -165,6 +165,11 @@
#if VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_COMPLEX
__Float64x2_t
vaddsubq_f64
(
__Float64x2_t
a
,
__Float64x2_t
b
){
return
vaddq_f64
(
a
,
vsetq_lane_f64
(
-
vgetq_lane_f64
(
b
,
1
),
b
,
1
));
}
#define offset 2
#define __SIMD_DATATYPE __Float64x2_t
#define _SIMD_LOAD vld1q_f64
...
...
@@ -175,7 +180,7 @@
#define _SIMD_ADD vaddq_f64
//#define _SIMD_XOR _mm_xor_pd
#define _SIMD_NEG vnegq_f64
#define _SIMD_ADDSUB
_mm_addsub_pd
#define _SIMD_ADDSUB
vaddsubq_f64
#define _SIMD_SHUFFLE _mm_shuffle_pd
#define _SHUFFLE _MM_SHUFFLE2(0,1)
...
...
@@ -185,16 +190,23 @@
#endif
/* DOUBLE_PRECISION_COMPLEX */
#ifdef SINGLE_PRECISION_COMPLEX
__Float32x4_t
vaddsubq_f32
(
__Float32x4_t
a
,
__Float32x4_t
b
){
b
=
vsetq_lane_f32
(
-
vgetq_lane_f32
(
b
,
1
),
b
,
1
);
b
=
vsetq_lane_f32
(
-
vgetq_lane_f32
(
b
,
3
),
b
,
3
);
return
vaddq_f32
(
a
,
b
);
}
#define offset 4
#define __SIMD_DATATYPE __
m128
#define _SIMD_LOAD _
mm_load_ps
#define _SIMD_LOADU _
mm_loadu_ps
#define _SIMD_STORE _
mm_store_ps
#define _SIMD_STOREU _
mm_storeu_ps
#define _SIMD_MUL _
mm_mul_ps
#define _SIMD_ADD _
mm_add_ps
#define __SIMD_DATATYPE __
Float32x4_t
#define _SIMD_LOAD _
vld1q_f32
#define _SIMD_LOADU _
vld1q_f32
#define _SIMD_STORE _
vst1q_f32
#define _SIMD_STOREU _
vst1q_f32
#define _SIMD_MUL _
vmulq_f32
#define _SIMD_ADD _
vaddq_f32
//#define _SIMD_XOR _mm_xor_ps
#define _SIMD_ADDSUB
_mm_addsub_ps
#define _SIMD_ADDSUB
vaddsubq_f32
#define _SIMD_SHUFFLE _mm_shuffle_ps
#define _SHUFFLE 0xb1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment