Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
ab215a89
Commit
ab215a89
authored
May 29, 2019
by
Andreas Marek
Browse files
Fix some errors in real Neon
parent
4d0b0ab1
Changes
3
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
ab215a89
...
...
@@ -227,9 +227,9 @@ fi
dnl check which MPI binray invokes a MPI job
if test x"$with_mpi" = x"yes"; then
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun], [no])
AC_CHECK_PROGS([MPI_BINARY], [mpiexec.hydra mpiexec mpirun poe runjob srun
aprun
], [no])
if test x"$MPI_BINARY" = x"no"; then
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun])
AC_MSG_ERROR([Could not find either of the MPI binaries: mpiexec.hydra, mpiexec, mpirun, poe, runjob, srun
, aprun
])
fi
fi
...
...
@@ -929,7 +929,7 @@ if test x"${need_neon_arch64}" = x"yes"; then
#include <arm_neon.h>
int main(int argc, char **argv) {
__Float64x2_t x1, x2, x3, x4;
x4 = vfmaq_64(x1, x2, x3);
x4 = vfmaq_
f
64(x1, x2, x3);
return 0;
}
])],
...
...
src/elpa2/compute_hh_trafo.F90
View file @
ab215a89
...
...
@@ -2192,7 +2192,7 @@
#else
call
quad_hh_trafo_
&
&
MATH_DATATYPE
&
&
neon_arch64_4hv_
&
&
_
neon_arch64_4hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
jj
+
off
+
a_off
-3
,
istripe
)),
w
,
&
nbw
,
nl
,
stripe_width
,
nbw
)
...
...
src/elpa2/kernels/real_128bit_256bit_512bit_BLOCK_template.c
View file @
ab215a89
...
...
@@ -381,7 +381,7 @@
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281 ||
VEC_SET == 1282 ||
VEC_SET == NEON_ARCH64_128
//Forward declaration
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
...
...
@@ -391,7 +391,7 @@
#undef ROW_LENGTH
#define ROW_LENGTH 4
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -425,7 +425,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
DATA_TYPE_PTR scalarprods);
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 4
...
...
@@ -434,7 +434,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 8
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -468,7 +468,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
DATA_TYPE_PTR scalarprods);
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282
|| VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 6
...
...
@@ -477,7 +477,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 12
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -512,7 +512,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
DATA_TYPE_PTR scalarprods);
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282
|| VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 8
...
...
@@ -521,7 +521,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 16
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -556,7 +556,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
DATA_TYPE_PTR scalarprods);
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 10
...
...
@@ -565,7 +565,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 20
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -600,7 +600,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
DATA_TYPE_PTR scalarprods);
#endif
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 12
...
...
@@ -609,7 +609,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 24
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -982,7 +982,7 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (DATA
/*
!f>#ifdef HAVE_NEON_ARCH64_SSE
!f> interface
!f> subroutine hexa_hh_trafo_real_NEON64_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> subroutine hexa_hh_trafo_real_NEON
_ARCH
64_6hv_double(q, hh, pnb, pnq, pldq, pldh) &
!f> bind(C, name="hexa_hh_trafo_real_NEON_ARCH64_6hv_double")
!f> use, intrinsic :: iso_c_binding
!f> integer(kind=c_int) :: pnb, pnq, pldq, pldh
...
...
@@ -1744,14 +1744,14 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_real_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (DATA
}
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
#if VEC_SET == 128 || VEC_SET == 1281 || VEC_SET ==
1282 || VEC_SET ==
NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#define ROW_LENGTH 12
#endif
#ifdef SINGLE_PRECISION_REAL
#define ROW_LENGTH 24
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282
|| VEC_SET == NEON_ARCH64_128 */
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -3312,7 +3312,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#endif
#if VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
h1 = SIMD_NEG(tau1);
h1 =
_
SIMD_NEG(tau1);
#endif
#if VEC_SET == 512
...
...
@@ -5040,14 +5040,14 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
}
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#define ROW_LENGTH 10
#endif
#ifdef SINGLE_PRECISION_REAL
#define ROW_LENGTH 20
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
*/
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -8057,14 +8057,14 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#define ROW_LENGTH 8
#endif
#ifdef SINGLE_PRECISION_REAL
#define ROW_LENGTH 16
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
*/
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -10781,14 +10781,14 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
}
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#define ROW_LENGTH 6
#endif
#ifdef SINGLE_PRECISION_REAL
#define ROW_LENGTH 12
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
*/
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -11956,7 +11956,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#endif
#if VEC_SET == 1281 || VEC_SET == NEON_ARCH64_128
h1 = _(tau1);
h1 = _
SIMD_NEG
(tau1);
#endif
#if VEC_SET == 512
...
...
@@ -13238,7 +13238,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 4
...
...
@@ -13247,7 +13247,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 8
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
*/
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
@@ -15409,7 +15409,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
}
#undef ROW_LENGTH
#if VEC_SET == 128 || VEC_SET == 1281
#if VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
#ifdef DOUBLE_PRECISION_REAL
#undef ROW_LENGTH
#define ROW_LENGTH 2
...
...
@@ -15418,7 +15418,7 @@ __forceinline void CONCAT_8ARGS(hh_trafo_kernel_,ROW_LENGTH,_,SIMD_SET,_,BLOCK,h
#undef ROW_LENGTH
#define ROW_LENGTH 4
#endif
#endif /* VEC_SET == 128 || VEC_SET == 1281 */
#endif /* VEC_SET == 128 || VEC_SET == 1281
|| VEC_SET == 1282 || VEC_SET == NEON_ARCH64_128
*/
#if VEC_SET == 256
#ifdef DOUBLE_PRECISION_REAL
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment