Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Sebastian Ohlmann
elpa
Commits
e892f315
Commit
e892f315
authored
Jun 19, 2019
by
Andreas Marek
Browse files
Start to cleanup
parent
fc609543
Changes
1
Hide whitespace changes
Inline
Side-by-side
src/elpa2/kernels/complex_128bit_256bit_512bit_BLOCK_template.c
View file @
e892f315
...
...
@@ -826,37 +826,12 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
for
(
i
=
0
;
i
<
nq
-
UPPER_BOUND
;
i
+=
STEP_SIZE
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// for (i = 0; i < nq - UPPER_BOUND; i+= STEP_SIZE)
// {
//
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+4], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+12], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+16], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+20], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+16], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+24], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+32], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+40], hh, nb, ldq);
//#endif
// worked_on += ROW_LENGTH;
// }
//#endif
if
(
nq
==
i
)
{
return
;
...
...
@@ -892,33 +867,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// if (nq-i == ROW_LENGTH)
// {
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+4], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+12], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+16], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+16], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+24], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+32], hh, nb, ldq);
//#endif
// worked_on += ROW_LENGTH;
// }
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -950,32 +903,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// if (nq-i == ROW_LENGTH)
// {
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+4], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+12], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+16], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+24], hh, nb, ldq);
//#endif
//
// worked_on += ROW_LENGTH;
// }
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -1007,28 +939,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// if (nq-i == ROW_LENGTH)
// {
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+4], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+8], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+8], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+16], hh, nb, ldq);
//#endif
// worked_on += ROW_LENGTH;
// }
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -1060,26 +975,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// if (nq-i == ROW_LENGTH)
// {
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i+4], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i+8], hh, nb, ldq);
//#endif
// worked_on += ROW_LENGTH;
// }
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -1111,24 +1011,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
);
worked_on
+=
ROW_LENGTH
;
}
//#else
// if (nq-i == ROW_LENGTH)
// {
//#ifdef DOUBLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_4_AVX512_1hv_double (&q[i], hh, nb, ldq);
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// hh_trafo_complex_kernel_8_AVX512_1hv_single (&q[i], hh, nb, ldq);
//#endif
// worked_on += ROW_LENGTH;
// }
//#endif
#endif
/* BLOCK1 */
...
...
@@ -1176,36 +1063,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
for
(
i
=
0
;
i
<
nq
-
UPPER_BOUND
;
i
+=
STEP_SIZE
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
,
ldh
,
s
);
worked_on
+=
ROW_LENGTH
;
}
//#endif
//#if VEC_SET == AVX_512
//#ifdef DOUBLE_PRECISION_COMPLEX
// for (i = 0; i < nq - 12; i+=16)
// {
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+4], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+8], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+12], hh, nb, ldq, ldh, s);
// worked_on +=16;
// }
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// for (i = 0; i < nq - 24; i+=32)
// {
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+8], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+16], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+24], hh, nb, ldq, ldh, s);
// worked_on +=32;
// }
//#endif
//#endif
if
(
nq
==
i
)
{
...
...
@@ -1242,34 +1104,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
,
ldh
,
s
);
worked_on
+=
ROW_LENGTH
;
}
//#endif
//#if VEC_SET == AVX_512
//#ifdef DOUBLE_PRECISION_COMPLEX
// if (nq-i == 12)
// {
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+4], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+8], hh, nb, ldq, ldh, s);
// worked_on +=12;
// }
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// if (nq-i == 24)
// {
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+8], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+16], hh, nb, ldq, ldh, s);
// worked_on +=24;
// }
//#endif
//
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -1301,32 +1140,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
,
ldh
,
s
);
worked_on
+=
ROW_LENGTH
;
}
//#endif
//#if VEC_SET == AVX_512
//#ifdef DOUBLE_PRECISION_COMPLEX
// if (nq-i == 8)
// {
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i+4], hh, nb, ldq, ldh, s);
// worked_on +=8;
// }
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// if (nq-i == 16)
// {
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i], hh, nb, ldq, ldh, s);
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i+8], hh, nb, ldq, ldh, s);
// worked_on +=16;
// }
//#endif
//
//#endif
#if VEC_SET == SSE_128
#undef ROW_LENGTH
...
...
@@ -1358,30 +1176,11 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
#endif
#endif
/* VEC_SET == AVX_512 */
//#if VEC_SET != AVX_512
if
(
nq
-
i
==
ROW_LENGTH
)
{
CONCAT_8ARGS
(
hh_trafo_complex_kernel_
,
ROW_LENGTH
,
_
,
SIMD_SET
,
_
,
BLOCK
,
hv_
,
WORD_LENGTH
)
(
&
q
[
i
],
hh
,
nb
,
ldq
,
ldh
,
s
);
worked_on
+=
ROW_LENGTH
;
}
//#endif
//#if VEC_SET == AVX_512
//#ifdef DOUBLE_PRECISION_COMPLEX
// if (nq-i == 4)
// {
// hh_trafo_complex_kernel_4_AVX512_2hv_double (&q[i], hh, nb, ldq, ldh, s);
// worked_on +=4;
// }
//#endif
//#ifdef SINGLE_PRECISION_COMPLEX
// if (nq-i == 8)
// {
// hh_trafo_complex_kernel_8_AVX512_2hv_single (&q[i], hh, nb, ldq, ldh, s);
// worked_on +=8;
// }
//#endif
//
//#endif
#endif
/* BLOCK2 */
...
...
@@ -1393,7 +1192,6 @@ void CONCAT_7ARGS(PREFIX,_hh_trafo_complex_,SIMD_SET,_,BLOCK,hv_,WORD_LENGTH) (D
}
#endif
}
#if VEC_SET == SSE_128
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment