Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
9b5a1df8
Unverified
Commit
9b5a1df8
authored
Apr 18, 2016
by
Andreas Marek
Browse files
Single precision SSE BLOCK1 complex kernel
parent
cb3da78c
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
src/elpa2.F90
View file @
9b5a1df8
...
@@ -930,6 +930,8 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
...
@@ -930,6 +930,8 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
if
(
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC
)
.or.
&
if
(
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK2
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
)
then
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
)
then
else
else
print
*
,
"At the moment single precision only works with the generic kernels"
print
*
,
"At the moment single precision only works with the generic kernels"
...
@@ -1255,6 +1257,8 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
...
@@ -1255,6 +1257,8 @@ function solve_evp_complex_2stage_single(na, nev, a, lda, ev, q, ldq, nblk, &
if
(
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC
)
.or.
&
if
(
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_GENERIC_SIMPLE
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK2
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
)
.or.
&
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
)
then
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE
)
)
then
else
else
print
*
,
"At the moment single precision only works with the generic kernels"
print
*
,
"At the moment single precision only works with the generic kernels"
...
...
src/elpa2_kernels/elpa2_kernels_complex_sse_1hv_single_precision.cpp
View file @
9b5a1df8
This diff is collapsed.
Click to expand it.
src/mod_compute_hh_trafo_complex.F90
View file @
9b5a1df8
...
@@ -312,7 +312,7 @@ module compute_hh_trafo_complex
...
@@ -312,7 +312,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNE */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNE
L
*/
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL) || defined(WITH_COMPLEX_AVX2_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
...
@@ -437,6 +437,33 @@ module compute_hh_trafo_complex
...
@@ -437,6 +437,33 @@ module compute_hh_trafo_complex
#else
#else
nl
=
merge
(
stripe_width
,
last_stripe_width
,
istripe
<
stripe_count
)
nl
=
merge
(
stripe_width
,
last_stripe_width
,
istripe
<
stripe_count
)
#endif
#endif
#if defined(WITH_COMPLEX_SSE_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE_BLOCK2
)
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
ttt
=
mpi_wtime
()
do
j
=
ncols
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
#ifdef WITH_OPENMP
call
double_hh_trafo_complex_sse_2hv_single
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
,
my_thread
),
&
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
double_hh_trafo_complex_sse_2hv_single
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
),
&
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
enddo
#ifdef WITH_OPENMP
if
(
j
==
1
)
call
single_hh_trafo_complex_sse_1hv_single
(
a
(
1
,
1
+
off
+
a_off
,
istripe
,
my_thread
),
&
bcast_buffer
(
1
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#else
if
(
j
==
1
)
call
single_hh_trafo_complex_sse_1hv_single
(
a
(
1
,
1
+
off
+
a_off
,
istripe
),
&
bcast_buffer
(
1
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#endif
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK2_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#if defined(WITH_COMPLEX_AVX_BLOCK2_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
...
@@ -562,6 +589,25 @@ module compute_hh_trafo_complex
...
@@ -562,6 +589,25 @@ module compute_hh_trafo_complex
! call single_hh_trafo_complex_avx_avx2_1hv_single(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
! call single_hh_trafo_complex_avx_avx2_1hv_single(a(1,j+off+a_off,istripe),bcast_buffer(1,j+off),nbw,nl,stripe_width)
!#endif
!#endif
#if defined(WITH_COMPLEX_SSE_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_SSE_BLOCK1
)
then
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
ttt
=
mpi_wtime
()
do
j
=
ncols
,
1
,
-1
#ifdef WITH_OPENMP
call
single_hh_trafo_complex_sse_1hv_double_single
(
a
(
1
,
j
+
off
+
a_off
,
istripe
,
my_thread
),
&
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#else
call
single_hh_trafo_complex_sse_1hv_single
(
a
(
1
,
j
+
off
+
a_off
,
istripe
),
&
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#endif
enddo
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_SSE_BLOCK1_KERNEL */
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#if defined(WITH_COMPLEX_AVX_BLOCK1_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
then
if
(
THIS_COMPLEX_ELPA_KERNEL
.eq.
COMPLEX_ELPA_KERNEL_AVX_BLOCK1
)
then
...
@@ -579,7 +625,7 @@ module compute_hh_trafo_complex
...
@@ -579,7 +625,7 @@ module compute_hh_trafo_complex
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
#if defined(WITH_NO_SPECIFIC_COMPLEX_KERNEL)
endif
endif
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_NO_SPECIFIC_COMPLEX_KERNEL */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNE */
#endif /* WITH_COMPLEX_AVX_BLOCK1_KERNE
L
*/
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
if
(
my_thread
==
1
)
then
if
(
my_thread
==
1
)
then
...
...
Andreas Marek
@amarek
mentioned in issue
#17 (closed)
·
Apr 24, 2016
mentioned in issue
#17 (closed)
mentioned in issue #17
Toggle commit list
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment