Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
26401f3b
Commit
26401f3b
authored
Nov 20, 2017
by
Andreas Marek
Browse files
Fix real block6 kernel on power8
parent
e13b6e7f
Changes
6
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
26401f3b
...
...
@@ -250,13 +250,13 @@ endif
# libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_4hv_single_precision.c
#endif
#endif
#
#
if WITH_REAL_VSX_BLOCK4_KERNEL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_double_precision.c
#
if WANT_SINGLE_PRECISION_REAL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_4hv_single_precision.c
#
endif
#
endif
if
WITH_REAL_VSX_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_4hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_4hv_single_precision.c
endif
endif
if
WITH_REAL_SSE_BLOCK4_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_4hv_double_precision.c
...
...
@@ -286,19 +286,19 @@ if WANT_SINGLE_PRECISION_REAL
endif
endif
#
if WITH_REAL_SPARC64_BLOCK6_KERNEL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_6hv_double_precision.c
#
if WANT_SINGLE_PRECISION_REAL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_sparc64_6hv_single_precision.c
#
endif
#
endif
#
#
if WITH_REAL_VSX_BLOCK6_KERNEL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_double_precision.c
#
if WANT_SINGLE_PRECISION_REAL
#
libelpa@SUFFIX@_private_la_SOURCES += src/elpa2/kernels/real_vsx_6hv_single_precision.c
#
endif
#
endif
if
WITH_REAL_SPARC64_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sparc64_6hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sparc64_6hv_single_precision.c
endif
endif
if
WITH_REAL_VSX_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_6hv_double_precision.c
if
WANT_SINGLE_PRECISION_REAL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_vsx_6hv_single_precision.c
endif
endif
if
WITH_REAL_SSE_BLOCK6_KERNEL
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa2/kernels/real_sse_6hv_double_precision.c
...
...
src/elpa2/compute_hh_trafo.F90
View file @
26401f3b
...
...
@@ -670,19 +670,19 @@
#if (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL))
ttt
=
mpi_wtime
()
do
j
=
ncols
,
1
,
-1
#ifdef WITH_OPENMP
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_1hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
j
+
off
+
a_off
,
istripe
,
my_thread
)),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#else
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_1hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
j
+
off
+
a_off
,
istripe
)),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
#endif
!
#ifdef WITH_OPENMP
!
call single_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_1hv_&
!
&PRECISION&
!
& (c_loc(a(1,j+off+a_off,istripe,my_thread)), bcast_buffer(1,j+off),nbw,nl,stripe_width)
!
#else
!
call single_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_1hv_&
!
&PRECISION&
!
& (c_loc(a(1,j+off+a_off,istripe)), bcast_buffer(1,j+off),nbw,nl,stripe_width)
!
#endif
enddo
#endif /* (!defined(WITH_FIXED_COMPLEX_KERNEL)) || (defined(WITH_FIXED_COMPLEX_KERNEL) && !defined(WITH_COMPLEX_VSX_BLOCK2_KERNEL)) */
...
...
@@ -973,33 +973,33 @@
do
j
=
ncols
,
2
,
-2
w
(:,
1
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
)
w
(:,
2
)
=
bcast_buffer
(
1
:
nbw
,
j
+
off
-1
)
#ifdef WITH_OPENMP
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_2hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
,
my_thread
)),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#else
call
double_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_2hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
j
+
off
+
a_off
-1
,
istripe
)),
w
,
nbw
,
nl
,
stripe_width
,
nbw
)
#endif
!
#ifdef WITH_OPENMP
!
call double_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_2hv_&
!
&PRECISION&
!
& (c_loc(a(1,j+off+a_off-1,istripe,my_thread)), w, nbw, nl, stripe_width, nbw)
!
#else
!
call double_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_2hv_&
!
&PRECISION&
!
& (c_loc(a(1,j+off+a_off-1,istripe)), w, nbw, nl, stripe_width, nbw)
!
#endif
enddo
#ifdef WITH_OPENMP
if
(
j
==
1
)
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_1hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
1
+
off
+
a_off
,
istripe
,
my_thread
)),
bcast_buffer
(
1
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#else
if
(
j
==
1
)
call
single_hh_trafo_
&
&
MATH_DATATYPE
&
&
_
vsx_1hv_
&
&
PRECISION
&
&
(
c_loc
(
a
(
1
,
1
+
off
+
a_off
,
istripe
)),
bcast_buffer
(
1
,
off
+1
),
nbw
,
nl
,
stripe_width
)
#endif
!
#ifdef WITH_OPENMP
!
if (j==1) call single_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_1hv_&
!
&PRECISION&
!
& (c_loc(a(1,1+off+a_off,istripe,my_thread)), bcast_buffer(1,off+1), nbw, nl, stripe_width)
!
#else
!
if (j==1) call single_hh_trafo_&
!
&MATH_DATATYPE&
!
&_vsx_1hv_&
!
&PRECISION&
!
& (c_loc(a(1,1+off+a_off,istripe)), bcast_buffer(1,off+1), nbw, nl, stripe_width)
!
#endif
#ifndef WITH_FIXED_COMPLEX_KERNEL
endif
! (kernel .eq. ELPA_2STAGE_COMPLEX_VSX_BLOCK2)
...
...
src/elpa2/kernels/real_vsx_2hv_template.c
View file @
26401f3b
...
...
@@ -823,7 +823,7 @@ void double_hh_trafo_real_vsx_2hv_single(float* q, float* hh, int* pnb, int* pnq
_SSE_STORE
((
__vector
unsigned
int
)
q2
,
0
,
(
unsigned
int
*
)
&
q
[
offset
]);
q3
=
_SSE_LOAD
(
0
,
(
unsigned
long
int
*
)
&
q
[
2
*
offset
]);
q3
=
_SSE_ADD
(
q3
,
y3
);
_SSE_STORE
((
__vector
unsigned
int
)
q3
,
0
,
(
unsigned
int
*
)
&
q
[
2
*
offset
]);
_SSE_STORE
((
__vector
unsigned
int
)
q3
,
0
,
(
unsigned
int
*
)
&
q
[
2
*
offset
]);
q4
=
_SSE_LOAD
(
0
,
(
unsigned
long
int
*
)
&
q
[
3
*
offset
]);
q4
=
_SSE_ADD
(
q4
,
y4
);
_SSE_STORE
((
__vector
unsigned
int
)
q4
,
0
,
(
unsigned
int
*
)
&
q
[
3
*
offset
]);
...
...
src/elpa2/kernels/real_vsx_6hv_double_precision.c
0 → 100644
View file @
26401f3b
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include
"config-f90.h"
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include
"../../general/precision_macros.h"
#include
"real_vsx_6hv_template.c"
#undef REALCASE
#undef DOUBLE_PRECISION
src/elpa2/kernels/real_vsx_6hv_single_precision.c
0 → 100644
View file @
26401f3b
// This file is part of ELPA.
//
// The ELPA library was originally created by the ELPA consortium,
// consisting of the following organizations:
//
// - Max Planck Computing and Data Facility (MPCDF), formerly known as
// Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
// - Bergische Universität Wuppertal, Lehrstuhl für angewandte
// Informatik,
// - Technische Universität München, Lehrstuhl für Informatik mit
// Schwerpunkt Wissenschaftliches Rechnen ,
// - Fritz-Haber-Institut, Berlin, Abt. Theorie,
// - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
// Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
// and
// - IBM Deutschland GmbH
//
// This particular source code file contains additions, changes and
// enhancements authored by Intel Corporation which is not part of
// the ELPA consortium.
//
// More information can be found here:
// http://elpa.mpcdf.mpg.de/
//
// ELPA is free software: you can redistribute it and/or modify
// it under the terms of the version 3 of the license of the
// GNU Lesser General Public License as published by the Free
// Software Foundation.
//
// ELPA is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with ELPA. If not, see <http://www.gnu.org/licenses/>
//
// ELPA reflects a substantial effort on the part of the original
// ELPA consortium, and we ask you to respect the spirit of the
// license that we chose: i.e., please contribute any changes you
// may have back to the original ELPA library distribution, and keep
// any derivatives of ELPA under the same license that we chose for
// the original distribution, the GNU Lesser General Public License.
//
// Author: Andreas Marek, MPCDF
#include
"config-f90.h"
#define REALCASE 1
#define SINGLE_PRECISION 1
#include
"../../general/precision_macros.h"
#include
"real_vsx_6hv_template.c"
#undef REALCASE
#undef SINGLE_PRECISION
src/elpa2/kernels/real_vsx_6hv_template.c
0 → 100644
View file @
26401f3b
This diff is collapsed.
Click to expand it.
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment