Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
14
Issues
14
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
b15d27f2
Commit
b15d27f2
authored
Mar 21, 2017
by
Andreas Marek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Unify pack_unpack_cpu
parent
9f7c384a
Changes
6
Pipelines
1
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
352 additions
and
314 deletions
+352
-314
Makefile.am
Makefile.am
+3
-2
src/elpa2_trans_ev_tridi_to_band_template.X90
src/elpa2_trans_ev_tridi_to_band_template.X90
+1
-2
src/mod_pack_unpack_complex.F90
src/mod_pack_unpack_complex.F90
+0
-309
src/mod_pack_unpack_cpu.F90
src/mod_pack_unpack_cpu.F90
+109
-0
src/pack_unpack_cpu.X90
src/pack_unpack_cpu.X90
+238
-0
src/precision_macros.h
src/precision_macros.h
+1
-1
No files found.
Makefile.am
View file @
b15d27f2
...
...
@@ -27,10 +27,9 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/mod_mpi.F90
\
src/mod_mpi_stubs.F90
\
src/mod_redist_band.F90
\
src/mod_pack_unpack_
real
.F90
\
src/mod_pack_unpack_
cpu
.F90
\
src/mod_compute_hh_trafo_real.F90
\
src/mod_compute_hh_trafo_complex.F90
\
src/mod_pack_unpack_complex.F90
\
src/aligned_mem.F90
\
src/elpa1_compute_private.F90
\
src/elpa2_determine_workload.F90
\
...
...
@@ -63,6 +62,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/pack_unpack_cpu.X90
\
src/redist_band.X90
\
src/sanity.X90
\
src/elpa_cholesky_template.X90
\
...
...
@@ -981,6 +981,7 @@ EXTRA_DIST = \
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/redist_band.X90
\
src/pack_unpack_cpu.X90
\
src/sanity.X90
\
src/elpa_cholesky_template.X90
\
src/elpa_invert_trm.X90
\
...
...
src/elpa2_trans_ev_tridi_to_band_template.X90
View file @
b15d27f2
...
...
@@ -45,14 +45,13 @@
use timings_dummy
#endif
use elpa2_workload
use pack_unpack_cpu
#if REALCASE == 1
use pack_unpack_real
use pack_unpack_real_gpu
use compute_hh_trafo_real
#endif
#if COMPLEXCASE == 1
use pack_unpack_complex
use compute_hh_trafo_complex
#endif
use cuda_functions
...
...
src/mod_pack_unpack_complex.F90
deleted
100644 → 0
View file @
9f7c384a
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
module
pack_unpack_complex
#include "config-f90.h"
implicit
none
#ifdef WITH_OPENMP
public
pack_row_complex_cpu_openmp_double
#else
public
pack_row_complex_cpu_double
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WITH_OPENMP
public
pack_row_complex_cpu_openmp_single
#else
public
pack_row_complex_cpu_single
#endif
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
contains
#ifdef WITH_OPENMP
subroutine
pack_row_complex_cpu_openmp_double
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_complex_cpu_double
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
complex
(
kind
=
ck8
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
last_stripe_width
,
stripe_count
complex
(
kind
=
ck8
),
intent
(
in
)
::
a
(:,:,:)
#endif
complex
(
kind
=
ck8
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_complex_cpu_openmp_double"
)
#else
call
timer
%
start
(
"pack_row_complex_cpu_double"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_complex_cpu_openmp_double"
)
#else
call
timer
%
stop
(
"pack_row_complex_cpu_double"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_complex_cpu_openmp_double
#else
end
subroutine
pack_row_complex_cpu_double
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_complex_cpu_openmp_double
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
complex
(
kind
=
ck8
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck8
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_openmp_double"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_openmp_double"
)
#endif
end
subroutine
unpack_row_complex_cpu_openmp_double
#else /* WITH_OPENMP */
subroutine
unpack_row_complex_cpu_double
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
stripe_width
,
last_stripe_width
,
n
complex
(
kind
=
ck8
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck8
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_double"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_double"
)
#endif
end
subroutine
unpack_row_complex_cpu_double
#endif /* WITH_OPENMP */
#ifdef WANT_SINGLE_PRECISION_COMPLEX
! single precision implementation, at the moment duplivated !!
#ifdef WITH_OPENMP
subroutine
pack_row_complex_cpu_openmp_single
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_complex_cpu_single
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
complex
(
kind
=
ck4
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
last_stripe_width
,
stripe_count
complex
(
kind
=
ck4
),
intent
(
in
)
::
a
(:,:,:)
#endif
complex
(
kind
=
ck4
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_complex_cpu_openmp_single"
)
#else
call
timer
%
start
(
"pack_row_complex_cpu_single"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_complex_cpu_openmp_single"
)
#else
call
timer
%
stop
(
"pack_row_complex_cpu_single"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_complex_cpu_openmp_single
#else
end
subroutine
pack_row_complex_cpu_single
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_complex_cpu_openmp_single
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
complex
(
kind
=
ck4
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck4
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_openmp_single"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_openmp_single"
)
#endif
end
subroutine
unpack_row_complex_cpu_openmp_single
#else /* WITH_OPENMP */
subroutine
unpack_row_complex_cpu_single
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
stripe_width
,
last_stripe_width
,
n
complex
(
kind
=
ck4
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck4
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_single"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_single"
)
#endif
end
subroutine
unpack_row_complex_cpu_single
#endif /* WITH_OPENMP */
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
end
module
src/mod_pack_unpack_cpu.F90
0 → 100644
View file @
b15d27f2
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
module
pack_unpack_cpu
#include "config-f90.h"
implicit
none
private
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_double
,
unpack_row_real_cpu_openmp_double
public
pack_row_complex_cpu_openmp_double
,
unpack_row_complex_cpu_openmp_double
#else
public
pack_row_real_cpu_double
,
unpack_row_real_cpu_double
public
pack_row_complex_cpu_double
,
unpack_row_complex_cpu_double
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_single
,
unpack_row_real_cpu_openmp_single
public
pack_row_complex_cpu_openmp_single
,
unpack_row_complex_cpu_openmp_single
#else
public
pack_row_real_cpu_single
,
unpack_row_real_cpu_single
public
pack_row_complex_cpu_single
,
unpack_row_complex_cpu_single
#endif
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
contains
!real double precision
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef REALCASE
#undef DOUBLE_PRECISION
! real single precision
#if defined(WANT_SINGLE_PRECISION_REAL)
#define REALCASE 1
#define SINGLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef REALCASE
#undef SINGLE_PRECISION
#endif
!complex double precision
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
! complex single precision
#if defined(WANT_SINGLE_PRECISION_COMPLEX)
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif
end
module
src/
mod_pack_unpack_real.F
90
→
src/
pack_unpack_cpu.X
90
View file @
b15d27f2
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
...
...
@@ -40,52 +41,73 @@
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
#endif
module
pack_unpack_real
#include "config-f90.h"
implicit
none
subroutine pack_row_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_double
,
unpack_row_real_cpu_openmp_double
&_cpu_openmp_&
#else
public
pack_row_real_cpu_double
,
unpack_row_real_cpu_double
&_cpu_&
#endif
contains
&PRECISION &
(a, row, n, stripe_width, &
#ifdef WITH_OPENMP
subroutine
pack_row_real_cpu_openmp_double
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
stripe_count, max_threads, thread_width, l_nev)
#else
subroutine
pack_row_real_cpu_double
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
last_stripe_width, stripe_count)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
integer(kind=ik), intent(in) :: n, stripe_count, stripe_width
#ifdef WITH_OPENMP
integer(kind=ik), intent(in) :: max_threads, thread_width, l_nev
real
(
kind
=
rk8
),
intent
(
in
)
::
a
(:,:,:,:)
#else
#if REALCASE == 1
real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
#endif
#else /* WITH_OPENMP */
integer(kind=ik), intent(in) :: last_stripe_width
real
(
kind
=
rk8
),
intent
(
in
)
::
a
(:,:,:)
#if REALCASE == 1
real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
#endif
#endif /* WITH_OPENMP */
#if REALCASE == 1
real(kind=C_DATATYPE_KIND) :: row(:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND) :: row(:)
#endif
real
(
kind
=
rk8
)
::
row
(:)
integer(kind=ik) :: i, noff, nl
#ifdef WITH_OPENMP
integer(kind=ik) :: nt
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("pack_row_&
&MATH_DATATPE&
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_real_cpu_openmp_double"
)
&_cpu_openmp" // &
#else
call
timer
%
start
(
"pack_row_real_cpu_double"
)
#endif
&_cpu" // &
#endif
&PRECISION_SUFFIX &
)
#ifdef WITH_OPENMP
do nt = 1, max_threads
...
...
@@ -104,212 +126,113 @@ module pack_unpack_real
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("pack_row_&
&MATH_DATATPE&
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_real_cpu_openmp_double"
)
&_cpu_openmp" // &
#else
call
timer
%
stop
(
"pack_row_real_cpu_double"
)
#endif
&_cpu" // &
#endif
&PRECISION_SUFFIX &
)
end subroutine
subroutine unpack_row_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
end
subroutine
pack_row_real_cpu_openmp_double
&_cpu_openmp_&
#else
end
subroutine
pack_row_real_cpu_double
&_cpu_&
#endif
&PRECISION &
(a, row, n, &
#ifdef WITH_OPENMP
subroutine
unpack_row_real_cpu_openmp_double
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
real
(
kind
=
rk8
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
real
(
kind
=
rk8
),
intent
(
in
)
::
row
(:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_real_cpu_openmp_double"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_real_cpu_openmp_double"
)
#endif
end
subroutine
unpack_row_real_cpu_openmp_double
#else /* WITH_OPENMP */
subroutine
unpack_row_real_cpu_double
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
stripe_count
,
stripe_width
,
last_stripe_width
real
(
kind
=
rk8
)
::
row
(:)
real
(
kind
=
rk8
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_real_cpu_double"
)
my_thread, &
#endif