Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
b15d27f2
Commit
b15d27f2
authored
Mar 21, 2017
by
Andreas Marek
Browse files
Unify pack_unpack_cpu
parent
9f7c384a
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
b15d27f2
...
...
@@ -27,10 +27,9 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/mod_mpi.F90
\
src/mod_mpi_stubs.F90
\
src/mod_redist_band.F90
\
src/mod_pack_unpack_
real
.F90
\
src/mod_pack_unpack_
cpu
.F90
\
src/mod_compute_hh_trafo_real.F90
\
src/mod_compute_hh_trafo_complex.F90
\
src/mod_pack_unpack_complex.F90
\
src/aligned_mem.F90
\
src/elpa1_compute_private.F90
\
src/elpa2_determine_workload.F90
\
...
...
@@ -63,6 +62,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2_kernels/elpa2_kernels_real_template.X90
\
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/pack_unpack_cpu.X90
\
src/redist_band.X90
\
src/sanity.X90
\
src/elpa_cholesky_template.X90
\
...
...
@@ -981,6 +981,7 @@ EXTRA_DIST = \
src/elpa2_kernels/elpa2_kernels_complex_template.X90
\
src/elpa2_kernels/elpa2_kernels_simple_template.X90
\
src/redist_band.X90
\
src/pack_unpack_cpu.X90
\
src/sanity.X90
\
src/elpa_cholesky_template.X90
\
src/elpa_invert_trm.X90
\
...
...
src/elpa2_trans_ev_tridi_to_band_template.X90
View file @
b15d27f2
...
...
@@ -45,14 +45,13 @@
use timings_dummy
#endif
use elpa2_workload
use pack_unpack_cpu
#if REALCASE == 1
use pack_unpack_real
use pack_unpack_real_gpu
use compute_hh_trafo_real
#endif
#if COMPLEXCASE == 1
use pack_unpack_complex
use compute_hh_trafo_complex
#endif
use cuda_functions
...
...
src/mod_pack_unpack_complex.F90
deleted
100644 → 0
View file @
9f7c384a
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
module
pack_unpack_complex
#include "config-f90.h"
implicit
none
#ifdef WITH_OPENMP
public
pack_row_complex_cpu_openmp_double
#else
public
pack_row_complex_cpu_double
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WITH_OPENMP
public
pack_row_complex_cpu_openmp_single
#else
public
pack_row_complex_cpu_single
#endif
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
contains
#ifdef WITH_OPENMP
subroutine
pack_row_complex_cpu_openmp_double
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_complex_cpu_double
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
complex
(
kind
=
ck8
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
last_stripe_width
,
stripe_count
complex
(
kind
=
ck8
),
intent
(
in
)
::
a
(:,:,:)
#endif
complex
(
kind
=
ck8
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_complex_cpu_openmp_double"
)
#else
call
timer
%
start
(
"pack_row_complex_cpu_double"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_complex_cpu_openmp_double"
)
#else
call
timer
%
stop
(
"pack_row_complex_cpu_double"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_complex_cpu_openmp_double
#else
end
subroutine
pack_row_complex_cpu_double
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_complex_cpu_openmp_double
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
complex
(
kind
=
ck8
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck8
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_openmp_double"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_openmp_double"
)
#endif
end
subroutine
unpack_row_complex_cpu_openmp_double
#else /* WITH_OPENMP */
subroutine
unpack_row_complex_cpu_double
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
stripe_width
,
last_stripe_width
,
n
complex
(
kind
=
ck8
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck8
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_double"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_double"
)
#endif
end
subroutine
unpack_row_complex_cpu_double
#endif /* WITH_OPENMP */
#ifdef WANT_SINGLE_PRECISION_COMPLEX
! single precision implementation, at the moment duplivated !!
#ifdef WITH_OPENMP
subroutine
pack_row_complex_cpu_openmp_single
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_complex_cpu_single
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
complex
(
kind
=
ck4
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
last_stripe_width
,
stripe_count
complex
(
kind
=
ck4
),
intent
(
in
)
::
a
(:,:,:)
#endif
complex
(
kind
=
ck4
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_complex_cpu_openmp_single"
)
#else
call
timer
%
start
(
"pack_row_complex_cpu_single"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_complex_cpu_openmp_single"
)
#else
call
timer
%
stop
(
"pack_row_complex_cpu_single"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_complex_cpu_openmp_single
#else
end
subroutine
pack_row_complex_cpu_single
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_complex_cpu_openmp_single
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
complex
(
kind
=
ck4
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck4
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_openmp_single"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_openmp_single"
)
#endif
end
subroutine
unpack_row_complex_cpu_openmp_single
#else /* WITH_OPENMP */
subroutine
unpack_row_complex_cpu_single
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
stripe_width
,
last_stripe_width
,
n
complex
(
kind
=
ck4
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck4
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_single"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_single"
)
#endif
end
subroutine
unpack_row_complex_cpu_single
#endif /* WITH_OPENMP */
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
end
module
src/mod_pack_unpack_cpu.F90
0 → 100644
View file @
b15d27f2
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
module
pack_unpack_cpu
#include "config-f90.h"
implicit
none
private
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_double
,
unpack_row_real_cpu_openmp_double
public
pack_row_complex_cpu_openmp_double
,
unpack_row_complex_cpu_openmp_double
#else
public
pack_row_real_cpu_double
,
unpack_row_real_cpu_double
public
pack_row_complex_cpu_double
,
unpack_row_complex_cpu_double
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_single
,
unpack_row_real_cpu_openmp_single
public
pack_row_complex_cpu_openmp_single
,
unpack_row_complex_cpu_openmp_single
#else
public
pack_row_real_cpu_single
,
unpack_row_real_cpu_single
public
pack_row_complex_cpu_single
,
unpack_row_complex_cpu_single
#endif
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
contains
!real double precision
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef REALCASE
#undef DOUBLE_PRECISION
! real single precision
#if defined(WANT_SINGLE_PRECISION_REAL)
#define REALCASE 1
#define SINGLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef REALCASE
#undef SINGLE_PRECISION
#endif
!complex double precision
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
! complex single precision
#if defined(WANT_SINGLE_PRECISION_COMPLEX)
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#include "precision_macros.h"
#include "pack_unpack_cpu.X90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif
end
module
src/
mod_
pack_unpack_
real.F
90
→
src/pack_unpack_
cpu.X
90
View file @
b15d27f2
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
...
...
@@ -40,52 +41,73 @@
! the original distribution, the GNU Lesser General Public License.
!
! This file was written by A. Marek, MPCDF
#endif
module
pack_unpack_real
#include "config-f90.h"
implicit
none
subroutine pack_row_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp_double
,
unpack_row_real
_cpu_openmp_
double
&
_cpu_openmp_
&
#else
public
pack_row_real_cpu_double
,
unpack_row_real_cpu_double
&_cpu_&
#endif
contains
&PRECISION &
(a, row, n, stripe_width, &
#ifdef WITH_OPENMP
subroutine
pack_row_real_cpu_openmp_double
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
stripe_count, max_threads, thread_width, l_nev)
#else
subroutine
pack_row_real_cpu_double
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
last_stripe_width, stripe_count)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use precision
implicit none
integer(kind=ik), intent(in) :: n, stripe_count, stripe_width
#ifdef WITH_OPENMP
integer(kind=ik), intent(in) :: max_threads, thread_width, l_nev
real
(
kind
=
rk8
),
intent
(
in
)
::
a
(:,:,:,:)
#else
#if REALCASE == 1
real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
#endif
#else /* WITH_OPENMP */
integer(kind=ik), intent(in) :: last_stripe_width
real
(
kind
=
rk8
),
intent
(
in
)
::
a
(:,:,:)
#if REALCASE == 1
real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
#endif
#endif /* WITH_OPENMP */
#if REALCASE == 1
real(kind=C_DATATYPE_KIND) :: row(:)
#endif
#if COMPLEXCASE == 1
complex(kind=C_DATATYPE_KIND) :: row(:)
#endif
real
(
kind
=
rk8
)
::
row
(:)
integer(kind=ik) :: i, noff, nl
#ifdef WITH_OPENMP
integer(kind=ik) :: nt
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("pack_row_&
&MATH_DATATPE&
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_real_cpu_openmp_double"
)
&_cpu_openmp" // &
#else
call
timer
%
start
(
"pack_row_real_cpu_double"
)
#endif
&_cpu" // &
#endif
&PRECISION_SUFFIX &
)
#ifdef WITH_OPENMP
do nt = 1, max_threads
...
...
@@ -104,212 +126,113 @@ module pack_unpack_real
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
call timer%stop("pack_row_&
&MATH_DATATPE&
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_real_cpu_openmp_double"
)
&_cpu_openmp" // &
#else
call
timer
%
stop
(
"pack_row_real_cpu_double"
)
#endif
&_cpu" // &
#endif
&PRECISION_SUFFIX &
)
end subroutine
subroutine unpack_row_&
&MATH_DATATYPE&
#ifdef WITH_OPENMP
end
subroutine
pack_row_real
_cpu_openmp_
double
&
_cpu_openmp_
&
#else
end
subroutine
pack_row_real_cpu_double
&_cpu_&
#endif
&PRECISION &
(a, row, n, &
#ifdef WITH_OPENMP
subroutine
unpack_row_real_cpu_openmp_double
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
real
(
kind
=
rk8
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
real
(
kind
=
rk8
),
intent
(
in
)
::
row
(:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_real_cpu_openmp_double"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)