Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Angerer, Christoph (cangerer)
elpa
Commits
a17ade0d
Unverified
Commit
a17ade0d
authored
Jan 11, 2016
by
Andreas Marek
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cleanup variable definitions by removing contains functions
parent
62a29931
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
319 additions
and
219 deletions
+319
-219
Makefile.am
Makefile.am
+2
-0
src/elpa2_compute.F90
src/elpa2_compute.F90
+48
-219
src/mod_pack_unpack_complex.F90
src/mod_pack_unpack_complex.F90
+130
-0
src/mod_pack_unpack_real.F90
src/mod_pack_unpack_real.F90
+139
-0
No files found.
Makefile.am
View file @
a17ade0d
...
...
@@ -14,6 +14,8 @@ libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \
src/elpa1_compute.F90
\
src/elpa1.F90
\
src/elpa2_utilities.F90
\
src/mod_pack_unpack_real.F90
\
src/mod_pack_unpack_complex.F90
\
src/elpa2_compute.F90
\
src/elpa2.F90
\
src/elpa_c_interface.F90
\
...
...
src/elpa2_compute.F90
View file @
a17ade0d
...
...
@@ -1599,6 +1599,7 @@ module ELPA2_compute
use
timings
#endif
use
precision
use
pack_unpack_real
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
THIS_REAL_ELPA_KERNEL
...
...
@@ -1789,7 +1790,8 @@ module ELPA2_compute
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
ip
),
my_thread
)
call
unpack_row_real_cpu_openmp
(
a
,
row
,
i
-
limits
(
ip
),
my_thread
,
stripe_count
,
&
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -1798,7 +1800,7 @@ module ELPA2_compute
#else
call
MPI_Recv
(
row
,
l_nev
,
MPI_REAL8
,
src
,
0
,
mpi_comm_rows
,
MPI_STATUS_IGNORE
,
mpierr
)
call
unpack_row
(
row
,
i
-
limits
(
ip
)
)
call
unpack_row
_real_cpu
(
a
,
row
,
i
-
limits
(
ip
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
elseif
(
src
==
my_prow
)
then
src_offset
=
src_offset
+1
...
...
@@ -1810,7 +1812,8 @@ module ELPA2_compute
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
ip
),
my_thread
)
call
unpack_row_real_cpu_openmp
(
a
,
row
,
i
-
limits
(
ip
),
my_thread
,
&
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -1818,7 +1821,7 @@ module ELPA2_compute
#endif
#else
call
unpack_row
(
row
,
i
-
limits
(
ip
)
)
call
unpack_row
_real_cpu
(
a
,
row
,
i
-
limits
(
ip
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
endif
enddo
...
...
@@ -1856,7 +1859,8 @@ module ELPA2_compute
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
my_prow
),
my_thread
)
call
unpack_row_real_cpu_openmp
(
a
,
row
,
i
-
limits
(
my_prow
),
my_thread
,
&
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -1865,7 +1869,7 @@ module ELPA2_compute
#else
call
MPI_Recv
(
row
,
l_nev
,
MPI_REAL8
,
src
,
0
,
mpi_comm_rows
,
MPI_STATUS_IGNORE
,
mpierr
)
call
unpack_row
(
row
,
i
-
limits
(
my_prow
)
)
call
unpack_row
_real_cpu
(
a
,
row
,
i
-
limits
(
my_prow
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
endif
enddo
...
...
@@ -2288,12 +2292,24 @@ module ELPA2_compute
if
(
dst
==
0
)
then
do
i
=
1
,
min
(
na
-
num_blk
*
nblk
,
nblk
)
call
pack_row
(
row
,
j
*
nblk
+
i
+
a_off
)
#ifdef WITH_OPENMP
call
pack_row_real_cpu_openmp
(
a
,
row
,
j
*
nblk
+
i
+
a_off
,
stripe_width
,
&
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
call
pack_row_real_cpu
(
a
,
row
,
j
*
nblk
+
i
+
a_off
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
q
((
num_blk
/
np_rows
)
*
nblk
+
i
,
1
:
l_nev
)
=
row
(:)
enddo
else
do
i
=
1
,
nblk
call
pack_row
(
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
)
#ifdef WITH_OPENMP
call
pack_row_real_cpu_openmp
(
a
,
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
,
&
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
call
pack_row_real_cpu
(
a
,
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
enddo
call
MPI_Isend
(
result_buffer
(
1
,
1
,
nbuf
),
l_nev
*
nblk
,
MPI_REAL8
,
dst
,
&
result_recv_tag
,
mpi_comm_rows
,
result_send_request
(
nbuf
),
mpierr
)
...
...
@@ -2430,100 +2446,6 @@ module ELPA2_compute
contains
subroutine
pack_row
(
row
,
n
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
real
(
kind
=
rk
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
#ifdef WITH_OPENMP
integer
(
kind
=
ik
)
::
nt
#endif
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"pack_row"
)
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"pack_row"
)
#endif
end
subroutine
pack_row
#ifdef WITH_OPENMP
subroutine
unpack_row
(
row
,
n
,
my_thread
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
real
(
kind
=
rk
),
intent
(
in
)
::
row
(:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row"
)
#endif
end
subroutine
unpack_row
#else
subroutine
unpack_row
(
row
,
n
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
real
(
kind
=
rk
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row"
)
#endif
end
subroutine
unpack_row
#endif
#ifdef WITH_OPENMP
subroutine
compute_hh_trafo
(
off
,
ncols
,
istripe
,
my_thread
,
THIS_REAL_ELPA_KERNEL
)
#else
...
...
@@ -4115,6 +4037,7 @@ module ELPA2_compute
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
pack_unpack_complex
use
precision
implicit
none
...
...
@@ -4315,7 +4238,8 @@ module ELPA2_compute
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
ip
),
my_thread
)
call
unpack_row_complex_cpu_openmp
(
a
,
row
,
i
-
limits
(
ip
),
my_thread
,
&
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -4323,7 +4247,7 @@ module ELPA2_compute
#endif
#else
call
unpack_row
(
row
,
i
-
limits
(
ip
)
)
call
unpack_row
_complex_cpu
(
a
,
row
,
i
-
limits
(
ip
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
elseif
(
src
==
my_prow
)
then
src_offset
=
src_offset
+1
...
...
@@ -4335,7 +4259,8 @@ module ELPA2_compute
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
ip
),
my_thread
)
call
unpack_row_complex_cpu_openmp
(
a
,
row
,
i
-
limits
(
ip
),
my_thread
,
&
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -4343,7 +4268,7 @@ module ELPA2_compute
#endif
#else
call
unpack_row
(
row
,
i
-
limits
(
ip
)
)
call
unpack_row
_complex_cpu
(
a
,
row
,
i
-
limits
(
ip
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
endif
enddo
...
...
@@ -4385,7 +4310,8 @@ module ELPA2_compute
#endif
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
call
unpack_row
(
row
,
i
-
limits
(
my_prow
),
my_thread
)
call
unpack_row_complex_cpu_openmp
(
a
,
row
,
i
-
limits
(
my_prow
),
my_thread
,
&
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
enddo
!$omp end parallel do
#ifdef HAVE_DETAILED_TIMINGS
...
...
@@ -4393,7 +4319,7 @@ module ELPA2_compute
#endif
#else
call
unpack_row
(
row
,
i
-
limits
(
my_prow
)
)
call
unpack_row
_complex_cpu
(
a
,
row
,
i
-
limits
(
my_prow
),
stripe_count
,
stripe_width
,
last_stripe_width
)
#endif
endif
enddo
...
...
@@ -4838,12 +4764,24 @@ module ELPA2_compute
if
(
dst
==
0
)
then
do
i
=
1
,
min
(
na
-
num_blk
*
nblk
,
nblk
)
call
pack_row
(
row
,
j
*
nblk
+
i
+
a_off
)
#ifdef WITH_OPENMP
call
pack_row_complex_cpu_openmp
(
a
,
row
,
j
*
nblk
+
i
+
a_off
,
&
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
call
pack_row_complex_cpu
(
a
,
row
,
j
*
nblk
+
i
+
a_off
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
q
((
num_blk
/
np_rows
)
*
nblk
+
i
,
1
:
l_nev
)
=
row
(:)
enddo
else
do
i
=
1
,
nblk
call
pack_row
(
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
)
#ifdef WITH_OPENMP
call
pack_row_complex_cpu_openmp
(
a
,
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
,
&
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
call
pack_row_complex_cpu
(
a
,
result_buffer
(:,
i
,
nbuf
),
j
*
nblk
+
i
+
a_off
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
enddo
call
MPI_Isend
(
result_buffer
(
1
,
1
,
nbuf
),
l_nev
*
nblk
,
MPI_COMPLEX16
,
dst
,
&
result_recv_tag
,
mpi_comm_rows
,
result_send_request
(
nbuf
),
mpierr
)
...
...
@@ -4983,115 +4921,6 @@ module ELPA2_compute
return
contains
#ifdef WITH_OPENMP
subroutine
pack_row
(
row
,
n
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
complex
(
kind
=
ck
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"pack_row"
)
#endif
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"pack_row"
)
#endif
end
subroutine
pack_row
#else
subroutine
pack_row
(
row
,
n
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
implicit
none
complex
(
kind
=
ck
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row"
)
#endif
end
subroutine
pack_row
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row
(
row
,
n
,
my_thread
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
complex
(
kind
=
ck
),
intent
(
in
)
::
row
(:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row"
)
#endif
end
subroutine
unpack_row
#else
subroutine
unpack_row
(
row
,
n
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
complex
(
kind
=
ck
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row"
)
#endif
end
subroutine
unpack_row
#endif
#ifdef WITH_OPENMP
subroutine
compute_hh_trafo_complex
(
off
,
ncols
,
istripe
,
my_thread
,
THIS_COMPLEX_ELPA_KERNEL
)
#else
...
...
src/mod_pack_unpack_complex.F90
0 → 100644
View file @
a17ade0d
module
pack_unpack_complex
implicit
none
#ifdef WITH_OPENMP
public
pack_row_complex_cpu_openmp
#else
public
pack_row_complex_cpu
#endif
contains
#ifdef WITH_OPENMP
subroutine
pack_row_complex_cpu_openmp
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_complex_cpu
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
complex
(
kind
=
ck
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_width
,
last_stripe_width
,
stripe_count
complex
(
kind
=
ck
),
intent
(
in
)
::
a
(:,:,:)
#endif
complex
(
kind
=
ck
)
::
row
(:)
integer
(
kind
=
ik
)
::
n
,
i
,
noff
,
nl
,
nt
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_complex_cpu_openmp"
)
#else
call
timer
%
start
(
"pack_row_complex_cpu"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_complex_cpu_openmp"
)
#else
call
timer
%
stop
(
"pack_row_complex_cpu"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_complex_cpu_openmp
#else
end
subroutine
pack_row_complex_cpu
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_complex_cpu_openmp
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
! Private variables in OMP regions (my_thread) should better be in the argument list!
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
my_thread
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
thread_width
,
stripe_width
,
l_nev
complex
(
kind
=
ck
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck
)
::
a
(:,:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu_openmp"
)
#endif
do
i
=
1
,
stripe_count
noff
=
(
my_thread
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
a
(
1
:
nl
,
n
,
i
,
my_thread
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu_openmp"
)
#endif
end
subroutine
unpack_row_complex_cpu_openmp
#else /* WITH_OPENMP */
subroutine
unpack_row_complex_cpu
(
a
,
row
,
n
,
stripe_count
,
stripe_width
,
last_stripe_width
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
stripe_count
,
stripe_width
,
last_stripe_width
,
n
complex
(
kind
=
ck
),
intent
(
in
)
::
row
(:)
complex
(
kind
=
ck
)
::
a
(:,:,:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"unpack_row_complex_cpu"
)
#endif
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
a
(
1
:
nl
,
n
,
i
)
=
row
(
noff
+1
:
noff
+
nl
)
enddo
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"unpack_row_complex_cpu"
)
#endif
end
subroutine
unpack_row_complex_cpu
#endif /* WITH_OPENMP */
end
module
src/mod_pack_unpack_real.F90
0 → 100644
View file @
a17ade0d
module
pack_unpack_real
implicit
none
#ifdef WITH_OPENMP
public
pack_row_real_cpu_openmp
,
unpack_row_real_cpu_openmp
#else
public
pack_row_real_cpu
,
unpack_row_real_cpu
#endif
contains
#ifdef WITH_OPENMP
subroutine
pack_row_real_cpu_openmp
(
a
,
row
,
n
,
stripe_width
,
stripe_count
,
max_threads
,
thread_width
,
l_nev
)
#else
subroutine
pack_row_real_cpu
(
a
,
row
,
n
,
stripe_width
,
last_stripe_width
,
stripe_count
)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision
implicit
none
integer
(
kind
=
ik
),
intent
(
in
)
::
n
,
stripe_count
,
stripe_width
#ifdef WITH_OPENMP
integer
(
kind
=
ik
),
intent
(
in
)
::
max_threads
,
thread_width
,
l_nev
real
(
kind
=
rk
),
intent
(
in
)
::
a
(:,:,:,:)
#else
integer
(
kind
=
ik
),
intent
(
in
)
::
last_stripe_width
real
(
kind
=
rk
),
intent
(
in
)
::
a
(:,:,:)
#endif
real
(
kind
=
rk
)
::
row
(:)
integer
(
kind
=
ik
)
::
i
,
noff
,
nl
#ifdef WITH_OPENMP
integer
(
kind
=
ik
)
::
nt
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
start
(
"pack_row_real_cpu_openmp"
)
#else
call
timer
%
start
(
"pack_row_real_cpu"
)
#endif
#endif
#ifdef WITH_OPENMP
do
nt
=
1
,
max_threads
do
i
=
1
,
stripe_count
noff
=
(
nt
-1
)
*
thread_width
+
(
i
-1
)
*
stripe_width
nl
=
min
(
stripe_width
,
nt
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
exit
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
,
nt
)
enddo
enddo
#else
do
i
=
1
,
stripe_count
nl
=
merge
(
stripe_width
,
last_stripe_width
,
i
<
stripe_count
)
noff
=
(
i
-1
)
*
stripe_width
row
(
noff
+1
:
noff
+
nl
)
=
a
(
1
:
nl
,
n
,
i
)
enddo
#endif
#ifdef HAVE_DETAILED_TIMINGS
#ifdef WITH_OPENMP
call
timer
%
stop
(
"pack_row_real_cpu_openmp"
)
#else
call
timer
%
stop
(
"pack_row_real_cpu"
)
#endif
#endif
#ifdef WITH_OPENMP
end
subroutine
pack_row_real_cpu_openmp
#else
end
subroutine
pack_row_real_cpu
#endif
#ifdef WITH_OPENMP
subroutine
unpack_row_real_cpu_openmp
(
a
,
row
,
n
,
my_thread
,
stripe_count
,
thread_width
,
stripe_width
,
l_nev
)
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
precision