Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
elpa
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
14
Issues
14
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Environments
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
elpa
elpa
Commits
ddba9c0f
Commit
ddba9c0f
authored
Aug 29, 2017
by
Pavel Kus
Committed by
Andreas Marek
Sep 01, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
real/complex unifications in initializations
in elpa2_trans_ev_tridi_to_band_template
parent
ea606c3b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
80 deletions
+14
-80
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90
+14
-80
No files found.
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90
View file @
ddba9c0f
...
@@ -525,13 +525,7 @@
...
@@ -525,13 +525,7 @@
stop
1
stop
1
endif
endif
#if REALCASE == 1
row_group
(:,
:)
=
0.0_rck
row_group
(:,
:)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
row_group
(:,
:)
=
CONST_COMPLEX_0_0
#endif
num
=
(
l_nev
*
nblk
)
*
size_of_datatype
num
=
(
l_nev
*
nblk
)
*
size_of_datatype
successCUDA
=
cuda_malloc
(
row_group_dev
,
num
)
successCUDA
=
cuda_malloc
(
row_group_dev
,
num
)
if
(
.not.
(
successCUDA
))
then
if
(
.not.
(
successCUDA
))
then
...
@@ -558,12 +552,7 @@
...
@@ -558,12 +552,7 @@
#ifdef WITH_OPENMP
#ifdef WITH_OPENMP
if
(
posix_memalign
(
aIntern_ptr
,
64_c_intptr_t
,
stripe_width
*
a_dim2
*
stripe_count
*
max_threads
*
&
if
(
posix_memalign
(
aIntern_ptr
,
64_c_intptr_t
,
stripe_width
*
a_dim2
*
stripe_count
*
max_threads
*
&
#if REALCASE == 1
C_SIZEOF
(
a_var
))
/
=
0
)
then
C_SIZEOF
(
a_var
))
/
=
0
)
then
#endif
#if COMPLEXCASE == 1
C_SIZEOF
(
a_var
))
/
=
0
)
then
#endif
print
*
,
"trans_ev_tridi_to_band_&
print
*
,
"trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&MATH_DATATYPE&
&: error when allocating aIntern"
//
errorMessage
&: error when allocating aIntern"
//
errorMessage
...
@@ -578,12 +567,7 @@
...
@@ -578,12 +567,7 @@
#else /* WITH_OPENMP */
#else /* WITH_OPENMP */
if
(
posix_memalign
(
aIntern_ptr
,
64_c_intptr_t
,
stripe_width
*
a_dim2
*
stripe_count
*
&
if
(
posix_memalign
(
aIntern_ptr
,
64_c_intptr_t
,
stripe_width
*
a_dim2
*
stripe_count
*
&
#if REALCASE == 1
C_SIZEOF
(
a_var
))
/
=
0
)
then
#endif
#if COMPLEXCASE == 1
C_SIZEOF
(
a_var
))
/
=
0
)
then
C_SIZEOF
(
a_var
))
/
=
0
)
then
#endif
print
*
,
"trans_ev_tridi_to_band_real: error when allocating aIntern"
//
errorMessage
print
*
,
"trans_ev_tridi_to_band_real: error when allocating aIntern"
//
errorMessage
stop
1
stop
1
endif
endif
...
@@ -591,12 +575,7 @@
...
@@ -591,12 +575,7 @@
call
c_f_pointer
(
aIntern_ptr
,
aIntern
,[
stripe_width
,
a_dim2
,
stripe_count
]
)
call
c_f_pointer
(
aIntern_ptr
,
aIntern
,[
stripe_width
,
a_dim2
,
stripe_count
]
)
!allocate(aIntern(stripe_width,a_dim2,stripe_count), stat=istat, errmsg=errorMessage)
!allocate(aIntern(stripe_width,a_dim2,stripe_count), stat=istat, errmsg=errorMessage)
#if REALCASE == 1
aIntern
(:,:,:)
=
0.0_rck
aIntern
(:,:,:)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
aIntern
(:,:,:)
=
0
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP */
endif
!useGPU
endif
!useGPU
...
@@ -608,13 +587,7 @@
...
@@ -608,13 +587,7 @@
stop
1
stop
1
endif
endif
#if REALCASE == 1
row
(:)
=
0.0_rck
row
(:)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
row
(:)
=
0
#endif
! Copy q from a block cyclic distribution into a distribution with contiguous rows,
! Copy q from a block cyclic distribution into a distribution with contiguous rows,
! and transpose the matrix using stripes of given stripe_width for cache blocking.
! and transpose the matrix using stripes of given stripe_width for cache blocking.
...
@@ -630,12 +603,7 @@
...
@@ -630,12 +603,7 @@
call
obj
%
timer
%
start
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"OpenMP parallel"
//
PRECISION_SUFFIX
)
!$omp parallel do private(my_thread), schedule(static, 1)
!$omp parallel do private(my_thread), schedule(static, 1)
do
my_thread
=
1
,
max_threads
do
my_thread
=
1
,
max_threads
#if REALCASE == 1
aIntern
(:,:,:,
my_thread
)
=
0.0_rck
! if possible, do first touch allocation!
aIntern
(:,:,:,
my_thread
)
=
CONST_0_0
! if possible, do first touch allocation!
#endif
#if COMPLEXCASE == 1
aIntern
(:,:,:,
my_thread
)
=
CONST_COMPLEX_0_0
! if possible, do first touch allocation!
#endif
enddo
enddo
!$omp end parallel do
!$omp end parallel do
...
@@ -759,14 +727,7 @@
...
@@ -759,14 +727,7 @@
row_group_size
,
nblk
,
unpack_idx
,
&
row_group_size
,
nblk
,
unpack_idx
,
&
i
-
limits
(
ip
),
.false.
)
i
-
limits
(
ip
),
.false.
)
#if REALCASE == 1
row_group
(:,
row_group_size
)
=
q
(
src_offset
,
1
:
l_nev
)
#endif
#if COMPLEXCASE == 1
row_group
(:,
row_group_size
)
=
q
(
src_offset
,
1
:
l_nev
)
row_group
(:,
row_group_size
)
=
q
(
src_offset
,
1
:
l_nev
)
#endif
#else /* WITH_OPENMP */
#else /* WITH_OPENMP */
#if COMPLEXCASE == 1
#if COMPLEXCASE == 1
...
@@ -1126,18 +1087,10 @@
...
@@ -1126,18 +1087,10 @@
stop
1
stop
1
endif
endif
#if REALCASE == 1
top_border_send_buffer
(:,:)
=
0.0_rck
top_border_send_buffer
(:,:)
=
CONST_0_0
top_border_recv_buffer
(:,:)
=
0.0_rck
top_border_recv_buffer
(:,:)
=
CONST_0_0
bottom_border_send_buffer
(:,:)
=
0.0_rck
bottom_border_send_buffer
(:,:)
=
CONST_0_0
bottom_border_recv_buffer
(:,:)
=
0.0_rck
bottom_border_recv_buffer
(:,:)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
top_border_send_buffer
(:,:)
=
CONST_COMPLEX_0_0
top_border_recv_buffer
(:,:)
=
CONST_COMPLEX_0_0
bottom_border_send_buffer
(:,:)
=
CONST_COMPLEX_0_0
bottom_border_recv_buffer
(:,:)
=
CONST_COMPLEX_0_0
#endif
! Initialize broadcast buffer
! Initialize broadcast buffer
#else /* WITH_OPENMP */
#else /* WITH_OPENMP */
...
@@ -1174,19 +1127,10 @@
...
@@ -1174,19 +1127,10 @@
stop
1
stop
1
endif
endif
#if REALCASE == 1
top_border_send_buffer
(:,:,:)
=
0.0_rck
top_border_send_buffer
(:,:,:)
=
CONST_0_0
top_border_recv_buffer
(:,:,:)
=
0.0_rck
top_border_recv_buffer
(:,:,:)
=
CONST_0_0
bottom_border_send_buffer
(:,:,:)
=
0.0_rck
bottom_border_send_buffer
(:,:,:)
=
CONST_0_0
bottom_border_recv_buffer
(:,:,:)
=
0.0_rck
bottom_border_recv_buffer
(:,:,:)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
top_border_send_buffer
(:,:,:)
=
CONST_COMPLEX_0_0
top_border_recv_buffer
(:,:,:)
=
CONST_COMPLEX_0_0
bottom_border_send_buffer
(:,:,:)
=
CONST_COMPLEX_0_0
bottom_border_recv_buffer
(:,:,:)
=
CONST_COMPLEX_0_0
#endif
#endif /* WITH_OPENMP */
#endif /* WITH_OPENMP */
! Initialize broadcast buffer
! Initialize broadcast buffer
...
@@ -1199,12 +1143,7 @@
...
@@ -1199,12 +1143,7 @@
stop
1
stop
1
endif
endif
#if REALCASE == 1
bcast_buffer
=
0.0_rck
bcast_buffer
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
bcast_buffer
=
0
#endif
if
(
useGPU
)
then
if
(
useGPU
)
then
num
=
(
nbw
*
max_blk_size
)
*
size_of_datatype
num
=
(
nbw
*
max_blk_size
)
*
size_of_datatype
successCUDA
=
cuda_malloc
(
bcast_buffer_dev
,
num
)
successCUDA
=
cuda_malloc
(
bcast_buffer_dev
,
num
)
...
@@ -1404,12 +1343,7 @@
...
@@ -1404,12 +1343,7 @@
else
! (current_local_n > 1) then
else
! (current_local_n > 1) then
! for current_local_n == 1 the one and only HH Vector is 0 and not stored in hh_trans_real/complex
! for current_local_n == 1 the one and only HH Vector is 0 and not stored in hh_trans_real/complex
#if REALCASE == 1
bcast_buffer
(:,
1
)
=
0.0_rck
bcast_buffer
(:,
1
)
=
CONST_0_0
#endif
#if COMPLEXCASE == 1
bcast_buffer
(:,
1
)
=
CONST_COMPLEX_0_0
#endif
if
(
useGPU
)
then
if
(
useGPU
)
then
successCUDA
=
cuda_memset
(
bcast_buffer_dev
,
0
,
nbw
*
size_of_datatype
)
successCUDA
=
cuda_memset
(
bcast_buffer_dev
,
0
,
nbw
*
size_of_datatype
)
if
(
.not.
(
successCUDA
))
then
if
(
.not.
(
successCUDA
))
then
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment