Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
952b8091
Commit
952b8091
authored
Jan 06, 2013
by
Alexander Heinecke
Browse files
fixed some bugs in integration of new kernels in development branch
parent
16114466
Changes
2
Hide whitespace changes
Inline
Side-by-side
ELPA_development_version/src/elpa2.f90
View file @
952b8091
...
@@ -3341,6 +3341,7 @@ subroutine trans_ev_tridi_to_band_complex(na, nev, nblk, nbw, q, ldq, mpi_comm_r
...
@@ -3341,6 +3341,7 @@ subroutine trans_ev_tridi_to_band_complex(na, nev, nblk, nbw, q, ldq, mpi_comm_r
a_dim2
=
max_blk_size
+
nbw
a_dim2
=
max_blk_size
+
nbw
!DEC$ ATTRIBUTES ALIGN: 64:: a
allocate
(
a
(
stripe_width
,
a_dim2
,
stripe_count
,
max_threads
))
allocate
(
a
(
stripe_width
,
a_dim2
,
stripe_count
,
max_threads
))
! a(:,:,:,:) should be set to 0 in a parallel region, not here!
! a(:,:,:,:) should be set to 0 in a parallel region, not here!
...
@@ -3830,6 +3831,13 @@ contains
...
@@ -3830,6 +3831,13 @@ contains
integer
,
intent
(
in
)
::
off
,
ncols
,
istripe
,
my_thread
integer
,
intent
(
in
)
::
off
,
ncols
,
istripe
,
my_thread
integer
j
,
nl
,
noff
integer
j
,
nl
,
noff
real
*
8
ttt
real
*
8
ttt
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! complex*16 w(nbw,2)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ttt
=
mpi_wtime
()
ttt
=
mpi_wtime
()
if
(
istripe
<
stripe_count
)
then
if
(
istripe
<
stripe_count
)
then
...
@@ -3839,6 +3847,18 @@ contains
...
@@ -3839,6 +3847,18 @@ contains
nl
=
min
(
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
nl
=
min
(
my_thread
*
thread_width
-
noff
,
l_nev
-
noff
)
if
(
nl
<=
0
)
return
if
(
nl
<=
0
)
return
endif
endif
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! do j = ncols, 2, -2
! w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1)
! call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe, my_thread), w, nbw, nl, stripe_width, nbw)
! enddo
! if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
do
j
=
ncols
,
1
,
-1
do
j
=
ncols
,
1
,
-1
call
single_hh_trafo_complex
(
a
(
1
,
j
+
off
+
a_off
,
istripe
,
my_thread
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
call
single_hh_trafo_complex
(
a
(
1
,
j
+
off
+
a_off
,
istripe
,
my_thread
),
bcast_buffer
(
1
,
j
+
off
),
nbw
,
nl
,
stripe_width
)
enddo
enddo
...
...
ELPA_development_version/test/test_complex2.f90
View file @
952b8091
...
@@ -25,7 +25,8 @@ program test_complex2
...
@@ -25,7 +25,8 @@ program test_complex2
! nblk: Blocking factor in block cyclic distribution
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
integer
::
na
=
4000
,
nev
=
1500
,
nblk
=
16
integer
,
parameter
::
nblk
=
16
integer
na
,
nev
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! Local Variables
! Local Variables
...
@@ -46,6 +47,21 @@ program test_complex2
...
@@ -46,6 +47,21 @@ program test_complex2
integer
::
iseed
(
4096
)
! Random seed, size should be sufficient for every generator
integer
::
iseed
(
4096
)
! Random seed, size should be sufficient for every generator
!-------------------------------------------------------------------------------
! Pharse command line argumnents, if given
character
*
16
arg1
character
*
16
arg2
na
=
4000
nev
=
1500
if
(
iargc
()
==
2
)
then
call
getarg
(
1
,
arg1
)
call
getarg
(
2
,
arg2
)
read
(
arg1
,
*
)
na
read
(
arg2
,
*
)
nev
endif
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! MPI Initialization
! MPI Initialization
...
@@ -59,7 +75,7 @@ program test_complex2
...
@@ -59,7 +75,7 @@ program test_complex2
! We only read on mpi task number myid = 0 to avoid any possible confusion.
! We only read on mpi task number myid = 0 to avoid any possible confusion.
! The parameters of interest are subsequently broadcast to all other mpi tasks.
! The parameters of interest are subsequently broadcast to all other mpi tasks.
call
read_test_parameters
(
na
,
nev
,
nblk
,
myid
,
mpi_comm_world
)
!
call read_test_parameters (na,nev,nblk,myid,mpi_comm_world)
!-------------------------------------------------------------------------------
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! Selection of number of processor rows/columns
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment