Commit 952b8091 authored by Alexander Heinecke's avatar Alexander Heinecke
Browse files

fixed some bugs in integration of new kernels in development branch

parent 16114466
...@@ -3341,6 +3341,7 @@ subroutine trans_ev_tridi_to_band_complex(na, nev, nblk, nbw, q, ldq, mpi_comm_r ...@@ -3341,6 +3341,7 @@ subroutine trans_ev_tridi_to_band_complex(na, nev, nblk, nbw, q, ldq, mpi_comm_r
a_dim2 = max_blk_size + nbw a_dim2 = max_blk_size + nbw
!DEC$ ATTRIBUTES ALIGN: 64:: a
allocate(a(stripe_width,a_dim2,stripe_count,max_threads)) allocate(a(stripe_width,a_dim2,stripe_count,max_threads))
! a(:,:,:,:) should be set to 0 in a parallel region, not here! ! a(:,:,:,:) should be set to 0 in a parallel region, not here!
...@@ -3830,6 +3831,13 @@ contains ...@@ -3830,6 +3831,13 @@ contains
integer, intent(in) :: off, ncols, istripe, my_thread integer, intent(in) :: off, ncols, istripe, my_thread
integer j, nl, noff integer j, nl, noff
real*8 ttt real*8 ttt
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! complex*16 w(nbw,2)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
ttt = mpi_wtime() ttt = mpi_wtime()
if(istripe<stripe_count) then if(istripe<stripe_count) then
...@@ -3839,6 +3847,18 @@ contains ...@@ -3839,6 +3847,18 @@ contains
nl = min(my_thread*thread_width-noff, l_nev-noff) nl = min(my_thread*thread_width-noff, l_nev-noff)
if(nl<=0) return if(nl<=0) return
endif endif
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! do j = ncols, 2, -2
! w(:,1) = bcast_buffer(1:nbw,j+off)
! w(:,2) = bcast_buffer(1:nbw,j+off-1)
! call double_hh_trafo_complex(a(1,j+off+a_off-1,istripe, my_thread), w, nbw, nl, stripe_width, nbw)
! enddo
! if(j==1) call single_hh_trafo_complex(a(1,1+off+a_off,istripe,my_thread),bcast_buffer(1,off+1), nbw, nl, stripe_width)
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! Currently (on Sandy Bridge), single is faster than double
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
do j = ncols, 1, -1 do j = ncols, 1, -1
call single_hh_trafo_complex(a(1,j+off+a_off,istripe,my_thread),bcast_buffer(1,j+off),nbw,nl,stripe_width) call single_hh_trafo_complex(a(1,j+off+a_off,istripe,my_thread),bcast_buffer(1,j+off),nbw,nl,stripe_width)
enddo enddo
......
...@@ -25,7 +25,8 @@ program test_complex2 ...@@ -25,7 +25,8 @@ program test_complex2
! nblk: Blocking factor in block cyclic distribution ! nblk: Blocking factor in block cyclic distribution
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
integer :: na = 4000, nev = 1500, nblk = 16 integer, parameter :: nblk = 16
integer na, nev
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Local Variables ! Local Variables
...@@ -46,6 +47,21 @@ program test_complex2 ...@@ -46,6 +47,21 @@ program test_complex2
integer :: iseed(4096) ! Random seed, size should be sufficient for every generator integer :: iseed(4096) ! Random seed, size should be sufficient for every generator
!-------------------------------------------------------------------------------
! Pharse command line argumnents, if given
character*16 arg1
character*16 arg2
na = 4000
nev = 1500
if (iargc() == 2) then
call getarg(1, arg1)
call getarg(2, arg2)
read(arg1, *) na
read(arg2, *) nev
endif
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! MPI Initialization ! MPI Initialization
...@@ -59,7 +75,7 @@ program test_complex2 ...@@ -59,7 +75,7 @@ program test_complex2
! We only read on mpi task number myid = 0 to avoid any possible confusion. ! We only read on mpi task number myid = 0 to avoid any possible confusion.
! The parameters of interest are subsequently broadcast to all other mpi tasks. ! The parameters of interest are subsequently broadcast to all other mpi tasks.
call read_test_parameters (na,nev,nblk,myid,mpi_comm_world) !call read_test_parameters (na,nev,nblk,myid,mpi_comm_world)
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
! Selection of number of processor rows/columns ! Selection of number of processor rows/columns
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment