Commit 8c10c2dc authored by Andreas Marek's avatar Andreas Marek

Replace tabs with spaces

parent 0b85f610
...@@ -109,7 +109,7 @@ ...@@ -109,7 +109,7 @@
implicit none implicit none
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
logical, intent(in) :: useGPU, wantDebug logical, intent(in) :: useGPU, wantDebug
real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double
integer(kind=lik) :: kernel_flops integer(kind=lik) :: kernel_flops
integer(kind=ik), intent(in) :: nbw, max_blk_size integer(kind=ik), intent(in) :: nbw, max_blk_size
...@@ -182,8 +182,8 @@ ...@@ -182,8 +182,8 @@
( kernel .ne. ELPA_2STAGE_COMPLEX_GPU)) then ( kernel .ne. ELPA_2STAGE_COMPLEX_GPU)) then
#endif #endif
print *,"ERROR: useGPU is set in conpute_hh_trafo but not GPU kernel!" print *,"ERROR: useGPU is set in conpute_hh_trafo but not GPU kernel!"
stop stop
endif endif
endif endif
#if REALCASE == 1 #if REALCASE == 1
...@@ -194,11 +194,11 @@ ...@@ -194,11 +194,11 @@
#endif #endif
! ncols - indicates the number of HH reflectors to apply; at least 1 must be available ! ncols - indicates the number of HH reflectors to apply; at least 1 must be available
if (ncols < 1) then if (ncols < 1) then
if (wantDebug) then if (wantDebug) then
print *, "Returning early from compute_hh_trafo" print *, "Returning early from compute_hh_trafo"
endif endif
return return
endif endif
endif endif
if (wantDebug) call obj%timer%start("compute_hh_trafo_& if (wantDebug) call obj%timer%start("compute_hh_trafo_&
...@@ -268,9 +268,9 @@ ...@@ -268,9 +268,9 @@
#if REALCASE == 1 #if REALCASE == 1
! GPU kernel real ! GPU kernel real
if (kernel .eq. ELPA_2STAGE_REAL_GPU) then if (kernel .eq. ELPA_2STAGE_REAL_GPU) then
if (wantDebug) then if (wantDebug) then
call obj%timer%start("compute_hh_trafo: GPU") call obj%timer%start("compute_hh_trafo: GPU")
endif endif
dev_offset = (0 + (a_off * stripe_width) + ( (istripe - 1) * stripe_width *a_dim2 )) *size_of_& dev_offset = (0 + (a_off * stripe_width) + ( (istripe - 1) * stripe_width *a_dim2 )) *size_of_&
&PRECISION& &PRECISION&
&_& &_&
...@@ -285,9 +285,9 @@ ...@@ -285,9 +285,9 @@
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
! GPU kernel complex ! GPU kernel complex
if (kernel .eq. ELPA_2STAGE_COMPLEX_GPU) then if (kernel .eq. ELPA_2STAGE_COMPLEX_GPU) then
if (wantDebug) then if (wantDebug) then
call obj%timer%start("compute_hh_trafo: GPU") call obj%timer%start("compute_hh_trafo: GPU")
endif endif
dev_offset = (0 + ( ( a_off + off-1 )* stripe_width) + ( (istripe - 1)*stripe_width*a_dim2 )) * size_of_& dev_offset = (0 + ( ( a_off + off-1 )* stripe_width) + ( (istripe - 1)*stripe_width*a_dim2 )) * size_of_&
&PRECISION& &PRECISION&
...@@ -421,7 +421,7 @@ ...@@ -421,7 +421,7 @@
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), & & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), &
bcast_buffer(1:nbw,j+off), nbw, nl, stripe_width) bcast_buffer(1:nbw,j+off), nbw, nl, stripe_width)
#endif #endif
#else /* WITH_OPENMP */ #else /* WITH_OPENMP */
...@@ -438,7 +438,7 @@ ...@@ -438,7 +438,7 @@
&_generic_& &_generic_&
&PRECISION& &PRECISION&
& (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off), & & (a(1:stripe_width,j+off+a_off:j+off+a_off+nbw-1,istripe), bcast_buffer(1:nbw,j+off), &
nbw, nl, stripe_width) nbw, nl, stripe_width)
#endif #endif
#endif /* WITH_OPENMP */ #endif /* WITH_OPENMP */
...@@ -527,7 +527,7 @@ ...@@ -527,7 +527,7 @@
&_generic_simple_& &_generic_simple_&
&PRECISION& &PRECISION&
& (a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off), & & (a(1:stripe_width, j+off+a_off:j+off+a_off+nbw-1,istripe,my_thread), bcast_buffer(1:nbw,j+off), &
nbw, nl, stripe_width) nbw, nl, stripe_width)
#endif #endif
#else /* WITH_OPENMP */ #else /* WITH_OPENMP */
...@@ -2120,9 +2120,9 @@ ...@@ -2120,9 +2120,9 @@
!no avx512 block6 complex kernel !no avx512 block6 complex kernel
#endif /* COMPLEXCASE */ #endif /* COMPLEXCASE */
if (wantDebug) then if (wantDebug) then
call obj%timer%stop("compute_hh_trafo: CPU") call obj%timer%stop("compute_hh_trafo: CPU")
endif endif
endif ! GPU_KERNEL endif ! GPU_KERNEL
#ifdef WITH_OPENMP #ifdef WITH_OPENMP
......
...@@ -368,9 +368,9 @@ ...@@ -368,9 +368,9 @@
if (kernel .eq. ELPA_2STAGE_COMPLEX_GPU) then if (kernel .eq. ELPA_2STAGE_COMPLEX_GPU) then
#endif #endif
do_useGPU_trans_ev_tridi = .true. do_useGPU_trans_ev_tridi = .true.
else else
do_useGPU_trans_ev_tridi = .false. do_useGPU_trans_ev_tridi = .false.
endif endif
endif endif
endif endif
...@@ -401,31 +401,31 @@ ...@@ -401,31 +401,31 @@
if (nbw == 0) then if (nbw == 0) then
if (wantDebug) then if (wantDebug) then
write(error_unit,*) "Specified bandwidth = 0; ELPA refuses to solve the eigenvalue problem ", & write(error_unit,*) "Specified bandwidth = 0; ELPA refuses to solve the eigenvalue problem ", &
"for a diagonal matrix! This is too simple" "for a diagonal matrix! This is too simple"
endif endif
print *, "Specified bandwidth = 0; ELPA refuses to solve the eigenvalue problem ", & print *, "Specified bandwidth = 0; ELPA refuses to solve the eigenvalue problem ", &
"for a diagonal matrix! This is too simple" "for a diagonal matrix! This is too simple"
success = .false. success = .false.
return return
endif endif
if (mod(nbw, nblk) .ne. 0) then if (mod(nbw, nblk) .ne. 0) then
! treat matrix with an effective bandwidth slightly bigger than specified bandwidth ! treat matrix with an effective bandwidth slightly bigger than specified bandwidth
! such that effective bandwidth is a multiply of nblk. which is a prerequiste for ELPA ! such that effective bandwidth is a multiply of nblk. which is a prerequiste for ELPA
nbw = nblk * ceiling(real(nbw,kind=c_double)/real(nblk,kind=c_double)) nbw = nblk * ceiling(real(nbw,kind=c_double)/real(nblk,kind=c_double))
! just check that effective bandwidth is NOT larger than matrix size ! just check that effective bandwidth is NOT larger than matrix size
if (nbw .gt. na) then if (nbw .gt. na) then
if (wantDebug) then if (wantDebug) then
write(error_unit,*) "Specified bandwidth ",nbw," leads internaly to a computed bandwidth ", & write(error_unit,*) "Specified bandwidth ",nbw," leads internaly to a computed bandwidth ", &
"which is larger than the matrix size ",na," ! ELPA will abort! Try to", & "which is larger than the matrix size ",na," ! ELPA will abort! Try to", &
"solve your problem by not specifing a bandwidth" "solve your problem by not specifing a bandwidth"
endif endif
print *, "Specified bandwidth ",nbw," leads internaly to a computed bandwidth ", & print *, "Specified bandwidth ",nbw," leads internaly to a computed bandwidth ", &
"which is larger than the matrix size ",na," ! ELPA will abort! Try to", & "which is larger than the matrix size ",na," ! ELPA will abort! Try to", &
"solve your problem by not specifing a bandwidth" "solve your problem by not specifing a bandwidth"
success = .false. success = .false.
return return
endif endif
endif endif
do_bandred = .false. ! we already have a banded matrix do_bandred = .false. ! we already have a banded matrix
do_solve_tridi = .true. ! we also have to solve something :-) do_solve_tridi = .true. ! we also have to solve something :-)
...@@ -575,7 +575,7 @@ ...@@ -575,7 +575,7 @@
! not positiv definite => eigenvectors needed ! not positiv definite => eigenvectors needed
do_trans_to_band = .true. do_trans_to_band = .true.
do_trans_to_full = .true. do_trans_to_full = .true.
else else
do_trans_to_band = .false. do_trans_to_band = .false.
do_trans_to_full = .false. do_trans_to_full = .false.
endif endif
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -336,14 +336,14 @@ module ftimings ...@@ -336,14 +336,14 @@ module ftimings
!> \param bytes_per_ldst For calculating the AI, assume this number !> \param bytes_per_ldst For calculating the AI, assume this number
!> of bytes per load or store (default: 8) !> of bytes per load or store (default: 8)
subroutine timer_set_print_options(self, & subroutine timer_set_print_options(self, &
print_allocated_memory, & print_allocated_memory, &
print_virtual_memory, & print_virtual_memory, &
print_max_allocated_memory, & print_max_allocated_memory, &
print_flop_count, & print_flop_count, &
print_flop_rate, & print_flop_rate, &
print_ldst, & print_ldst, &
print_memory_bandwidth, & print_memory_bandwidth, &
print_ai, & print_ai, &
bytes_per_ldst) bytes_per_ldst)
class(timer_t), intent(inout) :: self class(timer_t), intent(inout) :: self
logical, intent(in), optional :: & logical, intent(in), optional :: &
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment