Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
74a07ae8
Commit
74a07ae8
authored
Jan 26, 2017
by
Andreas Marek
Browse files
Unify real/complex versions of elpa2_template
Due to a bug in the Intel compiler two stages could not be unified
parent
289c35ac
Changes
1
Pipelines
1
Show whitespace changes
Inline
Side-by-side
src/elpa2_template.X90
View file @
74a07ae8
function solve_evp_&
&MATH_DATATYPE&
&_&
&2stage_&
&PRECISION &
(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
#if REALCASE == 1
#if REALCASE == 1
function solve_evp_real_2stage_PRECISION (na, nev, a, lda, ev, q, ldq, nblk, &
THIS_ELPA_KERNEL_API, useQR, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_ELPA_KERNEL_API, useQR, useGPU, bandwidth) result(success)
#elif COMPLEXCASE == 1
function solve_evp_complex_2stage_PRECISION(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_ELPA_KERNEL_API, useGPU, bandwidth) result(success)
#endif
#endif
#if COMPLEXCASE == 1
THIS_ELPA_KERNEL_API, &
#endif
useGPU, bandwidth) result(success)
#ifdef HAVE_DETAILED_TIMINGS
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#else
#else
...
@@ -62,7 +67,11 @@
...
@@ -62,7 +67,11 @@
logical :: do_useGPU
logical :: do_useGPU
integer(kind=c_int) :: numberOfGPUDevices
integer(kind=c_int) :: numberOfGPUDevices
call timer%start(solve_evp_NUMBER_2stage_PRECISION_STR)
call timer%start("solve_evp_&
&MATH_DATATYPE&
&_2stage_" // &
&PRECISION_SUFFIX &
)
call timer%start("mpi_communication")
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
...
@@ -108,7 +117,7 @@
...
@@ -108,7 +117,7 @@
return
return
endif
endif
endif
endif
#endif
#endif
/* REALCASE */
if (present(useGPU)) then
if (present(useGPU)) then
if (useGPU) then
if (useGPU) then
...
@@ -151,34 +160,72 @@
...
@@ -151,34 +160,72 @@
if (my_pe == 0) then
if (my_pe == 0) then
write(error_unit,*) " "
write(error_unit,*) " "
write(error_unit,*) "The choosen kernel ",UPCASENUMBER_ELPA_KERNEL_NAMES(THIS_ELPA_KERNEL)
write(error_unit,*) "The choosen kernel ", &
&MATH_DATATYPE&
&_ELPA_KERNEL_NAMES(THIS_ELPA_KERNEL)
write(error_unit,*) "is not in the list of the allowed kernels!"
write(error_unit,*) "is not in the list of the allowed kernels!"
write(error_unit,*) " "
write(error_unit,*) " "
write(error_unit,*) "Allowed kernels are:"
write(error_unit,*) "Allowed kernels are:"
do i=1,size(UPCASENUMBER_ELPA_KERNEL_NAMES(:))
do i=1,size( &
if (AVAILABLE_UPCASENUMBER_ELPA_KERNELS(i) .ne. 0) then
&MATH_DATATYPE&
write(error_unit,*) UPCASENUMBER_ELPA_KERNEL_NAMES(i)
&_ELPA_KERNEL_NAMES(:))
if (AVAILABLE_&
&MATH_DATATYPE&
&_ELPA_KERNELS(i) .ne. 0) then
write(error_unit,*) &
#if REALCASE == 1
REAL_ELPA_KERNEL_NAMES(i)
#endif
#if COMPLEXCASE == 1
COMPLEX_ELPA_KERNEL_NAMES(i)
#endif
endif
endif
enddo
enddo
write(error_unit,*) " "
write(error_unit,*) " "
! check whether generic kernel is defined
! check whether generic kernel is defined
if (AVAILABLE_UPCASENUMBER_ELPA_KERNELS(UPCASENUMBER_ELPA_KERNEL_GENERIC) .eq. 1) then
if (AVAILABLE_&
write(error_unit,*) "The default kernel NUMBER_ELPA_KERNEL_GENERIC will be used !"
&MATH_DATATYPE&
&_ELPA_KERNELS( &
#if REALCASE == 1
REAL_ELPA_KERNEL_GENERIC ) .eq. 1) then
#endif
#if COMPLEXCASE == 1
COMPLEX_ELPA_KERNEL_GENERIC ) .eq. 1) then
#endif
write(error_unit,*) "The default kernel &
&MATH_DATATYPE&
&( &
&_ELPA_KERNEL_GENERIC will be used !"
else
else
write(error_unit,*) "As default kernel ",UPCASENUMBER_ELPA_KERNEL_NAMES(DEFAULT_UPCASENUMBER_ELPA_KERNEL)," will be used"
write(error_unit,*) "As default kernel ", &
&MATH_DATATYPE&
&_ELPA_KERNEL_NAMES(DEFAULT_&
&MATH_DATATYPE&
&_ELPA_KERNEL)," will be used"
endif
endif
endif ! my_pe == 0
endif ! my_pe == 0
if (AVAILABLE_UPCASENUMBER_ELPA_KERNELS(UPCASENUMBER_ELPA_KERNEL_GENERIC) .eq. 1) then
if (AVAILABLE_&
THIS_ELPA_KERNEL = UPCASENUMBER_ELPA_KERNEL_GENERIC
&MATH_DATATYPE&
&_ELPA_KERNELS( &
&MATH_DATATYPE&
&_ELPA_KERNEL_GENERIC) .eq. 1) then
THIS_ELPA_KERNEL = &
&MATH_DATATYPE&
&_ELPA_KERNEL_GENERIC
else
else
THIS_ELPA_KERNEL = DEFAULT_UPCASENUMBER_ELPA_KERNEL
THIS_ELPA_KERNEL = DEFAULT_&
&MATH_DATATYPE&
&_ELPA_KERNEL
endif
endif
endif
endif
! check consistency between request for GPUs and defined kernel
! check consistency between request for GPUs and defined kernel
if (do_useGPU) then
if (do_useGPU) then
if (THIS_ELPA_KERNEL .ne. UPCASENUMBER_ELPA_KERNEL_GPU) then
if (THIS_ELPA_KERNEL .ne. &
&MATH_DATATYPE&
&_ELPA_KERNEL_GPU) then
write(error_unit,*) "GPU usage has been requested but compute kernel is defined as non-GPU! Aborting..."
write(error_unit,*) "GPU usage has been requested but compute kernel is defined as non-GPU! Aborting..."
success = .false.
success = .false.
return
return
...
@@ -227,7 +274,9 @@
...
@@ -227,7 +274,9 @@
allocate(tmat(nbw,nbw,num_blocks), stat=istat, errmsg=errorMessage)
allocate(tmat(nbw,nbw,num_blocks), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when allocating tmat "//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage_PRECISION " // ": error when allocating tmat "//errorMessage
stop
stop
endif
endif
...
@@ -235,17 +284,29 @@
...
@@ -235,17 +284,29 @@
ttt0 = MPI_Wtime()
ttt0 = MPI_Wtime()
ttts = ttt0
ttts = ttt0
call bandred_&
&MATH_DATATYPE&
&_&
&PRECISION &
(na, a, &
#if REALCASE == 1
#if REALCASE == 1
call bandred_real_PRECISION(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
a_dev, &
tmat, tmat_dev, wantDebug, do_useGPU, success, useQRActual)
#endif
#elif COMPLEXCASE == 1
lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, tmat, &
call bandred_complex_PRECISION(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
#if REALCASE == 1
tmat, wantDebug, do_useGPU, success)
tmat_dev, &
#endif
wantDebug, do_useGPU, success &
#if REALCASE == 1
, useQRActual &
#endif
#endif
)
if (.not.(success)) return
if (.not.(success)) return
ttt1 = MPI_Wtime()
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//bandred_NUMBER_PRECISION_STR//' :',ttt1-ttt0
write(error_unit,*) "Time " // "bandred_&
&MATH_DATATYPE&
&PRECISION " // " :",ttt1-ttt0
end if ! matrix not already banded on input
end if ! matrix not already banded on input
...
@@ -253,18 +314,27 @@
...
@@ -253,18 +314,27 @@
allocate(e(na), stat=istat, errmsg=errorMessage)
allocate(e(na), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when allocating e "//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when allocating e "//errorMessage
stop
stop
endif
endif
ttt0 = MPI_Wtime()
ttt0 = MPI_Wtime()
call tridiag_band_NUMBER_PRECISION(na, nbw, nblk, a, lda, ev, e, matrixCols, &
call tridiag_band_&
hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
&MATH_DATATYPE&
&_&
&PRECISION&
(na, nbw, nblk, a, lda, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all)
ttt1 = MPI_Wtime()
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//tridiag_band_NUMBER_PRECISION_STR//' :',ttt1-ttt0
write(error_unit,*) "Time " // "tridiag_band_&
&MATH_DATATYPE&
&_&
&PRECISION " // " :",ttt1-ttt0
#ifdef WITH_MPI
#ifdef WITH_MPI
call timer%start("mpi_communication")
call timer%start("mpi_communication")
...
@@ -282,7 +352,9 @@
...
@@ -282,7 +352,9 @@
allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
allocate(q_real(l_rows,l_cols), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,"solve_evp_complex_2stage: error when allocating q_real"//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage: error when allocating q_real"//errorMessage
stop
stop
endif
endif
#endif
#endif
...
@@ -290,13 +362,17 @@
...
@@ -290,13 +362,17 @@
! Solve tridiagonal system
! Solve tridiagonal system
ttt0 = MPI_Wtime()
ttt0 = MPI_Wtime()
call solve_tridi_&
&PRECISION &
(na, nev, ev, e, &
#if REALCASE == 1
#if REALCASE == 1
call solve_tridi_PRECISION(na, nev, ev, e, q, ldq, nblk, matrixCols, &
q, ldq, &
mpi_comm_rows, mpi_comm_cols, wantDebug, success)
#endif
#elif COMPLEXCASE == 1
#if COMPLEXCASE == 1
call solve_tridi_PRECISION(na, nev, ev, e, q_real, ubound(q_real,dim=1), nblk, matrixCols, &
q_real, ubound(q_real,dim=1), &
mpi_comm_rows, mpi_comm_cols, wantDebug, success)
#endif
#endif
nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug, success)
if (.not.(success)) return
if (.not.(success)) return
ttt1 = MPI_Wtime()
ttt1 = MPI_Wtime()
...
@@ -305,45 +381,55 @@
...
@@ -305,45 +381,55 @@
time_evp_solve = ttt1-ttt0
time_evp_solve = ttt1-ttt0
ttts = ttt1
ttts = ttt1
#if REALCASE == 1
deallocate(e, stat=istat, errmsg=errorMessage)
deallocate(e, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,"solve_evp_real_2stage: error when deallocating e "//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage: error when deallocating e "//errorMessage
stop
stop
endif
endif
#elif COMPLEXCASE == 1
#if COMPLEXCASE == 1
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
deallocate(
e,
q_real, stat=istat, errmsg=errorMessage)
deallocate(q_real, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,"solve_evp_complex_2stage: error when deallocating e, q_real"//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage: error when deallocating q_real"//errorMessage
stop
stop
endif
endif
#endif
#endif
! Backtransform stage 1
! Backtransform stage 1
ttt0 = MPI_Wtime()
ttt0 = MPI_Wtime()
call trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_&
&PRECISION &
(na, nev, nblk, nbw, q, &
#if REALCASE == 1
#if REALCASE == 1
call trans_ev_tridi_to_band_real_PRECISION(na, nev, nblk, nbw, q, q_dev, ldq, &
q_dev, &
matrixCols, hh_trans, &
mpi_comm_rows, mpi_comm_cols, &
wantDebug, do_useGPU, success, THIS_ELPA_KERNEL)
#elif COMPLEXCASE == 1
call trans_ev_tridi_to_band_complex_PRECISION(na, nev, nblk, nbw, q, ldq, &
matrixCols, hh_trans, &
mpi_comm_rows, mpi_comm_cols, &
wantDebug, do_useGPU, success,THIS_ELPA_KERNEL)
#endif
#endif
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU, &
success, THIS_ELPA_KERNEL)
if (.not.(success)) return
if (.not.(success)) return
ttt1 = MPI_Wtime()
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//trans_ev_tridi_to_band_NUMBER_PRECISION_STR//' :',ttt1-ttt0
write(error_unit,*) "Time " // "trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_&
&PRECISION " // " :",ttt1-ttt0
! We can now deallocate the stored householder vectors
! We can now deallocate the stored householder vectors
deallocate(hh_trans, stat=istat, errmsg=errorMessage)
deallocate(hh_trans, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *, solve_evp_NUMBER_2stage_PRECISION_STR//": error when deallocating hh_trans "//errorMessage
print *, "solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating hh_trans "//errorMessage
stop
stop
endif
endif
...
@@ -352,31 +438,55 @@
...
@@ -352,31 +438,55 @@
else
else
! Backtransform stage 2
! Backtransform stage 2
ttt0 = MPI_Wtime()
ttt0 = MPI_Wtime()
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION &
(na, nev, nblk, nbw, a, &
#if REALCASE == 1
#if REALCASE == 1
call trans_ev_band_to_full_real_PRECISION(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, &
a_dev, &
q, q_dev, ldq, matrixCols, num_blocks, &
#endif
mpi_comm_rows, mpi_comm_cols, do_useGPU, useQRActual)
lda, tmat, &
#elif COMPLEXCASE == 1
#if REALCASE == 1
call trans_ev_band_to_full_complex_PRECISION(na, nev, nblk, nbw, a, lda, tmat, &
tmat_dev, &
q, ldq, matrixCols, num_blocks, &
mpi_comm_rows, mpi_comm_cols, do_useGPU)
#endif
#endif
q, &
#if REALCASE == 1
q_dev, &
#endif
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU &
#if REALCASE == 1
, useQRActual &
#endif
)
ttt1 = MPI_Wtime()
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//trans_ev_band_to_full_NUMBER_PRECISION_STR//' :',ttt1-ttt0
write(error_unit,*) "Time " // "trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION " // " :",ttt1-ttt0
time_evp_back = ttt1-ttts
time_evp_back = ttt1-ttts
deallocate(tmat, stat=istat, errmsg=errorMessage)
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when deallocating tmat"//errorMessage
print *,"solve_evp_&
&MATH_DATATYPE&
_2stage_&
&PRECISION " // ": error when deallocating tmat"//errorMessage
stop
stop
endif
endif
endif ! not present(bandwidth)
endif ! not present(bandwidth)
call timer%stop(solve_evp_NUMBER_2stage_PRECISION_STR)
call timer%stop("solve_evp_&
&MATH_DATATYPE&
&_2stage_" // &
&PRECISION_SUFFIX &
)
1 format(a,f10.3)
1 format(a,f10.3)
end function solve_evp_NUMBER_2stage_PRECISION
end function solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment