Commit 692d8cd8 authored by Andreas Marek's avatar Andreas Marek

Implement method eigenvalues

parent f9940297
......@@ -60,6 +60,30 @@
elpa_eigenvectors_fc \
)(handle, a, ev, q, error)
/*! \brief generic C method for elpa_eigenvalues
*
* \details
* \param handle handle of the ELPA object, which defines the problem
* \param a float/double float complex/double complex pointer to matrix a
* \param ev on return: float/double pointer to eigenvalues
* \param error on return the error code, which can be queried with elpa_strerr()
* \result void
*/
#define elpa_eigenvalues(handle, a, ev, error) _Generic((a), \
double*: \
elpa_eigenvalues_d, \
\
float*: \
elpa_eigenvalues_f, \
\
double complex*: \
elpa_eigenvalues_dc, \
\
float complex*: \
elpa_eigenvalues_fc \
)(handle, a, ev, error)
/* \brief generic C method for elpa_cholesky
*
* \details
......
......@@ -73,31 +73,37 @@ function elpa_solve_evp_&
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*)
real(kind=C_DATATYPE_KIND), intent(out) :: q(obj%local_nrows,*)
real(kind=C_DATATYPE_KIND), optional,target,intent(out) :: q(obj%local_nrows,*)
#else
real(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
real(kind=C_DATATYPE_KIND), intent(out) :: q(obj%local_nrows,obj%local_ncols)
real(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
real(kind=C_DATATYPE_KIND), allocatable :: tau(:)
real(kind=C_DATATYPE_KIND), allocatable, target :: q_dummy(:,:)
real(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*)
complex(kind=C_DATATYPE_KIND), intent(out) :: q(obj%local_nrows,*)
complex(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,*)
#else
complex(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
complex(kind=C_DATATYPE_KIND), intent(out) :: q(obj%local_nrows,obj%local_ncols)
complex(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
real(kind=REAL_DATATYPE), allocatable :: q_real(:,:)
complex(kind=C_DATATYPE_KIND), allocatable :: tau(:)
complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
complex(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */
logical :: useGPU
logical :: success
logical :: solve_eigenvectors
logical :: do_useGPU
integer(kind=ik) :: numberOfGPUDevices
......@@ -116,6 +122,13 @@ function elpa_solve_evp_&
&PRECISION&
&")
if (present(q)) then
solve_eigenvectors =.true.
else
solve_eigenvectors = .false.
endif
na = obj%na
nev = obj%nev
lda = obj%local_nrows
......@@ -191,6 +204,15 @@ function elpa_solve_evp_&
endif
endif
endif
! allocate a dummy q_intern, if eigenvectors should not be commputed and thus q is NOT present
if ((solve_eigenvectors)) then
q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
else
allocate(q_dummy(obj%local_nrows,obj%local_ncols))
q_actual => q_dummy
endif
#if COMPLEXCASE == 1
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a and q
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local columns of q
......@@ -230,7 +252,7 @@ function elpa_solve_evp_&
&PRECISION&
& (obj, na, nev, ev, e, &
#if REALCASE == 1
q, ldq, &
q_actual, ldq, &
#endif
#if COMPLEXCASE == 1
q_real, l_rows, &
......@@ -241,32 +263,36 @@ function elpa_solve_evp_&
call obj%get("eigenvalues_only",eigenvalues_only)
if (eigenvalues_only .eq. 1) then
q(obj%local_nrows,obj%local_ncols) = 0.0
return
endif
if ((solve_eigenvectors) ) then
! q must be given thats why from here on we can use q and not q_actual
#if COMPLEXCASE == 1
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
ql(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
#endif
call obj%timer%start("back")
call trans_ev_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
call obj%timer%stop("back")
#if COMPLEXCASE == 1
deallocate(q_real, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
call obj%timer%start("back")
call trans_ev_&
&MATH_DATATYPE&
&_1stage_&
&_&
&PRECISION&
&" // ": error when deallocating q_real "//errorMessage
stop 1
endif
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU)
call obj%timer%stop("back")
endif ! .not.solve_eigenvectors
#if COMPLEXCASE == 1
deallocate(q_real, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&" // ": error when deallocating q_real "//errorMessage
stop 1
endif
#endif
deallocate(e, tau, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
......@@ -277,6 +303,19 @@ function elpa_solve_evp_&
stop 1
endif
if (.not.(solve_eigenvectors)) then
deallocate(q_dummy, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&" // ": error when deallocating q_dummy "//errorMessage
stop 1
endif
endif
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
......
......@@ -77,10 +77,11 @@
integer(kind=c_int) :: kernel
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*), q(obj%local_nrows,*)
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,*)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,*)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols), &
q(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
real(kind=C_DATATYPE_KIND), intent(inout) :: ev(obj%na)
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: hh_trans(:,:)
......@@ -92,10 +93,14 @@
#if COMPLEXCASE == 1
real(kind=C_DATATYPE_KIND), allocatable :: q_real(:,:)
#endif
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable, target :: q_dummy(:,:)
MATH_DATATYPE(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
integer(kind=c_intptr_t) :: tmat_dev, q_dev, a_dev
integer(kind=c_int) :: i
logical :: success, successCUDA
logical :: success, successCUDA, solve_eigenvectors
logical :: wantDebug
integer(kind=c_int) :: istat, gpu, debug, qr
character(200) :: errorMessage
......@@ -115,6 +120,12 @@
&PRECISION&
&")
if (present(q)) then
solve_eigenvectors = .true.
else
solve_eigenvectors = .false.
endif
na = obj%na
nev = obj%nev
lda = obj%local_nrows
......@@ -252,6 +263,14 @@
endif
call obj%timer%start("bandred")
if (solve_eigenvectors) then
q_actual => q(1:obj%local_nrows,1:obj%local_ncols)
else
allocate(q_dummy(1:obj%local_nrows,1:obj%local_ncols))
q_actual => q_dummy(1:obj%local_nrows,1:obj%local_ncols)
endif
if (obj%is_set("bandwidth") == 1) then
call obj%get("bandwidth",nbw)
if ((nbw == 0) .or. (mod(nbw, nblk) .ne. 0)) then
......@@ -356,7 +375,7 @@
&PRECISION &
(obj, na, nev, ev, e, &
#if REALCASE == 1
q, ldq, &
q_actual, ldq, &
#endif
#if COMPLEXCASE == 1
q_real, ubound(q_real,dim=1), &
......@@ -375,81 +394,96 @@
call obj%get("eigenvalues_only",eigenvalues_only)
if (eigenvalues_only .eq. 1) then
q(obj%local_nrows,obj%local_ncols) = 0.0
return
endif
if (solve_eigenvectors) then
#if COMPLEXCASE == 1
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
! q must be given thats why from here on we can use q and not q_actual
deallocate(q_real, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage: error when deallocating q_real"//errorMessage
stop 1
endif
#endif
! Backtransform stage 1
call obj%timer%start("trans_ev_to_band")
call trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, q, &
q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi, &
success=success, kernel=kernel)
call obj%timer%stop("trans_ev_to_band")
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
if (.not.(success)) return
deallocate(q_real, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage: error when deallocating q_real"//errorMessage
stop 1
endif
#endif
! Backtransform stage 1
call obj%timer%start("trans_ev_to_band")
! We can now deallocate the stored householder vectors
deallocate(hh_trans, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *, "solve_evp_&
call trans_ev_tridi_to_band_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating hh_trans "//errorMessage
stop 1
endif
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, q, &
q_dev, &
ldq, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, wantDebug, do_useGPU_trans_ev_tridi, &
success=success, kernel=kernel)
call obj%timer%stop("trans_ev_to_band")
call obj%timer%start("trans_ev_to_full")
if(obj%is_set("bandwidth") .ne. 1) then
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_datatype)
if (.not.(success)) return
successCUDA = cuda_memcpy(q_dev, loc(q), ldq*matrixCols* size_of_datatype, cudaMemcpyHostToDevice)
! We can now deallocate the stored householder vectors
deallocate(hh_trans, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *, "solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating hh_trans "//errorMessage
stop 1
endif
! Backtransform stage 2
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, a, &
a_dev, lda, tmat, tmat_dev, q, &
q_dev, &
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU &
call obj%timer%start("trans_ev_to_full")
if(obj%is_set("bandwidth") .ne. 1) then
if ( (do_useGPU) .and. .not.(do_useGPU_trans_ev_tridi) ) then
! copy to device if we want to continue on GPU
successCUDA = cuda_malloc(q_dev, ldq*matrixCols*size_of_datatype)
successCUDA = cuda_memcpy(q_dev, c_loc(q), ldq*matrixCols* size_of_datatype, cudaMemcpyHostToDevice)
endif
! Backtransform stage 2
call trans_ev_band_to_full_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, na, nev, nblk, nbw, a, &
a_dev, lda, tmat, tmat_dev, q, &
q_dev, &
ldq, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, do_useGPU &
#if REALCASE == 1
, useQRActual &
, useQRActual &
#endif
)
)
deallocate(tmat, stat=istat, errmsg=errorMessage)
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating tmat"//errorMessage
stop 1
endif
endif
call obj%timer%stop("trans_ev_to_full")
endif ! solve_eigenvectors
if (.not.(solve_eigenvectors)) then
deallocate(q_dummy, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"solve_evp_&
&MATH_DATATYPE&
&_2stage_&
&PRECISION " // ": error when deallocating tmat"//errorMessage
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&" // ": error when deallocating q_dummy "//errorMessage
stop 1
endif
endif
call obj%timer%stop("trans_ev_to_full")
call obj%timer%stop("elpa_solve_evp_&
&MATH_DATATYPE&
......
......@@ -98,12 +98,18 @@ module elpa_api
procedure(elpa_print_times_i), deferred, public :: print_times
! Actual math routines
generic, public :: eigenvectors => & !< method solve for solving the full eigenvalue problem
elpa_eigenvectors_d, & !< for symmetric real valued / hermitian complex valued
elpa_eigenvectors_f, & !< matrices
generic, public :: eigenvectors => & !< method eigenvectors for solving the full eigenvalue problem
elpa_eigenvectors_d, & !< the eigenvalues and (parts of) the eigenvectors are computed
elpa_eigenvectors_f, & !< for symmetric real valued / hermitian complex valued matrices
elpa_eigenvectors_dc, &
elpa_eigenvectors_fc
generic, public :: eigenvalues => & !< method eigenvalues for solving the eigenvalue problem
elpa_eigenvalues_d, & !< only the eigenvalues are computed
elpa_eigenvalues_f, & !< for symmetric real valued / hermitian complex valued matrices
elpa_eigenvalues_dc, &
elpa_eigenvalues_fc
generic, public :: hermitian_multiply => & !< method for a "hermitian" multiplication of matrices a and b
elpa_hermitian_multiply_d, & !< for real valued matrices: a**T * b
elpa_hermitian_multiply_dc, & !< for complex valued matrices a**H * b
......@@ -140,6 +146,11 @@ module elpa_api
procedure(elpa_eigenvectors_dc_i), deferred, private :: elpa_eigenvectors_dc
procedure(elpa_eigenvectors_fc_i), deferred, private :: elpa_eigenvectors_fc
procedure(elpa_eigenvalues_d_i), deferred, private :: elpa_eigenvalues_d
procedure(elpa_eigenvalues_f_i), deferred, private :: elpa_eigenvalues_f
procedure(elpa_eigenvalues_dc_i), deferred, private :: elpa_eigenvalues_dc
procedure(elpa_eigenvalues_fc_i), deferred, private :: elpa_eigenvalues_fc
procedure(elpa_hermitian_multiply_d_i), deferred, private :: elpa_hermitian_multiply_d
procedure(elpa_hermitian_multiply_f_i), deferred, private :: elpa_hermitian_multiply_f
procedure(elpa_hermitian_multiply_dc_i), deferred, private :: elpa_hermitian_multiply_dc
......@@ -485,6 +496,138 @@ module elpa_api
end subroutine
end interface
!> \brief abstract definition of interface to solve double real eigenvalue problem
!>
!> The dimensions of the matrix a (locally ditributed and global), the block-cyclic distribution
!> blocksize, the number of eigenvectors
!> to be computed and the MPI communicators are already known to the object and MUST be set BEFORE
!> with the class method "setup"
!>
!> It is possible to change the behaviour of the method by setting tunable parameters with the
!> class method "set"
!> Parameters
!> \details
!> \param self class(elpa_t), the ELPA object
!> \param a double real matrix a: defines the problem to solve
!> \param ev double real: on output stores the eigenvalues
!> \result error integer, optional : error code, which can be queried with elpa_strerr
abstract interface
subroutine elpa_eigenvalues_d_i(self, a, ev, error)
use iso_c_binding
import elpa_t
implicit none
class(elpa_t) :: self
#ifdef USE_ASSUMED_SIZE
real(kind=c_double) :: a(self%local_nrows, *)
#else
real(kind=c_double) :: a(self%local_nrows, self%local_ncols)
#endif
real(kind=c_double) :: ev(self%na)
integer, optional :: error
end subroutine
end interface
!> \brief abstract definition of interface to solve single real eigenvalue problem
!>
!> The dimensions of the matrix a (locally ditributed and global), the block-cyclic distribution
!> blocksize, the number of eigenvectors
!> to be computed and the MPI communicators are already known to the object and MUST be set BEFORE
!> with the class method "setup"
!>
!> It is possible to change the behaviour of the method by setting tunable parameters with the
!> class method "set"
!> Parameters
!> \details
!> \param self class(elpa_t), the ELPA object
!> \param a single real matrix a: defines the problem to solve
!> \param ev single real: on output stores the eigenvalues
!> \result error integer, optional : error code, which can be queried with elpa_strerr
abstract interface
subroutine elpa_eigenvalues_f_i(self, a, ev, error)
use iso_c_binding
import elpa_t
implicit none
class(elpa_t) :: self
#ifdef USE_ASSUMED_SIZE
real(kind=c_float) :: a(self%local_nrows, *)
#else
real(kind=c_float) :: a(self%local_nrows, self%local_ncols)
#endif
real(kind=c_float) :: ev(self%na)
integer, optional :: error
end subroutine
end interface
!> \brief abstract definition of interface to solve double complex eigenvalue problem
!>
!> The dimensions of the matrix a (locally ditributed and global), the block-cyclic distribution
!> blocksize, the number of eigenvectors
!> to be computed and the MPI communicators are already known to the object and MUST be set BEFORE
!> with the class method "setup"
!>
!> It is possible to change the behaviour of the method by setting tunable parameters with the
!> class method "set"
!> Parameters
!> \details
!> \param self class(elpa_t), the ELPA object
!> \param a double complex matrix a: defines the problem to solve
!> \param ev double real: on output stores the eigenvalues
!> \result error integer, optional : error code, which can be queried with elpa_strerr
abstract interface
subroutine elpa_eigenvalues_dc_i(self, a, ev, error)
use iso_c_binding
import elpa_t
implicit none
class(elpa_t) :: self
#ifdef USE_ASSUMED_SIZE
complex(kind=c_double_complex) :: a(self%local_nrows, *)
#else
complex(kind=c_double_complex) :: a(self%local_nrows, self%local_ncols)
#endif
real(kind=c_double) :: ev(self%na)
integer, optional :: error
end subroutine
end interface
!> \brief abstract definition of interface to solve single complex eigenvalue problem
!>
!> The dimensions of the matrix a (locally ditributed and global), the block-cyclic distribution
!> blocksize, the number of eigenvectors
!> to be computed and the MPI communicators are already known to the object and MUST be set BEFORE
!> with the class method "setup"
!>
!> It is possible to change the behaviour of the method by setting tunable parameters with the
!> class method "set"
!> Parameters
!> \details
!> \param self class(elpa_t), the ELPA object
!> \param a single complex matrix a: defines the problem to solve
!> \param ev single real: on output stores the eigenvalues
!> \result error integer, optional : error code, which can be queried with elpa_strerr
abstract interface
subroutine elpa_eigenvalues_fc_i(self, a, ev, error)
use iso_c_binding
import elpa_t
implicit none
class(elpa_t) :: self
#ifdef USE_ASSUMED_SIZE
complex(kind=c_float_complex) :: a(self%local_nrows, *)
#else
complex(kind=c_float_complex) :: a(self%local_nrows, self%local_ncols)
#endif
real(kind=c_float) :: ev(self%na)
integer, optional :: error
end subroutine
end interface
!> \brief abstract definition of interface to compute C : = A**T * B for double real matrices
!> where A is a square matrix (self%a,self%na) which is optionally upper or lower triangular
!> B is a (self%na,ncb) matrix
......
......@@ -87,6 +87,12 @@ module elpa_impl
procedure, private :: elpa_eigenvectors_dc
procedure, private :: elpa_eigenvectors_fc
procedure, private :: elpa_eigenvalues_d !< private methods to implement the solve step for real/complex
!< double/single matrices; only the eigenvalues are computed
procedure, private :: elpa_eigenvalues_f
procedure, private :: elpa_eigenvalues_dc
procedure, private :: elpa_eigenvalues_fc
procedure, private :: elpa_hermitian_multiply_d !< private methods to implement a "hermitian" multiplication of matrices a and b
procedure, private :: elpa_hermitian_multiply_f !< for real valued matrices: a**T * b
procedure, private :: elpa_hermitian_multiply_dc !< for complex valued matrices: a**H * b
......@@ -789,6 +795,331 @@ module elpa_impl
call elpa_eigenvectors_fc(self, a, ev, q, error)
end subroutine
!> \brief elpa_eigenvalues_d: class method to solve the eigenvalue problem for double real matrices
!>
!> The dimensions of the matrix a (locally ditributed and global), the block-cyclic distribution
!> blocksize, the number of eigenvectors
!> to be computed and the MPI communicators are already known to the object and MUST be set BEFORE
!> with the class method "setup"
!>
!> It is possible to change the behaviour of the method by setting tunable parameters with the
!> class method "set"
!>
!> Parameters
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param error integer, optional: returns an error code, which can be queried with elpa_strerr
subroutine elpa_eigenvalues_d(self, a, ev, error)
use elpa2_impl
use elpa1_impl
use elpa_utilities, only : error_unit