Commit c058a0ba authored by Andreas Marek's avatar Andreas Marek
Browse files

Rename ELPA2 driver function

parent 7123a5c9
...@@ -88,9 +88,10 @@ module elpa2_impl ...@@ -88,9 +88,10 @@ module elpa2_impl
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach !> \brief elpa_solve_evp_real_2stage_host_arrays_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -133,13 +134,64 @@ module elpa2_impl ...@@ -133,13 +134,64 @@ module elpa2_impl
#undef REALCASE #undef REALCASE
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#define REALCASE 1
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_device_pointer_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether to use GPUs or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL #ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1 #define REALCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach !> \brief elpa_solve_evp_real_2stage_host_arrays_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -182,13 +234,64 @@ module elpa2_impl ...@@ -182,13 +234,64 @@ module elpa2_impl
#undef REALCASE #undef REALCASE
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#define REALCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_device_pointer_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef REALCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */ #endif /* WANT_SINGLE_PRECISION_REAL */
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach !> \brief elpa_solve_evp_complex_2stage_host_arrays_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -229,14 +332,62 @@ module elpa2_impl ...@@ -229,14 +332,62 @@ module elpa2_impl
#undef COMPLEXCASE #undef COMPLEXCASE
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_device_pointer_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_COMPLEX #ifdef WANT_SINGLE_PRECISION_COMPLEX
#define COMPLEXCASE 1 #define COMPLEXCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach !> \brief elpa_solve_evp_complex_2stage_host_arrays_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -277,17 +428,64 @@ module elpa2_impl ...@@ -277,17 +428,64 @@ module elpa2_impl
#undef COMPLEXCASE #undef COMPLEXCASE
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_device_pointer_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_COMPLEX */ #endif /* WANT_SINGLE_PRECISION_COMPLEX */
#ifdef HAVE_SKEWSYMMETRIC #ifdef HAVE_SKEWSYMMETRIC
#define REALCASE 1 #define REALCASE 1
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#define ACTIVATE_SKEW #define ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach !> \brief elpa_solve_skew_evp_real_2stage_host_arrays_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -331,13 +529,65 @@ module elpa2_impl ...@@ -331,13 +529,65 @@ module elpa2_impl
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#define REALCASE 1
#define DOUBLE_PRECISION 1
#define ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_device_pointer_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether to use GPUs or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#ifdef WANT_SINGLE_PRECISION_REAL #ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1 #define REALCASE 1
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#define ACTIVATE_SKEW #define ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
!------------------------------------------------------------------------------- !-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach !> \brief elpa_solve_skew_evp_real_2stage_host_arrays_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!> !>
!> Parameters !> Parameters
!> !>
...@@ -381,6 +631,57 @@ module elpa2_impl ...@@ -381,6 +631,57 @@ module elpa2_impl
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#undef ACTIVATE_SKEW #undef ACTIVATE_SKEW
#define REALCASE 1
#define SINGLE_PRECISION 1
#define ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_device_pointer_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#endif /* WANT_SINGLE_PRECISION_REAL */ #endif /* WANT_SINGLE_PRECISION_REAL */
#endif /* HAVE_SKEWSYMMETRIC */ #endif /* HAVE_SKEWSYMMETRIC */
......
...@@ -55,11 +55,34 @@ ...@@ -55,11 +55,34 @@
#include "elpa/elpa_simd_constants.h" #include "elpa/elpa_simd_constants.h"
#include "../general/error_checking.inc" #include "../general/error_checking.inc"
#ifdef DEVICE_POINTER
#ifdef ACTIVATE_SKEW #ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_& function elpa_solve_skew_evp_&
#else #else
function elpa_solve_evp_& function elpa_solve_evp_&
#endif #endif /* ACTIVATE_SKEW */
&MATH_DATATYPE&
&_&
&2stage_device_pointer_&
&PRECISION&
&_impl (obj, &
#ifdef REDISTRIBUTE_MATRIX
aExtern, &
#else
a, &
#endif /* REDISTRIBUTE_MATRIX */
ev, &
#ifdef REDISTRIBUTE_MATRIX
qExtern) result(success)
#else
q) result(success)
#endif /* REDISTRIBUTE_MATRIX */
#else /* DEVICE_POINTER */
#ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_&
#else
function elpa_solve_evp_&
#endif /* ACTIVATE_SKEW */
&MATH_DATATYPE& &MATH_DATATYPE&
&_& &_&
&2stage_all_host_arrays_& &2stage_all_host_arrays_&
...@@ -69,13 +92,14 @@ ...@@ -69,13 +92,14 @@
aExtern, & aExtern, &
#else #else
a, & a, &
#endif #endif /* REDISTRIBUTE_MATRIX */
ev, & ev, &
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
qExtern) result(success) qExtern) result(success)
#else #else
q) result(success) q) result(success)
#endif #endif /* REDISTRIBUTE_MATRIX */
#endif /* DEVICE_POINTER */
!use matrix_plot !use matrix_plot
use elpa_abstract_impl use elpa_abstract_impl
...@@ -107,6 +131,17 @@ ...@@ -107,6 +131,17 @@
logical :: useQRActual logical :: useQRActual
#endif #endif
integer(kind=c_int) :: kernel, kernelByUser integer(kind=c_int) :: kernel, kernelByUser
#ifdef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
type(c_ptr) :: aExtern, qExtern
#else /* REDISTRIBUTE_MATRIX */
type(c_ptr) :: a, q
#endif /* REDISTRIBUTE_MATRIX */
#else /* DEVICE_POINTER */
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
...@@ -137,12 +172,24 @@ ...@@ -137,12 +172,24 @@
#endif /* REDISTRIBUTE_MATRIX */ #endif /* REDISTRIBUTE_MATRIX */
#endif /* DEVICE_POINTER */
#ifdef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
type(c_ptr) :: a, q
#endif
#else /* DEVICE_POINTER */
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
MATH_DATATYPE(kind=rck), pointer :: a(:,:) MATH_DATATYPE(kind=rck), pointer :: a(:,:)
MATH_DATATYPE(kind=rck), pointer :: q(:,:) MATH_DATATYPE(kind=rck), pointer :: q(:,:)
#endif #endif
#endif /* DEVICE_POINTER */
#ifdef DEVICE_POINTER
type(c_ptr) :: ev
#else
real(kind=C_DATATYPE_KIND), intent(inout) :: ev(obj%na) real(kind=C_DATATYPE_KIND), intent(inout) :: ev(obj%na)
#endif
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: hh_trans(:,:) MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: hh_trans(:,:)
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols
...@@ -296,6 +343,7 @@ ...@@ -296,6 +343,7 @@
success = .true. success = .true.
#ifndef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
if (present(qExtern)) then if (present(qExtern)) then
#else #else
...@@ -305,7 +353,7 @@ ...@@ -305,7 +353,7 @@
else else
obj%eigenvalues_only = .true. obj%eigenvalues_only = .true.
endif endif
#endif
na = obj%na na = obj%na
nev = obj%nev nev = obj%nev
nblk = obj%nblk nblk = obj%nblk
...@@ -346,13 +394,15 @@ ...@@ -346,13 +394,15 @@
call obj%timer%stop("mpi_communication") call obj%timer%stop("mpi_communication")
#ifndef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX #ifdef REDISTRIBUTE_MATRIX
#include "../helpers/elpa_redistribute_template.F90" #include "../helpers/elpa_redistribute_template.F90"
#endif /* REDISTRIBUTE_MATRIX */ #endif /* REDISTRIBUTE_MATRIX */
#endif
! special case na = 1 ! special case na = 1
if (na .eq. 1) then if (na .eq. 1) then
#ifndef DEVICE_POINTER
#if REALCASE == 1 #if REALCASE == 1
ev(1) = a(1,1) ev(1) = a(1,1)
#endif #endif
...@@ -362,7 +412,7 @@ ...@@ -362,7 +412,7 @@
if (.not.(obj%eigenvalues_only)) then if (.not.(obj%eigenvalues_only)) then
q(1,1) = ONE q(1,1) = ONE
endif endif
#endif
! restore original OpenMP settings ! restore original OpenMP settings
#ifdef WITH_OPENMP_TRADITIONAL #ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function ! store the number of OpenMP threads used in the calling function
...@@ -720,6 +770,7 @@ ...@@ -720,6 +770,7 @@
endif endif