Commit c058a0ba authored by Andreas Marek's avatar Andreas Marek
Browse files

Rename ELPA2 driver function

parent 7123a5c9
......@@ -88,9 +88,10 @@ module elpa2_impl
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_evp_real_2stage_host_arrays_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -133,13 +134,64 @@ module elpa2_impl
#undef REALCASE
#undef DOUBLE_PRECISION
#define REALCASE 1
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_device_pointer_double_impl: Fortran function to solve the double-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether to use GPUs or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_evp_real_2stage_host_arrays_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -182,13 +234,64 @@ module elpa2_impl
#undef REALCASE
#undef SINGLE_PRECISION
#define REALCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_evp_real_2stage_device_pointer_single_impl: Fortran function to solve the single-precision real eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef REALCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_evp_complex_2stage_host_arrays_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -229,14 +332,62 @@ module elpa2_impl
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_device_pointer_double_impl: Fortran function to solve the double-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_COMPLEX
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_evp_complex_2stage_host_arrays_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -277,17 +428,64 @@ module elpa2_impl
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#define COMPLEXCASE 1
#define SINGLE_PRECISION 1
#undef ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!> \brief elpa_solve_evp_complex_2stage_device_pointer_single_impl: Fortran function to solve the single-precision complex eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef DEVICE_POINTER
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
#ifdef HAVE_SKEWSYMMETRIC
#define REALCASE 1
#define DOUBLE_PRECISION 1
#define ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_skew_evp_real_2stage_host_arrays_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -331,13 +529,65 @@ module elpa2_impl
#undef DOUBLE_PRECISION
#undef ACTIVATE_SKEW
#define REALCASE 1
#define DOUBLE_PRECISION 1
#define ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_device_pointer_double_impl: Fortran function to solve the double-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether to use GPUs or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef REALCASE
#undef DOUBLE_PRECISION
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#ifdef WANT_SINGLE_PRECISION_REAL
#define REALCASE 1
#define SINGLE_PRECISION 1
#define ACTIVATE_SKEW
#undef DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!> \brief elpa_solve_skew_evp_real_2stage_host_arrays_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
......@@ -381,6 +631,57 @@ module elpa2_impl
#undef SINGLE_PRECISION
#undef ACTIVATE_SKEW
#define REALCASE 1
#define SINGLE_PRECISION 1
#define ACTIVATE_SKEW
#define DEVICE_POINTER
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
!> \brief elpa_solve_skew_evp_real_2stage_device_pointer_single_impl: Fortran function to solve the single-precision real skew-symmetric eigenvalue problem with a 2 stage approach
!>
!> Parameters
!>
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed
!>
!> \param a Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols local columns of matrix a and q
!>
!> \param mpi_comm_rows MPI communicator for rows
!> \param mpi_comm_cols MPI communicator for columns
!> \param mpi_comm_all MPI communicator for the total processor set
!>
!> \param kernel specify ELPA2 kernel to use
!>
!> \param useQR (optional) use QR decomposition
!> \param useGPU (optional) decide whether GPUs should be used or not
!>
!> \result success logical, false if error occured
!-------------------------------------------------------------------------------
#include "elpa2_template.F90"
#undef REALCASE
#undef SINGLE_PRECISION
#undef ACTIVATE_SKEW
#undef DEVICE_POINTER
#endif /* WANT_SINGLE_PRECISION_REAL */
#endif /* HAVE_SKEWSYMMETRIC */
......
......@@ -55,11 +55,34 @@
#include "elpa/elpa_simd_constants.h"
#include "../general/error_checking.inc"
#ifdef DEVICE_POINTER
#ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_&
#else
function elpa_solve_evp_&
#endif
#endif /* ACTIVATE_SKEW */
&MATH_DATATYPE&
&_&
&2stage_device_pointer_&
&PRECISION&
&_impl (obj, &
#ifdef REDISTRIBUTE_MATRIX
aExtern, &
#else
a, &
#endif /* REDISTRIBUTE_MATRIX */
ev, &
#ifdef REDISTRIBUTE_MATRIX
qExtern) result(success)
#else
q) result(success)
#endif /* REDISTRIBUTE_MATRIX */
#else /* DEVICE_POINTER */
#ifdef ACTIVATE_SKEW
function elpa_solve_skew_evp_&
#else
function elpa_solve_evp_&
#endif /* ACTIVATE_SKEW */
&MATH_DATATYPE&
&_&
&2stage_all_host_arrays_&
......@@ -69,13 +92,14 @@
aExtern, &
#else
a, &
#endif
#endif /* REDISTRIBUTE_MATRIX */
ev, &
#ifdef REDISTRIBUTE_MATRIX
qExtern) result(success)
#else
q) result(success)
#endif
#endif /* REDISTRIBUTE_MATRIX */
#endif /* DEVICE_POINTER */
!use matrix_plot
use elpa_abstract_impl
......@@ -107,6 +131,17 @@
logical :: useQRActual
#endif
integer(kind=c_int) :: kernel, kernelByUser
#ifdef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
type(c_ptr) :: aExtern, qExtern
#else /* REDISTRIBUTE_MATRIX */
type(c_ptr) :: a, q
#endif /* REDISTRIBUTE_MATRIX */
#else /* DEVICE_POINTER */
#ifdef REDISTRIBUTE_MATRIX
#ifdef USE_ASSUMED_SIZE
......@@ -137,12 +172,24 @@
#endif /* REDISTRIBUTE_MATRIX */
#endif /* DEVICE_POINTER */
#ifdef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
type(c_ptr) :: a, q
#endif
#else /* DEVICE_POINTER */
#ifdef REDISTRIBUTE_MATRIX
MATH_DATATYPE(kind=rck), pointer :: a(:,:)
MATH_DATATYPE(kind=rck), pointer :: q(:,:)
#endif
#endif /* DEVICE_POINTER */
#ifdef DEVICE_POINTER
type(c_ptr) :: ev
#else
real(kind=C_DATATYPE_KIND), intent(inout) :: ev(obj%na)
#endif
MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: hh_trans(:,:)
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols
......@@ -296,6 +343,7 @@
success = .true.
#ifndef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
if (present(qExtern)) then
#else
......@@ -305,7 +353,7 @@
else
obj%eigenvalues_only = .true.
endif
#endif
na = obj%na
nev = obj%nev
nblk = obj%nblk
......@@ -346,13 +394,15 @@
call obj%timer%stop("mpi_communication")
#ifndef DEVICE_POINTER
#ifdef REDISTRIBUTE_MATRIX
#include "../helpers/elpa_redistribute_template.F90"
#endif /* REDISTRIBUTE_MATRIX */
#endif
! special case na = 1
if (na .eq. 1) then
#ifndef DEVICE_POINTER
#if REALCASE == 1
ev(1) = a(1,1)
#endif
......@@ -362,7 +412,7 @@
if (.not.(obj%eigenvalues_only)) then
q(1,1) = ONE
endif
#endif
! restore original OpenMP settings
#ifdef WITH_OPENMP_TRADITIONAL
! store the number of OpenMP threads used in the calling function
......@@ -720,6 +770,7 @@
endif
#endif /* REALCASE */
#ifndef DEVICE_POINTER
if (.not. obj%eigenvalues_only) then
q_actual => q(1:matrixRows,1:matrixCols)
else
......@@ -727,6 +778,7 @@
check_allocate("elpa2_template: q_dummy", istat, errorMessage)
q_actual => q_dummy(1:matrixRows,1:matrixCols)
endif
#endif
! set the default values for each of the 5 compute steps
do_bandred = .true.
......@@ -826,6 +878,8 @@
#ifdef HAVE_LIKWID
call likwid_markerStartRegion("bandred")
#endif
#ifndef DEVICE_POINTER
! Reduction full -> band
call bandred_&
&MATH_DATATYPE&
......@@ -838,6 +892,8 @@
useQRActual, &
#endif
nrThreads, isSkewsymmetric)
#endif
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("bandred")
#endif
......@@ -855,19 +911,25 @@
#ifdef HAVE_LIKWID
call likwid_markerStartRegion("tridiag")
#endif
#ifndef DEVICE_POINTER
call tridiag_band_&
&MATH_DATATYPE&
&_&
&PRECISION&
(obj, na, nbw, nblk, a, matrixRows, ev, e, matrixCols, hh_trans, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
do_useGPU_tridiag_band, wantDebug, nrThreads, isSkewsymmetric)
#endif
#ifndef DEVICE_POINTER
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call mpi_bcast(ev, int(na,kind=MPI_KIND), MPI_REAL_PRECISION, 0_MPI_KIND, int(mpi_comm_all,kind=MPI_KIND), mpierr)
call mpi_bcast(e, int(na,kind=MPI_KIND), MPI_REAL_PRECISION, 0_MPI_KIND, int(mpi_comm_all,kind=MPI_KIND), mpierr)
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
#endif
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("tridiag")
#endif
......@@ -890,6 +952,8 @@
#ifdef HAVE_LIKWID
call likwid_markerStartRegion("solve")
#endif
#ifndef DEVICE_POINTER
call solve_tridi_&
&PRECISION &
(obj, na, nev, ev, e, &
......@@ -901,6 +965,9 @@
#endif
nblk, matrixCols, mpi_comm_all, mpi_comm_rows, mpi_comm_cols, do_useGPU_solve_tridi, wantDebug, &
success, nrThreads)
#endif
#ifdef HAVE_LIKWID
call likwid_markerStopRegion("solve")
#endif
......@@ -931,12 +998,14 @@
stop
endif
#ifndef DEVICE_POINTER
check_pd = 0
do i = 1, na
if (ev(i) .gt. thres_pd) then
check_pd = check_pd + 1
endif
enddo
#endif
if (check_pd .lt. na) then
! not positiv definite => eigenvectors needed
do_trans_to_band = .true.
......@@ -951,8 +1020,9 @@
#if COMPLEXCASE == 1
if (do_trans_to_band) then
! q must be given thats why from here on we can use q and not q_actual
#ifndef DEVICE_POINTER
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
#endif
endif
! make sure q_real is deallocated when using check_pd
......@@ -963,6 +1033,7 @@
#endif
if (isSkewsymmetric) then
#ifndef DEVICE_POINTER
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
......@@ -985,6 +1056,7 @@
q(i,1:matrixCols) = 0
end if
end do
#endif
endif
! Backtransform stage 1
if (do_trans_to_band) then
......@@ -992,7 +1064,7 @@