Commit a0f52c60 authored by Andreas Marek's avatar Andreas Marek

Start to prepare ELPA1 for new interface

parent 2eb3a354
......@@ -13,9 +13,11 @@ libelpa@SUFFIX@_public_la_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@modules @FC_MODINC@
libelpa@SUFFIX@_public_la_SOURCES = \
src/elpa.F90 \
src/elpa1/elpa1.F90 \
src/elpa1/elpa1_new_interface.F90 \
src/elpa2/elpa2.F90 \
src/elpa2/elpa2_new_interface.F90 \
src/elpa1/elpa1_auxiliary.F90 \
src/elpa1/elpa1_auxiliary_new_interface.F90 \
src/elpa1/elpa1_utilities.F90 \
src/elpa2/elpa2_utilities.F90 \
src/elpa_t.F90 \
......@@ -55,6 +57,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/elpa2_compute_real_template.X90 \
src/elpa2/elpa2_compute_complex_template.X90 \
src/elpa1/elpa1_template.X90 \
src/elpa1/elpa1_template_new_interface.X90 \
src/elpa2/elpa2_template.X90 \
src/elpa2/elpa2_template_new_interface.X90 \
src/elpa1_c_interface_template.X90 \
......@@ -80,9 +83,13 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/redist_band.X90 \
src/sanity.X90 \
src/elpa1/elpa_cholesky_template.X90 \
src/elpa1/elpa_cholesky_template_new_interface.X90 \
src/elpa1/elpa_invert_trm.X90 \
src/elpa1/elpa_invert_trm_new_interface.X90 \
src/elpa1/elpa_multiply_a_b.X90 \
src/elpa1/elpa_multiply_a_b_new_interface.X90 \
src/elpa1/elpa_solve_tridi.X90 \
src/elpa1/elpa_solve_tridi_new_interface.X90 \
src/precision_macros.h
lib_LTLIBRARIES = libelpa@SUFFIX@.la
......@@ -1006,6 +1013,7 @@ EXTRA_DIST = \
src/elpa1/elpa1_compute_template.X90 \
src/elpa1/elpa1_merge_systems_real_template.X90 \
src/elpa1/elpa1_solve_tridi_real_template.X90 \
src/elpa1/elpa1_solve_tridi_real_template_new_interface.X90 \
src/elpa1/elpa1_tools_template.X90 \
src/elpa1/elpa1_trans_ev_template.X90 \
src/elpa1/elpa1_tridiag_template.X90 \
......@@ -1015,6 +1023,7 @@ EXTRA_DIST = \
src/elpa2/elpa2_herm_matrix_allreduce_complex_template.X90 \
src/elpa2/elpa2_symm_matrix_allreduce_real_template.X90 \
src/elpa1/elpa1_template.X90 \
src/elpa1/elpa1_template_new_interface.X90 \
src/elpa2/elpa2_template.X90 \
src/elpa2/elpa2_template_new_interface.X90 \
src/elpa1_c_interface_template.X90 \
......@@ -1038,9 +1047,13 @@ EXTRA_DIST = \
src/elpa2/compute_hh_trafo.X90 \
src/sanity.X90 \
src/elpa1/elpa_cholesky_template.X90 \
src/elpa1/elpa_cholesky_template_new_interface.X90 \
src/elpa1/elpa_invert_trm.X90 \
src/elpa1/elpa_invert_trm_new_interface.X90 \
src/elpa1/elpa_multiply_a_b.X90 \
src/elpa1/elpa_multiply_a_b_new_interface.X90 \
src/elpa1/elpa_solve_tridi.X90 \
src/elpa1/elpa_solve_tridi_new_interface.X90 \
src/elpa2/qr/elpa_qrkernels.X90 \
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_complex_template.Xcu \
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_real_template.Xcu \
......
This diff is collapsed.
......@@ -68,6 +68,7 @@ module ELPA1_COMPUTE
public :: trans_ev_real
public :: solve_tridi_double
public :: solve_tridi_double_new
interface tridiag_real
module procedure tridiag_real_double
......@@ -81,6 +82,7 @@ module ELPA1_COMPUTE
public :: tridiag_real_single ! Transform real single-precision symmetric matrix to tridiagonal form
public :: trans_ev_real_single ! Transform real single-precision eigenvectors of a tridiagonal matrix back
public :: solve_tridi_single
public :: solve_tridi_single_new
#endif
public :: tridiag_complex_double ! Transform complex hermitian matrix to tridiagonal form
......
......@@ -70,6 +70,7 @@
#include "elpa1_tridiag_template.X90"
#include "elpa1_trans_ev_template.X90"
#include "elpa1_solve_tridi_real_template.X90"
#include "elpa1_solve_tridi_real_template_new_interface.X90"
#include "elpa1_merge_systems_real_template.X90"
#include "elpa1_tools_template.X90"
......
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!> \mainpage
!> Eigenvalue SoLvers for Petaflop-Applications (ELPA)
!> \par
!> http://elpa.mpcdf.mpg.de
!>
!> \par
!> The ELPA library was originally created by the ELPA consortium,
!> consisting of the following organizations:
!>
!> - Max Planck Computing and Data Facility (MPCDF) formerly known as
!> Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!> - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!> Informatik,
!> - Technische Universität München, Lehrstuhl für Informatik mit
!> Schwerpunkt Wissenschaftliches Rechnen ,
!> - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!> - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!> Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!> and
!> - IBM Deutschland GmbH
!>
!> Some parts and enhancements of ELPA have been contributed and authored
!> by the Intel Corporation which is not part of the ELPA consortium.
!>
!> Contributions to the ELPA source have been authored by (in alphabetical order):
!>
!> \author T. Auckenthaler, Volker Blum, A. Heinecke, L. Huedepohl, R. Johanni, Werner Jürgens, and A. Marek
#include "config-f90.h"
!> \brief Fortran module which provides the routines to use the one-stage ELPA solver
module ELPA1_new
use, intrinsic :: iso_c_binding
use elpa_utilities
use elpa1_auxiliary_new
use elpa1_utilities
implicit none
! The following routines are public:
private
public :: elpa_get_communicators_new !< Sets MPI row/col communicators as needed by ELPA
public :: elpa_solve_evp_real_1stage_double_new !< Driver routine for real double-precision 1-stage eigenvalue problem
#ifdef WANT_SINGLE_PRECISION_REAL
public :: elpa_solve_evp_real_1stage_single_new !< Driver routine for real single-precision 1-stage eigenvalue problem
#endif
public :: elpa_solve_evp_complex_1stage_double_new !< Driver routine for complex 1-stage eigenvalue problem
#ifdef WANT_SINGLE_PRECISION_COMPLEX
public :: elpa_solve_evp_complex_1stage_single_new !< Driver routine for complex 1-stage eigenvalue problem
#endif
! imported from elpa1_auxilliary
public :: elpa_mult_at_b_real_double_new !< Multiply double-precision real matrices A**T * B
public :: elpa_mult_ah_b_complex_double_new !< Multiply double-precision complex matrices A**H * B
public :: elpa_invert_trm_real_double_new !< Invert double-precision real triangular matrix
public :: elpa_invert_trm_complex_double_new !< Invert double-precision complex triangular matrix
public :: elpa_cholesky_real_double_new !< Cholesky factorization of a double-precision real matrix
public :: elpa_cholesky_complex_double_new !< Cholesky factorization of a double-precision complex matrix
public :: elpa_solve_tridi_double_new !< Solve a double-precision tridiagonal eigensystem with divide and conquer method
#ifdef WANT_SINGLE_PRECISION_REAL
public :: elpa_mult_at_b_real_single_new !< Multiply single-precision real matrices A**T * B
public :: elpa_invert_trm_real_single_new !< Invert single-precision real triangular matrix
public :: elpa_cholesky_real_single_new !< Cholesky factorization of a single-precision real matrix
public :: elpa_solve_tridi_single_new !< Solve a single-precision tridiagonal eigensystem with divide and conquer method
#endif
#ifdef WANT_SINGLE_PRECISION_COMPLEX
public :: elpa_mult_ah_b_complex_single_new !< Multiply single-precision complex matrices A**H * B
public :: elpa_invert_trm_complex_single_new !< Invert single-precision complex triangular matrix
public :: elpa_cholesky_complex_single_new !< Cholesky factorization of a single-precision complex matrix
#endif
! Timing results, set by every call to solve_evp_xxx
real(kind=c_double), public :: time_evp_fwd !< time for forward transformations (to tridiagonal form)
real(kind=c_double), public :: time_evp_solve !< time for solving the tridiagonal system
real(kind=c_double), public :: time_evp_back !< time for back transformations of eigenvectors
logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs
!> \brief elpa_solve_evp_real_1stage_double_new: Fortran function to solve the real eigenvalue problem with 1-stage solver. This is called by "elpa_solve_evp_real"
!>
! Parameters
!
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed.
!> The smallest nev eigenvalues/eigenvectors are calculated.
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols distributed number of matrix columns
!>
!> \param mpi_comm_rows MPI-Communicator for rows
!> \param mpi_comm_cols MPI-Communicator for columns
!>
!> \result success
contains
!-------------------------------------------------------------------------------
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set here.
! mpi_comm_rows/mpi_comm_cols can be free'd with MPI_Comm_free if not used any more.
!
! Parameters
!
!> \param mpi_comm_global Global communicator for the calculations (in)
!>
!> \param my_prow Row coordinate of the calling process in the process grid (in)
!>
!> \param my_pcol Column coordinate of the calling process in the process grid (in)
!>
!> \param mpi_comm_rows Communicator for communicating within rows of processes (out)
!>
!> \param mpi_comm_cols Communicator for communicating within columns of processes (out)
!> \result mpierr integer error value of mpi_comm_split function
function elpa_get_communicators_new(mpi_comm_global, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols) result(mpierr)
! use precision
use elpa_mpi
use iso_c_binding
implicit none
integer(kind=c_int), intent(in) :: mpi_comm_global, my_prow, my_pcol
integer(kind=c_int), intent(out) :: mpi_comm_rows, mpi_comm_cols
integer(kind=c_int) :: mpierr
! mpi_comm_rows is used for communicating WITHIN rows, i.e. all processes
! having the same column coordinate share one mpi_comm_rows.
! So the "color" for splitting is my_pcol and the "key" is my row coordinate.
! Analogous for mpi_comm_cols
call mpi_comm_split(mpi_comm_global,my_pcol,my_prow,mpi_comm_rows,mpierr)
call mpi_comm_split(mpi_comm_global,my_prow,my_pcol,mpi_comm_cols,mpierr)
end function elpa_get_communicators_new
!> \brief elpa_solve_evp_real_1stage_double_new: Fortran function to solve the real double-precision eigenvalue problem with 1-stage solver
!>
! Parameters
!
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed.
!> The smallest nev eigenvalues/eigenvectors are calculated.
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols distributed number of matrix columns
!>
!> \param mpi_comm_rows MPI-Communicator for rows
!> \param mpi_comm_cols MPI-Communicator for columns
!> \param mpi_comm_all global MPI communicator
!> \param useGPU use GPU version (.true. or .false.)
!>
!> \result success
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "../precision_macros.h"
#include "elpa1_template_new_interface.X90"
#undef REALCASE
#undef DOUBLE_PRECISION
#ifdef WANT_SINGLE_PRECISION_REAL
!> \brief elpa_solve_evp_real_1stage_single_new: Fortran function to solve the real single-precision eigenvalue problem with 1-stage solver
!>
! Parameters
!
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed.
!> The smallest nev eigenvalues/eigenvectors are calculated.
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols distributed number of matrix columns
!>
!> \param mpi_comm_rows MPI-Communicator for rows
!> \param mpi_comm_cols MPI-Communicator for columns
!> \param mpi_comm_all global MPI commuicator
!> \param useGPU
!>
!> \result success
#define REALCASE 1
#define SINGLE_PRECISION 1
#include "../precision_macros.h"
#include "elpa1_template_new_interface.X90"
#undef REALCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_REAL */
!> \brief elpa_solve_evp_complex_1stage_double_new: Fortran function to solve the complex double-precision eigenvalue problem with 1-stage solver
!>
! Parameters
!
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed.
!> The smallest nev eigenvalues/eigenvectors are calculated.
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols distributed number of matrix columns
!>
!> \param mpi_comm_rows MPI-Communicator for rows
!> \param mpi_comm_cols MPI-Communicator for columns
!> \param mpi_comm_all global MPI Communicator
!> \param useGPU use GPU version (.true. or .false.)
!>
!> \result success
#define COMPLEXCASE 1
#define DOUBLE_PRECISION 1
#include "../precision_macros.h"
#include "elpa1_template_new_interface.X90"
#undef DOUBLE_PRECISION
#undef COMPLEXCASE
#ifdef WANT_SINGLE_PRECISION_COMPLEX
!> \brief elpa_solve_evp_complex_1stage_single_new: Fortran function to solve the complex single-precision eigenvalue problem with 1-stage solver
!>
! Parameters
!
!> \param na Order of matrix a
!>
!> \param nev Number of eigenvalues needed.
!> The smallest nev eigenvalues/eigenvectors are calculated.
!>
!> \param a(lda,matrixCols) Distributed matrix for which eigenvalues are to be computed.
!> Distribution is like in Scalapack.
!> The full matrix must be set (not only one half like in scalapack).
!> Destroyed on exit (upper and lower half).
!>
!> \param lda Leading dimension of a
!>
!> \param ev(na) On output: eigenvalues of a, every processor gets the complete set
!>
!> \param q(ldq,matrixCols) On output: Eigenvectors of a
!> Distribution is like in Scalapack.
!> Must be always dimensioned to the full size (corresponding to (na,na))
!> even if only a part of the eigenvalues is needed.
!>
!> \param ldq Leading dimension of q
!>
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
!>
!> \param matrixCols distributed number of matrix columns
!>
!> \param mpi_comm_rows MPI-Communicator for rows
!> \param mpi_comm_cols MPI-Communicator for columns
!> \param mpi_comm_all global MPI communicator
!> \param useGPU
!>
!> \result success
#define COMPLEXCASE 1
#define SINGLE_PRECISION
#include "../precision_macros.h"
#include "elpa1_template_new_interface.X90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
#endif /* WANT_SINGLE_PRECISION_COMPLEX */
end module ELPA1_new
This diff is collapsed.
#if 0
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
#endif
#include "../sanity.X90"
function elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&_new (na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all, &
useGPU) result(success)
use precision
use cuda_functions
use mod_check_for_gpu
#ifdef HAVE_DETAILED_TIMINGS
use timings
#else
use timings_dummy
#endif
use iso_c_binding
use elpa_mpi
use elpa1_compute
implicit none
integer(kind=c_int), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all
real(kind=REAL_DATATYPE), intent(out) :: ev(na)
#if REALCASE == 1
#ifdef USE_ASSUMED_SIZE
real(kind=C_DATATYPE_KIND), intent(inout) :: a(lda,*)
real(kind=C_DATATYPE_KIND), intent(out) :: q(ldq,*)
#else
real(kind=C_DATATYPE_KIND), intent(inout) :: a(lda,matrixCols)
real(kind=C_DATATYPE_KIND), intent(out) :: q(ldq,matrixCols)
#endif
real(kind=C_DATATYPE_KIND), allocatable :: tau(:)
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef USE_ASSUMED_SIZE
complex(kind=C_DATATYPE_KIND), intent(inout) :: a(lda,*)
complex(kind=C_DATATYPE_KIND), intent(out) :: q(ldq,*)
#else
complex(kind=C_DATATYPE_KIND), intent(inout) :: a(lda,matrixCols)
complex(kind=C_DATATYPE_KIND), intent(out) :: q(ldq,matrixCols)
#endif
real(kind=REAL_DATATYPE), allocatable :: q_real(:,:)
complex(kind=C_DATATYPE_KIND), allocatable :: tau(:)
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
#endif /* COMPLEXCASE */
logical, intent(in), optional :: useGPU
logical :: success
logical :: do_useGPU
integer(kind=ik) :: numberOfGPUDevices
integer(kind=c_int) :: my_pe, n_pes, my_prow, my_pcol, mpierr
real(kind=C_DATATYPE_KIND), allocatable :: e(:)
real(kind=c_double) :: ttt0, ttt1 ! MPI_WTIME always needs double
logical, save :: firstCall = .true.
logical :: wantDebug
integer(kind=c_int) :: istat
character(200) :: errorMessage
call timer%start("elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
&PRECISION&
&")
call timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_all,my_pe,mpierr)
call mpi_comm_size(mpi_comm_all,n_pes,mpierr)
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
#if COMPLEXCASE == 1
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
#endif
call timer%stop("mpi_communication")
success = .true.
wantDebug = .false.
if (firstCall) then
! are debug messages desired?
wantDebug = debug_messages_via_environment_variable()
firstCall = .false.
endif
do_useGPU = .false.
if (present(useGPU)) then
! user defined GPU usage via the optional argument in the API call
if (useGPU) then
if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
do_useGPU = .true.
! set the neccessary parameters
cudaMemcpyHostToDevice = cuda_memcpyHostToDevice()
cudaMemcpyDeviceToHost = cuda_memcpyDeviceToHost()
cudaMemcpyDeviceToDevice = cuda_memcpyDeviceToDevice()
cudaHostRegisterPortable = cuda_hostRegisterPortable()
cudaHostRegisterMapped = cuda_hostRegisterMapped()
else
print *,"GPUs are requested but not detected! Aborting..."