Unverified Commit c03e7828 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'master' into ELPA_GPU

parents a368d0a2 62a29931
......@@ -9,7 +9,8 @@ AM_LDFLAGS = $(SCALAPACK_LDFLAGS)
lib_LTLIBRARIES = libelpa@SUFFIX@.la
libelpa@SUFFIX@_la_LINK = $(FCLINK) $(AM_LDFLAGS) -version-info $(ELPA_SO_VERSION) -lstdc++
libelpa@SUFFIX@_la_SOURCES = src/elpa_utilities.F90 \
libelpa@SUFFIX@_la_SOURCES = src/mod_precision.f90 \
src/elpa_utilities.F90 \
src/elpa1_compute.F90 \
src/elpa1.F90 \
src/elpa2_utilities.F90 \
......
......@@ -47,15 +47,15 @@ module mod_check_for_gpu
function check_for_gpu(myid, numberOfDevices) result(gpuAvailable)
use cuda_functions
use precision
implicit none
include 'mpif.h'
integer, intent(in) :: myid
logical :: success
integer, intent(out) :: numberOfDevices
integer :: deviceNumber, mpierr, maxNumberOfDevices
logical :: gpuAvailable
character(len=1024) :: envname
integer(kind=ik), intent(in) :: myid
logical :: success
integer(kind=ik), intent(out) :: numberOfDevices
integer(kind=ik) :: deviceNumber, mpierr, maxNumberOfDevices
logical :: gpuAvailable
character(len=1024) :: envname
gpuAvailable = .false.
......
......@@ -81,7 +81,7 @@
#include "config-f90.h"
!> \brief Fortran module which provides the routines to use the one-stage ELPA solver
module ELPA1
use elpa1_compute
use precision
use elpa_utilities
use elpa1_compute
......@@ -104,9 +104,9 @@ module ELPA1
! Timing results, set by every call to solve_evp_xxx
real*8, public :: time_evp_fwd !< time for forward transformations (to tridiagonal form)
real*8, public :: time_evp_solve !< time for solving the tridiagonal system
real*8, public :: time_evp_back !< time for back transformations of eigenvectors
real(kind=rk), public :: time_evp_fwd !< time for forward transformations (to tridiagonal form)
real(kind=rk), public :: time_evp_solve !< time for solving the tridiagonal system
real(kind=rk), public :: time_evp_back !< time for back transformations of eigenvectors
logical, public :: elpa_print_times = .false. !< Set elpa_print_times to .true. for explicit timing outputs
......@@ -235,13 +235,13 @@ contains
function get_elpa_communicators(mpi_comm_global, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols) result(mpierr)
use precision
implicit none
integer, intent(in) :: mpi_comm_global, my_prow, my_pcol
integer, intent(out) :: mpi_comm_rows, mpi_comm_cols
integer(kind=ik), intent(in) :: mpi_comm_global, my_prow, my_pcol
integer(kind=ik), intent(out) :: mpi_comm_rows, mpi_comm_cols
integer :: mpierr
integer(kind=ik) :: mpierr
! mpi_comm_rows is used for communicating WITHIN rows, i.e. all processes
! having the same column coordinate share one mpi_comm_rows.
......@@ -290,21 +290,21 @@ end function get_elpa_communicators
function solve_evp_real_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
use precision
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#endif
implicit none
integer, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real*8 :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
real(kind=rk) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
integer :: my_prow, my_pcol, mpierr
real*8, allocatable :: e(:), tau(:)
real*8 :: ttt0, ttt1
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
integer(kind=ik) :: my_prow, my_pcol, mpierr
real(kind=rk), allocatable :: e(:), tau(:)
real(kind=rk) :: ttt0, ttt1
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_real_1stage")
......@@ -390,24 +390,24 @@ end function solve_evp_real_1stage
function solve_evp_complex_1stage(na, nev, a, lda, ev, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols) result(success)
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#endif
use precision
implicit none
integer, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
complex*16 :: a(lda,matrixCols), q(ldq,matrixCols)
real*8 :: ev(na)
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
complex(kind=ck) :: a(lda,matrixCols), q(ldq,matrixCols)
real(kind=rk) :: ev(na)
integer :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer :: l_rows, l_cols, l_cols_nev
real*8, allocatable :: q_real(:,:), e(:)
complex*16, allocatable :: tau(:)
real*8 ttt0, ttt1
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: l_rows, l_cols, l_cols_nev
real(kind=rk), allocatable :: q_real(:,:), e(:)
complex(kind=ck), allocatable :: tau(:)
real(kind=rk) :: ttt0, ttt1
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_complex_1stage")
......
This diff is collapsed.
......@@ -142,34 +142,34 @@ contains
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use cuda_functions
use mod_check_for_gpu
implicit none
logical, intent(in), optional :: useQR
logical :: useQRActual, useQREnvironment
integer, intent(in), optional :: THIS_REAL_ELPA_KERNEL_API
integer :: THIS_REAL_ELPA_KERNEL
integer, intent(in) :: na, nev, lda, ldq, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all
integer, intent(in) :: nblk
real*8, intent(inout) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
real*8, allocatable :: hh_trans_real(:,:)
integer :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer :: nbw, num_blocks
real*8, allocatable :: tmat(:,:,:), e(:)
real*8 :: ttt0, ttt1, ttts
integer :: i
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
integer :: istat
character(200) :: errorMessage
logical :: useGPU
integer :: numberOfGPUDevices
use precision
use cuda_functions
use mod_check_for_gpu
implicit none
logical, intent(in), optional :: useQR
logical :: useQRActual, useQREnvironment
integer(kind=ik), intent(in), optional :: THIS_REAL_ELPA_KERNEL_API
integer(kind=ik) :: THIS_REAL_ELPA_KERNEL
integer(kind=ik), intent(in) :: na, nev, lda, ldq, matrixCols, mpi_comm_rows, &
mpi_comm_cols, mpi_comm_all
integer(kind=ik), intent(in) :: nblk
real(kind=rk), intent(inout) :: a(lda,matrixCols), ev(na), q(ldq,matrixCols)
real(kind=rk), allocatable :: hh_trans_real(:,:)
integer(kind=ik) :: my_pe, n_pes, my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: nbw, num_blocks
real(kind=rk), allocatable :: tmat(:,:,:), e(:)
real(kind=rk) :: ttt0, ttt1, ttts
integer(kind=ik) :: i
logical :: success
logical, save :: firstCall = .true.
logical :: wantDebug
integer(kind=ik) :: istat
character(200) :: errorMessage
logical :: useGPU
integer(kind=ik) :: numberOfGPUDevices
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_real_2stage")
......@@ -339,7 +339,7 @@ contains
! Backtransform stage 1
ttt0 = MPI_Wtime()
call trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, &
call trans_ev_tridi_to_band_real(na, nev, nblk, nbw, q, ldq, matrixCols, hh_trans_real, &
mpi_comm_rows, mpi_comm_cols, wantDebug, useGPU, success, &
THIS_REAL_ELPA_KERNEL)
if (.not.(success)) return
......@@ -419,32 +419,32 @@ function solve_evp_complex_2stage(na, nev, a, lda, ev, q, ldq, nblk, &
mpi_comm_all, THIS_COMPLEX_ELPA_KERNEL_API) result(success)
#ifdef HAVE_DETAILED_TIMINGS
use timings
use timings
#endif
use cuda_functions
use mod_check_for_gpu
implicit none
integer, intent(in), optional :: THIS_COMPLEX_ELPA_KERNEL_API
integer :: THIS_COMPLEX_ELPA_KERNEL
integer, intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all
complex*16, intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
real*8, intent(inout) :: ev(na)
complex*16, allocatable :: hh_trans_complex(:,:)
integer :: my_prow, my_pcol, np_rows, np_cols, mpierr, my_pe, n_pes
integer :: l_cols, l_rows, l_cols_nev, nbw, num_blocks
complex*16, allocatable :: tmat(:,:,:)
real*8, allocatable :: q_real(:,:), e(:)
real*8 :: ttt0, ttt1, ttts
integer :: i
logical :: success, wantDebug
logical, save :: firstCall = .true.
integer :: istat
character(200) :: errorMessage
logical :: useGPU
integer :: numberOfGPUDevices
use precision
use cuda_functions
use mod_check_for_gpu
implicit none
integer(kind=ik), intent(in), optional :: THIS_COMPLEX_ELPA_KERNEL_API
integer(kind=ik) :: THIS_COMPLEX_ELPA_KERNEL
integer(kind=ik), intent(in) :: na, nev, lda, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, mpi_comm_all
complex(kind=ck), intent(inout) :: a(lda,matrixCols), q(ldq,matrixCols)
real(kind=rk), intent(inout) :: ev(na)
complex(kind=ck), allocatable :: hh_trans_complex(:,:)
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr, my_pe, n_pes
integer(kind=ik) :: l_cols, l_rows, l_cols_nev, nbw, num_blocks
complex(kind=ck), allocatable :: tmat(:,:,:)
real(kind=rk), allocatable :: q_real(:,:), e(:)
real(kind=rk) :: ttt0, ttt1, ttts
integer(kind=ik) :: i
logical :: success, wantDebug
logical, save :: firstCall = .true.
integer(kind=ik) :: istat
character(200) :: errorMessage
logical :: useGPU
integer(kind=ik) :: numberOfGPUDevices
#ifdef HAVE_DETAILED_TIMINGS
call timer%start("solve_evp_complex_2stage")
......
This diff is collapsed.
......@@ -59,14 +59,14 @@ module complex_generic_kernel
public single_hh_trafo_complex_generic
contains
subroutine single_hh_trafo_complex_generic(q, hh, nb, nq, ldq)
use precision
implicit none
integer, intent(in) :: nb, nq, ldq
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(*)
integer(kind=ik), intent(in) :: nb, nq, ldq
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
integer i
integer(kind=ik) :: i
! Safety only:
......@@ -93,15 +93,15 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine double_hh_trafo_complex_generic(q, hh, nb, nq, ldq, ldh)
use precision
implicit none
integer, intent(in) :: nb, nq, ldq, ldh
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(ldh,*)
complex*16 s
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
complex(kind=ck) :: s
integer i
integer(kind=ik) :: i
! Safety only:
......@@ -138,16 +138,16 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_12(q, hh, nb, ldq)
use precision
implicit none
integer, intent(in) :: nb, ldq
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(*)
integer(kind=ik), intent(in) :: nb, ldq
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
complex*16 x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc
complex*16 h1, tau1
integer i
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8, x9, xa, xb, xc
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
x1 = q(1,1)
......@@ -231,16 +231,16 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_8(q, hh, nb, ldq)
use precision
implicit none
integer, intent(in) :: nb, ldq
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(*)
integer(kind=ik), intent(in) :: nb, ldq
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
complex*16 x1, x2, x3, x4, x5, x6, x7, x8
complex*16 h1, tau1
integer i
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, x7, x8
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
x1 = q(1,1)
......@@ -304,16 +304,17 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_4(q, hh, nb, ldq)
use precision
implicit none
integer, intent(in) :: nb, ldq
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(*)
integer(kind=ik), intent(in) :: nb, ldq
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
complex*16 x1, x2, x3, x4
complex*16 h1, tau1
integer i
complex(kind=ck) :: x1, x2, x3, x4
complex(kind=ck) :: h1, tau1
integer(kind=ik) :: i
x1 = q(1,1)
......@@ -357,17 +358,17 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_4_2hv(q, hh, nb, ldq, ldh, s)
use precision
implicit none
integer, intent(in) :: nb, ldq, ldh
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(ldh,*)
complex*16, intent(in) :: s
integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
complex(kind=ck), intent(in) :: s
complex*16 x1, x2, x3, x4, y1, y2, y3, y4
complex*16 h1, h2, tau1, tau2
integer i
complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4
complex(kind=ck) :: h1, h2, tau1, tau2
integer(kind=ik) :: i
x1 = q(1,2)
x2 = q(2,2)
......@@ -443,17 +444,17 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_8_2hv(q, hh, nb, ldq, ldh, s)
use precision
implicit none
integer, intent(in) :: nb, ldq, ldh
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(ldh,*)
complex*16, intent(in) :: s
integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
complex(kind=ck), intent(in) :: s
complex*16 x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8
complex*16 h1, h2, tau1, tau2
integer i
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, y1, y2, y3, y4, y5, y6, y7, y8
complex(kind=ck) :: h1, h2, tau1, tau2
integer(kind=ik) :: i
x1 = q(1,2)
x2 = q(2,2)
......@@ -574,17 +575,18 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_complex_kernel_12_2hv(q, hh, nb, ldq, ldh, s)
use precision
implicit none
integer, intent(in) :: nb, ldq, ldh
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(ldh,*)
complex*16, intent(in) :: s
integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
complex(kind=ck), intent(in) :: s
complex*16 x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12
complex*16 h1, h2, tau1, tau2
integer i
complex(kind=ck) :: x1, x2, x3, x4, x5, x6 ,x7, x8, x9, x10, x11, x12, y1, y2, y3, y4, y5, y6, &
y7, y8, y9, y10, y11, y12
complex(kind=ck) :: h1, h2, tau1, tau2
integer(kind=ik) :: i
x1 = q(1,2)
x2 = q(2,2)
......
......@@ -61,15 +61,16 @@ module complex_generic_simple_kernel
public single_hh_trafo_complex_generic_simple
contains
subroutine single_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq)
use precision
implicit none
integer, intent(in) :: nb, nq, ldq
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(*)
integer(kind=ik), intent(in) :: nb, nq, ldq
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(*)
integer i
complex*16 h1, tau1, x(nq)
integer(kind=ik) :: i
complex(kind=ck) :: h1, tau1, x(nq)
! Just one Householder transformation
......@@ -92,15 +93,15 @@ contains
! --------------------------------------------------------------------------------------------------
subroutine double_hh_trafo_complex_generic_simple(q, hh, nb, nq, ldq, ldh)
use precision
implicit none
integer, intent(in) :: nb, nq, ldq, ldh
complex*16, intent(inout) :: q(ldq,*)
complex*16, intent(in) :: hh(ldh,*)
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq,*)
complex(kind=ck), intent(in) :: hh(ldh,*)
complex*16 s, h1, h2, tau1, tau2, x(nq), y(nq)
integer i
complex(kind=ck) :: s, h1, h2, tau1, tau2, x(nq), y(nq)
integer(kind=ik) :: i
! Calculate dot product of the two Householder vectors
......
......@@ -59,15 +59,15 @@
! public double_hh_trafo_generic
!contains
subroutine double_hh_trafo_generic(q, hh, nb, nq, ldq, ldh)
use precision
implicit none
integer, intent(in) :: nb, nq, ldq, ldh
real*8, intent(inout) :: q(ldq,*)
real*8, intent(in) :: hh(ldh,*)
integer(kind=ik), intent(in) :: nb, nq, ldq, ldh
real(kind=rk), intent(inout) :: q(ldq,*)
real(kind=rk), intent(in) :: hh(ldh,*)
real*8 s
integer i
real(kind=rk) :: s
integer(kind=ik) :: i
! equivalence(q(1,1),q_complex(1,1))
......@@ -97,7 +97,7 @@
else if(nq-i+1 > 0) then
call hh_trafo_kernel_4_generic(q(i,1),hh, nb, ldq, ldh, s)
endif
end subroutine double_hh_trafo_generic
! --------------------------------------------------------------------------------------------------
......@@ -108,16 +108,16 @@
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_kernel_12_generic(q, hh, nb, ldq, ldh, s)
use precision
implicit none
integer, intent(in) :: nb, ldq, ldh
complex*16, intent(inout) :: q(ldq/2,*)
real*8, intent(in) :: hh(ldh,*), s
integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*)
real(kind=rk), intent(in) :: hh(ldh,*), s
complex*16 x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
real*8 h1, h2, tau1, tau2
integer i
complex(kind=ck) :: x1, x2, x3, x4, x5, x6, y1, y2, y3, y4, y5, y6
real(kind=rk) :: h1, h2, tau1, tau2
integer(kind=ik) :: i
x1 = q(1,2)
......@@ -215,16 +215,16 @@
! --------------------------------------------------------------------------------------------------
subroutine hh_trafo_kernel_8_generic(q, hh, nb, ldq, ldh, s)
use precision
implicit none
integer, intent(in) :: nb, ldq, ldh
complex*16, intent(inout) :: q(ldq/2,*)
real*8, intent(in) :: hh(ldh,*), s
integer(kind=ik), intent(in) :: nb, ldq, ldh
complex(kind=ck), intent(inout) :: q(ldq/2,*)
real(kind=rk), intent(in) :: hh(ldh,*), s
complex*16 x1, x2, x3, x4, y1, y2, y3, y4
real*8 h1, h2, tau1, tau2
integer i
complex(kind=ck) :: x1, x2, x3, x4, y1, y2, y3, y4
real(kind=rk) :: h1, h2,