! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
! This particular source code file contains additions, changes and
! enhancements authored by Intel Corporation which is not part of
! the ELPA consortium.
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
#include "../general/sanity.F90"
use elpa1_compute
use elpa_utilities
use elpa_mpi
use precision
use elpa_abstract_impl
#ifdef WITH_OPENMP
use omp_lib
#endif
implicit none
#include "../general/precision_kinds.F90"
class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE(kind=rck) :: a(obj%local_nrows,*)
#else
MATH_DATATYPE(kind=rck) :: a(obj%local_nrows,obj%local_ncols)
#endif
integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
integer(kind=ik) :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
integer(kind=ik) :: n, nc, i, info
integer(kind=ik) :: lcs, lce, lrs, lre
integer(kind=ik) :: tile_size, l_rows_tile, l_cols_tile
MATH_DATATYPE(kind=rck), allocatable :: tmp1(:), tmp2(:,:), tmatr(:,:), tmatc(:,:)
logical :: wantDebug
logical :: success
integer(kind=ik) :: istat, debug, error
character(200) :: errorMessage
integer(kind=ik) :: nrThreads
call obj%timer%start("elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
&")
#ifdef WITH_OPENMP
nrThreads=omp_get_max_threads()
#else
nrThreads=1
#endif
na = obj%na
lda = obj%local_nrows
nblk = obj%nblk
matrixCols = obj%local_ncols
call obj%get("mpi_comm_rows",mpi_comm_rows,error )
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
call obj%get("mpi_comm_cols",mpi_comm_cols,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
call obj%get("debug",debug,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
if (debug == 1) then
wantDebug = .true.
else
wantDebug = .false.
endif
call obj%timer%start("mpi_communication")
call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
call obj%timer%stop("mpi_communication")
success = .true.
! Matrix is split into tiles; work is done only for tiles on the diagonal or above
tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide
l_rows_tile = tile_size/np_rows ! local rows of a tile
l_cols_tile = tile_size/np_cols ! local cols of a tile
l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a
allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
stop 1
endif
allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmp2 "//errorMessage
stop 1
endif
tmp1 = 0
tmp2 = 0
allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmatr "//errorMessage
stop 1
endif
allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,"elpa_cholesky_&
&MATH_DATATYPE&
&: error when allocating tmatc "//errorMessage
stop 1
endif
tmatr = 0
tmatc = 0
do n = 1, na, nblk
! Calculate first local row and column of the still remaining matrix
! on the local processor
l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)
l_rowx = local_index(n+nblk, my_prow, np_rows, nblk, +1)
l_colx = local_index(n+nblk, my_pcol, np_cols, nblk, +1)
if (n+nblk > na) then
! This is the last step, just do a Cholesky-Factorization
! of the remaining block
if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
call obj%timer%start("blas")
call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
call obj%timer%stop("blas")
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in dpotrf: ",info
#endif
#if COMPLEXCASE == 1
&: Error in zpotrf: ",info
#endif
success = .false.
return
endif
endif
exit ! Loop
endif
if (my_prow==prow(n, nblk, np_rows)) then
if (my_pcol==pcol(n, nblk, np_cols)) then
! The process owning the upper left remaining block does the
! Cholesky-Factorization of this block
call obj%timer%start("blas")
call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
call obj%timer%stop("blas")
if (info/=0) then
if (wantDebug) write(error_unit,*) "elpa_cholesky_&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in dpotrf 2: ",info
#endif
#if COMPLEXCASE == 1
&: Error in zpotrf 2: ",info
#endif
success = .false.
return
endif
nc = 0
do i=1,nblk
tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
nc = nc+i
enddo
endif
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
call MPI_Bcast(tmp1, nblk*(nblk+1)/2, &
#if REALCASE == 1
MPI_REAL_PRECISION, &
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION, &
#endif
pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
nc = 0
do i=1,nblk
tmp2(1:i,i) = tmp1(nc+1:nc+i)
nc = nc+i
enddo
call obj%timer%start("blas")
if (l_cols-l_colx+1>0) &
call PRECISION_TRSM('L', 'U', BLAS_TRANS_OR_CONJ, 'N', nblk, l_cols-l_colx+1, ONE, tmp2, &
ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
call obj%timer%stop("blas")
endif
do i=1,nblk
#if REALCASE == 1
if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = a(l_row1+i-1,l_colx:l_cols)
#endif
#if COMPLEXCASE == 1
if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = conjg(a(l_row1+i-1,l_colx:l_cols))
#endif
#ifdef WITH_MPI
call obj%timer%start("mpi_communication")
if (l_cols-l_colx+1>0) &
call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, MPI_MATH_DATATYPE_PRECISION, &
prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
call obj%timer%stop("mpi_communication")
#endif /* WITH_MPI */
enddo
! this has to be checked since it was changed substantially when doing type safe
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
(obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
n, na, nblk, nblk, nrThreads)
do i=0,(na-1)/tile_size
lcs = max(l_colx,i*l_cols_tile+1)
lce = min(l_cols,(i+1)*l_cols_tile)
lrs = l_rowx
lre = min(l_rows,(i+1)*l_rows_tile)
if (lce