Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
1aa89171
Commit
1aa89171
authored
Aug 07, 2020
by
Andreas Marek
Browse files
Cleanup of ELPA1
parent
69972fbf
Changes
7
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
src/elpa1/elpa_cholesky_template.F90
View file @
1aa89171
...
...
@@ -44,300 +44,300 @@
#include "../general/sanity.F90"
#include "../general/error_checking.inc"
use
elpa1_compute
use
elpa_utilities
use
elpa_mpi
use
precision
use
elpa_abstract_impl
use
elpa_omp
use
elpa_blas_interfaces
implicit
none
use
elpa1_compute
use
elpa_utilities
use
elpa_mpi
use
precision
use
elpa_abstract_impl
use
elpa_omp
use
elpa_blas_interfaces
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
matrixRows
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
matrixRows
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
*
)
#else
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
obj
%
local_ncols
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
obj
%
local_ncols
)
#endif
integer
(
kind
=
ik
)
::
my_prow
,
my_pcol
,
np_rows
,
np_cols
integer
(
kind
=
MPI_KIND
)
::
mpierr
,
my_prowMPI
,
my_pcolMPI
,
np_rowsMPI
,
np_colsMPI
integer
(
kind
=
ik
)
::
l_cols
,
l_rows
,
l_col1
,
l_row1
,
l_colx
,
l_rowx
integer
(
kind
=
ik
)
::
n
,
nc
,
i
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
)
::
lcs
,
lce
,
lrs
,
lre
integer
(
kind
=
ik
)
::
tile_size
,
l_rows_tile
,
l_cols_tile
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmp1
(:),
tmp2
(:,:),
tmatr
(:,:),
tmatc
(:,:)
logical
::
wantDebug
logical
::
success
integer
(
kind
=
ik
)
::
istat
,
debug
,
error
character
(
200
)
::
errorMessage
integer
(
kind
=
ik
)
::
nrThreads
call
obj
%
timer
%
start
(
"elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
&"
)
integer
(
kind
=
ik
)
::
my_prow
,
my_pcol
,
np_rows
,
np_cols
integer
(
kind
=
MPI_KIND
)
::
mpierr
,
my_prowMPI
,
my_pcolMPI
,
np_rowsMPI
,
np_colsMPI
integer
(
kind
=
ik
)
::
l_cols
,
l_rows
,
l_col1
,
l_row1
,
l_colx
,
l_rowx
integer
(
kind
=
ik
)
::
n
,
nc
,
i
,
info
integer
(
kind
=
BLAS_KIND
)
::
infoBLAS
integer
(
kind
=
ik
)
::
lcs
,
lce
,
lrs
,
lre
integer
(
kind
=
ik
)
::
tile_size
,
l_rows_tile
,
l_cols_tile
MATH_DATATYPE
(
kind
=
rck
),
allocatable
::
tmp1
(:),
tmp2
(:,:),
tmatr
(:,:),
tmatc
(:,:)
logical
::
wantDebug
logical
::
success
integer
(
kind
=
ik
)
::
istat
,
debug
,
error
character
(
200
)
::
errorMessage
integer
(
kind
=
ik
)
::
nrThreads
call
obj
%
timer
%
start
(
"elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
&"
)
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller
=
omp_get_max_threads
()
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
omp_threads_caller
=
omp_get_max_threads
()
! check the number of threads that ELPA should use internally
call
obj
%
get
(
"omp_threads"
,
nrThreads
,
error
)
call
omp_set_num_threads
(
nrThreads
)
! check the number of threads that ELPA should use internally
call
obj
%
get
(
"omp_threads"
,
nrThreads
,
error
)
call
omp_set_num_threads
(
nrThreads
)
#else
nrThreads
=
1
nrThreads
=
1
#endif
na
=
obj
%
na
matrixRows
=
obj
%
local_nrows
nblk
=
obj
%
nblk
matrixCols
=
obj
%
local_ncols
na
=
obj
%
na
matrixRows
=
obj
%
local_nrows
nblk
=
obj
%
nblk
matrixCols
=
obj
%
local_ncols
call
obj
%
get
(
"mpi_comm_rows"
,
mpi_comm_rows
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for mpi_comm_rows. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for mpi_comm_cols. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_rows"
,
mpi_comm_rows
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for mpi_comm_rows. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for mpi_comm_cols. Aborting..."
stop
endif
call
obj
%
get
(
"debug"
,
debug
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for debug settings. Aborting..."
stop
endif
if
(
debug
==
1
)
then
wantDebug
=
.true.
else
wantDebug
=
.false.
endif
call
obj
%
get
(
"debug"
,
debug
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option for debug settings. Aborting..."
stop
endif
if
(
debug
==
1
)
then
wantDebug
=
.true.
else
wantDebug
=
.false.
endif
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
mpi_comm_rank
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
my_prowMPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
np_rowsMPI
,
mpierr
)
call
mpi_comm_rank
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
my_pcolMPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
np_colsMPI
,
mpierr
)
my_prow
=
int
(
my_prowMPI
,
kind
=
c_int
)
np_rows
=
int
(
np_rowsMPI
,
kind
=
c_int
)
my_pcol
=
int
(
my_pcolMPI
,
kind
=
c_int
)
np_cols
=
int
(
np_colsMPI
,
kind
=
c_int
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
success
=
.true.
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
mpi_comm_rank
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
my_prowMPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
np_rowsMPI
,
mpierr
)
call
mpi_comm_rank
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
my_pcolMPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
np_colsMPI
,
mpierr
)
! Matrix is split into tiles; work is done only for tiles on the diagonal or above
my_prow
=
int
(
my_prowMPI
,
kind
=
c_int
)
np_rows
=
int
(
np_rowsMPI
,
kind
=
c_int
)
my_pcol
=
int
(
my_pcolMPI
,
kind
=
c_int
)
np_cols
=
int
(
np_colsMPI
,
kind
=
c_int
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
success
=
.true.
tile_size
=
nblk
*
least_common_multiple
(
np_rows
,
np_cols
)
! minimum global tile size
tile_size
=
((
128
*
max
(
np_rows
,
np_cols
)
-1
)/
tile_size
+1
)
*
tile_size
! make local tiles at least 128 wide
! Matrix is split into tiles; work is done only for tiles on the diagonal or above
l_rows_tile
=
tile_size
/
np_rows
!
lo
c
al
rows of a til
e
l_cols_tile
=
tile_size
/
np_cols
! local cols of a til
e
tile_size
=
nblk
*
least_common_multiple
(
np_rows
,
np_cols
)
! minimum g
lo
b
al
tile siz
e
tile_size
=
((
128
*
max
(
np_rows
,
np_cols
)
-1
)/
tile_size
+1
)
*
tile_size
! make local tiles at least 128 wid
e
l_rows
=
local_index
(
na
,
my_prow
,
np_rows
,
nblk
,
-1
)
!
L
ocal rows of a
l_cols
=
local_index
(
na
,
my_pcol
,
np_cols
,
nblk
,
-1
)
!
L
ocal cols of a
l_rows
_tile
=
tile_size
/
np_rows
!
l
ocal rows of a
tile
l_cols
_tile
=
tile_size
/
np_cols
!
l
ocal cols of a
tile
allocate
(
tmp1
(
nblk
*
nblk
)
,
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa
_c
h
ol
esky: tmp1"
,
istat
,
errorMessage
)
l_rows
=
local_index
(
na
,
my_prow
,
np_rows
,
nblk
,
-1
)
! Local rows of a
l_cols
=
local_index
(
na
,
my_pcol
,
np
_col
s
,
nblk
,
-1
)
! Local cols of a
allocate
(
tmp
2
(
nblk
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmp
2
"
,
istat
,
errorMessage
)
allocate
(
tmp
1
(
nblk
*
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmp
1
"
,
istat
,
errorMessage
)
tmp1
=
0
tmp2
=
0
allocate
(
tmp2
(
nblk
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmp2"
,
istat
,
errorMessage
)
allocate
(
tmatr
(
l_rows
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmatr"
,
istat
,
errorMessage
)
tmp1
=
0
tmp2
=
0
allocate
(
tmat
c
(
l_
col
s
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmat
c
"
,
istat
,
errorMessage
)
allocate
(
tmat
r
(
l_
row
s
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmat
r
"
,
istat
,
errorMessage
)
tmatr
=
0
tmatc
=
0
allocate
(
tmatc
(
l_cols
,
nblk
),
stat
=
istat
,
errmsg
=
errorMessage
)
check_allocate
(
"elpa_cholesky: tmatc"
,
istat
,
errorMessage
)
do
n
=
1
,
na
,
nblk
! Calculate first local row and column of the still remaining matrix
! on the local processor
tmatr
=
0
tmatc
=
0
l_row1
=
local_index
(
n
,
my_prow
,
np_rows
,
nblk
,
+1
)
l_col1
=
local_index
(
n
,
my_pcol
,
np_cols
,
nblk
,
+1
)
do
n
=
1
,
na
,
nblk
! Calculate first local row and column of the still remaining matrix
! on the local processor
l_row
x
=
local_index
(
n
+
nblk
,
my_prow
,
np_rows
,
nblk
,
+1
)
l_col
x
=
local_index
(
n
+
nblk
,
my_pcol
,
np_cols
,
nblk
,
+1
)
l_row
1
=
local_index
(
n
,
my_prow
,
np_rows
,
nblk
,
+1
)
l_col
1
=
local_index
(
n
,
my_pcol
,
np_cols
,
nblk
,
+1
)
if
(
n
+
nblk
>
na
)
then
l_rowx
=
local_index
(
n
+
nblk
,
my_prow
,
np_rows
,
nblk
,
+1
)
l_colx
=
local_index
(
n
+
nblk
,
my_pcol
,
np_cols
,
nblk
,
+1
)
! This is the last step, just do a Cholesky-Factorization
! of the remaining block
if
(
n
+
nblk
>
na
)
then
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
)
.and.
my_pcol
==
pcol
(
n
,
nblk
,
np_cols
))
the
n
call
obj
%
timer
%
start
(
"blas"
)
! This is the last step, just do a Cholesky-Factorizatio
n
! of the remaining block
call
PRECISION_POTRF
(
'U'
,
int
(
na
-
n
+1
,
kind
=
BLAS_KIND
),
a
(
l_row1
,
l_col1
),
&
int
(
matrixRows
,
kind
=
BLAS_KIND
),
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
)
.and.
my_pcol
==
pcol
(
n
,
nblk
,
np_cols
))
then
call
obj
%
timer
%
start
(
"blas"
)
if
(
info
/
=
0
)
then
if
(
wantDebug
)
write
(
error_unit
,
*
)
"elpa_cholesky_&
&MATH_DATATYPE&
call
PRECISION_POTRF
(
'U'
,
int
(
na
-
n
+1
,
kind
=
BLAS_KIND
),
a
(
l_row1
,
l_col1
),
&
int
(
matrixRows
,
kind
=
BLAS_KIND
),
infoBLAS
)
info
=
int
(
infoBLAS
,
kind
=
ik
)
call
obj
%
timer
%
stop
(
"blas"
)
if
(
info
/
=
0
)
then
if
(
wantDebug
)
write
(
error_unit
,
*
)
"elpa_cholesky_&
&MATH_DATATYPE&
#if REALCASE == 1
&: Error in dpotrf: "
,
info
&: Error in dpotrf: "
,
info
#endif
#if COMPLEXCASE == 1
&:
Error
in
zpotrf
:
",info
&:
Error
in
zpotrf
:
",info
#endif
success = .false.
return
endif
success = .false.
return
endif
endif
endif
exit ! Loop
exit ! Loop
endif
endif
if (my_prow==prow(n, nblk, np_rows)) then
if (my_prow==prow(n, nblk, np_rows)) then
if (my_pcol==pcol(n, nblk, np_cols)) then
if (my_pcol==pcol(n, nblk, np_cols)) then
! The process owning the upper left remaining block does the
! Cholesky-Factorization of this block
call obj%timer%start("
blas
")
! The process owning the upper left remaining block does the
! Cholesky-Factorization of this block
call obj%timer%start("
blas
")
call PRECISION_POTRF('U', int(nblk,kind=BLAS_KIND), a(l_row1,l_col1), &
int(matrixRows,kind=BLAS_KIND) , infoBLAS )
info = int(infoBLAS,kind=ik)
call obj%timer%stop("
blas
")
call PRECISION_POTRF('U', int(nblk,kind=BLAS_KIND), a(l_row1,l_col1), &
int(matrixRows,kind=BLAS_KIND) , infoBLAS )
info = int(infoBLAS,kind=ik)
call obj%timer%stop("
blas
")
if (info/=0) then
if (wantDebug) write(error_unit,*) "
elpa_cholesky_
&
&
MATH_DATATYPE
&
if (info/=0) then
if (wantDebug) write(error_unit,*) "
elpa_cholesky_
&
&
MATH_DATATYPE
&
#if REALCASE == 1
&:
Error
in
dpotrf
2
:
",info
&:
Error
in
dpotrf
2
:
",info
#endif
#if COMPLEXCASE == 1
&: Error in zpotrf 2: "
,
info
&: Error in zpotrf 2: "
,
info
#endif
success
=
.false.
return
endif
nc
=
0
do
i
=
1
,
nblk
tmp1
(
nc
+1
:
nc
+
i
)
=
a
(
l_row1
:
l_row1
+
i
-1
,
l_col1
+
i
-1
)
nc
=
nc
+
i
enddo
endif
success
=
.false.
return
endif
nc
=
0
do
i
=
1
,
nblk
tmp1
(
nc
+1
:
nc
+
i
)
=
a
(
l_row1
:
l_row1
+
i
-1
,
l_col1
+
i
-1
)
nc
=
nc
+
i
enddo
endif
#ifdef WITH_MPI
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
MPI_Bcast
(
tmp1
,
int
(
nblk
*
(
nblk
+1
)/
2
,
kind
=
MPI_KIND
),
&
call
MPI_Bcast
(
tmp1
,
int
(
nblk
*
(
nblk
+1
)/
2
,
kind
=
MPI_KIND
),
&
#if REALCASE == 1
MPI_REAL_PRECISION
,
&
MPI_REAL_PRECISION
,
&
#endif
#if COMPLEXCASE == 1
MPI_COMPLEX_PRECISION
,
&
MPI_COMPLEX_PRECISION
,
&
#endif
int
(
pcol
(
n
,
nblk
,
np_cols
),
kind
=
MPI_KIND
),
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
mpierr
)
int
(
pcol
(
n
,
nblk
,
np_cols
),
kind
=
MPI_KIND
),
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
mpierr
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
#endif /* WITH_MPI */
nc
=
0
do
i
=
1
,
nblk
tmp2
(
1
:
i
,
i
)
=
tmp1
(
nc
+1
:
nc
+
i
)
nc
=
nc
+
i
enddo
call
obj
%
timer
%
start
(
"blas"
)
if
(
l_cols
-
l_colx
+1
>
0
)
&
call
PRECISION_TRSM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
int
(
nblk
,
kind
=
BLAS_KIND
),
&
int
(
l_cols
-
l_colx
+1
,
kind
=
BLAS_KIND
),
ONE
,
tmp2
,
&
int
(
ubound
(
tmp2
,
dim
=
1
),
kind
=
BLAS_KIND
),
a
(
l_row1
,
l_colx
),
int
(
matrixRows
,
kind
=
BLAS_KIND
)
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
nc
=
0
do
i
=
1
,
nblk
tmp2
(
1
:
i
,
i
)
=
tmp1
(
nc
+1
:
nc
+
i
)
nc
=
nc
+
i
enddo
do
i
=
1
,
nblk
call
obj
%
timer
%
start
(
"blas"
)
if
(
l_cols
-
l_colx
+1
>
0
)
&
call
PRECISION_TRSM
(
'L'
,
'U'
,
BLAS_TRANS_OR_CONJ
,
'N'
,
int
(
nblk
,
kind
=
BLAS_KIND
),
&
int
(
l_cols
-
l_colx
+1
,
kind
=
BLAS_KIND
),
ONE
,
tmp2
,
&
int
(
ubound
(
tmp2
,
dim
=
1
),
kind
=
BLAS_KIND
),
a
(
l_row1
,
l_colx
),
int
(
matrixRows
,
kind
=
BLAS_KIND
)
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
do
i
=
1
,
nblk
#if REALCASE == 1
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
))
tmatc
(
l_colx
:
l_cols
,
i
)
=
a
(
l_row1
+
i
-1
,
l_colx
:
l_cols
)
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
))
tmatc
(
l_colx
:
l_cols
,
i
)
=
a
(
l_row1
+
i
-1
,
l_colx
:
l_cols
)
#endif
#if COMPLEXCASE == 1
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
))
tmatc
(
l_colx
:
l_cols
,
i
)
=
conjg
(
a
(
l_row1
+
i
-1
,
l_colx
:
l_cols
))
if
(
my_prow
==
prow
(
n
,
nblk
,
np_rows
))
tmatc
(
l_colx
:
l_cols
,
i
)
=
conjg
(
a
(
l_row1
+
i
-1
,
l_colx
:
l_cols
))
#endif
#ifdef WITH_MPI
call
obj
%
timer
%
start
(
"mpi_communication"
)
if
(
l_cols
-
l_colx
+1
>
0
)
&
call
MPI_Bcast
(
tmatc
(
l_colx
,
i
),
int
(
l_cols
-
l_colx
+1
,
kind
=
MPI_KIND
),
MPI_MATH_DATATYPE_PRECISION
,
&
int
(
prow
(
n
,
nblk
,
np_rows
),
kind
=
MPI_KIND
),
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
mpierr
)
call
obj
%
timer
%
start
(
"mpi_communication"
)
if
(
l_cols
-
l_colx
+1
>
0
)
&
call
MPI_Bcast
(
tmatc
(
l_colx
,
i
),
int
(
l_cols
-
l_colx
+1
,
kind
=
MPI_KIND
),
MPI_MATH_DATATYPE_PRECISION
,
&
int
(
prow
(
n
,
nblk
,
np_rows
),
kind
=
MPI_KIND
),
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
mpierr
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
call
obj
%
timer
%
stop
(
"mpi_communication"
)
#endif /* WITH_MPI */
enddo
! this has to be checked since it was changed substantially when doing type safe
call
elpa_transpose_vectors_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
tmatc
,
ubound
(
tmatc
,
dim
=
1
),
mpi_comm_cols
,
&
tmatr
,
ubound
(
tmatr
,
dim
=
1
),
mpi_comm_rows
,
&
n
,
na
,
nblk
,
nblk
,
nrThreads
)
do
i
=
0
,(
na
-1
)/
tile_size
lcs
=
max
(
l_colx
,
i
*
l_cols_tile
+1
)
lce
=
min
(
l_cols
,(
i
+1
)
*
l_cols_tile
)
lrs
=
l_rowx
lre
=
min
(
l_rows
,(
i
+1
)
*
l_rows_tile
)
if
(
lce
<
lcs
.or.
lre
<
lrs
)
cycle
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'N'
,
BLAS_TRANS_OR_CONJ
,
int
(
lre
-
lrs
+1
,
kind
=
BLAS_KIND
),
int
(
lce
-
lcs
+1
,
kind
=
BLAS_KIND
),
&
int
(
nblk
,
kind
=
BLAS_KIND
),
-
ONE
,
&
tmatr
(
lrs
,
1
),
int
(
ubound
(
tmatr
,
dim
=
1
),
kind
=
BLAS_KIND
),
tmatc
(
lcs
,
1
),
&
int
(
ubound
(
tmatc
,
dim
=
1
),
kind
=
BLAS_KIND
),
&
ONE
,
a
(
lrs
,
lcs
),
int
(
matrixRows
,
kind
=
BLAS_KIND
))
call
obj
%
timer
%
stop
(
"blas"
)
enddo
enddo
deallocate
(
tmp1
,
tmp2
,
tmatr
,
tmatc
,
stat
=
istat
,
errmsg
=
errorMessage
)
check_deallocate
(
"elpa_cholesky: tmp1, tmp2, tmatr, tmatc"
,
istat
,
errorMessage
)
! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)
do
i
=
1
,
na
if
(
my_pcol
==
pcol
(
i
,
nblk
,
np_cols
))
then
! column i is on local processor
l_col1
=
local_index
(
i
,
my_pcol
,
np_cols
,
nblk
,
+1
)
! local column number
l_row1
=
local_index
(
i
+1
,
my_prow
,
np_rows
,
nblk
,
+1
)
! first row below diagonal
a
(
l_row1
:
l_rows
,
l_col1
)
=
0
endif
enddo
! restore original OpenMP settings
enddo
! this has to be checked since it was changed substantially when doing type safe
call
elpa_transpose_vectors_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
tmatc
,
ubound
(
tmatc
,
dim
=
1
),
mpi_comm_cols
,
&
tmatr
,
ubound
(
tmatr
,
dim
=
1
),
mpi_comm_rows
,
&
n
,
na
,
nblk
,
nblk
,
nrThreads
)
do
i
=
0
,(
na
-1
)/
tile_size
lcs
=
max
(
l_colx
,
i
*
l_cols_tile
+1
)
lce
=
min
(
l_cols
,(
i
+1
)
*
l_cols_tile
)
lrs
=
l_rowx
lre
=
min
(
l_rows
,(
i
+1
)
*
l_rows_tile
)
if
(
lce
<
lcs
.or.
lre
<
lrs
)
cycle
call
obj
%
timer
%
start
(
"blas"
)
call
PRECISION_GEMM
(
'N'
,
BLAS_TRANS_OR_CONJ
,
int
(
lre
-
lrs
+1
,
kind
=
BLAS_KIND
),
int
(
lce
-
lcs
+1
,
kind
=
BLAS_KIND
),
&
int
(
nblk
,
kind
=
BLAS_KIND
),
-
ONE
,
&
tmatr
(
lrs
,
1
),
int
(
ubound
(
tmatr
,
dim
=
1
),
kind
=
BLAS_KIND
),
tmatc
(
lcs
,
1
),
&
int
(
ubound
(
tmatc
,
dim
=
1
),
kind
=
BLAS_KIND
),
&
ONE
,
a
(
lrs
,
lcs
),
int
(
matrixRows
,
kind
=
BLAS_KIND
))
call
obj
%
timer
%
stop
(
"blas"
)
enddo
enddo
deallocate
(
tmp1
,
tmp2
,
tmatr
,
tmatc
,
stat
=
istat
,
errmsg
=
errorMessage
)
check_deallocate
(
"elpa_cholesky: tmp1, tmp2, tmatr, tmatc"
,
istat
,
errorMessage
)
! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)
do
i
=
1
,
na
if
(
my_pcol
==
pcol
(
i
,
nblk
,
np_cols
))
then
! column i is on local processor
l_col1
=
local_index
(
i
,
my_pcol
,
np_cols
,
nblk
,
+1
)
! local column number
l_row1
=
local_index
(
i
+1
,
my_prow
,
np_rows
,
nblk
,
+1
)
! first row below diagonal
a
(
l_row1
:
l_rows
,
l_col1
)
=
0
endif
enddo
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
! store the number of OpenMP threads used in the calling function
! restore this at the end of ELPA 2
call
omp_set_num_threads
(
omp_threads_caller
)
#endif
call
obj
%
timer
%
stop
(
"elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
&"
)
call
obj
%
timer
%
stop
(
"elpa_cholesky_&
&MATH_DATATYPE&
&_&
&PRECISION&
&"
)
src/elpa1/elpa_invert_trm.F90
View file @
1aa89171
...
...
@@ -53,214 +53,214 @@
#include "../general/sanity.F90"
#include "../general/error_checking.inc"
use
precision
use
elpa1_compute
use
elpa_utilities
use
elpa_mpi
use
elpa_abstract_impl
use
elpa_blas_interfaces
implicit
none
use
precision
use
elpa1_compute
use
elpa_utilities
use
elpa_mpi
use
elpa_abstract_impl
use
elpa_blas_interfaces
implicit
none
#include "../general/precision_kinds.F90"
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
matrixRows
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
)
::
na
,
matrixRows
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
*
)
#else
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
obj
%
local_ncols
)
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
obj
%
local_nrows
,
obj
%
local_ncols
)
#endif