Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
5011a02f
Commit
5011a02f
authored
Aug 09, 2016
by
Andreas Marek
Browse files
Merge branch 'bugfixes_current_release' into ELPA_GPU
parents
b33d2ca9
43d232a1
Changes
6
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
5011a02f
...
...
@@ -281,7 +281,9 @@ dist_files_DATA = \
test
/Fortran/test_transpose_multiply_real.F90
\
test
/Fortran/test_transpose_multiply_complex.F90
\
test
/Fortran/test_cholesky_real.F90
\
test
/Fortran/test_invert_trm_real.F90
\
test
/Fortran/test_cholesky_complex.F90
\
test
/Fortran/test_invert_trm_complex.F90
\
src/elpa2_print_kernels.F90
#end needed
...
...
@@ -310,7 +312,9 @@ noinst_PROGRAMS = \
elpa1_real_transpose_multiply@SUFFIX@
\
elpa1_complex_transpose_multiply@SUFFIX@
\
elpa1_real_cholesky@SUFFIX@
\
elpa1_real_invert_trm@SUFFIX@
\
elpa1_complex_cholesky@SUFFIX@
\
elpa1_complex_invert_trm@SUFFIX@
\
elpa1_test_real_with_c@SUFFIX@
\
elpa1_test_real_c_version@SUFFIX@
\
elpa1_test_complex_c_version@SUFFIX@
\
...
...
@@ -325,6 +329,7 @@ noinst_PROGRAMS += \
elpa2_test_complex_default_single_precision@SUFFIX@
\
elpa1_complex_transpose_multiply_single_precision@SUFFIX@
\
elpa1_complex_cholesky_single_precision@SUFFIX@
\
elpa1_complex_invert_trm_single_precision@SUFFIX@
\
elpa2_test_complex_api_single_precision@SUFFIX@
endif
...
...
@@ -337,6 +342,7 @@ noinst_PROGRAMS += \
elpa2_test_real_api_single_precision@SUFFIX@
\
elpa1_real_transpose_multiply_single_precision@SUFFIX@
\
elpa1_real_cholesky_single_precision@SUFFIX@
\
elpa1_real_invert_trm_single_precision@SUFFIX@
\
elpa1_real_toeplitz_single_precision@SUFFIX@
endif
...
...
@@ -429,11 +435,21 @@ elpa1_real_cholesky@SUFFIX@_LDADD = $(build_lib)
elpa1_real_cholesky@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_real_cholesky@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_real_invert_trm@SUFFIX@
_SOURCES
=
test
/Fortran/test_invert_trm_real.F90
elpa1_real_invert_trm@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_real_invert_trm@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_real_invert_trm@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_complex_cholesky@SUFFIX@
_SOURCES
=
test
/Fortran/test_cholesky_complex.F90
elpa1_complex_cholesky@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_complex_cholesky@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_complex_cholesky@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_complex_invert_trm@SUFFIX@
_SOURCES
=
test
/Fortran/test_invert_trm_complex.F90
elpa1_complex_invert_trm@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_complex_invert_trm@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_complex_invert_trm@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa2_test_real@SUFFIX@
_SOURCES
=
test
/Fortran/test_real2.F90
elpa2_test_real@SUFFIX@
_LDADD
=
$(build_lib)
elpa2_test_real@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
...
...
@@ -505,6 +521,11 @@ elpa1_real_cholesky_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_real_cholesky_single_precision@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_real_cholesky_single_precision@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_real_invert_trm_single_precision@SUFFIX@
_SOURCES
=
test
/Fortran/test_invert_trm_real_single.F90
elpa1_real_invert_trm_single_precision@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_real_invert_trm_single_precision@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_real_invert_trm_single_precision@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa2_test_real_default_single_precision@SUFFIX@
_SOURCES
=
test
/Fortran/test_real2_default.F90
elpa2_test_real_default_single_precision@SUFFIX@
_LDADD
=
$(build_lib)
#elpa2_test_real_default_single_precision@SUFFIX@_LDFLAGS = -static
...
...
@@ -550,6 +571,11 @@ elpa1_complex_cholesky_single_precision@SUFFIX@_LDADD = $(build_lib)
elpa1_complex_cholesky_single_precision@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_complex_cholesky_single_precision@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_complex_invert_trm_single_precision@SUFFIX@
_SOURCES
=
test
/Fortran/test_invert_trm_complex_single.F90
elpa1_complex_invert_trm_single_precision@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_complex_invert_trm_single_precision@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_complex_invert_trm_single_precision@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa2_test_complex_api_single_precision@SUFFIX@
_SOURCES
=
test
/Fortran/test_complex2_api_single.F90
elpa2_test_complex_api_single_precision@SUFFIX@
_LDADD
=
$(build_lib)
elpa2_test_complex_api_single_precision@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
...
...
@@ -599,7 +625,9 @@ check_SCRIPTS = \
elpa1_real_transpose_multiply@SUFFIX@.sh
\
elpa1_complex_transpose_multiply@SUFFIX@.sh
\
elpa1_real_cholesky@SUFFIX@.sh
\
elpa1_real_invert_trm@SUFFIX@.sh
\
elpa1_complex_cholesky@SUFFIX@.sh
\
elpa1_complex_invert_trm@SUFFIX@.sh
\
elpa2_print_kernels@SUFFIX@
\
elpa1_test_real_c_version@SUFFIX@.sh
\
elpa1_test_complex_c_version@SUFFIX@.sh
\
...
...
@@ -614,6 +642,7 @@ check_SCRIPTS += \
elpa2_test_real_qr_single_precision@SUFFIX@.sh
\
elpa1_real_transpose_multiply_single_precision@SUFFIX@.sh
\
elpa1_real_cholesky_single_precision@SUFFIX@.sh
\
elpa1_real_invert_trm_single_precision@SUFFIX@.sh
\
elpa2_test_real_api_single_precision@SUFFIX@.sh
endif
...
...
@@ -624,6 +653,7 @@ check_SCRIPTS += \
elpa2_test_complex_default_single_precision@SUFFIX@.sh
\
elpa1_complex_transpose_multiply_single_precision@SUFFIX@.sh
\
elpa1_complex_cholesky_single_precision@SUFFIX@.sh
\
elpa1_complex_invert_trm_single_precision@SUFFIX@.sh
\
elpa2_test_complex_api_single_precision@SUFFIX@.sh
endif
...
...
src/elpa1_auxiliary.F90
View file @
5011a02f
...
...
@@ -96,7 +96,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -115,7 +115,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
...
...
@@ -134,7 +134,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -155,7 +155,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
...
...
@@ -267,7 +267,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -525,7 +525,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -779,7 +779,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param nblk blocksize of cyclic distribution, must be the same in both directions!
...
...
@@ -966,7 +966,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param matrixCols local columns of matrix a
...
...
@@ -1156,7 +1156,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -1410,7 +1410,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be factorized.
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> On return, the upper triangle contains the Cholesky factor
!> and the lower triangle is set to 0.
!> \param lda Leading dimension of a
...
...
@@ -1667,7 +1667,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param matrixCols local columns of matrix a
...
...
@@ -1875,7 +1875,7 @@ module elpa1_auxiliary
!> \param na Order of matrix
!> \param a(lda,matrixCols) Distributed matrix which should be inverted
!> Distribution is like in Scalapack.
!> Only upper triangle
is
needs to be set.
!> Only upper triangle needs to be set.
!> The lower triangle is not referenced.
!> \param lda Leading dimension of a
!> \param matrixCols local columns of matrix a
...
...
test/Fortran/test_invert_trm_complex.F90
0 → 100644
View file @
5011a02f
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
program
test_invert_trm
use
precision
use
ELPA1
use
elpa_utilities
#ifdef WITH_OPENMP
use
test_util
#endif
use
mod_read_input_parameters
use
mod_check_correctness
use
mod_setup_mpi
use
mod_blacs_infrastructure
use
mod_prepare_matrix
use
elpa_mpi
#ifdef HAVE_REDIRECT
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! na: System size
! nev: Number of eigenvectors to be calculated
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer
(
kind
=
ik
)
::
nblk
integer
(
kind
=
ik
)
::
na
,
nev
integer
(
kind
=
ik
)
::
np_rows
,
np_cols
,
na_rows
,
na_cols
integer
(
kind
=
ik
)
::
myid
,
nprocs
,
my_prow
,
my_pcol
,
mpi_comm_rows
,
mpi_comm_cols
integer
(
kind
=
ik
)
::
i
,
mpierr
,
my_blacs_ctxt
,
sc_desc
(
9
),
info
,
nprow
,
npcol
integer
,
external
::
numroc
real
(
kind
=
rk8
),
allocatable
::
ev
(:),
xr
(:,:)
complex
(
kind
=
ck8
),
allocatable
::
a
(:,:),
b
(:,:),
c
(:,:),
z
(:,:),
tmp1
(:,:),
tmp2
(:,:),
as
(:,:)
complex
(
kind
=
ck8
),
allocatable
::
d
(:),
e
(:),
bs
(:,:)
complex
(
kind
=
rk8
)
::
diagonalElement
,
subdiagonalElement
integer
(
kind
=
ik
)
::
loctmp
,
rowLocal
,
colLocal
complex
(
kind
=
ck8
),
parameter
::
CZERO
=
(
0._rk8
,
0._rk8
),
CONE
=
(
1._rk8
,
0._rk8
)
real
(
kind
=
rk8
)
::
norm
,
normmax
#ifdef WITH_MPI
real
(
kind
=
rk8
)
::
pzlange
#else
real
(
kind
=
rk8
)
::
zlange
#endif
integer
(
kind
=
ik
)
::
iseed
(
4096
)
! Random seed, size should be sufficient for every generator
complex
(
kind
=
ck8
),
parameter
::
pi
=
(
3.141592653589793238462643383279_rk8
,
0._rk8
)
integer
(
kind
=
ik
)
::
STATUS
#ifdef WITH_OPENMP
integer
(
kind
=
ik
)
::
omp_get_max_threads
,
required_mpi_thread_level
,
&
provided_mpi_thread_level
#endif
type
(
output_t
)
::
write_to_file
logical
::
success
character
(
len
=
8
)
::
task_suffix
integer
(
kind
=
ik
)
::
j
!-------------------------------------------------------------------------------
success
=
.true.
call
read_input_parameters
(
na
,
nev
,
nblk
,
write_to_file
)
!-------------------------------------------------------------------------------
! MPI Initialization
call
setup_mpi
(
myid
,
nprocs
)
STATUS
=
0
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program"
)
#endif
do
np_cols
=
NINT
(
SQRT
(
REAL
(
nprocs
))),
2
,
-1
if
(
mod
(
nprocs
,
np_cols
)
==
0
)
exit
enddo
! at the end of the above loop, nprocs is always divisible by np_cols
np_rows
=
nprocs
/
np_cols
if
(
myid
==
0
)
then
print
'(3(a,i0))'
,
'Matrix size='
,
na
,
', Block size='
,
nblk
print
'(3(a,i0))'
,
'Number of processor rows='
,
np_rows
,
', cols='
,
np_cols
,
', total='
,
nprocs
print
*
endif
!-------------------------------------------------------------------------------
! Set up BLACS context and MPI communicators
!
! The BLACS context is only necessary for using Scalapack.
!
! For ELPA, the MPI communicators along rows/cols are sufficient,
! and the grid setup may be done in an arbitrary way as long as it is
! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
! process has a unique (my_prow,my_pcol) pair).
call
set_up_blacsgrid
(
mpi_comm_world
,
my_blacs_ctxt
,
np_rows
,
np_cols
,
&
nprow
,
npcol
,
my_prow
,
my_pcol
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past BLACS_Gridinfo.'
end
if
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in get_elpa_communicators.
mpierr
=
get_elpa_communicators
(
mpi_comm_world
,
my_prow
,
my_pcol
,
&
mpi_comm_rows
,
mpi_comm_cols
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past split communicator setup for rows and columns.'
end
if
call
set_up_blacs_descriptor
(
na
,
nblk
,
my_prow
,
my_pcol
,
np_rows
,
np_cols
,
&
na_rows
,
na_cols
,
sc_desc
,
my_blacs_ctxt
,
info
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past scalapack descriptor setup.'
end
if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
b
(
na_rows
,
na_cols
))
allocate
(
bs
(
na_rows
,
na_cols
))
allocate
(
c
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
allocate
(
ev
(
na
))
allocate
(
xr
(
na_rows
,
na_cols
))
call
prepare_matrix_double
(
na
,
myid
,
sc_desc
,
iseed
,
xr
,
b
,
z
,
bs
)
deallocate
(
xr
)
bs
(:,:)
=
b
(:,:)
a
(:,:)
=
CONE
-
CONE
diagonalElement
=
(
2.546_rk8
,
0.0_rk8
)
do
i
=
1
,
na
if
(
map_global_array_index_to_local_index
(
i
,
i
,
rowLocal
,
colLocal
,
nblk
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
))
then
a
(
rowLocal
,
colLocal
)
=
diagonalElement
*
abs
(
cos
(
pi
*
real
(
i
,
kind
=
rk8
)/
real
(
na
+1
,
kind
=
rk8
)
))
endif
enddo
as
(:,:)
=
a
(:,:)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
!-------------------------------------------------------------------------------
! Calculate eigenvalues/eigenvectors
if
(
myid
==
0
)
then
print
'(a)'
,
'| Setting up tridiagonal matrix ... '
print
*
end
if
#ifdef WITH_MPI
call
mpi_barrier
(
mpi_comm_world
,
mpierr
)
! for correct timings only
#endif
success
=
elpa_cholesky_complex_double
(
na
,
a
,
na_rows
,
nblk
,
na_cols
,
mpi_comm_rows
,
mpi_comm_cols
,
.true.
)
if
(
.not.
(
success
))
then
write
(
error_unit
,
*
)
" elpa_cholesky_complex produced an error! Aborting..."
#ifdef WITH_MPI
call
MPI_ABORT
(
mpi_comm_world
,
1
,
mpierr
)
#endif
endif
as
(:,:)
=
a
(:,:)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Setting up tridiagonal matrix complete.'
print
*
end
if
if
(
myid
==
0
)
then
print
'(a)'
,
'| Inverting tridiagonal matrix ... '
print
*
end
if
#ifdef WITH_MPI
call
mpi_barrier
(
mpi_comm_world
,
mpierr
)
! for correct timings only
#endif
success
=
elpa_invert_trm_complex_double
(
na
,
a
,
na_rows
,
nblk
,
na_cols
,
mpi_comm_rows
,
mpi_comm_cols
,
.true.
)
if
(
.not.
(
success
))
then
write
(
error_unit
,
*
)
" elpa_invert_trm_complex produced an error! Aborting..."
#ifdef WITH_MPI
call
MPI_ABORT
(
mpi_comm_world
,
1
,
mpierr
)
#endif
endif
if
(
myid
==
0
)
then
print
'(a)'
,
'| Inversion of tridiagonal matrix complete.'
print
*
end
if
!-------------------------------------------------------------------------------
! Test correctness of result (using plain scalapack routines)
allocate
(
tmp1
(
na_rows
,
na_cols
))
allocate
(
tmp2
(
na_rows
,
na_cols
))
tmp1
(:,:)
=
0.0_ck8
! tmp1 = a * a^-1 ! should be unity matrix
#ifdef WITH_MPI
call
pzgemm
(
"N"
,
"N"
,
na
,
na
,
na
,
CONE
,
as
,
1
,
1
,
sc_desc
,
a
,
1
,
1
,
&
sc_desc
,
CZERO
,
tmp1
,
1
,
1
,
sc_desc
)
#else
call
zgemm
(
"N"
,
"N"
,
na
,
na
,
na
,
CONE
,
as
,
na
,
a
,
na
,
CZERO
,
tmp1
,
na
)
#endif
! tmp2 = b * tmp1
tmp2
(:,:)
=
0.0_ck8
#ifdef WITH_MPI
call
pzgemm
(
"N"
,
"N"
,
na
,
na
,
na
,
CONE
,
b
,
1
,
1
,
sc_desc
,
tmp1
,
1
,
1
,
&
sc_desc
,
CZERO
,
tmp2
,
1
,
1
,
sc_desc
)
#else
call
zgemm
(
"N"
,
"N"
,
na
,
na
,
na
,
CONE
,
b
,
na
,
tmp1
,
na
,
CZERO
,
tmp2
,
na
)
#endif
! compare tmp2 with c
tmp2
(:,:)
=
tmp2
(:,:)
-
bs
(:,:)
tmp1
(:,:)
=
0.0_ck8
#ifdef WITH_MPI
norm
=
pzlange
(
"M"
,
na
,
na
,
tmp2
,
1
,
1
,
sc_desc
,
tmp1
)
#else
norm
=
zlange
(
"M"
,
na
,
na
,
tmp2
,
na_rows
,
tmp1
)
#endif
#ifdef WITH_MPI
call
mpi_allreduce
(
norm
,
normmax
,
1
,
MPI_REAL8
,
MPI_MAX
,
MPI_COMM_WORLD
,
mpierr
)
#else
normmax
=
norm
#endif
if
(
myid
.eq.
0
)
then
print
*
,
" Maximum error of result: "
,
normmax
endif
if
(
normmax
.gt.
5e-11_rk8
)
then
status
=
1
endif
deallocate
(
a
)
deallocate
(
b
)
deallocate
(
bs
)
deallocate
(
c
)
deallocate
(
as
)
deallocate
(
z
)
deallocate
(
tmp1
)
deallocate
(
tmp2
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program"
)
print
*
,
" "
print
*
,
"Timings program:"
print
*
,
" "
call
timer
%
print
(
"program"
)
print
*
,
" "
print
*
,
"End timings program"
print
*
,
" "
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
#endif
call
EXIT
(
STATUS
)
end
!-------------------------------------------------------------------------------
test/Fortran/test_invert_trm_complex_single.F90
0 → 100644
View file @
5011a02f
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
program
test_invert_trm
use
precision