diff --git a/Makefile.am b/Makefile.am
index a1303792ad70bb2485301b8990dafe726715b4b9..cd6e49669532d1b5e86ed30aef027aeb6c28d9f0 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -199,7 +199,9 @@ dist_files_DATA = \
test/Fortran/test_transpose_multiply_real.F90 \
test/Fortran/test_transpose_multiply_complex.F90 \
test/Fortran/test_cholesky_real.F90 \
+ test/Fortran/test_invert_trm_real.F90 \
test/Fortran/test_cholesky_complex.F90 \
+ test/Fortran/test_invert_trm_complex.F90 \
src/elpa2_print_kernels.F90
dist_doc_DATA = README.md USERS_GUIDE.md INSTALL.md CONTRIBUTING.md LICENSE Changelog COPYING/COPYING COPYING/gpl.txt COPYING/lgpl.txt
@@ -226,7 +228,9 @@ noinst_PROGRAMS = \
elpa1_real_transpose_multiply@SUFFIX@ \
elpa1_complex_transpose_multiply@SUFFIX@ \
elpa1_real_cholesky@SUFFIX@ \
+ elpa1_real_invert_trm@SUFFIX@ \
elpa1_complex_cholesky@SUFFIX@ \
+ elpa1_complex_invert_trm@SUFFIX@ \
elpa1_test_real_with_c@SUFFIX@ \
elpa1_test_real_c_version@SUFFIX@ \
elpa1_test_complex_c_version@SUFFIX@ \
@@ -301,11 +305,21 @@ elpa1_real_cholesky@SUFFIX@_LDADD = $(build_lib)
elpa1_real_cholesky@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_real_cholesky@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
+elpa1_real_invert_trm@SUFFIX@_SOURCES = test/Fortran/test_invert_trm_real.F90
+elpa1_real_invert_trm@SUFFIX@_LDADD = $(build_lib)
+elpa1_real_invert_trm@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
+EXTRA_elpa1_real_invert_trm@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
+
elpa1_complex_cholesky@SUFFIX@_SOURCES = test/Fortran/test_cholesky_complex.F90
elpa1_complex_cholesky@SUFFIX@_LDADD = $(build_lib)
elpa1_complex_cholesky@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa1_complex_cholesky@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
+elpa1_complex_invert_trm@SUFFIX@_SOURCES = test/Fortran/test_invert_trm_complex.F90
+elpa1_complex_invert_trm@SUFFIX@_LDADD = $(build_lib)
+elpa1_complex_invert_trm@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
+EXTRA_elpa1_complex_invert_trm@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90
+
elpa1_test_real_with_c@SUFFIX@_SOURCES = test/Fortran/test_real_with_c.F90
elpa1_test_real_with_c@SUFFIX@_LDADD = $(build_lib)
elpa1_test_real_with_c@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules
@@ -372,7 +386,9 @@ check_SCRIPTS = \
elpa1_real_transpose_multiply@SUFFIX@.sh \
elpa1_complex_transpose_multiply@SUFFIX@.sh \
elpa1_real_cholesky@SUFFIX@.sh \
+ elpa1_real_invert_trm@SUFFIX@.sh \
elpa1_complex_cholesky@SUFFIX@.sh \
+ elpa1_complex_invert_trm@SUFFIX@.sh \
elpa2_print_kernels@SUFFIX@ \
elpa1_test_real_c_version@SUFFIX@.sh \
elpa1_test_complex_c_version@SUFFIX@.sh \
diff --git a/test/Fortran/test_invert_trm_complex.F90 b/test/Fortran/test_invert_trm_complex.F90
new file mode 100644
index 0000000000000000000000000000000000000000..65e1e65cfe8aaadb2d82c106e31005b4e1026e2d
--- /dev/null
+++ b/test/Fortran/test_invert_trm_complex.F90
@@ -0,0 +1,361 @@
+! This file is part of ELPA.
+!
+! The ELPA library was originally created by the ELPA consortium,
+! consisting of the following organizations:
+!
+! - Max Planck Computing and Data Facility (MPCDF), formerly known as
+! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
+! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
+! Informatik,
+! - Technische Universität München, Lehrstuhl für Informatik mit
+! Schwerpunkt Wissenschaftliches Rechnen ,
+! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
+! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
+! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
+! and
+! - IBM Deutschland GmbH
+!
+!
+! More information can be found here:
+! http://elpa.mpcdf.mpg.de/
+!
+! ELPA is free software: you can redistribute it and/or modify
+! it under the terms of the version 3 of the license of the
+! GNU Lesser General Public License as published by the Free
+! Software Foundation.
+!
+! ELPA is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU Lesser General Public License for more details.
+!
+! You should have received a copy of the GNU Lesser General Public License
+! along with ELPA. If not, see
+!
+! ELPA reflects a substantial effort on the part of the original
+! ELPA consortium, and we ask you to respect the spirit of the
+! license that we chose: i.e., please contribute any changes you
+! may have back to the original ELPA library distribution, and keep
+! any derivatives of ELPA under the same license that we chose for
+! the original distribution, the GNU Lesser General Public License.
+!
+!
+#include "config-f90.h"
+!>
+
+program test_invert_trm
+
+ use precision
+ use ELPA1
+ use elpa_utilities
+#ifdef WITH_OPENMP
+ use test_util
+#endif
+
+ use mod_read_input_parameters
+ use mod_check_correctness
+ use mod_setup_mpi
+ use mod_blacs_infrastructure
+ use mod_prepare_matrix
+
+ use elpa_mpi
+#ifdef HAVE_REDIRECT
+ use redirect
+#endif
+#ifdef HAVE_DETAILED_TIMINGS
+ use timings
+#endif
+ use output_types
+
+ implicit none
+
+ !-------------------------------------------------------------------------------
+ ! Please set system size parameters below!
+ ! na: System size
+ ! nev: Number of eigenvectors to be calculated
+ ! nblk: Blocking factor in block cyclic distribution
+ !-------------------------------------------------------------------------------
+ integer(kind=ik) :: nblk
+ integer(kind=ik) :: na, nev
+
+ integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
+
+ integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
+ integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
+
+ integer, external :: numroc
+
+ real(kind=rk), allocatable :: ev(:), xr(:,:)
+ complex(kind=ck), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:)
+ complex(kind=ck), allocatable :: d(:), e(:), bs(:,:)
+ complex(kind=rk) :: diagonalElement, subdiagonalElement
+ integer(kind=ik) :: loctmp ,rowLocal, colLocal
+ complex(kind=ck), parameter :: CZERO = (0.d0,0.d0), CONE = (1.d0,0.d0)
+ real(kind=rk) :: norm, normmax
+#ifdef WITH_MPI
+ real(kind=rk) :: pzlange
+#else
+ real(kind=rk) :: zlange
+#endif
+
+ integer(kind=ik) :: iseed(4096) ! Random seed, size should be sufficient for every generator
+ complex(kind=ck), parameter :: pi = (3.141592653589793238462643383279d0, 0.d0)
+
+ integer(kind=ik) :: STATUS
+#ifdef WITH_OPENMP
+ integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, &
+ provided_mpi_thread_level
+#endif
+ type(output_t) :: write_to_file
+ logical :: success
+ character(len=8) :: task_suffix
+ integer(kind=ik) :: j
+ !-------------------------------------------------------------------------------
+
+ success = .true.
+
+ call read_input_parameters(na, nev, nblk, write_to_file)
+
+ !-------------------------------------------------------------------------------
+ ! MPI Initialization
+ call setup_mpi(myid, nprocs)
+
+ STATUS = 0
+
+#ifdef HAVE_DETAILED_TIMINGS
+
+ ! initialise the timing functionality
+
+#ifdef HAVE_LIBPAPI
+ call timer%measure_flops(.true.)
+#endif
+
+ call timer%measure_allocated_memory(.true.)
+ call timer%measure_virtual_memory(.true.)
+ call timer%measure_max_allocated_memory(.true.)
+
+ call timer%set_print_options(&
+#ifdef HAVE_LIBPAPI
+ print_flop_count=.true., &
+ print_flop_rate=.true., &
+#endif
+ print_allocated_memory = .true. , &
+ print_virtual_memory=.true., &
+ print_max_allocated_memory=.true.)
+
+
+ call timer%enable()
+
+ call timer%start("program")
+#endif
+
+ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
+ if(mod(nprocs,np_cols) == 0 ) exit
+ enddo
+
+ ! at the end of the above loop, nprocs is always divisible by np_cols
+
+ np_rows = nprocs/np_cols
+
+ if(myid==0) then
+ print '(3(a,i0))','Matrix size=',na,', Block size=',nblk
+ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
+ print *
+ endif
+
+ !-------------------------------------------------------------------------------
+ ! Set up BLACS context and MPI communicators
+ !
+ ! The BLACS context is only necessary for using Scalapack.
+ !
+ ! For ELPA, the MPI communicators along rows/cols are sufficient,
+ ! and the grid setup may be done in an arbitrary way as long as it is
+ ! consistent (i.e. 0<=my_prow
+!
+! ELPA reflects a substantial effort on the part of the original
+! ELPA consortium, and we ask you to respect the spirit of the
+! license that we chose: i.e., please contribute any changes you
+! may have back to the original ELPA library distribution, and keep
+! any derivatives of ELPA under the same license that we chose for
+! the original distribution, the GNU Lesser General Public License.
+!
+!
+#include "config-f90.h"
+!>
+
+program test_invert_trm
+
+ use precision
+ use ELPA1
+ use elpa_utilities
+#ifdef WITH_OPENMP
+ use test_util
+#endif
+
+ use mod_read_input_parameters
+ use mod_check_correctness
+ use mod_setup_mpi
+ use mod_blacs_infrastructure
+ use mod_prepare_matrix
+
+ use elpa_mpi
+#ifdef HAVE_REDIRECT
+ use redirect
+#endif
+#ifdef HAVE_DETAILED_TIMINGS
+ use timings
+#endif
+ use output_types
+
+ implicit none
+
+ !-------------------------------------------------------------------------------
+ ! Please set system size parameters below!
+ ! na: System size
+ ! nev: Number of eigenvectors to be calculated
+ ! nblk: Blocking factor in block cyclic distribution
+ !-------------------------------------------------------------------------------
+ integer(kind=ik) :: nblk
+ integer(kind=ik) :: na, nev
+
+ integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
+
+ integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
+ integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
+
+ integer, external :: numroc
+
+ real(kind=rk), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
+ real(kind=rk), allocatable :: d(:), e(:), bs(:,:)
+ real(kind=rk) :: diagonalElement, subdiagonalElement
+ integer(kind=ik) :: loctmp ,rowLocal, colLocal
+
+ real(kind=rk) :: norm, normmax
+#ifdef WITH_MPI
+ real(kind=rk) :: pdlange
+#else
+ real(kind=rk) :: dlange
+#endif
+ integer(kind=ik) :: iseed(4096) ! Random seed, size should be sufficient for every generator
+ real(kind=rk), parameter :: pi = 3.141592653589793238462643383279_rk
+ integer(kind=ik) :: STATUS
+#ifdef WITH_OPENMP
+ integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, &
+ provided_mpi_thread_level
+#endif
+ type(output_t) :: write_to_file
+ logical :: success
+ character(len=8) :: task_suffix
+ integer(kind=ik) :: j
+ !-------------------------------------------------------------------------------
+
+ success = .true.
+
+ call read_input_parameters(na, nev, nblk, write_to_file)
+
+ !-------------------------------------------------------------------------------
+ ! MPI Initialization
+ call setup_mpi(myid, nprocs)
+
+ STATUS = 0
+
+#ifdef HAVE_DETAILED_TIMINGS
+
+ ! initialise the timing functionality
+
+#ifdef HAVE_LIBPAPI
+ call timer%measure_flops(.true.)
+#endif
+
+ call timer%measure_allocated_memory(.true.)
+ call timer%measure_virtual_memory(.true.)
+ call timer%measure_max_allocated_memory(.true.)
+
+ call timer%set_print_options(&
+#ifdef HAVE_LIBPAPI
+ print_flop_count=.true., &
+ print_flop_rate=.true., &
+#endif
+ print_allocated_memory = .true. , &
+ print_virtual_memory=.true., &
+ print_max_allocated_memory=.true.)
+
+
+ call timer%enable()
+
+ call timer%start("program")
+#endif
+
+ do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
+ if(mod(nprocs,np_cols) == 0 ) exit
+ enddo
+
+ ! at the end of the above loop, nprocs is always divisible by np_cols
+
+ np_rows = nprocs/np_cols
+
+ if(myid==0) then
+ print '(3(a,i0))','Matrix size=',na,', Block size=',nblk
+ print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
+ print *
+ endif
+
+ !-------------------------------------------------------------------------------
+ ! Set up BLACS context and MPI communicators
+ !
+ ! The BLACS context is only necessary for using Scalapack.
+ !
+ ! For ELPA, the MPI communicators along rows/cols are sufficient,
+ ! and the grid setup may be done in an arbitrary way as long as it is
+ ! consistent (i.e. 0<=my_prow