diff --git a/Makefile.am b/Makefile.am index a1303792ad70bb2485301b8990dafe726715b4b9..cd6e49669532d1b5e86ed30aef027aeb6c28d9f0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -199,7 +199,9 @@ dist_files_DATA = \ test/Fortran/test_transpose_multiply_real.F90 \ test/Fortran/test_transpose_multiply_complex.F90 \ test/Fortran/test_cholesky_real.F90 \ + test/Fortran/test_invert_trm_real.F90 \ test/Fortran/test_cholesky_complex.F90 \ + test/Fortran/test_invert_trm_complex.F90 \ src/elpa2_print_kernels.F90 dist_doc_DATA = README.md USERS_GUIDE.md INSTALL.md CONTRIBUTING.md LICENSE Changelog COPYING/COPYING COPYING/gpl.txt COPYING/lgpl.txt @@ -226,7 +228,9 @@ noinst_PROGRAMS = \ elpa1_real_transpose_multiply@SUFFIX@ \ elpa1_complex_transpose_multiply@SUFFIX@ \ elpa1_real_cholesky@SUFFIX@ \ + elpa1_real_invert_trm@SUFFIX@ \ elpa1_complex_cholesky@SUFFIX@ \ + elpa1_complex_invert_trm@SUFFIX@ \ elpa1_test_real_with_c@SUFFIX@ \ elpa1_test_real_c_version@SUFFIX@ \ elpa1_test_complex_c_version@SUFFIX@ \ @@ -301,11 +305,21 @@ elpa1_real_cholesky@SUFFIX@_LDADD = $(build_lib) elpa1_real_cholesky@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules EXTRA_elpa1_real_cholesky@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 +elpa1_real_invert_trm@SUFFIX@_SOURCES = test/Fortran/test_invert_trm_real.F90 +elpa1_real_invert_trm@SUFFIX@_LDADD = $(build_lib) +elpa1_real_invert_trm@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules +EXTRA_elpa1_real_invert_trm@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 + elpa1_complex_cholesky@SUFFIX@_SOURCES = test/Fortran/test_cholesky_complex.F90 elpa1_complex_cholesky@SUFFIX@_LDADD = $(build_lib) elpa1_complex_cholesky@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules EXTRA_elpa1_complex_cholesky@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 +elpa1_complex_invert_trm@SUFFIX@_SOURCES = test/Fortran/test_invert_trm_complex.F90 +elpa1_complex_invert_trm@SUFFIX@_LDADD = $(build_lib) +elpa1_complex_invert_trm@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules +EXTRA_elpa1_complex_invert_trm@SUFFIX@_DEPENDENCIES = test/Fortran/elpa_print_headers.X90 + elpa1_test_real_with_c@SUFFIX@_SOURCES = test/Fortran/test_real_with_c.F90 elpa1_test_real_with_c@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_with_c@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules @@ -372,7 +386,9 @@ check_SCRIPTS = \ elpa1_real_transpose_multiply@SUFFIX@.sh \ elpa1_complex_transpose_multiply@SUFFIX@.sh \ elpa1_real_cholesky@SUFFIX@.sh \ + elpa1_real_invert_trm@SUFFIX@.sh \ elpa1_complex_cholesky@SUFFIX@.sh \ + elpa1_complex_invert_trm@SUFFIX@.sh \ elpa2_print_kernels@SUFFIX@ \ elpa1_test_real_c_version@SUFFIX@.sh \ elpa1_test_complex_c_version@SUFFIX@.sh \ diff --git a/test/Fortran/test_invert_trm_complex.F90 b/test/Fortran/test_invert_trm_complex.F90 new file mode 100644 index 0000000000000000000000000000000000000000..65e1e65cfe8aaadb2d82c106e31005b4e1026e2d --- /dev/null +++ b/test/Fortran/test_invert_trm_complex.F90 @@ -0,0 +1,361 @@ +! This file is part of ELPA. +! +! The ELPA library was originally created by the ELPA consortium, +! consisting of the following organizations: +! +! - Max Planck Computing and Data Facility (MPCDF), formerly known as +! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), +! - Bergische Universität Wuppertal, Lehrstuhl für angewandte +! Informatik, +! - Technische Universität München, Lehrstuhl für Informatik mit +! Schwerpunkt Wissenschaftliches Rechnen , +! - Fritz-Haber-Institut, Berlin, Abt. Theorie, +! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, +! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, +! and +! - IBM Deutschland GmbH +! +! +! More information can be found here: +! http://elpa.mpcdf.mpg.de/ +! +! ELPA is free software: you can redistribute it and/or modify +! it under the terms of the version 3 of the license of the +! GNU Lesser General Public License as published by the Free +! Software Foundation. +! +! ELPA is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +! GNU Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with ELPA. If not, see +! +! ELPA reflects a substantial effort on the part of the original +! ELPA consortium, and we ask you to respect the spirit of the +! license that we chose: i.e., please contribute any changes you +! may have back to the original ELPA library distribution, and keep +! any derivatives of ELPA under the same license that we chose for +! the original distribution, the GNU Lesser General Public License. +! +! +#include "config-f90.h" +!> + +program test_invert_trm + + use precision + use ELPA1 + use elpa_utilities +#ifdef WITH_OPENMP + use test_util +#endif + + use mod_read_input_parameters + use mod_check_correctness + use mod_setup_mpi + use mod_blacs_infrastructure + use mod_prepare_matrix + + use elpa_mpi +#ifdef HAVE_REDIRECT + use redirect +#endif +#ifdef HAVE_DETAILED_TIMINGS + use timings +#endif + use output_types + + implicit none + + !------------------------------------------------------------------------------- + ! Please set system size parameters below! + ! na: System size + ! nev: Number of eigenvectors to be calculated + ! nblk: Blocking factor in block cyclic distribution + !------------------------------------------------------------------------------- + integer(kind=ik) :: nblk + integer(kind=ik) :: na, nev + + integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols + + integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols + integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol + + integer, external :: numroc + + real(kind=rk), allocatable :: ev(:), xr(:,:) + complex(kind=ck), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:) + complex(kind=ck), allocatable :: d(:), e(:), bs(:,:) + complex(kind=rk) :: diagonalElement, subdiagonalElement + integer(kind=ik) :: loctmp ,rowLocal, colLocal + complex(kind=ck), parameter :: CZERO = (0.d0,0.d0), CONE = (1.d0,0.d0) + real(kind=rk) :: norm, normmax +#ifdef WITH_MPI + real(kind=rk) :: pzlange +#else + real(kind=rk) :: zlange +#endif + + integer(kind=ik) :: iseed(4096) ! Random seed, size should be sufficient for every generator + complex(kind=ck), parameter :: pi = (3.141592653589793238462643383279d0, 0.d0) + + integer(kind=ik) :: STATUS +#ifdef WITH_OPENMP + integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, & + provided_mpi_thread_level +#endif + type(output_t) :: write_to_file + logical :: success + character(len=8) :: task_suffix + integer(kind=ik) :: j + !------------------------------------------------------------------------------- + + success = .true. + + call read_input_parameters(na, nev, nblk, write_to_file) + + !------------------------------------------------------------------------------- + ! MPI Initialization + call setup_mpi(myid, nprocs) + + STATUS = 0 + +#ifdef HAVE_DETAILED_TIMINGS + + ! initialise the timing functionality + +#ifdef HAVE_LIBPAPI + call timer%measure_flops(.true.) +#endif + + call timer%measure_allocated_memory(.true.) + call timer%measure_virtual_memory(.true.) + call timer%measure_max_allocated_memory(.true.) + + call timer%set_print_options(& +#ifdef HAVE_LIBPAPI + print_flop_count=.true., & + print_flop_rate=.true., & +#endif + print_allocated_memory = .true. , & + print_virtual_memory=.true., & + print_max_allocated_memory=.true.) + + + call timer%enable() + + call timer%start("program") +#endif + + do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 + if(mod(nprocs,np_cols) == 0 ) exit + enddo + + ! at the end of the above loop, nprocs is always divisible by np_cols + + np_rows = nprocs/np_cols + + if(myid==0) then + print '(3(a,i0))','Matrix size=',na,', Block size=',nblk + print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs + print * + endif + + !------------------------------------------------------------------------------- + ! Set up BLACS context and MPI communicators + ! + ! The BLACS context is only necessary for using Scalapack. + ! + ! For ELPA, the MPI communicators along rows/cols are sufficient, + ! and the grid setup may be done in an arbitrary way as long as it is + ! consistent (i.e. 0<=my_prow +! +! ELPA reflects a substantial effort on the part of the original +! ELPA consortium, and we ask you to respect the spirit of the +! license that we chose: i.e., please contribute any changes you +! may have back to the original ELPA library distribution, and keep +! any derivatives of ELPA under the same license that we chose for +! the original distribution, the GNU Lesser General Public License. +! +! +#include "config-f90.h" +!> + +program test_invert_trm + + use precision + use ELPA1 + use elpa_utilities +#ifdef WITH_OPENMP + use test_util +#endif + + use mod_read_input_parameters + use mod_check_correctness + use mod_setup_mpi + use mod_blacs_infrastructure + use mod_prepare_matrix + + use elpa_mpi +#ifdef HAVE_REDIRECT + use redirect +#endif +#ifdef HAVE_DETAILED_TIMINGS + use timings +#endif + use output_types + + implicit none + + !------------------------------------------------------------------------------- + ! Please set system size parameters below! + ! na: System size + ! nev: Number of eigenvectors to be calculated + ! nblk: Blocking factor in block cyclic distribution + !------------------------------------------------------------------------------- + integer(kind=ik) :: nblk + integer(kind=ik) :: na, nev + + integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols + + integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols + integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol + + integer, external :: numroc + + real(kind=rk), allocatable :: a(:,:), b(:,:), c(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:) + real(kind=rk), allocatable :: d(:), e(:), bs(:,:) + real(kind=rk) :: diagonalElement, subdiagonalElement + integer(kind=ik) :: loctmp ,rowLocal, colLocal + + real(kind=rk) :: norm, normmax +#ifdef WITH_MPI + real(kind=rk) :: pdlange +#else + real(kind=rk) :: dlange +#endif + integer(kind=ik) :: iseed(4096) ! Random seed, size should be sufficient for every generator + real(kind=rk), parameter :: pi = 3.141592653589793238462643383279_rk + integer(kind=ik) :: STATUS +#ifdef WITH_OPENMP + integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, & + provided_mpi_thread_level +#endif + type(output_t) :: write_to_file + logical :: success + character(len=8) :: task_suffix + integer(kind=ik) :: j + !------------------------------------------------------------------------------- + + success = .true. + + call read_input_parameters(na, nev, nblk, write_to_file) + + !------------------------------------------------------------------------------- + ! MPI Initialization + call setup_mpi(myid, nprocs) + + STATUS = 0 + +#ifdef HAVE_DETAILED_TIMINGS + + ! initialise the timing functionality + +#ifdef HAVE_LIBPAPI + call timer%measure_flops(.true.) +#endif + + call timer%measure_allocated_memory(.true.) + call timer%measure_virtual_memory(.true.) + call timer%measure_max_allocated_memory(.true.) + + call timer%set_print_options(& +#ifdef HAVE_LIBPAPI + print_flop_count=.true., & + print_flop_rate=.true., & +#endif + print_allocated_memory = .true. , & + print_virtual_memory=.true., & + print_max_allocated_memory=.true.) + + + call timer%enable() + + call timer%start("program") +#endif + + do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 + if(mod(nprocs,np_cols) == 0 ) exit + enddo + + ! at the end of the above loop, nprocs is always divisible by np_cols + + np_rows = nprocs/np_cols + + if(myid==0) then + print '(3(a,i0))','Matrix size=',na,', Block size=',nblk + print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs + print * + endif + + !------------------------------------------------------------------------------- + ! Set up BLACS context and MPI communicators + ! + ! The BLACS context is only necessary for using Scalapack. + ! + ! For ELPA, the MPI communicators along rows/cols are sufficient, + ! and the grid setup may be done in an arbitrary way as long as it is + ! consistent (i.e. 0<=my_prow