From ebbf0967af42f8bf9f039525ceff40e691c3e10e Mon Sep 17 00:00:00 2001 From: Andreas Marek Date: Thu, 30 Jun 2016 12:15:34 +0200 Subject: [PATCH] Simple test case for elpa_solve_tridi --- Makefile.am | 9 + src/elpa_utilities.F90 | 40 ++ .../test_real_toeplitz_matrix.F90 | 408 ++++++++++++++++++ 3 files changed, 457 insertions(+) create mode 100644 test/fortran_test_programs/test_real_toeplitz_matrix.F90 diff --git a/Makefile.am b/Makefile.am index 92d2d5b7..fda7df5e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -195,6 +195,7 @@ dist_files_DATA = \ test/fortran_test_programs/test_real2_choose_kernel_with_api.F90 \ test/fortran_test_programs/test_real.F90 \ test/fortran_test_programs/test_real_with_c.F90 \ + test/fortran_test_programs/test_real_toeplitz_matrix.F90 \ src/elpa2_print_kernels.F90 dist_doc_DATA = README.md USERS_GUIDE.md INSTALL.md CONTRIBUTING.md LICENSE Changelog COPYING/COPYING COPYING/gpl.txt COPYING/lgpl.txt @@ -217,6 +218,7 @@ noinst_PROGRAMS = \ elpa2_test_complex_default_kernel@SUFFIX@ \ elpa2_test_real_choose_kernel_with_api@SUFFIX@ \ elpa2_test_complex_choose_kernel_with_api@SUFFIX@ \ + elpa1_test_real_toeplitz_matrix@SUFFIX@ \ elpa1_test_real_with_c@SUFFIX@ if !WITH_OPENMP noinst_PROGRAMS += \ @@ -279,6 +281,11 @@ elpa1_test_real@SUFFIX@_LDADD = $(build_lib) elpa1_test_real@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules EXTRA_elpa1_test_real@SUFFIX@_DEPENDENCIES = test/fortran_test_programs/elpa_test_programs_print_headers.X90 +elpa1_test_real_toeplitz_matrix@SUFFIX@_SOURCES = test/fortran_test_programs/test_real_toeplitz_matrix.F90 +elpa1_test_real_toeplitz_matrix@SUFFIX@_LDADD = $(build_lib) +elpa1_test_real_toeplitz_matrix@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules +EXTRA_elpa1_test_real_toeplitz_matrix@SUFFIX@_DEPENDENCIES = test/fortran_test_programs/elpa_test_programs_print_headers.X90 + elpa1_test_real_with_c@SUFFIX@_SOURCES = test/fortran_test_programs/test_real_with_c.F90 elpa1_test_real_with_c@SUFFIX@_LDADD = $(build_lib) elpa1_test_real_with_c@SUFFIX@_FCFLAGS = $(AM_FCFLAGS) @FC_MODOUT@private_modules @FC_MODINC@private_modules @@ -340,9 +347,11 @@ check_SCRIPTS = \ elpa2_test_real_default_kernel@SUFFIX@.sh \ elpa1_test_complex@SUFFIX@.sh \ elpa2_test_complex@SUFFIX@.sh \ + elpa2_test_real_default_kernel_qr_decomposition@SUFFIX@.sh \ elpa2_test_complex_default_kernel@SUFFIX@.sh \ elpa2_test_real_choose_kernel_with_api@SUFFIX@.sh \ elpa2_test_complex_choose_kernel_with_api@SUFFIX@.sh \ + elpa1_test_real_toeplitz_matrix@SUFFIX@.sh \ elpa2_print_kernels@SUFFIX@ diff --git a/src/elpa_utilities.F90 b/src/elpa_utilities.F90 index cf28ca46..d10a5bb9 100644 --- a/src/elpa_utilities.F90 +++ b/src/elpa_utilities.F90 @@ -60,6 +60,7 @@ module ELPA_utilities private ! By default, all routines contained are private public :: debug_messages_via_environment_variable, pcol, prow, error_unit + public :: map_global_array_index_to_local_index #ifndef HAVE_ISO_FORTRAN_ENV integer(kind=ik), parameter :: error_unit = 0 #endif @@ -123,4 +124,43 @@ module ELPA_utilities !------------------------------------------------------------------------------- + function map_global_array_index_to_local_index(iGLobal, jGlobal, iLocal, jLocal , nblk, np_rows, np_cols, my_prow, my_pcol) & + result(possible) + use precision + + implicit none + + integer(kind=ik) :: pi, pj, li, lj, xi, xj + integer(kind=ik), intent(in) :: iGlobal, jGlobal, nblk, np_rows, np_cols, my_prow, my_pcol + integer(kind=ik), intent(out) :: iLocal, jLocal + logical :: possible + + possible = .true. + iLocal = 0 + jLocal = 0 + + pi = prow(iGlobal, nblk, np_rows) + + if (my_prow .ne. pi) then + possible = .false. + return + endif + + pj = pcol(jGlobal, nblk, np_cols) + + if (my_pcol .ne. pj) then + possible = .false. + return + endif + li = (iGlobal-1)/(np_rows*nblk) ! block number for rows + lj = (jGlobal-1)/(np_cols*nblk) ! block number for columns + + xi = mod( (iGlobal-1),nblk)+1 ! offset in block li + xj = mod( (jGlobal-1),nblk)+1 ! offset in block lj + + iLocal = li * nblk + xi + jLocal = lj * nblk + xj + + end function + end module ELPA_utilities diff --git a/test/fortran_test_programs/test_real_toeplitz_matrix.F90 b/test/fortran_test_programs/test_real_toeplitz_matrix.F90 new file mode 100644 index 00000000..585752a8 --- /dev/null +++ b/test/fortran_test_programs/test_real_toeplitz_matrix.F90 @@ -0,0 +1,408 @@ +! This file is part of ELPA. +! +! The ELPA library was originally created by the ELPA consortium, +! consisting of the following organizations: +! +! - Max Planck Computing and Data Facility (MPCDF), formerly known as +! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), +! - Bergische Universität Wuppertal, Lehrstuhl für angewandte +! Informatik, +! - Technische Universität München, Lehrstuhl für Informatik mit +! Schwerpunkt Wissenschaftliches Rechnen , +! - Fritz-Haber-Institut, Berlin, Abt. Theorie, +! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, +! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, +! and +! - IBM Deutschland GmbH +! +! +! More information can be found here: +! http://elpa.mpcdf.mpg.de/ +! +! ELPA is free software: you can redistribute it and/or modify +! it under the terms of the version 3 of the license of the +! GNU Lesser General Public License as published by the Free +! Software Foundation. +! +! ELPA is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +! GNU Lesser General Public License for more details. +! +! You should have received a copy of the GNU Lesser General Public License +! along with ELPA. If not, see +! +! ELPA reflects a substantial effort on the part of the original +! ELPA consortium, and we ask you to respect the spirit of the +! license that we chose: i.e., please contribute any changes you +! may have back to the original ELPA library distribution, and keep +! any derivatives of ELPA under the same license that we chose for +! the original distribution, the GNU Lesser General Public License. +! +! +#include "config-f90.h" +!> +!> Fortran test programm to demonstrates the use of +!> the elpa_solve_tridi library function +!> +program test_solve_tridi + +! +! Copyright of the original code rests with the authors inside the ELPA +! consortium. The copyright of any additional modifications shall rest +! with their original authors, but shall adhere to the licensing terms +! distributed along with the original code in the file "COPYING". +! +!------------------------------------------------------------------------------- + use precision + use ELPA1 + use elpa_utilities +#ifdef WITH_OPENMP + use test_util +#endif + + use mod_read_input_parameters + use mod_check_correctness + use mod_setup_mpi + use mod_blacs_infrastructure + use mod_prepare_matrix + + use elpa_mpi +#ifdef HAVE_REDIRECT + use redirect +#endif +#ifdef HAVE_DETAILED_TIMINGS + use timings +#endif + use output_types + + implicit none + + !------------------------------------------------------------------------------- + ! Please set system size parameters below! + ! na: System size + ! nev: Number of eigenvectors to be calculated + ! nblk: Blocking factor in block cyclic distribution + !------------------------------------------------------------------------------- + integer(kind=ik) :: nblk + integer(kind=ik) :: na, nev + + integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols + + integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols + integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol + + integer, external :: numroc + + real(kind=rk), allocatable :: a(:,:), d(:), e(:), ev_analytic(:), ev(:) + real(kind=rk) :: diagonalELement, subdiagonalElement + + real(kind=rk), allocatable :: tmp1(:,:), tmp2(:,:), as(:,:) + real(kind=rk) :: tmp + integer(kind=ik) :: loctmp ,rowLocal, colLocal + + + real(kind=rk) :: maxerr + + logical :: wantDebug + + real(kind=rk), parameter :: pi = 3.141592653589793238462643383279_rk + + integer(kind=ik) :: iseed(4096) ! Random seed, size should be sufficient for every generator + + integer(kind=ik) :: STATUS +#ifdef WITH_OPENMP + integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, & + provided_mpi_thread_level +#endif + type(output_t) :: write_to_file + logical :: success + character(len=8) :: task_suffix + integer(kind=ik) :: j + !------------------------------------------------------------------------------- + + success = .true. + + call read_input_parameters(na, nev, nblk, write_to_file) + + !------------------------------------------------------------------------------- + ! MPI Initialization + call setup_mpi(myid, nprocs) + + STATUS = 0 + +!#define DATATYPE REAL +!#define ELPA1 +!#include "elpa_test_programs_print_headers.X90" + +#ifdef HAVE_DETAILED_TIMINGS + + ! initialise the timing functionality + +#ifdef HAVE_LIBPAPI + call timer%measure_flops(.true.) +#endif + + call timer%measure_allocated_memory(.true.) + call timer%measure_virtual_memory(.true.) + call timer%measure_max_allocated_memory(.true.) + + call timer%set_print_options(& +#ifdef HAVE_LIBPAPI + print_flop_count=.true., & + print_flop_rate=.true., & +#endif + print_allocated_memory = .true. , & + print_virtual_memory=.true., & + print_max_allocated_memory=.true.) + + + call timer%enable() + + call timer%start("program") +#endif + + do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 + if(mod(nprocs,np_cols) == 0 ) exit + enddo + + ! at the end of the above loop, nprocs is always divisible by np_cols + + np_rows = nprocs/np_cols + + if(myid==0) then + print * + print '(a)','Test program for elpa_solve_tridi with a Toeplitz matrix' + print * + print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk + print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs + print * + endif + + !------------------------------------------------------------------------------- + ! Set up BLACS context and MPI communicators + ! + ! The BLACS context is only necessary for using Scalapack. + ! + ! For ELPA, the MPI communicators along rows/cols are sufficient, + ! and the grid setup may be done in an arbitrary way as long as it is + ! consistent (i.e. 0<=my_prow