! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
!> Fortran test programm to demonstrates the use of
!> ELPA 2 real case library.
!> If "HAVE_REDIRECT" was defined at build time
!> the stdout and stderr output of each MPI task
!> can be redirected to files if the environment
!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
!> to "true".
!>
!> By calling executable [arg1] [arg2] [arg3] [arg4]
!> one can define the size (arg1), the number of
!> Eigenvectors to compute (arg2), and the blocking (arg3).
!> If these values are not set default values (4000, 1500, 16)
!> are choosen.
!> If these values are set the 4th argument can be
!> "output", which specifies that the EV's are written to
!> an ascii file.
!>
!> The complex ELPA 2 kernel is set in this program via
!> the API call. However, this can be overriden by setting
!> the environment variable "REAL_ELPA_KERNEL" to an
!> appropiate value.
!>
program test_real2_choose_kernel_with_api_single_precision
!-------------------------------------------------------------------------------
! Standard eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
!-------------------------------------------------------------------------------
use precision
use elpa1
use elpa2
use mod_check_for_gpu, only : check_for_gpu
use elpa_utilities, only : error_unit
use elpa2_utilities
use mod_read_input_parameters
use mod_check_correctness
use mod_setup_mpi
use mod_blacs_infrastructure
use mod_prepare_matrix
use elpa_mpi
#ifdef WITH_OPENMP
use test_util
#endif
#ifdef HAVE_REDIRECT
use redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
#endif
use output_types
implicit none
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! na: System size
! nev: Number of eigenvectors to be calculated
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer(kind=ik) :: nblk
integer(kind=ik) :: na, nev
integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols
integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
integer(kind=ik), external :: numroc
real(kind=rk4), allocatable :: a(:,:), z(:,:), as(:,:), ev(:)
integer(kind=ik) :: STATUS
#ifdef WITH_OPENMP
integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level
#endif
logical :: successELPA, success
integer(kind=ik) :: numberOfDevices
logical :: gpuAvailable
type(output_t) :: write_to_file
character(len=8) :: task_suffix
integer(kind=ik) :: j
#undef DOUBLE_PRECISION_REAL
successELPA = .true.
gpuAvailable = .false.
call read_input_parameters(na, nev, nblk, write_to_file)
!-------------------------------------------------------------------------------
! MPI Initialization
call setup_mpi(myid, nprocs)
gpuAvailable = check_for_gpu(myid, numberOfDevices)
STATUS = 0
#define REALCASE
#include "../elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call timer%measure_flops(.true.)
#endif
call timer%measure_allocated_memory(.true.)
call timer%measure_virtual_memory(.true.)
call timer%measure_max_allocated_memory(.true.)
call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
print_flop_count=.true., &
print_flop_rate=.true., &
#endif
print_allocated_memory = .true. , &
print_virtual_memory=.true., &
print_max_allocated_memory=.true.)
call timer%enable()
call timer%start("program: test_real2_choose_kernel_with_api_single_precision")
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
! divisors of nprocs with a number next to the square root of nprocs
! and decrement it until a divisor is found.
do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
if(mod(nprocs,np_cols) == 0 ) exit
enddo
! at the end of the above loop, nprocs is always divisible by np_cols
np_rows = nprocs/np_cols
if(myid==0) then
print *
print '(a)','Standard eigenvalue problem - REAL version'
if (gpuAvailable) then
print *,"with GPU Version"
endif
print *
print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk
print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
print *
print *, "This is an example how to determine the ELPA2 kernel with"
print *, "an api call. Note, however, that setting the kernel via"
print *, "an environment variable will always take precedence over"
print *, "everything else! "
print *
#ifdef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
print *," However, this version of ELPA was build with only one of all the available"
print *," kernels, thus it will not be successful to call ELPA with another "
print *," kernel than the one specified at compile time!"
#endif
print *," "
#ifndef HAVE_ENVIRONMENT_CHECKING
print *, " Notice that it is not possible with this build to set the "
print *, " kernel via an environment variable! To change this re-install"
print *, " the library and have a look at the log files"
#endif
#ifndef WITH_ONE_SPECIFIC_REAL_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_GENERIC_SIMPLE"
#else /* WITH_ONE_SPECIFIC_REAL_KERNEL */
#ifdef WITH_REAL_GENERIC_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_GENERIC"
#endif
#ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_GENERIC_SIMPLE"
#endif
#ifdef WITH_REAL_GENERIC_SSE_ASSEMBLY_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_SSE"
#endif
#ifdef WITH_REAL_SSE_BLOCK2_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK2"
#endif
#ifdef WITH_REAL_SSE_BLOCK4_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK4"
#endif
#ifdef WITH_REAL_SSE_BLOCK6_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK6"
#endif
#ifdef WITH_REAL_AVX_BLOCK2_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK2"
#endif
#ifdef WITH_REAL_AVX_BLOCK4_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK4"
#endif
#ifdef WITH_REAL_AVX_BLOCK6_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK6"
#endif
#ifdef WITH_REAL_AVX2_BLOCK2_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK2"
#endif
#ifdef WITH_REAL_AVX2_BLOCK4_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK4"
#endif
#ifdef WITH_REAL_AVX2_BLOCK6_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK6"
#endif
#ifdef WITH_REAL_AVX512_BLOCK2_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK2"
#endif
#ifdef WITH_REAL_AVX512_BLOCK4_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK4"
#endif
#ifdef WITH_REAL_AVX512_BLOCK6_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK6"
#endif
#ifdef WITH_REAL_BGP_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_BGP"
#endif
#ifdef WITH_REAL_BGQ_KERNEL
print *, " The settings are: REAL_ELPA_KERNEL_BGQ"
#endif
#ifdef WITH_GPU_VERSION
print *, " The settings are: REAL_ELPA_GPU"
#endif
#endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */
print *
endif
!-------------------------------------------------------------------------------
! Set up BLACS context and MPI communicators
!
! The BLACS context is only necessary for using Scalapack.
!
! For ELPA, the MPI communicators along rows/cols are sufficient,
! and the grid setup may be done in an arbitrary way as long as it is
! consistent (i.e. 0<=my_prow