! This file is part of ELPA. ! ! The ELPA library was originally created by the ELPA consortium, ! consisting of the following organizations: ! ! - Max Planck Computing and Data Facility (MPCDF), formerly known as ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte ! Informatik, ! - Technische Universität München, Lehrstuhl für Informatik mit ! Schwerpunkt Wissenschaftliches Rechnen , ! - Fritz-Haber-Institut, Berlin, Abt. Theorie, ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, ! and ! - IBM Deutschland GmbH ! ! ! More information can be found here: ! http://elpa.mpcdf.mpg.de/ ! ! ELPA is free software: you can redistribute it and/or modify ! it under the terms of the version 3 of the license of the ! GNU Lesser General Public License as published by the Free ! Software Foundation. ! ! ELPA is distributed in the hope that it will be useful, ! but WITHOUT ANY WARRANTY; without even the implied warranty of ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ! GNU Lesser General Public License for more details. ! ! You should have received a copy of the GNU Lesser General Public License ! along with ELPA. If not, see ! ! ELPA reflects a substantial effort on the part of the original ! ELPA consortium, and we ask you to respect the spirit of the ! license that we chose: i.e., please contribute any changes you ! may have back to the original ELPA library distribution, and keep ! any derivatives of ELPA under the same license that we chose for ! the original distribution, the GNU Lesser General Public License. ! ! #include "config-f90.h" !> !> Fortran test programm to demonstrates the use of !> ELPA 2 real case library. !> If "HAVE_REDIRECT" was defined at build time !> the stdout and stderr output of each MPI task !> can be redirected to files if the environment !> variable "REDIRECT_ELPA_TEST_OUTPUT" is set !> to "true". !> !> By calling executable [arg1] [arg2] [arg3] [arg4] !> one can define the size (arg1), the number of !> Eigenvectors to compute (arg2), and the blocking (arg3). !> If these values are not set default values (4000, 1500, 16) !> are choosen. !> If these values are set the 4th argument can be !> "output", which specifies that the EV's are written to !> an ascii file. !> !> The complex ELPA 2 kernel is set in this program via !> the API call. However, this can be overriden by setting !> the environment variable "REAL_ELPA_KERNEL" to an !> appropiate value. !> program test_real2_choose_kernel_with_api_double_precision !------------------------------------------------------------------------------- ! Standard eigenvalue problem - REAL version ! ! This program demonstrates the use of the ELPA module ! together with standard scalapack routines ! ! Copyright of the original code rests with the authors inside the ELPA ! consortium. The copyright of any additional modifications shall rest ! with their original authors, but shall adhere to the licensing terms ! distributed along with the original code in the file "COPYING". ! !------------------------------------------------------------------------------- use precision use elpa1 use elpa2 use mod_check_for_gpu, only : check_for_gpu use elpa_utilities, only : error_unit use elpa2_utilities use mod_read_input_parameters use mod_check_correctness use mod_setup_mpi use mod_blacs_infrastructure use mod_prepare_matrix use elpa_mpi #ifdef WITH_OPENMP use test_util #endif #ifdef HAVE_REDIRECT use redirect #endif #ifdef HAVE_DETAILED_TIMINGS use timings #endif use output_types implicit none !------------------------------------------------------------------------------- ! Please set system size parameters below! ! na: System size ! nev: Number of eigenvectors to be calculated ! nblk: Blocking factor in block cyclic distribution !------------------------------------------------------------------------------- integer(kind=ik) :: nblk integer(kind=ik) :: na, nev integer(kind=ik) :: np_rows, np_cols, na_rows, na_cols integer(kind=ik) :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols integer(kind=ik) :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol integer(kind=ik), external :: numroc real(kind=rk8), allocatable :: a(:,:), z(:,:), as(:,:), ev(:) integer(kind=ik) :: STATUS #ifdef WITH_OPENMP integer(kind=ik) :: omp_get_max_threads, required_mpi_thread_level, provided_mpi_thread_level #endif logical :: successELPA, success integer(kind=ik) :: numberOfDevices logical :: gpuAvailable type(output_t) :: write_to_file character(len=8) :: task_suffix integer(kind=ik) :: j #define DOUBLE_PRECISION_REAL 1 successELPA = .true. gpuAvailable = .false. call read_input_parameters(na, nev, nblk, write_to_file) !------------------------------------------------------------------------------- ! MPI Initialization call setup_mpi(myid, nprocs) gpuAvailable = check_for_gpu(myid, numberOfDevices) STATUS = 0 #define REALCASE #include "../elpa_print_headers.X90" #ifdef HAVE_DETAILED_TIMINGS ! initialise the timing functionality #ifdef HAVE_LIBPAPI call timer%measure_flops(.true.) #endif call timer%measure_allocated_memory(.true.) call timer%measure_virtual_memory(.true.) call timer%measure_max_allocated_memory(.true.) call timer%set_print_options(& #ifdef HAVE_LIBPAPI print_flop_count=.true., & print_flop_rate=.true., & #endif print_allocated_memory = .true. , & print_virtual_memory=.true., & print_max_allocated_memory=.true.) call timer%enable() call timer%start("program: test_real2_choose_kernel_with_api_double_precision") #endif !------------------------------------------------------------------------------- ! Selection of number of processor rows/columns ! We try to set up the grid square-like, i.e. start the search for possible ! divisors of nprocs with a number next to the square root of nprocs ! and decrement it until a divisor is found. do np_cols = NINT(SQRT(REAL(nprocs))),2,-1 if(mod(nprocs,np_cols) == 0 ) exit enddo ! at the end of the above loop, nprocs is always divisible by np_cols np_rows = nprocs/np_cols if(myid==0) then print * print '(a)','Standard eigenvalue problem - REAL version' if (gpuAvailable) then print *,"with GPU Version" endif print * print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs print * print *, "This is an example how to determine the ELPA2 kernel with" print *, "an api call. Note, however, that setting the kernel via" print *, "an environment variable will always take precedence over" print *, "everything else! " print * #ifdef WITH_ONE_SPECIFIC_REAL_KERNEL print *," However, this version of ELPA was build with only one of all the available" print *," kernels, thus it will not be successful to call ELPA with another " print *," kernel than the one specified at compile time!" #endif print *," " #ifndef HAVE_ENVIRONMENT_CHECKING print *, " Notice that it is not possible with this build to set the " print *, " kernel via an environment variable! To change this re-install" print *, " the library and have a look at the log files" #endif #ifndef WITH_ONE_SPECIFIC_REAL_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_GENERIC_SIMPLE" #else /* WITH_ONE_SPECIFIC_REAL_KERNEL */ #ifdef WITH_REAL_GENERIC_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_GENERIC" #endif #ifdef WITH_REAL_GENERIC_SIMPLE_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_GENERIC_SIMPLE" #endif #ifdef WITH_REAL_GENERIC_SSE_ASSEMBLY_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_SSE" #endif #ifdef WITH_REAL_SSE_BLOCK2_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK2" #endif #ifdef WITH_REAL_SSE_BLOCK4_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK4" #endif #ifdef WITH_REAL_SSE_BLOCK6_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_SSE_BLOCK6" #endif #ifdef WITH_REAL_AVX_BLOCK2_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK2" #endif #ifdef WITH_REAL_AVX_BLOCK4_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK4" #endif #ifdef WITH_REAL_AVX_BLOCK6_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX_BLOCK6" #endif #ifdef WITH_REAL_AVX2_BLOCK2_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK2" #endif #ifdef WITH_REAL_AVX2_BLOCK4_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK4" #endif #ifdef WITH_REAL_AVX2_BLOCK6_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX2_BLOCK6" #endif #ifdef WITH_REAL_AVX512_BLOCK2_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK2" #endif #ifdef WITH_REAL_AVX512_BLOCK4_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK4" #endif #ifdef WITH_REAL_AVX512_BLOCK6_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_AVX512_BLOCK6" #endif #ifdef WITH_REAL_BGP_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_BGP" #endif #ifdef WITH_REAL_BGQ_KERNEL print *, " The settings are: REAL_ELPA_KERNEL_BGQ" #endif #ifdef WITH_GPU_VERSION print *, " The settings are: REAL_ELPA_GPU" #endif #endif /* WITH_ONE_SPECIFIC_REAL_KERNEL */ print * endif !------------------------------------------------------------------------------- ! Set up BLACS context and MPI communicators ! ! The BLACS context is only necessary for using Scalapack. ! ! For ELPA, the MPI communicators along rows/cols are sufficient, ! and the grid setup may be done in an arbitrary way as long as it is ! consistent (i.e. 0<=my_prow