test_elpa2_real_qr_default_kernel.F90 13.2 KB
Newer Older
1 2 3 4 5
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
6 7
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 9 10 11 12 13 14 15 16 17 18 19
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
20
!    http://elpa.mpcdf.mpg.de/
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
!> Fortran test programm to demonstrates the use of
!> ELPA 2 real case library.
!> If "HAVE_REDIRECT" was defined at build time
!> the stdout and stderr output of each MPI task
!> can be redirected to files if the environment
!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
!> to "true".
!>
!> By calling executable [arg1] [arg2] [arg3] [arg4]
!> one can define the size (arg1), the number of
!> Eigenvectors to compute (arg2), and the blocking (arg3).
!> If these values are not set default values (4000, 1500, 16)
!> are choosen.
!> If these values are set the 4th argument can be
!> "output", which specifies that the EV's are written to
!> an ascii file.
!>
!> The real ELPA 2 kernel is set as the default kernel.
!> In this test case the qr_decomposition is used.
!> However, this can be overriden by setting
!> the environment variable "REAL_ELPA_KERNEL" to an
!> appropiate value.
!>
68
program test_real2_default_kernel_qr_decomposition_double_precision
69 70 71 72 73 74 75 76 77 78 79 80 81

!-------------------------------------------------------------------------------
! Standard eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
!-------------------------------------------------------------------------------
82
   use precision
83 84
   use ELPA1
   use ELPA2
85

86
   use mod_check_for_gpu, only : check_for_gpu
87
   use elpa_utilities, only : error_unit
88
   use elpa2_utilities, only : get_actual_real_kernel_name
Andreas Marek's avatar
Andreas Marek committed
89 90 91 92 93
   use mod_read_input_parameters
   use mod_check_correctness
   use mod_setup_mpi
   use mod_blacs_infrastructure
   use mod_prepare_matrix
94
   use elpa_mpi
95 96 97 98 99 100 101 102 103 104 105
#ifdef WITH_OPENMP
   use test_util
#endif

#ifdef HAVE_REDIRECT
  use redirect
#endif

#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
106
 use output_types
107 108 109 110 111 112 113 114 115
   implicit none

   !-------------------------------------------------------------------------------
   ! Please set system size parameters below!
   ! na:   System size
   ! nev:  Number of eigenvectors to be calculated
   ! nblk: Blocking factor in block cyclic distribution
   !-------------------------------------------------------------------------------

116 117
   integer(kind=ik)           :: nblk
   integer(kind=ik)           :: na, nev
118

119
   integer(kind=ik)           :: np_rows, np_cols, na_rows, na_cols
120

121 122
   integer(kind=ik)           :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
   integer(kind=ik)           :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
123

124
   integer, external          :: numroc
125

126
   real(kind=rk8), allocatable :: a(:,:), z(:,:), tmp1(:,:), tmp2(:,:), as(:,:), ev(:)
127

128 129
   integer(kind=ik)           :: iseed(4096) ! Random seed, size should be sufficient for every generator
   integer(kind=ik)           :: STATUS
130
#ifdef WITH_OPENMP
131
   integer(kind=ik)           :: omp_get_max_threads,  required_mpi_thread_level, provided_mpi_thread_level
132
#endif
133 134 135
   logical                    :: successELPA, success
   integer(kind=ik)           :: numberOfDevices
   logical                    :: gpuAvailable
136
   type(output_t)             :: write_to_file
137 138
   character(len=8)           :: task_suffix
   integer(kind=ik)           :: j
139

140
#define DOUBLE_PRECISION_REAL 1
141 142 143

   successELPA   = .true.
   gpuAvailable  = .false.
144

145 146 147 148 149

   if (COMMAND_ARGUMENT_COUNT() /= 0) then
     write(error_unit,*) "This program does not support any command-line arguments"
     stop 1
   endif
150 151 152 153 154

   nblk = 2
   na   = 4000
   nev  = 1500

155 156
   !-------------------------------------------------------------------------------
   !  MPI Initialization
Andreas Marek's avatar
Andreas Marek committed
157
   call setup_mpi(myid, nprocs)
158

159
   gpuAvailable = check_for_gpu(myid, numberOfDevices)
160

161
   STATUS = 0
Andreas Marek's avatar
Andreas Marek committed
162
#define REALCASE
163
#include "elpa_test_programs_print_headers.X90"
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

#ifdef HAVE_DETAILED_TIMINGS

   ! initialise the timing functionality

#ifdef HAVE_LIBPAPI
   call timer%measure_flops(.true.)
#endif

   call timer%measure_allocated_memory(.true.)
   call timer%measure_virtual_memory(.true.)
   call timer%measure_max_allocated_memory(.true.)

   call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
                print_flop_count=.true., &
                print_flop_rate=.true., &
#endif
                print_allocated_memory = .true. , &
                print_virtual_memory=.true., &
                print_max_allocated_memory=.true.)


187
   call timer%enable()
188

189
   call timer%start("program: test_real2_default_kernel_qr_decomposition_double_precision")
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
#endif
   !-------------------------------------------------------------------------------
   ! Selection of number of processor rows/columns
   ! We try to set up the grid square-like, i.e. start the search for possible
   ! divisors of nprocs with a number next to the square root of nprocs
   ! and decrement it until a divisor is found.

   do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
      if(mod(nprocs,np_cols) == 0 ) exit
   enddo
   ! at the end of the above loop, nprocs is always divisible by np_cols

   np_rows = nprocs/np_cols

   if(myid==0) then
      print *
      print '(a)','Standard eigenvalue problem - REAL version'
207 208 209
      if (gpuAvailable) then
        print *,"with GPU version"
      endif
210 211 212 213 214 215 216 217 218 219 220 221
      print *
      print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk
      print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
      print *
      print *, "This is an example how ELPA2 chooses a default kernel,"
#ifdef HAVE_ENVIRONMENT_CHECKING
      print *, "or takes the kernel defined in the environment variable,"
#endif
      print *, "since the ELPA API call does not contain any kernel specification"
      print *
      print *, " The settings are: ",trim(get_actual_real_kernel_name())," as real kernel"
      print *
222 223 224 225 226 227
#ifdef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
      print *," However, this version of ELPA was build with only one of all the available"
      print *," kernels, thus it will not be successful to call ELPA with another "
      print *," kernel than the one specified at compile time!"
#endif
      print *," "
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
#ifndef HAVE_ENVIRONMENT_CHECKING
      print *, " Notice that it is not possible with this build to set the "
      print *, " kernel via an environment variable! To change this re-install"
      print *, " the library and have a look at the log files"
#endif
      print *, " The qr-decomposition is used via the api call"
   endif

   !-------------------------------------------------------------------------------
   ! Set up BLACS context and MPI communicators
   !
   ! The BLACS context is only necessary for using Scalapack.
   !
   ! For ELPA, the MPI communicators along rows/cols are sufficient,
   ! and the grid setup may be done in an arbitrary way as long as it is
   ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
   ! process has a unique (my_prow,my_pcol) pair).

Andreas Marek's avatar
Andreas Marek committed
246 247
   call set_up_blacsgrid(mpi_comm_world, my_blacs_ctxt, np_rows, np_cols, &
                         nprow, npcol, my_prow, my_pcol)
248 249 250 251 252 253

   if (myid==0) then
     print '(a)','| Past BLACS_Gridinfo.'
   end if

   ! All ELPA routines need MPI communicators for communicating within
254
   ! rows or columns of processes, these are set in get_elpa_communicators.
255

256
   mpierr = get_elpa_communicators(mpi_comm_world, my_prow, my_pcol, &
257
                                   mpi_comm_rows, mpi_comm_cols)
258 259 260 261 262

   if (myid==0) then
     print '(a)','| Past split communicator setup for rows and columns.'
   end if

Andreas Marek's avatar
Andreas Marek committed
263 264
   call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
                                na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280

   if (myid==0) then
     print '(a)','| Past scalapack descriptor setup.'
   end if

   !-------------------------------------------------------------------------------
   ! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("set up matrix")
#endif
   allocate(a (na_rows,na_cols))
   allocate(z (na_rows,na_cols))
   allocate(as(na_rows,na_cols))

   allocate(ev(na))

281
   call prepare_matrix_double(na, myid, sc_desc, iseed,  a, z, as)
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301

#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("set up matrix")
#endif
   ! set print flag in elpa1
   elpa_print_times = .true.

   !-------------------------------------------------------------------------------
   ! Calculate eigenvalues/eigenvectors

   if (myid==0) then
     print '(a)','| Entering two-stage ELPA solver ... '
     print *
   end if


   ! ELPA is called without any kernel specification in the API,
   ! furthermore, if the environment variable is not set, the
   ! default kernel is called. Otherwise, the kernel defined in the
   ! environment variable
302
#ifdef WITH_MPI
303
   call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
304
#endif
305

306
   successELPA = solve_evp_real_2stage_double(na, nev, a, na_rows, ev, z, na_rows, nblk, &
307
                              na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world,   &
308 309
                              useQR=.true.)

310
   if (.not.(successELPA)) then
311
      write(error_unit,*) "solve_evp_real_2stage produced an error! Aborting..."
312
#ifdef WITH_MPI
Andreas Marek's avatar
Andreas Marek committed
313
      call MPI_ABORT(mpi_comm_world, 1, mpierr)
314
#endif
315 316 317 318 319 320 321 322 323 324 325 326
   endif

   if (myid==0) then
     print '(a)','| Two-step ELPA solver complete.'
     print *
   end if

   if(myid == 0) print *,'Time transform to tridi :',time_evp_fwd
   if(myid == 0) print *,'Time solve tridi        :',time_evp_solve
   if(myid == 0) print *,'Time transform back EVs :',time_evp_back
   if(myid == 0) print *,'Total time (sum above)  :',time_evp_back+time_evp_solve+time_evp_fwd

327
   if(write_to_file%eigenvectors) then
328 329 330 331 332 333 334 335 336 337
     write(unit = task_suffix, fmt = '(i8.8)') myid
     open(17,file="EVs_real2_out_task_"//task_suffix(1:8)//".txt",form='formatted',status='new')
     write(17,*) "Part of eigenvectors: na_rows=",na_rows,"of na=",na," na_cols=",na_cols," of na=",na

     do i=1,na_rows
       do j=1,na_cols
         write(17,*) "row=",i," col=",j," element of eigenvector=",z(i,j)
       enddo
     enddo
     close(17)
338
   endif
339 340

   if(write_to_file%eigenvalues) then
341 342 343 344 345 346 347 348
      if (myid == 0) then
         open(17,file="EVs_real2_out.txt",form='formatted',status='new')
         do i=1,na
            write(17,*) i,ev(i)
         enddo
         close(17)
      endif
   endif
349 350


351 352 353 354 355
   !-------------------------------------------------------------------------------
   ! Test correctness of result (using plain scalapack routines)
   allocate(tmp1(na_rows,na_cols))
   allocate(tmp2(na_rows,na_cols))

356
   status = check_correctness_double(na, nev, as, z, ev, sc_desc, myid, tmp1, tmp2)
357

Andreas Marek's avatar
Andreas Marek committed
358 359
   deallocate(a)
   deallocate(as)
360 361 362 363 364 365 366

   deallocate(z)
   deallocate(tmp1)
   deallocate(tmp2)
   deallocate(ev)

#ifdef HAVE_DETAILED_TIMINGS
367
   call timer%stop("program: test_real2_default_kernel_qr_decomposition_double_precision")
368
   print *," "
369 370
   print *,"Timings program: test_real2_default_kernel_qr_decomposition_double_precision"
   call timer%print("program: test_real2_default_kernel_qr_decomposition_double_precision")
371
   print *," "
372
   print *,"End timings program: test_real2_default_kernel_qr_decomposition_double_precision"
373
#endif
374
#ifdef WITH_MPI
375 376
   call blacs_gridexit(my_blacs_ctxt)
   call mpi_finalize(mpierr)
377
#endif
378 379 380 381
   call EXIT(STATUS)
end

!-------------------------------------------------------------------------------