test_real2_qr.F90 13.7 KB
Newer Older
1
2
3
4
5
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
6
7
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8
9
10
11
12
13
14
15
16
17
18
19
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
20
!    http://elpa.mpcdf.mpg.de/
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
!> Fortran test programm to demonstrates the use of
!> ELPA 2 real case library.
!> If "HAVE_REDIRECT" was defined at build time
!> the stdout and stderr output of each MPI task
!> can be redirected to files if the environment
!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
!> to "true".
!>
!> By calling executable [arg1] [arg2] [arg3] [arg4]
!> one can define the size (arg1), the number of
!> Eigenvectors to compute (arg2), and the blocking (arg3).
!> If these values are not set default values (4000, 1500, 16)
!> are choosen.
!> If these values are set the 4th argument can be
!> "output", which specifies that the EV's are written to
!> an ascii file.
!>
!> The real ELPA 2 kernel is set as the default kernel.
!> In this test case the qr_decomposition is used.
!> However, this can be overriden by setting
!> the environment variable "REAL_ELPA_KERNEL" to an
!> appropiate value.
!>
68
program test_real2_default_kernel_qr_decomposition_double_precision
69
70
71
72
73
74
75
76
77
78
79
80
81

!-------------------------------------------------------------------------------
! Standard eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
!-------------------------------------------------------------------------------
82
   use precision
83
   use elpa1_legacy
84
   use elpa2_legacy
85

86
   use mod_check_for_gpu, only : check_for_gpu
87
   use elpa_utilities, only : error_unit
88
   use elpa2_utilities, only : elpa_get_actual_real_kernel_name
Andreas Marek's avatar
Andreas Marek committed
89
90
91
92
93
   use mod_read_input_parameters
   use mod_check_correctness
   use mod_setup_mpi
   use mod_blacs_infrastructure
   use mod_prepare_matrix
94
   use elpa_mpi
95
96
97
98
99
100
101
102
103
104
105
#ifdef WITH_OPENMP
   use test_util
#endif

#ifdef HAVE_REDIRECT
  use redirect
#endif

#ifdef HAVE_DETAILED_TIMINGS
 use timings
#endif
106
 use output_types
107
108
109
110
111
112
113
114
115
   implicit none

   !-------------------------------------------------------------------------------
   ! Please set system size parameters below!
   ! na:   System size
   ! nev:  Number of eigenvectors to be calculated
   ! nblk: Blocking factor in block cyclic distribution
   !-------------------------------------------------------------------------------

116
117
   integer(kind=ik)           :: nblk
   integer(kind=ik)           :: na, nev
118

119
   integer(kind=ik)           :: np_rows, np_cols, na_rows, na_cols
120

121
122
   integer(kind=ik)           :: myid, nprocs, my_prow, my_pcol, mpi_comm_rows, mpi_comm_cols
   integer(kind=ik)           :: i, mpierr, my_blacs_ctxt, sc_desc(9), info, nprow, npcol
123

124
   integer, external          :: numroc
125

126
   real(kind=rk8), allocatable :: a(:,:), z(:,:), as(:,:), ev(:)
127

128
   integer(kind=ik)           :: STATUS
129
#ifdef WITH_OPENMP
130
   integer(kind=ik)           :: omp_get_max_threads,  required_mpi_thread_level, provided_mpi_thread_level
131
#endif
132
133
134
   logical                    :: successELPA, success
   integer(kind=ik)           :: numberOfDevices
   logical                    :: gpuAvailable
135
   type(output_t)             :: write_to_file
136
137
   character(len=8)           :: task_suffix
   integer(kind=ik)           :: j
138

139
#define DOUBLE_PRECISION_REAL 1
140
141
142

   successELPA   = .true.
   gpuAvailable  = .false.
143

144
   !write_to_file = .false.
145
   call read_input_parameters(na, nev, nblk, write_to_file)
146

147
148
149
150
!   if (COMMAND_ARGUMENT_COUNT() /= 0) then
!     write(error_unit,*) "This program does not support any command-line arguments"
!     stop 1
!   endif
151

152
153
154
!   nblk = 2
!   na   = 4000
!   nev  = 1500
155

156
157
158
159
!   ! make sure na, nbl is even
!   if (mod(nblk,2 ) .ne. 0) then
!     nblk = nblk - 1
!   endif
160

161
162
163
164
165
166
167
168
!   ! make sure na is even
!   if (mod(na,2) .ne. 0) then
!     na = na - 1
!   endif
!   ! make sure na is at least 34
!   if (na .lt. 34) then
!     na = 34
!   endif
169

170
171
   !-------------------------------------------------------------------------------
   !  MPI Initialization
Andreas Marek's avatar
Andreas Marek committed
172
   call setup_mpi(myid, nprocs)
173

174
   gpuAvailable = check_for_gpu(myid, numberOfDevices)
175

176
   STATUS = 0
177
178
179
180
181
182
183
184
185
186
187
188
189

   if (nblk .lt. 64) then
     status = 1
     if (myid .eq. 0) print *,"At the moment QR decomposition need blocksize of at least 64"
     if (na .lt. 64) then
       if (myid .eq. 0) print *,"This is why the matrix size must also be at least 64 or only 1 MPI task can be used"
     endif

#ifdef WITH_MPI
      call mpi_finalize(mpierr)
#endif
      call EXIT(0)
   endif
Andreas Marek's avatar
Andreas Marek committed
190
#define REALCASE
191
#include "elpa_print_headers.X90"
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

#ifdef HAVE_DETAILED_TIMINGS

   ! initialise the timing functionality

#ifdef HAVE_LIBPAPI
   call timer%measure_flops(.true.)
#endif

   call timer%measure_allocated_memory(.true.)
   call timer%measure_virtual_memory(.true.)
   call timer%measure_max_allocated_memory(.true.)

   call timer%set_print_options(&
#ifdef HAVE_LIBPAPI
                print_flop_count=.true., &
                print_flop_rate=.true., &
#endif
                print_allocated_memory = .true. , &
                print_virtual_memory=.true., &
                print_max_allocated_memory=.true.)


215
   call timer%enable()
216

217
   call timer%start("program: test_real2_default_kernel_qr_decomposition_double_precision")
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#endif
   !-------------------------------------------------------------------------------
   ! Selection of number of processor rows/columns
   ! We try to set up the grid square-like, i.e. start the search for possible
   ! divisors of nprocs with a number next to the square root of nprocs
   ! and decrement it until a divisor is found.

   do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
      if(mod(nprocs,np_cols) == 0 ) exit
   enddo
   ! at the end of the above loop, nprocs is always divisible by np_cols

   np_rows = nprocs/np_cols

   if(myid==0) then
      print *
      print '(a)','Standard eigenvalue problem - REAL version'
235
236
237
      if (gpuAvailable) then
        print *,"with GPU version"
      endif
238
239
240
241
242
243
244
245
246
247
      print *
      print '(3(a,i0))','Matrix size=',na,', Number of eigenvectors=',nev,', Block size=',nblk
      print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
      print *
      print *, "This is an example how ELPA2 chooses a default kernel,"
#ifdef HAVE_ENVIRONMENT_CHECKING
      print *, "or takes the kernel defined in the environment variable,"
#endif
      print *, "since the ELPA API call does not contain any kernel specification"
      print *
248
      print *, " The settings are: ",trim(elpa_get_actual_real_kernel_name())," as real kernel"
249
      print *
250
251
252
253
254
255
#ifdef WITH_ONE_SPECIFIC_COMPLEX_KERNEL
      print *," However, this version of ELPA was build with only one of all the available"
      print *," kernels, thus it will not be successful to call ELPA with another "
      print *," kernel than the one specified at compile time!"
#endif
      print *," "
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#ifndef HAVE_ENVIRONMENT_CHECKING
      print *, " Notice that it is not possible with this build to set the "
      print *, " kernel via an environment variable! To change this re-install"
      print *, " the library and have a look at the log files"
#endif
      print *, " The qr-decomposition is used via the api call"
   endif

   !-------------------------------------------------------------------------------
   ! Set up BLACS context and MPI communicators
   !
   ! The BLACS context is only necessary for using Scalapack.
   !
   ! For ELPA, the MPI communicators along rows/cols are sufficient,
   ! and the grid setup may be done in an arbitrary way as long as it is
   ! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
   ! process has a unique (my_prow,my_pcol) pair).

Andreas Marek's avatar
Andreas Marek committed
274
275
   call set_up_blacsgrid(mpi_comm_world, my_blacs_ctxt, np_rows, np_cols, &
                         nprow, npcol, my_prow, my_pcol)
276
277
278
279
280
281

   if (myid==0) then
     print '(a)','| Past BLACS_Gridinfo.'
   end if

   ! All ELPA routines need MPI communicators for communicating within
282
   ! rows or columns of processes, these are set in elpa_get_communicators.
283

284
   mpierr = elpa_get_communicators(mpi_comm_world, my_prow, my_pcol, &
285
                                   mpi_comm_rows, mpi_comm_cols)
286
287
288
289
290

   if (myid==0) then
     print '(a)','| Past split communicator setup for rows and columns.'
   end if

Andreas Marek's avatar
Andreas Marek committed
291
292
   call set_up_blacs_descriptor(na ,nblk, my_prow, my_pcol, np_rows, np_cols, &
                                na_rows, na_cols, sc_desc, my_blacs_ctxt, info)
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308

   if (myid==0) then
     print '(a)','| Past scalapack descriptor setup.'
   end if

   !-------------------------------------------------------------------------------
   ! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
   call timer%start("set up matrix")
#endif
   allocate(a (na_rows,na_cols))
   allocate(z (na_rows,na_cols))
   allocate(as(na_rows,na_cols))

   allocate(ev(na))

309
   call prepare_matrix_double(na, myid, sc_desc, a, z, as)
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329

#ifdef HAVE_DETAILED_TIMINGS
   call timer%stop("set up matrix")
#endif
   ! set print flag in elpa1
   elpa_print_times = .true.

   !-------------------------------------------------------------------------------
   ! Calculate eigenvalues/eigenvectors

   if (myid==0) then
     print '(a)','| Entering two-stage ELPA solver ... '
     print *
   end if


   ! ELPA is called without any kernel specification in the API,
   ! furthermore, if the environment variable is not set, the
   ! default kernel is called. Otherwise, the kernel defined in the
   ! environment variable
330
#ifdef WITH_MPI
331
   call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
332
#endif
333

334
   successELPA = elpa_solve_evp_real_2stage_double(na, nev, a, na_rows, ev, z, na_rows, nblk, &
335
                              na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world,   &
336
337
                              useQR=.true.)

338
   if (.not.(successELPA)) then
339
      write(error_unit,*) "solve_evp_real_2stage produced an error! Aborting..."
340
#ifdef WITH_MPI
Andreas Marek's avatar
Andreas Marek committed
341
      call MPI_ABORT(mpi_comm_world, 1, mpierr)
342
#endif
343
344
345
346
347
348
349
350
351
352
353
354
   endif

   if (myid==0) then
     print '(a)','| Two-step ELPA solver complete.'
     print *
   end if

   if(myid == 0) print *,'Time transform to tridi :',time_evp_fwd
   if(myid == 0) print *,'Time solve tridi        :',time_evp_solve
   if(myid == 0) print *,'Time transform back EVs :',time_evp_back
   if(myid == 0) print *,'Total time (sum above)  :',time_evp_back+time_evp_solve+time_evp_fwd

355
   if(write_to_file%eigenvectors) then
356
357
358
359
360
361
362
363
364
365
     write(unit = task_suffix, fmt = '(i8.8)') myid
     open(17,file="EVs_real2_out_task_"//task_suffix(1:8)//".txt",form='formatted',status='new')
     write(17,*) "Part of eigenvectors: na_rows=",na_rows,"of na=",na," na_cols=",na_cols," of na=",na

     do i=1,na_rows
       do j=1,na_cols
         write(17,*) "row=",i," col=",j," element of eigenvector=",z(i,j)
       enddo
     enddo
     close(17)
366
   endif
367
368

   if(write_to_file%eigenvalues) then
369
370
371
372
373
374
375
376
      if (myid == 0) then
         open(17,file="EVs_real2_out.txt",form='formatted',status='new')
         do i=1,na
            write(17,*) i,ev(i)
         enddo
         close(17)
      endif
   endif
377
378


379
380
   !-------------------------------------------------------------------------------
   ! Test correctness of result (using plain scalapack routines)
381
   status = check_correctness_double(na, nev, as, z, ev, sc_desc, myid)
382

Andreas Marek's avatar
Andreas Marek committed
383
384
   deallocate(a)
   deallocate(as)
385
386
387
388
389

   deallocate(z)
   deallocate(ev)

#ifdef HAVE_DETAILED_TIMINGS
390
   call timer%stop("program: test_real2_default_kernel_qr_decomposition_double_precision")
391
   print *," "
392
393
   print *,"Timings program: test_real2_default_kernel_qr_decomposition_double_precision"
   call timer%print("program: test_real2_default_kernel_qr_decomposition_double_precision")
394
   print *," "
395
   print *,"End timings program: test_real2_default_kernel_qr_decomposition_double_precision"
396
#endif
397
#ifdef WITH_MPI
398
399
   call blacs_gridexit(my_blacs_ctxt)
   call mpi_finalize(mpierr)
400
#endif
401
402
403
404
   call EXIT(STATUS)
end

!-------------------------------------------------------------------------------