test.F90 13.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"

! Define one of TEST_REAL or TEST_COMPLEX
! Define one of TEST_SINGLE or TEST_DOUBLE
! Define one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
! Define TEST_GPU \in [0, 1]
49
! Define either TEST_ALL_KERNELS or a TEST_KERNEL \in [any valid kernel]
50 51 52 53 54 55 56 57 58 59 60 61 62

#if !(defined(TEST_REAL) ^ defined(TEST_COMPLEX))
error: define exactly one of TEST_REAL or TEST_COMPLEX
#endif

#if !(defined(TEST_SINGLE) ^ defined(TEST_DOUBLE))
error: define exactly one of TEST_SINGLE or TEST_DOUBLE
#endif

#if !(defined(TEST_SOLVER_1STAGE) ^ defined(TEST_SOLVER_2STAGE))
error: define exactly one of TEST_SOLVER_1STAGE or TEST_SOLVER_2STAGE
#endif

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
#ifdef TEST_SOLVER_1STAGE
#ifdef TEST_ALL_KERNELS
error: TEST_ALL_KERNELS cannot be defined for TEST_SOLVER_1STAGE
#endif
#ifdef TEST_KERNEL
error: TEST_KERNEL cannot be defined for TEST_SOLVER_1STAGE
#endif
#endif

#ifdef TEST_SOLVER_2STAGE
#if !(defined(TEST_KERNEL) ^ defined(TEST_ALL_KERNELS))
error: define either TEST_ALL_KERNELS or a valid TEST_KERNEL
#endif
#endif


79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#ifdef TEST_SINGLE
#  define EV_TYPE real(kind=C_FLOAT)
#  ifdef TEST_REAL
#    define MATRIX_TYPE real(kind=C_FLOAT)
#  else
#    define MATRIX_TYPE complex(kind=C_FLOAT_COMPLEX)
#  endif
#else
#  define EV_TYPE real(kind=C_DOUBLE)
#  ifdef TEST_REAL
#    define MATRIX_TYPE real(kind=C_DOUBLE)
#  else
#    define MATRIX_TYPE complex(kind=C_DOUBLE_COMPLEX)
#  endif
#endif

95 96 97 98 99 100 101
#ifdef TEST_REAL
#define KERNEL_KEY "real_kernel"
#endif
#ifdef TEST_COMPLEX
#define KERNEL_KEY "complex_kernel"
#endif

102 103 104 105
#include "assert.h"

program test
   use elpa
106 107 108 109 110 111 112

   use test_util
   use test_setup_mpi
   use test_prepare_matrix
   use test_read_input_parameters
   use test_blacs_infrastructure
   use test_check_correctness
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134

   implicit none

   ! matrix dimensions
   integer :: na, nev, nblk

   ! mpi
   integer :: myid, nprocs
   integer :: na_cols, na_rows  ! local matrix size
   integer :: np_cols, np_rows  ! number of MPI processes per column/row
   integer :: my_prow, my_pcol  ! local MPI task position (my_prow, my_pcol) in the grid (0..np_cols -1, 0..np_rows -1)
   integer :: mpierr

   ! blacs
   integer :: my_blacs_ctxt, sc_desc(9), info, nprow, npcol

   ! The Matrix
   MATRIX_TYPE, allocatable :: a(:,:), as(:,:)
   ! eigenvectors
   MATRIX_TYPE, allocatable :: z(:,:)
   ! eigenvalues
   EV_TYPE, allocatable :: ev(:)
135 136 137 138 139 140 141 142 143 144 145
#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
   EV_TYPE, allocatable :: d(:), sd(:), ev_analytic(:), ds(:), sds(:)
   EV_TYPE              :: diagonalELement, subdiagonalElement, tmp, maxerr
#ifdef TEST_DOUBLE
   EV_TYPE, parameter   :: pi = 3.141592653589793238462643383279_rk8
#else
   EV_TYPE, parameter   :: pi = 3.1415926535897932_rk4
#endif
   integer              :: loctmp ,rowLocal, colLocal, j,ii
#endif

146

147
   integer :: error, status
148 149 150

   type(output_t) :: write_to_file
   class(elpa_t), pointer :: e
151
#ifdef TEST_ALL_KERNELS
152
   integer :: i
153
#endif
154
   integer :: kernel
155

156 157 158
#if defined(TEST_COMPLEX) && defined(__SOLVE_TRIDIAGONAL)
   stop 77
#endif
159
   call read_input_parameters_traditional(na, nev, nblk, write_to_file)
160 161 162 163 164 165 166 167
   call setup_mpi(myid, nprocs)

   do np_cols = NINT(SQRT(REAL(nprocs))),2,-1
      if(mod(nprocs,np_cols) == 0 ) exit
   enddo

   np_rows = nprocs/np_cols

168 169 170 171 172 173 174 175 176
   if (myid == 0) then
     print '((a,i0))', 'Matrix size: ', na
     print '((a,i0))', 'Num eigenvectors: ', nev
     print '((a,i0))', 'Blocksize: ', nblk
     print '((a,i0))', 'Num MPI proc: ', nprocs
     print '(3(a,i0))','Number of processor rows=',np_rows,', cols=',np_cols,', total=',nprocs
     print *,''
   endif

177 178 179 180 181 182 183 184 185 186
   call set_up_blacsgrid(mpi_comm_world, my_blacs_ctxt, np_rows, np_cols, &
                         nprow, npcol, my_prow, my_pcol)

   call set_up_blacs_descriptor(na, nblk, my_prow, my_pcol, np_rows, np_cols, &
                                na_rows, na_cols, sc_desc, my_blacs_ctxt, info)

   allocate(a (na_rows,na_cols), as(na_rows,na_cols))
   allocate(z (na_rows,na_cols))
   allocate(ev(na))

187 188 189 190 191 192
#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
   allocate(d (na), ds(na))
   allocate(sd (na), sds(na))
   allocate(ev_analytic(na))
#endif

193 194 195 196
   a(:,:) = 0.0
   z(:,:) = 0.0
   ev(:) = 0.0

197
#ifdef __EIGENVECTORS
198
   call prepare_matrix(na, myid, sc_desc, a, z, as)
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
#endif

#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
   ! set up simple toeplitz matrix
#ifdef TEST_DOUBLE
   diagonalElement = 0.45_rk8
   subdiagonalElement =  0.78_rk8
#else
   diagonalElement = 0.45_rk4
   subdiagonalElement =  0.78_rk4
#endif

   d(:) = diagonalElement
   sd(:) = subdiagonalElement

   ! set up the diagonal and subdiagonals (for general solver test)
   do ii=1, na ! for diagonal elements
     if (map_global_array_index_to_local_index(ii, ii, rowLocal, colLocal, nblk, np_rows, np_cols, my_prow, my_pcol)) then
       a(rowLocal,colLocal) = diagonalElement
     endif
   enddo
   do ii=1, na-1
     if (map_global_array_index_to_local_index(ii, ii+1, rowLocal, colLocal, nblk, np_rows, np_cols, my_prow, my_pcol)) then
       a(rowLocal,colLocal) = subdiagonalElement
     endif
   enddo

   do ii=2, na
     if (map_global_array_index_to_local_index(ii, ii-1, rowLocal, colLocal, nblk, np_rows, np_cols, my_prow, my_pcol)) then
       a(rowLocal,colLocal) = subdiagonalElement
     endif
   enddo
   ds = d
   sds = sd
   as = a
#endif
235 236 237 238 239 240 241 242

   if (elpa_init(CURRENT_API_VERSION) /= ELPA_OK) then
     print *, "ELPA API version not supported"
     stop 1
   endif

   e => elpa_allocate()

243 244 245 246 247 248 249 250 251 252
   call e%set("na", na, error)
   assert_elpa_ok(error)
   call e%set("nev", nev, error)
   assert_elpa_ok(error)
   call e%set("local_nrows", na_rows, error)
   assert_elpa_ok(error)
   call e%set("local_ncols", na_cols, error)
   assert_elpa_ok(error)
   call e%set("nblk", nblk, error)
   assert_elpa_ok(error)
253 254

#ifdef WITH_MPI
255 256 257 258 259 260
   call e%set("mpi_comm_parent", MPI_COMM_WORLD, error)
   assert_elpa_ok(error)
   call e%set("process_row", my_prow, error)
   assert_elpa_ok(error)
   call e%set("process_col", my_pcol, error)
   assert_elpa_ok(error)
261
#endif
262

Andreas Marek's avatar
Andreas Marek committed
263 264
   call e%set("timings",1)

265 266 267 268 269 270 271
   assert_elpa_ok(e%setup())

#ifdef TEST_SOLVER_1STAGE
   call e%set("solver", ELPA_SOLVER_1STAGE)
#else
   call e%set("solver", ELPA_SOLVER_2STAGE)
#endif
272
   assert_elpa_ok(error)
273

274 275
   call e%set("gpu", TEST_GPU, error)
   assert_elpa_ok(error)
276

277 278 279
#ifdef TEST_ALL_KERNELS
   do i = 0, elpa_option_cardinality(KERNEL_KEY)
     kernel = elpa_option_enumerate(KERNEL_KEY, i)
280
#endif
281
#ifdef TEST_KERNEL
282
     kernel = TEST_KERNEL
283
#endif
284 285

#ifdef TEST_SOLVER_2STAGE
286
     call e%set(KERNEL_KEY, kernel, error)
287 288 289
#ifdef TEST_KERNEL
     assert_elpa_ok(error)
#else
290 291 292
     if (error /= ELPA_OK) then
       cycle
     endif
293
#endif
294
     if (myid == 0) print *, elpa_int_value_to_string(KERNEL_KEY, kernel), " kernel"
295 296 297

     call e%timer_start(elpa_int_value_to_string(KERNEL_KEY, kernel))
#else
298 299

#ifdef __EIGENVECTORS
300
     call e%timer_start("e%eigenvectors()")
301 302 303 304 305 306 307
#endif
#ifdef __EIGENVALUES
     call e%timer_start("e%eigenvalues()")
#endif
#ifdef __SOLVE_TRIDIAGONAL
     call e%timer_start("e%solve_tridiagonal()")
#endif
308
#endif
309

310
     ! The actual solve step
311
#ifdef __EIGENVECTORS
312
     call e%eigenvectors(a, ev, z, error)
313 314 315 316 317 318 319 320 321
#endif
#ifdef __EIGENVALUES
     call e%eigenvalues(a, ev, error)
#endif
#if defined(__SOLVE_TRIDIAGONAL) && !defined(TEST_COMPLEX)
     call e%solve_tridiagonal(d, sd, z, error)
     ev(:) = d(:)
#endif

322 323
     assert_elpa_ok(error)

324 325 326
#ifdef TEST_SOLVER_2STAGE
     call e%timer_stop(elpa_int_value_to_string(KERNEL_KEY, kernel))
#else
327
#ifdef __EIGENVECTORS
328
     call e%timer_stop("e%eigenvectors()")
329 330 331 332 333 334 335
#endif
#ifdef __EIGENVALUES
     call e%timer_stop("e%eigenvalues()")
#endif
#ifdef __SOLVE_TRIDIAGONAL
     call e%timer_stop("e%solve_tridiagonal()")
#endif
336 337
#endif

338
     if (myid .eq. 0) then
339 340 341
#ifdef TEST_SOLVER_2STAGE
       call e%print_times(elpa_int_value_to_string(KERNEL_KEY, kernel))
#else
342
#ifdef __EIGENVECTORS
343
       call e%print_times("e%eigenvectors()")
344 345 346 347 348 349 350
#endif
#ifdef __EIGENVALUES
       call e%print_times("e%eigenvalues()")
#endif
#ifdef __SOLVE_TRIDIAGONAL
     call e%print_times("e%solve_tridiagonal()")
#endif
351
#endif
352
     endif
353 354

#ifdef __EIGENVECTORS
355 356
     status = check_correctness(na, nev, as, z, ev, sc_desc, myid)
     if (status /= 0) then
Andreas Marek's avatar
Andreas Marek committed
357
       if (myid == 0) print *, "Result incorrect!"
358 359
       call exit(status)
     endif
Andreas Marek's avatar
Andreas Marek committed
360
     if (myid == 0) print *, ""
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
#endif
#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
     status = 0
     ! analytic solution
     do ii=1, na
#ifdef TEST_DOUBLE
       ev_analytic(ii) = diagonalElement + 2.0 * subdiagonalElement *cos( pi*real(ii,kind=rk8)/ real(na+1,kind=rk8) )
#else
       ev_analytic(ii) = diagonalElement + 2.0 * subdiagonalElement *cos( pi*real(ii,kind=rk4)/ real(na+1,kind=rk4) )
#endif
     enddo

     ! sort analytic solution:

     ! this hack is neihter elegant, nor optimized: for huge matrixes it might be expensive
     ! a proper sorting algorithmus might be implemented here

     tmp    = minval(ev_analytic)
     loctmp = minloc(ev_analytic, 1)

     ev_analytic(loctmp) = ev_analytic(1)
     ev_analytic(1) = tmp

     do ii=2, na
       tmp = ev_analytic(ii)
       do j= ii, na
         if (ev_analytic(j) .lt. tmp) then
           tmp    = ev_analytic(j)
           loctmp = j
         endif
       enddo
       ev_analytic(loctmp) = ev_analytic(ii)
       ev_analytic(ii) = tmp
     enddo

     ! compute a simple error max of eigenvalues
     maxerr = 0.0
     maxerr = maxval( (ev(:) - ev_analytic(:))/ev_analytic(:) , 1)

#ifdef TEST_DOUBLE
     if (maxerr .gt. 8.e-13) then
#else
     if (maxerr .gt. 8.e-4) then
#endif
       status = 1
       if (myid .eq. 0) then
         print *,"Eigenvalues differ from analytic solution: maxerr = ",maxerr
       endif
     endif

     if (status /= 0) then
       call exit(status)
     endif
#ifdef __SOLVE_TRIDIAGONAL
     ! check eigenvectors
     status = check_correctness(na, nev, as, z, ev, sc_desc, myid)
     if (status /= 0) then
       if (myid == 0) print *, "Result incorrect!"
       call exit(status)
     endif
     if (myid == 0) print *, ""
#endif

#endif
425 426 427

#ifdef TEST_ALL_KERNELS
     a(:,:) = as(:,:)
428 429 430 431
#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
     d = ds
     sd = sds
#endif
432 433
   end do
#endif
Andreas Marek's avatar
Andreas Marek committed
434

435 436 437 438 439 440 441 442
   call elpa_deallocate(e)
   call elpa_uninit()

   deallocate(a)
   deallocate(as)
   deallocate(z)
   deallocate(ev)

443 444 445 446 447 448
#ifdef __EIGENVALUES
   deallocate(d, ds)
   deallocate(sd, sds)
   deallocate(ev_analytic)
#endif

449 450 451 452 453 454 455
#ifdef WITH_MPI
   call blacs_gridexit(my_blacs_ctxt)
   call mpi_finalize(mpierr)
#endif

   call exit(status)

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
#if defined(__EIGENVALUES) || defined(__SOLVE_TRIDIAGONAL)
   contains

     !Processor col for global col number
     pure function pcol(global_col, nblk, np_cols) result(local_col)
       implicit none
       integer(kind=c_int), intent(in) :: global_col, nblk, np_cols
       integer(kind=c_int)             :: local_col
       local_col = MOD((global_col-1)/nblk,np_cols)
     end function

     !Processor row for global row number
     pure function prow(global_row, nblk, np_rows) result(local_row)
       implicit none
       integer(kind=c_int), intent(in) :: global_row, nblk, np_rows
       integer(kind=c_int)             :: local_row
       local_row = MOD((global_row-1)/nblk,np_rows)
     end function

     function map_global_array_index_to_local_index(iGLobal, jGlobal, iLocal, jLocal , nblk, np_rows, np_cols, my_prow, my_pcol) &
       result(possible)
       implicit none

       integer(kind=c_int)              :: pi, pj, li, lj, xi, xj
       integer(kind=c_int), intent(in)  :: iGlobal, jGlobal, nblk, np_rows, np_cols, my_prow, my_pcol
       integer(kind=c_int), intent(out) :: iLocal, jLocal
       logical                       :: possible

       possible = .true.
       iLocal = 0
       jLocal = 0

       pi = prow(iGlobal, nblk, np_rows)

       if (my_prow .ne. pi) then
         possible = .false.
         return
       endif

       pj = pcol(jGlobal, nblk, np_cols)

       if (my_pcol .ne. pj) then
         possible = .false.
         return
       endif
       li = (iGlobal-1)/(np_rows*nblk) ! block number for rows
       lj = (jGlobal-1)/(np_cols*nblk) ! block number for columns

       xi = mod( (iGlobal-1),nblk)+1   ! offset in block li
       xj = mod( (jGlobal-1),nblk)+1   ! offset in block lj

       iLocal = li * nblk + xi
       jLocal = lj * nblk + xj

     end function
#endif
512
end program