elpa_t.F90 48.8 KB
Newer Older
1
2
3
!
!    Copyright 2017, L. Hüdepohl and A. Marek, MPCDF
!
Andreas Marek's avatar
Andreas Marek committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
48
#include "config-f90.h"
49

Andreas Marek's avatar
Andreas Marek committed
50
module elpa_type
51
  use elpa_constants
52
53
  use, intrinsic :: iso_c_binding

54
  public :: elpa_init, elpa_initialized, elpa_uninit, elpa_allocate, elpa_t, c_int, c_double, c_float
55

56
  type :: elpa_t
Andreas Marek's avatar
Andreas Marek committed
57
   private
58
   type(c_ptr)         :: index = C_NULL_PTR
59

60
61
62
63
64
   integer(kind=c_int), pointer :: na => NULL()
   integer(kind=c_int), pointer :: nev => NULL()
   integer(kind=c_int), pointer :: local_nrows => NULL()
   integer(kind=c_int), pointer :: local_ncols => NULL()
   integer(kind=c_int), pointer :: nblk => NULL()
65

66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
   contains
     procedure, public :: setup => elpa_setup

     generic, public   :: set => elpa_set_integer, &
                                 elpa_set_double

     procedure, public :: get => elpa_get_integer
     procedure, public :: is_set => elpa_is_set_integer
     !procedure, public :: get_double => elpa_get_double
     !procedure, public :: is_set_double => elpa_is_set_double

     generic, public :: solve => &
         elpa_solve_real_double, &
         elpa_solve_real_single, &
         elpa_solve_complex_double, &
         elpa_solve_complex_single

     generic, public :: hermitian_multiply => &
         elpa_multiply_at_b_double, &
         elpa_multiply_ah_b_double, &
         elpa_multiply_at_b_single, &
         elpa_multiply_ah_b_single

     generic, public :: cholesky => &
         elpa_cholesky_double_real, &
         elpa_cholesky_single_real, &
         elpa_cholesky_double_complex, &
         elpa_cholesky_single_complex

     generic, public :: invert_tridiagonal => &
         elpa_invert_trm_double_real, &
         elpa_invert_trm_single_real, &
         elpa_invert_trm_double_complex, &
         elpa_invert_trm_single_complex

     generic, public :: solve_tridi => &
         elpa_solve_tridi_double_real, &
         elpa_solve_tridi_single_real
104
105
106
107
108

     procedure, public :: destroy => elpa_destroy

     ! privates:
     procedure, private :: elpa_set_integer
109
     procedure, private :: elpa_set_double
110

111
     procedure, private :: elpa_solve_real_double
112
     procedure, private :: elpa_solve_real_single
113
     procedure, private :: elpa_solve_complex_double
114
     procedure, private :: elpa_solve_complex_single
115

116
117
118
119
     procedure, private :: elpa_multiply_at_b_double
     procedure, private :: elpa_multiply_at_b_single
     procedure, private :: elpa_multiply_ah_b_double
     procedure, private :: elpa_multiply_ah_b_single
120

121
122
123
124
     procedure, private :: elpa_cholesky_double_real
     procedure, private :: elpa_cholesky_single_real
     procedure, private :: elpa_cholesky_double_complex
     procedure, private :: elpa_cholesky_single_complex
125
126
127
128
129

     procedure, private :: elpa_invert_trm_double_real
     procedure, private :: elpa_invert_trm_single_real
     procedure, private :: elpa_invert_trm_double_complex
     procedure, private :: elpa_invert_trm_single_complex
130
131
132

     procedure, private :: elpa_solve_tridi_double_real
     procedure, private :: elpa_solve_tridi_single_real
133
134
135

     procedure, private :: associate_int => elpa_associate_int

136
137
138
139
140
141
142
143
144
145
146
147
148
  end type elpa_t

  logical :: initDone = .false.

  integer, parameter :: earliest_api_version = 20170403
  integer, parameter :: current_api_version  = 20170403

  contains

    function elpa_init(api_version) result(success)
      use elpa_utilities, only : error_unit
      implicit none
      integer, intent(in) :: api_version
149
      integer             :: success
150
151
152
153
154
155
156
157
158

      if (earliest_api_version <= api_version .and. api_version <= current_api_version) then
        initDone = .true.
        success = ELPA_OK
      else
        write(error_unit, "(a,i0,a)") "ELPA: Error API version ", api_version," is not supported by this library"
        success = ELPA_ERROR
      endif
    end function
Andreas Marek's avatar
Andreas Marek committed
159

160
161
162
163
164
165
166
167
168
169
170

    function elpa_initialized() result(state)
      logical :: state
      state = initDone
    end function


    subroutine elpa_uninit()
    end subroutine


171
    function elpa_allocate(success) result(obj)
Andreas Marek's avatar
Andreas Marek committed
172
173
      use precision
      use elpa_utilities, only : error_unit
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
174
      use elpa_generated_fortran_interfaces
Andreas Marek's avatar
Andreas Marek committed
175
      implicit none
Andreas Marek's avatar
Andreas Marek committed
176

177
178
      type(elpa_t)                 :: obj
      integer, optional            :: success
Andreas Marek's avatar
Andreas Marek committed
179

Andreas Marek's avatar
Andreas Marek committed
180
181
      ! check whether init has ever been called
      if (.not.(elpa_initialized())) then
182
        write(error_unit, *) "elpa_allocate(): you must call elpa_init() once before creating instances of ELPA"
183
184
185
        if(present(success)) then
          success = ELPA_ERROR
        endif
Andreas Marek's avatar
Andreas Marek committed
186
187
        return
      endif
Andreas Marek's avatar
Andreas Marek committed
188

189
190
191
192
193
194
195
196
      obj%index = elpa_index_instance()

      ! Associate some important integer pointers for convenience
      obj%na = obj%associate_int("na")
      obj%nev = obj%associate_int("nev")
      obj%local_nrows = obj%associate_int("local_nrows")
      obj%local_ncols = obj%associate_int("local_ncols")
      obj%nblk = obj%associate_int("nblk")
Andreas Marek's avatar
Andreas Marek committed
197

198
199
200
      if(present(success)) then
        success = ELPA_OK
      endif
Andreas Marek's avatar
Andreas Marek committed
201

202
    end function
Andreas Marek's avatar
Andreas Marek committed
203

204
    function elpa_setup(self) result(success)
205
      use elpa1_impl, only : elpa_get_communicators_impl
206
207
208
      class(elpa_t), intent(inout) :: self
      integer :: success
      integer :: mpi_comm_rows, mpi_comm_cols, mpierr
209

210
      success = ELPA_ERROR
211

212
213
214
      if (self%is_set("mpi_comm_parent") == ELPA_OK .and. &
          self%is_set("process_row") == ELPA_OK .and. &
          self%is_set("process_col") == ELPA_OK) then
215

216
217
218
219
220
221
        mpierr = elpa_get_communicators_impl(&
                        self%get("mpi_comm_parent"), &
                        self%get("process_row"), &
                        self%get("process_col"), &
                        mpi_comm_rows, &
                        mpi_comm_cols)
222

223
224
225
226
227
        call self%set("mpi_comm_rows", mpi_comm_rows)
        call self%set("mpi_comm_cols", mpi_comm_cols)

        success = ELPA_OK
      endif
228

229
230
231
      if (self%is_set("mpi_comm_rows") == ELPA_OK .and. self%is_set("mpi_comm_cols") == ELPA_OK) then
        success = ELPA_OK
      endif
232
233

    end function
234
235

    subroutine elpa_set_integer(self, name, value, success)
236
      use iso_c_binding
237
238
      use elpa_generated_fortran_interfaces
      use elpa_utilities, only : error_unit
239
      implicit none
240
241
242
243
244
      class(elpa_t)                   :: self
      character(*), intent(in)        :: name
      integer(kind=c_int), intent(in) :: value
      integer, optional               :: success
      integer                         :: actual_success
245

246
      actual_success = elpa_index_set_int_value(self%index, name // c_null_char, value)
247

248
249
250
251
252
253
254
255
      if (present(success)) then
        success = actual_success

      else if (actual_success /= ELPA_OK) then
        write(error_unit,'(a,a,i0,a)') "ELPA: Error setting option '", name, "' to value ", value, &
                "and you did not check for errors!"

      end if
256
257
    end subroutine

258
259

    function elpa_get_integer(self, name, success) result(value)
260
      use iso_c_binding
261
      use elpa_generated_fortran_interfaces
262
      implicit none
263
264
265
266
      class(elpa_t)                  :: self
      character(*), intent(in)       :: name
      integer(kind=c_int)            :: value
      integer, intent(out), optional :: success
267

268
      value = elpa_index_get_int_value(self%index, name // c_null_char, success)
269

270
    end function
Andreas Marek's avatar
Andreas Marek committed
271

272
273
274
275
276
277
278
279
280
281
282
283
284
    function elpa_is_set_integer(self, name) result(success)
      use iso_c_binding
      use elpa_generated_fortran_interfaces
      implicit none
      class(elpa_t)            :: self
      character(*), intent(in) :: name
      integer                  :: success

      success = elpa_index_int_value_is_set(self%index, name // c_null_char)

    end function

    subroutine elpa_set_double(self, name, value, success)
Andreas Marek's avatar
Andreas Marek committed
285
      use iso_c_binding
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
286
      use elpa_generated_fortran_interfaces
287
      use elpa_utilities, only : error_unit
Andreas Marek's avatar
Andreas Marek committed
288
289
      implicit none
      class(elpa_t)                   :: self
290
      character(*), intent(in)        :: name
291
      real(kind=c_double), intent(in) :: value
292
293
      integer, optional               :: success
      integer                         :: actual_success
Andreas Marek's avatar
Andreas Marek committed
294

295
      actual_success = elpa_index_set_double_value(self%index, name // c_null_char, value)
Andreas Marek's avatar
Andreas Marek committed
296

297
298
299
300
      if (present(success)) then
        success = actual_success

      else if (actual_success /= ELPA_OK) then
301
        write(error_unit,'(a,a,es12.5,a)') "ELPA: Error setting option '", name, "' to value ", value, &
302
303
304
305
                "and you did not check for errors!"

      end if
    end subroutine
Andreas Marek's avatar
Andreas Marek committed
306
307


308
    function elpa_get_double(self, name, success) result(value)
Andreas Marek's avatar
Andreas Marek committed
309
      use iso_c_binding
Lorenz Huedepohl's avatar
Lorenz Huedepohl committed
310
      use elpa_generated_fortran_interfaces
Andreas Marek's avatar
Andreas Marek committed
311
      implicit none
312
313
      class(elpa_t)                  :: self
      character(*), intent(in)       :: name
314
      real(kind=c_double)            :: value
315
316
317
      integer, intent(out), optional :: success
      type(c_ptr) :: c_success_ptr

318
      value = elpa_index_get_double_value(self%index, name // c_null_char, success)
319
320

    end function
Andreas Marek's avatar
Andreas Marek committed
321
322


323
    function elpa_associate_int(self, name) result(value)
Andreas Marek's avatar
Andreas Marek committed
324
      use iso_c_binding
325
      use elpa_generated_fortran_interfaces
Andreas Marek's avatar
Andreas Marek committed
326
      implicit none
327
328
329
      class(elpa_t)                  :: self
      character(*), intent(in)       :: name
      integer(kind=c_int), pointer   :: value
Andreas Marek's avatar
Andreas Marek committed
330

331
332
333
334
335
      type(c_ptr)                    :: value_p

      value_p = elpa_index_get_int_loc(self%index, name // c_null_char)
      call c_f_pointer(value_p, value)
    end function
Andreas Marek's avatar
Andreas Marek committed
336

337
338

    subroutine elpa_solve_real_double(self, a, ev, q, success)
339
340
      use elpa2_impl
      use elpa1_impl
341
      use elpa_utilities, only : error_unit
342
      use precision
Andreas Marek's avatar
Andreas Marek committed
343
344
      use iso_c_binding
      implicit none
345
      class(elpa_t)       :: self
Andreas Marek's avatar
Andreas Marek committed
346

347
348
349
#ifdef USE_ASSUMED_SIZE
      real(kind=c_double) :: a(self%local_nrows, *), q(self%local_nrows, *)
#else
350
      real(kind=c_double) :: a(self%local_nrows, self%local_ncols), q(self%local_nrows, self%local_ncols)
351
#endif
352
      real(kind=c_double) :: ev(self%na)
353
354

      real(kind=c_double) :: time_evp_fwd, time_evp_solve, time_evp_back
355
356
      integer, optional   :: success
      integer(kind=c_int) :: success_internal
357
      logical             :: success_l, summary_timings
358

359
      logical             :: useGPU, useQR
360
      integer(kind=c_int) :: kernel
361

362
363
364
365
366
367
368
369
370
371
372
373
      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

        summary_timings = .true.
      else
        summary_timings = .false.
      endif


374
375
376
377
378
379
380
381
382
383
384
      if (self%get("gpu",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry gpu"
          stop
        endif

        useGPU = .true.
      else
        useGPU = .false.
      endif

385
386
387
388
389
390
391
392
393
394
395
396
      if (self%get("qr",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry qr"
          stop
        endif

        useQR = .true.
      else
        useQR = .false.
      endif


397
398
399
400
401
      if (self%get("solver",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
402
        success_l = elpa_solve_evp_real_1stage_double_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
403
404
405
406
                                                           self%local_nrows,  self%nblk, self%local_ncols, &
                                                           self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), &
                                                           self%get("mpi_comm_parent"), useGPU, time_evp_fwd,     &
                                                           time_evp_solve, time_evp_back, summary_timings)
407
408
409
410
411
412

      else if (self%get("solver",success_internal) .eq. 2) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
413
414
415
416
417
        kernel = self%get("real_kernel",success_internal)
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry kernel"
          stop
        endif
418
        success_l = elpa_solve_evp_real_2stage_double_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
419
420
421
422
423
                                                           self%local_nrows,  self%nblk, self%local_ncols, &
                                                           self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), &
                                                           self%get("mpi_comm_parent"), time_evp_fwd,     &
                                                           time_evp_solve, time_evp_back, summary_timings, useGPU, &
                                                           kernel, useQR)
424
425
426
427
      else
        print *,"unknown solver"
        stop
      endif
428
429
430
431
432
433
434
435
436
437

      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in solve() and you did not check for errors!"
      endif
Andreas Marek's avatar
Andreas Marek committed
438

439
440
441
442
443
444
      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

445
446
447
        call self%set("time_evp_fwd", time_evp_fwd)
        call self%set("time_evp_solve", time_evp_solve)
        call self%set("time_evp_back", time_evp_back)
448
449
      else

450
451
452
        call self%set("time_evp_fwd", -1.0_rk8)
        call self%set("time_evp_solve", -1.0_rk8)
        call self%set("time_evp_back", -1.0_rk8)
453
      endif
454
455
    end subroutine

456

457
    subroutine elpa_solve_real_single(self, a, ev, q, success)
458
459
      use elpa2_impl
      use elpa1_impl
460
      use elpa_utilities, only : error_unit
461
      use precision
462
463
464
      use iso_c_binding
      implicit none
      class(elpa_t)       :: self
465
466
467
#ifdef USE_ASSUMED_SIZE
      real(kind=c_float)  :: a(self%local_nrows, *), q(self%local_nrows, *)
#else
468
      real(kind=c_float)  :: a(self%local_nrows, self%local_ncols), q(self%local_nrows, self%local_ncols)
469
#endif
470
      real(kind=c_float)  :: ev(self%na)
471
472

      real(kind=c_double) :: time_evp_fwd, time_evp_solve, time_evp_back
473
474
      integer, optional   :: success
      integer(kind=c_int) :: success_internal
475
      logical             :: success_l, summary_timings
476

477
478
      logical             :: useGPU, useQR
      integer(kind=c_int) :: THIS_ELPA_KERNEL_API
479

480
#ifdef WANT_SINGLE_PRECISION_REAL
481
482
483
484
485
486
487
488
489
490
      if (self%get("timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

        summary_timings = .true.
      else
        summary_timings = .false.
      endif
491
492
493
494
495
496
497
498
499
500
501
502

      if (self%get("gpu",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry gpu"
          stop
        endif

        useGPU = .true.
      else
        useGPU = .false.
      endif

503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
      if (self%get("qr",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry qr"
          stop
        endif

        useQR = .true.
      else
        useQR = .false.
      endif

      THIS_ELPA_KERNEL_API = self%get("real_kernel",success_internal)
      if (success_internal .ne. ELPA_OK) then
        print *,"Could not querry kernel"
        stop
      endif

520
521
522
523
524
      if (self%get("solver",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
525
        success_l = elpa_solve_evp_real_1stage_single_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
526
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
527
528
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"), useGPU, time_evp_fwd,     &
529
                                                          time_evp_solve, time_evp_back, summary_timings)
530
531
532
533
534
535

      else if (self%get("solver",success_internal) .eq. 2) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
536
        success_l = elpa_solve_evp_real_2stage_single_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
537
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
538
539
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"), time_evp_fwd,     &
540
541
                                                          time_evp_solve, time_evp_back, summary_timings, useGPU, &
                                                          THIS_ELPA_KERNEL_API, useQR)
542
543
544
545
      else
        print *,"unknown solver"
        stop
      endif
546
547
548
549
550
551
552
553
554
555

      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in solve() and you did not check for errors!"
      endif
556
557
558
559
560
561
562
563


      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

564
565
566
        call self%set("time_evp_fwd", time_evp_fwd)
        call self%set("time_evp_solve", time_evp_solve)
        call self%set("time_evp_back", time_evp_back)
567
568
      else

569
570
571
        call self%set("time_evp_fwd", -1.0_rk8)
        call self%set("time_evp_solve", -1.0_rk8)
        call self%set("time_evp_back", -1.0_rk8)
572
      endif
573
#else
574
      print *,"This installation of the ELPA library has not been build with single-precision support"
575
576
577
578
      success = ELPA_ERROR
#endif
    end subroutine

579
580

    subroutine elpa_solve_complex_double(self, a, ev, q, success)
581
582
      use elpa2_impl
      use elpa1_impl
583
      use elpa_utilities, only : error_unit
584
      use precision
585
586
587
588
      use iso_c_binding
      implicit none
      class(elpa_t)                  :: self

589
590
591
#ifdef USE_ASSUMED_SIZE
      complex(kind=c_double_complex) :: a(self%local_nrows, *), q(self%local_nrows, *)
#else
592
      complex(kind=c_double_complex) :: a(self%local_nrows, self%local_ncols), q(self%local_nrows, self%local_ncols)
593
#endif
594
      real(kind=c_double)            :: ev(self%na)
595

596
597
      real(kind=c_double) :: time_evp_fwd, time_evp_solve, time_evp_back

598
599
      integer, optional              :: success
      integer(kind=c_int)            :: success_internal
600
      logical                        :: success_l, summary_timings
601

602
      logical                        :: useGPU
603
      integer(kind=c_int) :: THIS_ELPA_KERNEL_API
604
605
606
607
608
609
610
611
612
613
      if (self%get("timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

        summary_timings = .true.
      else
        summary_timings = .false.
      endif
614
615
616
617
618
619
620
621
622
623
624
625

      if (self%get("gpu",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry gpu"
          stop
        endif

        useGPU = .true.
      else
        useGPU = .false.
      endif

626
      THIS_ELPA_KERNEL_API = self%get("complex_kernel",success_internal)
627
628
629
630
631
      if (success_internal .ne. ELPA_OK) then
        print *,"Could not querry kernel"
        stop
      endif

632
633
634
635
636
      if (self%get("solver",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
637
        success_l = elpa_solve_evp_complex_1stage_double_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
638
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
639
640
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"), useGPU, time_evp_fwd,     &
641
                                                          time_evp_solve, time_evp_back, summary_timings)
642
643
644
645
646
647

      else if (self%get("solver",success_internal) .eq. 2) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
648
        success_l = elpa_solve_evp_complex_2stage_double_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
649
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
650
651
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"), time_evp_fwd,     &
652
653
                                                          time_evp_solve, time_evp_back, summary_timings, useGPU, &
                                                          THIS_ELPA_KERNEL_API)
654
655
656
657
      else
        print *,"unknown solver"
        stop
      endif
658
659
660
661
662
663
664
665
666
667
668

      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in solve() and you did not check for errors!"
      endif

669
670
671
672
673
674
      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

675
676
677
        call self%set("time_evp_fwd", time_evp_fwd)
        call self%set("time_evp_solve", time_evp_solve)
        call self%set("time_evp_back", time_evp_back)
678
679
      else

680
681
682
        call self%set("time_evp_fwd", -1.0_rk8)
        call self%set("time_evp_solve", -1.0_rk8)
        call self%set("time_evp_back", -1.0_rk8)
683
      endif
684
685
686
    end subroutine


687
    subroutine elpa_solve_complex_single(self, a, ev, q, success)
688
689
      use elpa2_impl
      use elpa1_impl
690
691
692
      use elpa_utilities, only : error_unit

      use iso_c_binding
693
      use precision
694
      implicit none
695
      class(elpa_t)                 :: self
696
697
698
699
700
701
#ifdef USE_ASSUMED_SIZE
      complex(kind=ck4)             :: a(self%local_nrows, *), q(self%local_nrows, *)
#else
      complex(kind=ck4)             :: a(self%local_nrows, self%local_ncols), q(self%local_nrows, self%local_ncols)
#endif
      real(kind=rk4)                :: ev(self%na)
702

703
      real(kind=c_double) :: time_evp_fwd, time_evp_solve, time_evp_back
704
705
      integer, optional             :: success
      integer(kind=c_int)           :: success_internal
706
      logical                       :: success_l, summary_timings
707

708
      logical                       :: useGPU
709
      integer(kind=c_int) :: THIS_ELPA_KERNEL_API
710
#ifdef WANT_SINGLE_PRECISION_COMPLEX
711
712
713
714
715
716
717
718
719
720
721
722

      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

        summary_timings = .true.
      else
        summary_timings = .false.
      endif

723
724
725
726
727
728
729
730
731
732
733
      if (self%get("gpu",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry gpu"
          stop
        endif

        useGPU = .true.
      else
        useGPU = .false.
      endif

734
      THIS_ELPA_KERNEL_API = self%get("complex_kernel",success_internal)
735
736
737
738
739
      if (success_internal .ne. ELPA_OK) then
        print *,"Could not querry kernel"
        stop
      endif

740
741
742
743
744
      if (self%get("solver",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
745
        success_l = elpa_solve_evp_complex_1stage_single_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
746
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
747
748
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"), useGPU, time_evp_fwd,     &
749
                                                          time_evp_solve, time_evp_back, summary_timings)
750
751
752
753
754
755

      else if (self%get("solver",success_internal) .eq. 2) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry solver"
          stop
        endif
756
        success_l = elpa_solve_evp_complex_2stage_single_impl(self%na, self%nev, a, self%local_nrows, ev, q,  &
757
                                                          self%local_nrows,  self%nblk, self%local_ncols, &
758
759
                                                          self%get("mpi_comm_rows"), self%get("mpi_comm_cols"),         &
                                                          self%get("mpi_comm_parent"),  time_evp_fwd,     &
760
761
                                                          time_evp_solve, time_evp_back, summary_timings, useGPU, &
                                                          THIS_ELPA_KERNEL_API)
762
763
764
765
      else
        print *,"unknown solver"
        stop
      endif
766
767
768
769
770
771
772
773
774
775

      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in solve() and you did not check for errors!"
      endif
776
777
778
779
780
781
782

      if (self%get("summary_timings",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry summary timings"
          stop
        endif

783
784
785
        call self%set("time_evp_fwd", time_evp_fwd)
        call self%set("time_evp_solve", time_evp_solve)
        call self%set("time_evp_back", time_evp_back)
786
787
      else

788
789
790
        call self%set("time_evp_fwd", -1.0_rk8)
        call self%set("time_evp_solve", -1.0_rk8)
        call self%set("time_evp_back", -1.0_rk8)
791
792
      endif

793
#else
794
      print *,"This installation of the ELPA library has not been build with single-precision support"
795
796
797
798
      success = ELPA_ERROR
#endif
    end subroutine

799

800
801
802
    subroutine elpa_multiply_at_b_double (self,uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, &
                                          c, ldc, ldcCols, success)
      use iso_c_binding
803
      use elpa1_auxiliary_impl
804
      use precision
805
806
807
      implicit none
      class(elpa_t)                   :: self
      character*1                     :: uplo_a, uplo_c
808
      integer(kind=ik), intent(in)    :: na, lda, ldaCols, ldb, ldbCols, ldc, ldcCols, ncb
809
810
811
#ifdef USE_ASSUMED_SIZE
      real(kind=rk8)                  :: a(lda,*), b(ldb,*), c(ldc,*)
#else
812
      real(kind=rk8)                  :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols)
813
#endif
814
815
816
      integer, optional               :: success
      logical                         :: success_l

817
      success_l = elpa_mult_at_b_real_double_impl(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, self%nblk, &
818
                              self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), c, ldc, ldcCols)
819
820
821
822
823
824
825
826
827
828
829
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in multiply_a_b() and you did not check for errors!"
      endif
    end subroutine

830

831
832
833
    subroutine elpa_multiply_at_b_single (self,uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, &
                                          c, ldc, ldcCols, success)
      use iso_c_binding
834
      use elpa1_auxiliary_impl
835
      use precision
836
837
838
      implicit none
      class(elpa_t)                   :: self
      character*1                     :: uplo_a, uplo_c
839
      integer(kind=ik), intent(in)    :: na, lda, ldaCols, ldb, ldbCols, ldc, ldcCols, ncb
840
841
842
#ifdef USE_ASSUMED_SIZE
      real(kind=rk4)                  :: a(lda,*), b(ldb,*), c(ldc,*)
#else
843
      real(kind=rk4)                  :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols)
844
#endif
845
846
847
      integer, optional               :: success
      logical                         :: success_l
#ifdef WANT_SINGLE_PRECISION_REAL
848
      success_l = elpa_mult_at_b_real_single_impl(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, self%nblk, &
849
                              self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), c, ldc, ldcCols)
850
851
852
853
854
855
856
857
858
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in multiply_a_b() and you did not check for errors!"
      endif
859
860
861
#else
      print *,"This installation of the ELPA library has not been build with single-precision support"
      success = ELPA_ERROR
862
863
864
#endif
    end subroutine

865

866
867
868
    subroutine elpa_multiply_ah_b_double (self,uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, &
                                          c, ldc, ldcCols, success)
      use iso_c_binding
869
      use elpa1_auxiliary_impl
870
      use precision
871
872
873
      implicit none
      class(elpa_t)                   :: self
      character*1                     :: uplo_a, uplo_c
874
      integer(kind=ik), intent(in)    :: na, lda, ldaCols, ldb, ldbCols, ldc, ldcCols, ncb
875
876
877
#ifdef USE_ASSUMED_SIZE
      complex(kind=ck8)               :: a(lda,*), b(ldb,*), c(ldc,*)
#else
878
      complex(kind=ck8)               :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols)
879
#endif
880
881
882
      integer, optional               :: success
      logical                         :: success_l

883
      success_l = elpa_mult_ah_b_complex_double_impl(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, self%nblk, &
884
                              self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), c, ldc, ldcCols)
885
886
887
888
889
890
891
892
893
894
895
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in multiply_a_b() and you did not check for errors!"
      endif
    end subroutine

896

897
898
899
    subroutine elpa_multiply_ah_b_single (self,uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, &
                                          c, ldc, ldcCols, success)
      use iso_c_binding
900
      use elpa1_auxiliary_impl
901
      use precision
902
903
904
      implicit none
      class(elpa_t)                   :: self
      character*1                     :: uplo_a, uplo_c
905
      integer(kind=ik), intent(in)    :: na, lda, ldaCols, ldb, ldbCols, ldc, ldcCols, ncb
906
907
908
#ifdef USE_ASSUMED_SIZE
      complex(kind=ck4)               :: a(lda,*), b(ldb,*), c(ldc,*)
#else
909
      complex(kind=ck4)               :: a(lda,ldaCols), b(ldb,ldbCols), c(ldc,ldcCols)
910
#endif
911
912
913
914
      integer, optional               :: success
      logical                         :: success_l

#ifdef WANT_SINGLE_PRECISION_COMPLEX
915
      success_l = elpa_mult_ah_b_complex_single_impl(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, self%nblk, &
916
                              self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), c, ldc, ldcCols)
917
918
919
920
921
922
923
924
925
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in multiply_a_b() and you did not check for errors!"
      endif
926
927
928
#else
      print *,"This installation of the ELPA library has not been build with single-precision support"
      success = ELPA_ERROR
929
930
931
#endif
    end subroutine

932

933
    subroutine elpa_cholesky_double_real (self, a, success)
934
      use iso_c_binding
935
      use elpa1_auxiliary_impl
936
937
938
      use precision
      implicit none
      class(elpa_t)                   :: self
939
940
941
#ifdef USE_ASSUMED_SIZE
      real(kind=rk8)                  :: a(self%local_nrows,*)
#else
942
      real(kind=rk8)                  :: a(self%local_nrows,self%local_ncols)
943
#endif
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
      integer, optional               :: success
      logical                         :: success_l
      integer(kind=c_int)             :: success_internal
      logical                         :: wantDebugIntern

      if (self%get("wantDebug",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry wantDebug"
          stop
        endif

        wantDebugIntern = .true.
      else
        wantDebugIntern = .false.
      endif

960

961
      success_l = elpa_cholesky_real_double_impl (self%na, a, self%local_nrows, self%nblk, &
962
                                                 self%local_ncols, self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), &
963
964
965
966
967
968
969
970
971
972
973
974
                                                 wantDebugIntern)
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in cholesky() and you did not check for errors!"
      endif
    end subroutine

975

976
    subroutine elpa_cholesky_single_real(self, a, success)
977
      use iso_c_binding
978
      use elpa1_auxiliary_impl
979
980
981
      use precision
      implicit none
      class(elpa_t)                   :: self
982
983
984
#ifdef USE_ASSUMED_SIZE
      real(kind=rk4)                  :: a(self%local_nrows,*)
#else
985
      real(kind=rk4)                  :: a(self%local_nrows,self%local_ncols)
986
#endif
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
      integer, optional               :: success
      logical                         :: success_l
      integer(kind=c_int)             :: success_internal
      logical                         :: wantDebugIntern

      if (self%get("wantDebug",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry wantDebug"
          stop
        endif

        wantDebugIntern = .true.
      else
        wantDebugIntern = .false.
      endif

#if WANT_SINGLE_PRECISION_REAL
1004
      success_l = elpa_cholesky_real_single_impl (self%na, a, self%local_nrows, self%nblk, &
1005
                                                 self%local_ncols, self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), &
1006
                                                 wantDebugIntern)
1007
1008
1009
#else
      print *,"This installation of the ELPA library has not been build with single-precision support"
      success = ELPA_ERROR
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
#endif
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in cholesky() and you did not check for errors!"
      endif
    end subroutine

1022

1023
    subroutine elpa_cholesky_double_complex (self, a, success)
1024
      use iso_c_binding
1025
      use elpa1_auxiliary_impl
1026
1027
1028
      use precision
      implicit none
      class(elpa_t)                   :: self
1029
1030
1031
1032
1033
#ifdef USE_ASSUMED_SIZE
      complex(kind=ck8)               :: a(self%local_nrows,*)
#else
      complex(kind=ck8)               :: a(self%local_nrows,self%local_ncols)
#endif
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
      integer, optional               :: success
      logical                         :: success_l
      integer(kind=c_int)             :: success_internal
      logical                         :: wantDebugIntern

      if (self%get("wantDebug",success_internal) .eq. 1) then
        if (success_internal .ne. ELPA_OK) then
          print *,"Could not querry wantDebug"
          stop
        endif

        wantDebugIntern = .true.
      else
        wantDebugIntern = .false.
      endif

1050
      success_l = elpa_cholesky_complex_double_impl (self%na, a, self%local_nrows, self%nblk, &
1051
                                                 self%local_ncols, self%get("mpi_comm_rows"), self%get("mpi_comm_cols"), &
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
                                                 wantDebugIntern)
      if (present(success)) then
        if (success_l) then
          success = ELPA_OK
        else
          success = ELPA_ERROR
        endif
      else if (.not. success_l) then
        write(error_unit,'(a)') "ELPA: Error in cholesky() and you did not check for errors!"
      endif
    end subroutine

1064

1065
    subroutine elpa_cholesky_single_complex (self, a, success)
1066
      use iso_c_binding
1067
      use elpa1_auxiliary_impl
1068
1069
1070
      use precision
      implicit none
      class(elpa_t)                   :: self
1071
1072
1073
1074
1075
#ifdef USE_ASSUMED_SIZE
      complex(kind=ck4)               :: a(self%local_nrows,*)
#else
      complex(kind=ck4)               :: a(self%local_nrows,self%local_ncols)
#endif
1076
1077
1078
1079
1080
1081