elpa1_auxiliary.F90 37.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".

#include "config-f90.h"

Andreas Marek's avatar
Andreas Marek committed
55
56
!> \brief Fortran module which provides helper routines for matrix calculations
module ELPA1_AUXILIARY
57
  use elpa_utilities
Andreas Marek's avatar
Andreas Marek committed
58

59
60
  implicit none

61
62
  public :: elpa_mult_at_b_real_double      !< Multiply double-precision real matrices A**T * B
  public :: mult_at_b_real                  !< Old, deprecated interface to multiply double-precision real matrices A**T * B. DO NOT USE
63

64
65
  public :: elpa_mult_ah_b_complex_double   !< Multiply double-precision complex matrices A**H * B
  public :: mult_ah_b_complex               !< Old, deprecated interface to multiply double-precision complex matrices A**H * B. DO NOT USE
66

67
68
  public :: elpa_invert_trm_real_double     !< Invert double-precision real triangular matrix
  public :: invert_trm_real                 !< Old, deprecated interface for inversion of double-precision real triangular matrix. DO NOT USE
69

70
71
  public :: elpa_invert_trm_complex_double  !< Invert double-precision complex triangular matrix
  public :: invert_trm_complex              !< Old, deprecated interface to invert double-precision complex triangular matrix. DO NOT USE
72

73
74
  public :: elpa_cholesky_real_double       !< Cholesky factorization of a double-precision real matrix
  public :: cholesky_real                   !< Old, deprecated name for Cholesky factorization of a double-precision real matrix. DO NOT USE
75

76
77
  public :: elpa_cholesky_complex_double    !< Cholesky factorization of a double-precision complex matrix
  public :: cholesky_complex                !< Old, deprecated interface for a Cholesky factorization of a double-precision complex matrix. DO NOT USE
78

79
80
  public :: elpa_solve_tridi_double         !< Solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
  public :: solve_tridi                     !< Old, deprecated interface to solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
81
82

#ifdef WANT_SINGLE_PRECISION_REAL
83
84
85
86
  public :: elpa_cholesky_real_single       !< Cholesky factorization of a single-precision real matrix
  public :: elpa_invert_trm_real_single     !< Invert single-precision real triangular matrix
  public :: elpa_mult_at_b_real_single      !< Multiply single-precision real matrices A**T * B
  public :: elpa_solve_tridi_single         !< Solve tridiagonal eigensystem for a single-precision matrix with divide and conquer method
87
88
89
#endif

#ifdef WANT_SINGLE_PRECISION_COMPLEX
90
91
92
  public :: elpa_cholesky_complex_single    !< Cholesky factorization of a single-precision complex matrix
  public :: elpa_invert_trm_complex_single  !< Invert single-precision complex triangular matrix
  public :: elpa_mult_ah_b_complex_single   !< Multiply single-precision complex matrices A**H * B
93
94
#endif

95
!> \brief  cholesky_real: old, deprecated interface for Cholesky factorization of a double-precision real symmetric matrix
96
97
98
99
100
!> \details
!>
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
101
!>                              Only upper triangle needs to be set.
102
103
104
105
106
107
108
109
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
110
!> \result succes                logical, reports success or failure
111
  interface cholesky_real
112
    module procedure elpa_cholesky_real_double
113
114
  end interface

115
!> \brief  Old, deprecated interface invert_trm_real: Inverts a upper double-precision triangular matrix
116
117
118
119
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
120
!>                              Only upper triangle needs to be set.
121
122
123
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
124
!> \param  matrixCols           local columns of matrix a
125
126
127
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
128
!> \param result                logical, reports success or failure
129
130

  interface invert_trm_real
131
    module procedure elpa_invert_trm_real_double
132
133
  end interface

134
!> \brief  old, deprecated interface cholesky_complex: Cholesky factorization of a double-precision complex hermitian matrix
135
136
137
138
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
139
!>                              Only upper triangle needs to be set.
140
141
142
143
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
144
!> \param  matrixCols           local columns of matrix a
145
146
147
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
148
!> \result succes               logical, reports success or failure
149
150
151


  interface cholesky_complex
152
    module procedure elpa_cholesky_real_double
153
154
  end interface

155
!> \brief  old, deprecated interface invert_trm_complex: Inverts a double-precision complex upper triangular matrix
156
157
158
159
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
160
!>                              Only upper triangle needs to be set.
161
162
163
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
164
!> \param  matrixCols           local columns of matrix a
165
166
167
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
168
!> \result succes               logical, reports success or failure
169
170

  interface invert_trm_complex
171
    module procedure elpa_invert_trm_complex_double
172
173
  end interface

174
!> \brief  mult_at_b_real: Performs C : = A**T * B for double matrices
175
!> this is the old, deprecated interface for the newer elpa_mult_at_b_real
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
  interface mult_at_b_real
206
    module procedure elpa_mult_at_b_real_double
207
208
  end interface

209
!> \brief  Old, deprecated interface mult_ah_b_complex: Performs C : = A**H * B for double-precision matrices
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details
!>
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
  interface mult_ah_b_complex
240
    module procedure elpa_mult_ah_b_complex_double
241
242
243
  end interface


244
!> \brief  solve_tridi: Old, deprecated interface to solve a double-precision tridiagonal eigensystem for a double-precision matrix with divide and conquer method
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.
  interface solve_tridi
261
    module procedure elpa_solve_tridi_double
262
  end interface
263

264
265
  contains

266
!> \brief  cholesky_real_double: Cholesky factorization of a double-precision real symmetric matrix
267
!> \details
268
269
270
271
!>
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
272
!>                              Only upper triangle needs to be set.
273
274
275
276
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
277
!> \param  matrixCols           local columns of matrix a
278
279
280
281
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
!> \param succes                logical, reports success or failure
282

Andreas Marek's avatar
Andreas Marek committed
283
284
285
286
#define REALCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"

287

288
289
   function elpa_cholesky_real_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
                                            wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
290
#include "elpa_cholesky_template.X90"
291

292
    end function elpa_cholesky_real_double
293

294
#ifdef WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
295
296
297
#define REALCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"
298

299
!> \brief  cholesky_real_single: Cholesky factorization of a single-precision real symmetric matrix
300
!> \details
301
!>
302
!> \param  na                   Order of matrix
303
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
304
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
305
!>                              Only upper triangle needs to be set.
306
307
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
308
309
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
310
!> \param  matrixCols           local columns of matrix a
311
312
313
314
315
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
!> \param succes                logical, reports success or failure

316
317
   function elpa_cholesky_real_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
                                            wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
318
#include "elpa_cholesky_template.X90"
319

320
    end function elpa_cholesky_real_single
321

322
#endif /* WANT_SINGLE_PRECSION_REAL */
323

Andreas Marek's avatar
Andreas Marek committed
324
325
326
#define REALCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
327
!> \brief  elpa_invert_trm_real_double: Inverts a double-precision real upper triangular matrix
328
!> \details
329
330
331
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
332
!>                              Only upper triangle needs to be set.
333
334
335
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
336
!> \param  matrixCols           local columns of matrix a
337
338
339
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
340
341
!> \result succes               logical, reports success or failure
    function elpa_invert_trm_real_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
342
#include "elpa_invert_trm.X90"
343
     end function elpa_invert_trm_real_double
344
345

#if WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
346
347
348
349
#define REALCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"

350
!> \brief  elpa_invert_trm_real_single: Inverts a single-precision real upper triangular matrix
351
352
353
354
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
355
!>                              Only upper triangle needs to be set.
356
357
358
359
360
361
362
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
363
364
!> \result succes               logical, reports success or failure
    function elpa_invert_trm_real_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
365
366
#include "elpa_invert_trm.X90"
    end function elpa_invert_trm_real_single
367
368
369

#endif /* WANT_SINGLE_PRECISION_REAL */

370

Andreas Marek's avatar
Andreas Marek committed
371
372
373
374
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"

375
!> \brief  elpa_cholesky_complex_double: Cholesky factorization of a double-precision complex hermitian matrix
376
377
378
379
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
380
!>                              Only upper triangle needs to be set.
381
382
383
384
385
386
387
388
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
389
390
!> \result succes               logical, reports success or failure
    function elpa_cholesky_complex_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
391

Andreas Marek's avatar
Andreas Marek committed
392
#include "elpa_cholesky_template.X90"
393

394
    end function elpa_cholesky_complex_double
395

Andreas Marek's avatar
Andreas Marek committed
396

397
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
398
399
400
401
#define COMPLEXCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"

402
!> \brief  elpa_cholesky_complex_single: Cholesky factorization of a single-precision complex hermitian matrix
403
404
405
406
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
407
!>                              Only upper triangle needs to be set.
408
409
410
411
412
413
414
415
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
416
417
!> \result succes               logical, reports success or failure
    function elpa_cholesky_complex_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
418

Andreas Marek's avatar
Andreas Marek committed
419
#include "elpa_cholesky_template.X90"
420

421
422
423
    end function elpa_cholesky_complex_single

#endif /* WANT_SINGLE_PRECISION_COMPLEX */
424

Andreas Marek's avatar
Andreas Marek committed
425
426
427
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
428

429
!> \brief  elpa_invert_trm_complex_double: Inverts a double-precision complex upper triangular matrix
430
431
432
433
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
434
!>                              Only upper triangle needs to be set.
435
436
437
438
439
440
441
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
442
443
444
!> \result succes               logical, reports success or failure

     function elpa_invert_trm_complex_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
445
#include "elpa_invert_trm.X90"
446
    end function elpa_invert_trm_complex_double
447
448

#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
449
450
451
#define COMPLEXCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
452

453
!> \brief  elpa_invert_trm_complex_single: Inverts a single-precision complex upper triangular matrix
454
455
456
457
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
458
!>                              Only upper triangle needs to be set.
459
460
461
462
463
464
465
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
466
467
468
!> \result succes               logical, reports success or failure

    function elpa_invert_trm_complex_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
Andreas Marek's avatar
Andreas Marek committed
469
#include "elpa_invert_trm.X90"
470
    end function elpa_invert_trm_complex_single
471

472
#endif /* WANT_SINGE_PRECISION_COMPLEX */
473

Andreas Marek's avatar
Andreas Marek committed
474
475
476
#define REALCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
!> \brief  mult_at_b_real_double: Performs C : = A**T * B
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
507
!> \result success
508

509
510
    function elpa_mult_at_b_real_double(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                              mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
Andreas Marek's avatar
Andreas Marek committed
511
#include "elpa_multiply_a_b.X90"
512
    end function elpa_mult_at_b_real_double
513
514

#if WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
515
516
517
518
#define REALCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"

519
!> \brief  elpa_mult_at_b_real_single: Performs C : = A**T * B
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
549
!> \result success
550

551
552
    function elpa_mult_at_b_real_single(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                              mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
Andreas Marek's avatar
Andreas Marek committed
553
554

#include "elpa_multiply_a_b.X90"
555

556
    end function elpa_mult_at_b_real_single
557

558
559
#endif /* WANT_SINGLE_PRECISION_REAL */

560

Andreas Marek's avatar
Andreas Marek committed
561
562
563
564
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"

565
!> \brief  elpa_mult_ah_b_complex_double: Performs C : = A**H * B
566
567
568
569
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
570
!> \details
571
!>
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
588
!> \param ldaCols               columns of matrix a
589
590
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
591
!> \param ldbCols               columns of matrix b
592
593
594
595
596
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
597
!> \result success
598

599
600
    function elpa_mult_ah_b_complex_double(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                                 mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
Andreas Marek's avatar
Andreas Marek committed
601
#include "elpa_multiply_a_b.X90"
602

603
    end function elpa_mult_ah_b_complex_double
604

605
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
606
607
608
#define COMPLEXCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"
609

610
!> \brief  elpa_mult_ah_b_complex_single: Performs C : = A**H * B
611
612
613
614
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
615
!> \details
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
!>
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
633
!> \param ldaCols               columns of matrix a
634
635
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
636
!> \param ldbCols               columns of matrix b
637
638
639
640
641
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
642
!> \result success
643

644
645
    function elpa_mult_ah_b_complex_single(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                                 mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
646

Andreas Marek's avatar
Andreas Marek committed
647
#include "elpa_multiply_a_b.X90"
648

649
    end function elpa_mult_ah_b_complex_single
650
651
652

#endif /* WANT_SINGLE_PRECISION_COMPLEX */

653
654
655
#define REALCASE 1
#define DOUBLE_PRECISION
#include "precision_macros.h"
656
657

!> \brief  elpa_solve_tridi_double: Solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.

674
675
    function elpa_solve_tridi_double(na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) &
          result(success)
676

677
#include "elpa_solve_tridi.X90"
678

679
    end function
680
681


682
#ifdef WANT_SINGLE_PRECISION_REAL
683
684
685
686
#define REALCASE 1
#define SINGLE_PRECISION
#include "precision_macros.h"

687
!> \brief  elpa_solve_tridi_single: Solve tridiagonal eigensystem for a single-precision matrix with divide and conquer method
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.

704
705
    function elpa_solve_tridi_single(na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
                                     mpi_comm_cols, wantDebug) result(success)
706

707
#include "elpa_solve_tridi.X90"
708
709
710

    end function

711
712
#endif /* WANT_SINGLE_PRECISION_REAL */

713
714
715



716
717
end module elpa1_auxiliary