elpa1_auxiliary.F90 37.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
Andreas Marek's avatar
Andreas Marek committed
52 53
!
! This file has been rewritten by A. Marek, MPCDF
54 55
#include "config-f90.h"

Andreas Marek's avatar
Andreas Marek committed
56
!> \brief Fortran module which provides helper routines for matrix calculations
57
module ELPA1_AUXILIARY
58
  use elpa_utilities
Andreas Marek's avatar
Andreas Marek committed
59

60 61
  implicit none

62 63
  public :: elpa_mult_at_b_real_double      !< Multiply double-precision real matrices A**T * B
  public :: mult_at_b_real                  !< Old, deprecated interface to multiply double-precision real matrices A**T * B. DO NOT USE
64

65 66
  public :: elpa_mult_ah_b_complex_double   !< Multiply double-precision complex matrices A**H * B
  public :: mult_ah_b_complex               !< Old, deprecated interface to multiply double-precision complex matrices A**H * B. DO NOT USE
67

68 69
  public :: elpa_invert_trm_real_double     !< Invert double-precision real triangular matrix
  public :: invert_trm_real                 !< Old, deprecated interface for inversion of double-precision real triangular matrix. DO NOT USE
70

71 72
  public :: elpa_invert_trm_complex_double  !< Invert double-precision complex triangular matrix
  public :: invert_trm_complex              !< Old, deprecated interface to invert double-precision complex triangular matrix. DO NOT USE
73

74 75
  public :: elpa_cholesky_real_double       !< Cholesky factorization of a double-precision real matrix
  public :: cholesky_real                   !< Old, deprecated name for Cholesky factorization of a double-precision real matrix. DO NOT USE
76

77 78
  public :: elpa_cholesky_complex_double    !< Cholesky factorization of a double-precision complex matrix
  public :: cholesky_complex                !< Old, deprecated interface for a Cholesky factorization of a double-precision complex matrix. DO NOT USE
79

80 81
  public :: elpa_solve_tridi_double         !< Solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
  public :: solve_tridi                     !< Old, deprecated interface to solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
82 83

#ifdef WANT_SINGLE_PRECISION_REAL
84 85 86 87
  public :: elpa_cholesky_real_single       !< Cholesky factorization of a single-precision real matrix
  public :: elpa_invert_trm_real_single     !< Invert single-precision real triangular matrix
  public :: elpa_mult_at_b_real_single      !< Multiply single-precision real matrices A**T * B
  public :: elpa_solve_tridi_single         !< Solve tridiagonal eigensystem for a single-precision matrix with divide and conquer method
88 89 90
#endif

#ifdef WANT_SINGLE_PRECISION_COMPLEX
91 92 93
  public :: elpa_cholesky_complex_single    !< Cholesky factorization of a single-precision complex matrix
  public :: elpa_invert_trm_complex_single  !< Invert single-precision complex triangular matrix
  public :: elpa_mult_ah_b_complex_single   !< Multiply single-precision complex matrices A**H * B
94 95
#endif

96
!> \brief  cholesky_real: old, deprecated interface for Cholesky factorization of a double-precision real symmetric matrix
97 98 99 100 101
!> \details
!>
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
102
!>                              Only upper triangle needs to be set.
103 104 105 106 107 108 109 110
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
111
!> \result succes                logical, reports success or failure
112
  interface cholesky_real
113
    module procedure elpa_cholesky_real_double
114 115
  end interface

116
!> \brief  Old, deprecated interface invert_trm_real: Inverts a upper double-precision triangular matrix
117 118 119 120
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
121
!>                              Only upper triangle needs to be set.
122 123 124
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
125
!> \param  matrixCols           local columns of matrix a
126 127 128
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
129
!> \param result                logical, reports success or failure
130 131

  interface invert_trm_real
132
    module procedure elpa_invert_trm_real_double
133 134
  end interface

135
!> \brief  old, deprecated interface cholesky_complex: Cholesky factorization of a double-precision complex hermitian matrix
136 137 138 139
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
140
!>                              Only upper triangle needs to be set.
141 142 143 144
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
145
!> \param  matrixCols           local columns of matrix a
146 147 148
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
149
!> \result succes               logical, reports success or failure
150 151 152


  interface cholesky_complex
153
    module procedure elpa_cholesky_real_double
154 155
  end interface

156
!> \brief  old, deprecated interface invert_trm_complex: Inverts a double-precision complex upper triangular matrix
157 158 159 160
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
161
!>                              Only upper triangle needs to be set.
162 163 164
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
165
!> \param  matrixCols           local columns of matrix a
166 167 168
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
169
!> \result succes               logical, reports success or failure
170 171

  interface invert_trm_complex
172
    module procedure elpa_invert_trm_complex_double
173 174
  end interface

175
!> \brief  mult_at_b_real: Performs C : = A**T * B for double matrices
176
!> this is the old, deprecated interface for the newer elpa_mult_at_b_real
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
  interface mult_at_b_real
207
    module procedure elpa_mult_at_b_real_double
208 209
  end interface

210
!> \brief  Old, deprecated interface mult_ah_b_complex: Performs C : = A**H * B for double-precision matrices
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details
!>
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
  interface mult_ah_b_complex
241
    module procedure elpa_mult_ah_b_complex_double
242 243 244
  end interface


245
!> \brief  solve_tridi: Old, deprecated interface to solve a double-precision tridiagonal eigensystem for a double-precision matrix with divide and conquer method
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.
  interface solve_tridi
262
    module procedure elpa_solve_tridi_double
263
  end interface
264

265 266
  contains

267
!> \brief  cholesky_real_double: Cholesky factorization of a double-precision real symmetric matrix
268
!> \details
269 270 271 272
!>
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
273
!>                              Only upper triangle needs to be set.
274 275 276 277
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
278
!> \param  matrixCols           local columns of matrix a
279 280 281 282
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
!> \param succes                logical, reports success or failure
283

Andreas Marek's avatar
Andreas Marek committed
284 285
#define REALCASE 1
#define DOUBLE_PRECISION
286
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
287

288

289 290
    function elpa_cholesky_real_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
                                             wantDebug) result(success)
291
#include "./elpa_cholesky_template.X90"
292
    end function
293

294
#ifdef WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
295 296
#define REALCASE 1
#define SINGLE_PRECISION
297
#include "../../general/precision_macros.h"
298

299
!> \brief  cholesky_real_single: Cholesky factorization of a single-precision real symmetric matrix
300
!> \details
301
!>
302
!> \param  na                   Order of matrix
303
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
304
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
305
!>                              Only upper triangle needs to be set.
306 307
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
308 309
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
310
!> \param  matrixCols           local columns of matrix a
311 312 313 314 315
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
!> \param succes                logical, reports success or failure

316
    function elpa_cholesky_real_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, &
317
                                            wantDebug) result(success)
318
#include "./elpa_cholesky_template.X90"
319
    end function
320

321
#endif /* WANT_SINGLE_PRECSION_REAL */
322

Andreas Marek's avatar
Andreas Marek committed
323 324
#define REALCASE 1
#define DOUBLE_PRECISION
325
#include "../../general/precision_macros.h"
326
!> \brief  elpa_invert_trm_real_double: Inverts a double-precision real upper triangular matrix
327
!> \details
328 329 330
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
Andreas Marek's avatar
Andreas Marek committed
331
!>                              Only upper triangle needs to be set.
332 333 334
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
Andreas Marek's avatar
Andreas Marek committed
335
!> \param  matrixCols           local columns of matrix a
336 337 338
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
339
!> \result succes               logical, reports success or failure
340
     function elpa_invert_trm_real_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
341
#include "./elpa_invert_trm.X90"
342
     end function
343 344

#if WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
345 346
#define REALCASE 1
#define SINGLE_PRECISION
347
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
348

349
!> \brief  elpa_invert_trm_real_single: Inverts a single-precision real upper triangular matrix
350 351 352 353
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
354
!>                              Only upper triangle needs to be set.
355 356 357 358 359 360 361
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
362 363
!> \result succes               logical, reports success or failure
    function elpa_invert_trm_real_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
364
#include "./elpa_invert_trm.X90"
365
    end function
366 367 368

#endif /* WANT_SINGLE_PRECISION_REAL */

369

Andreas Marek's avatar
Andreas Marek committed
370 371
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
372
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
373

374
!> \brief  elpa_cholesky_complex_double: Cholesky factorization of a double-precision complex hermitian matrix
375 376 377 378
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
379
!>                              Only upper triangle needs to be set.
380 381 382 383 384 385 386 387
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
388 389
!> \result succes               logical, reports success or failure
    function elpa_cholesky_complex_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
390
#include "./elpa_cholesky_template.X90"
391
    end function
392

Andreas Marek's avatar
Andreas Marek committed
393

394
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
395 396
#define COMPLEXCASE 1
#define SINGLE_PRECISION
397
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
398

399
!> \brief  elpa_cholesky_complex_single: Cholesky factorization of a single-precision complex hermitian matrix
400 401 402 403
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be factorized.
!>                              Distribution is like in Scalapack.
404
!>                              Only upper triangle needs to be set.
405 406 407 408 409 410 411 412
!>                              On return, the upper triangle contains the Cholesky factor
!>                              and the lower triangle is set to 0.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
413 414
!> \result succes               logical, reports success or failure
    function elpa_cholesky_complex_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
415
#include "./elpa_cholesky_template.X90"
416
    end function
417 418

#endif /* WANT_SINGLE_PRECISION_COMPLEX */
419

Andreas Marek's avatar
Andreas Marek committed
420 421
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
422
#include "../../general/precision_macros.h"
423

424
!> \brief  elpa_invert_trm_complex_double: Inverts a double-precision complex upper triangular matrix
425 426 427 428
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
429
!>                              Only upper triangle needs to be set.
430 431 432 433 434 435 436
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
437 438
!> \result succes               logical, reports success or failure

439
    function elpa_invert_trm_complex_double(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
440
#include "./elpa_invert_trm.X90"
441
    end function
442 443

#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
444 445
#define COMPLEXCASE 1
#define SINGLE_PRECISION
446
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
447

448
!> \brief  elpa_invert_trm_complex_single: Inverts a single-precision complex upper triangular matrix
449 450 451 452
!> \details
!> \param  na                   Order of matrix
!> \param  a(lda,matrixCols)    Distributed matrix which should be inverted
!>                              Distribution is like in Scalapack.
453
!>                              Only upper triangle needs to be set.
454 455 456 457 458 459 460
!>                              The lower triangle is not referenced.
!> \param  lda                  Leading dimension of a
!> \param                       matrixCols  local columns of matrix a
!> \param  nblk                 blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param wantDebug             logical, more debug information on failure
461 462 463
!> \result succes               logical, reports success or failure

    function elpa_invert_trm_complex_single(na, a, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) result(success)
464
#include "./elpa_invert_trm.X90"
465
    end function
466

467
#endif /* WANT_SINGE_PRECISION_COMPLEX */
468

Andreas Marek's avatar
Andreas Marek committed
469 470
#define REALCASE 1
#define DOUBLE_PRECISION
471
#include "../../general/precision_macros.h"
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501
!> \brief  mult_at_b_real_double: Performs C : = A**T * B
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
502
!> \result success
503

504 505
    function elpa_mult_at_b_real_double(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                              mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
506
#include "./elpa_multiply_a_b.X90"
507
    end function elpa_mult_at_b_real_double
508 509

#if WANT_SINGLE_PRECISION_REAL
Andreas Marek's avatar
Andreas Marek committed
510 511
#define REALCASE 1
#define SINGLE_PRECISION
512
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
513

514
!> \brief  elpa_mult_at_b_real_single: Performs C : = A**T * B
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
!> \details

!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
544
!> \result success
545

546 547
    function elpa_mult_at_b_real_single(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                              mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
Andreas Marek's avatar
Andreas Marek committed
548

549
#include "./elpa_multiply_a_b.X90"
550

551
    end function elpa_mult_at_b_real_single
552

553 554
#endif /* WANT_SINGLE_PRECISION_REAL */

555

Andreas Marek's avatar
Andreas Marek committed
556 557
#define COMPLEXCASE 1
#define DOUBLE_PRECISION
558
#include "../../general/precision_macros.h"
Andreas Marek's avatar
Andreas Marek committed
559

560
!> \brief  elpa_mult_ah_b_complex_double: Performs C : = A**H * B
561 562 563 564
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
565
!> \details
566
!>
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
583
!> \param ldaCols               columns of matrix a
584 585
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
586
!> \param ldbCols               columns of matrix b
587 588 589 590 591
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
592
!> \result success
593

594 595
    function elpa_mult_ah_b_complex_double(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                                 mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
596
#include "./elpa_multiply_a_b.X90"
597

598
    end function elpa_mult_ah_b_complex_double
599

600
#ifdef WANT_SINGLE_PRECISION_COMPLEX
Andreas Marek's avatar
Andreas Marek committed
601 602
#define COMPLEXCASE 1
#define SINGLE_PRECISION
603
#include "../../general/precision_macros.h"
604

605
!> \brief  elpa_mult_ah_b_complex_single: Performs C : = A**H * B
606 607 608 609
!>         where   A is a square matrix (na,na) which is optionally upper or lower triangular
!>                 B is a (na,ncb) matrix
!>                 C is a (na,ncb) matrix where optionally only the upper or lower
!>                   triangle may be computed
610
!> \details
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
!>
!> \param  uplo_a               'U' if A is upper triangular
!>                              'L' if A is lower triangular
!>                              anything else if A is a full matrix
!>                              Please note: This pertains to the original A (as set in the calling program)
!>                                           whereas the transpose of A is used for calculations
!>                              If uplo_a is 'U' or 'L', the other triangle is not used at all,
!>                              i.e. it may contain arbitrary numbers
!> \param uplo_c                'U' if only the upper diagonal part of C is needed
!>                              'L' if only the upper diagonal part of C is needed
!>                              anything else if the full matrix C is needed
!>                              Please note: Even when uplo_c is 'U' or 'L', the other triangle may be
!>                                            written to a certain extent, i.e. one shouldn't rely on the content there!
!> \param na                    Number of rows/columns of A, number of rows of B and C
!> \param ncb                   Number of columns  of B and C
!> \param a                     matrix a
!> \param lda                   leading dimension of matrix a
628
!> \param ldaCols               columns of matrix a
629 630
!> \param b                     matrix b
!> \param ldb                   leading dimension of matrix b
631
!> \param ldbCols               columns of matrix b
632 633 634 635 636
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param  mpi_comm_rows        MPI communicator for rows
!> \param  mpi_comm_cols        MPI communicator for columns
!> \param c                     matrix c
!> \param ldc                   leading dimension of matrix c
637
!> \result success
638

639 640
    function elpa_mult_ah_b_complex_single(uplo_a, uplo_c, na, ncb, a, lda, ldaCols, b, ldb, ldbCols, nblk, &
                                 mpi_comm_rows, mpi_comm_cols, c, ldc, ldcCols) result(success)
641

642
#include "./elpa_multiply_a_b.X90"
643

644
    end function elpa_mult_ah_b_complex_single
645 646 647

#endif /* WANT_SINGLE_PRECISION_COMPLEX */

648 649
#define REALCASE 1
#define DOUBLE_PRECISION
650
#include "../../general/precision_macros.h"
651 652

!> \brief  elpa_solve_tridi_double: Solve tridiagonal eigensystem for a double-precision matrix with divide and conquer method
653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.

669 670
    function elpa_solve_tridi_double(na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, wantDebug) &
          result(success)
671

672
#include "./elpa_solve_tridi.X90"
673

674
    end function
675 676


677
#ifdef WANT_SINGLE_PRECISION_REAL
678 679
#define REALCASE 1
#define SINGLE_PRECISION
680
#include "../../general/precision_macros.h"
681

682
!> \brief  elpa_solve_tridi_single: Solve tridiagonal eigensystem for a single-precision matrix with divide and conquer method
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
!> \details
!>
!> \param na                    Matrix dimension
!> \param nev                   number of eigenvalues/vectors to be computed
!> \param d                     array d(na) on input diagonal elements of tridiagonal matrix, on
!>                              output the eigenvalues in ascending order
!> \param e                     array e(na) on input subdiagonal elements of matrix, on exit destroyed
!> \param q                     on exit : matrix q(ldq,matrixCols) contains the eigenvectors
!> \param ldq                   leading dimension of matrix q
!> \param nblk                  blocksize of cyclic distribution, must be the same in both directions!
!> \param matrixCols            columns of matrix q
!> \param mpi_comm_rows         MPI communicator for rows
!> \param mpi_comm_cols         MPI communicator for columns
!> \param wantDebug             logical, give more debug information if .true.
!> \result success              logical, .true. on success, else .false.

699 700
    function elpa_solve_tridi_single(na, nev, d, e, q, ldq, nblk, matrixCols, mpi_comm_rows, &
                                     mpi_comm_cols, wantDebug) result(success)
701

702
#include "./elpa_solve_tridi.X90"
703 704 705

    end function

706 707
#endif /* WANT_SINGLE_PRECISION_REAL */

708 709 710



711
end module elpa1_auxiliary