elpa_cholesky_template.F90 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

45
#include "../general/sanity.F90"
46
47
48
49
     use elpa1_compute
     use elpa_utilities
     use elpa_mpi
     use precision
50
     use elpa_abstract_impl
51
     implicit none
Pavel Kus's avatar
Pavel Kus committed
52
#include "../general/precision_kinds.F90"
53
      class(elpa_abstract_impl_t), intent(inout) :: obj
54
      integer(kind=ik)              :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
55
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
56
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,*)
57
#else
Pavel Kus's avatar
Pavel Kus committed
58
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,obj%local_ncols)
59
60
61
62
63
64
65
#endif
      integer(kind=ik)              :: my_prow, my_pcol, np_rows, np_cols, mpierr
      integer(kind=ik)              :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
      integer(kind=ik)              :: n, nc, i, info
      integer(kind=ik)              :: lcs, lce, lrs, lre
      integer(kind=ik)              :: tile_size, l_rows_tile, l_cols_tile

Pavel Kus's avatar
Pavel Kus committed
66
      MATH_DATATYPE(kind=rck), allocatable    :: tmp1(:), tmp2(:,:), tmatr(:,:), tmatc(:,:)
67
      logical                       :: wantDebug
68
      logical                       :: success
69
      integer(kind=ik)              :: istat, debug
70
71
      character(200)                :: errorMessage

72
      call obj%timer%start("elpa_cholesky_&
73
74
      &MATH_DATATYPE&
      &_&
75
      &PRECISION&
76
      &")
77

78
79
80
81
82
      na         = obj%na
      lda        = obj%local_nrows
      nblk       = obj%nblk
      matrixCols = obj%local_ncols

83
84
      call obj%get("mpi_comm_rows",mpi_comm_rows )
      call obj%get("mpi_comm_cols",mpi_comm_cols)
85

86
87
      call obj%get("debug",debug)
      if (debug == 1) then
88
89
90
91
92
        wantDebug = .true.
      else
        wantDebug = .false.
      endif

93
      call obj%timer%start("mpi_communication")
94
95
96
97
      call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
      call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
      call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
      call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
98
      call obj%timer%stop("mpi_communication")
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
      success = .true.

      ! Matrix is split into tiles; work is done only for tiles on the diagonal or above

      tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
      tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide

      l_rows_tile = tile_size/np_rows ! local rows of a tile
      l_cols_tile = tile_size/np_cols ! local cols of a tile

      l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
      l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a

      allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
115
  &MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
116
117
118
119
120
121
        stop 1
      endif

      allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
122
123
  &MATH_DATATYPE&
  &: error when allocating tmp2 "//errorMessage
124
125
126
127
128
129
130
131
132
        stop 1
      endif

      tmp1 = 0
      tmp2 = 0

      allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
133
134
  &MATH_DATATYPE&
  &: error when allocating tmatr "//errorMessage
135
136
137
138
139
140
        stop 1
      endif

      allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
141
142
  &MATH_DATATYPE&
  &: error when allocating tmatc "//errorMessage
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        stop 1
      endif

      tmatr = 0
      tmatc = 0

      do n = 1, na, nblk
        ! Calculate first local row and column of the still remaining matrix
        ! on the local processor

        l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
        l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)

        l_rowx = local_index(n+nblk, my_prow, np_rows, nblk, +1)
        l_colx = local_index(n+nblk, my_pcol, np_cols, nblk, +1)

        if (n+nblk > na) then

          ! This is the last step, just do a Cholesky-Factorization
          ! of the remaining block

          if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
165
            call obj%timer%start("blas")
166
167

            call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
168
            call obj%timer%stop("blas")
169
170
171

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
172
        &MATH_DATATYPE&
173
174

#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
175
        &: Error in dpotrf: ",info
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#endif
#if COMPLEXCASE == 1
              &: Error in zpotrf: ",info
#endif
              success = .false.
              return
            endif

          endif

          exit ! Loop

        endif

        if (my_prow==prow(n, nblk, np_rows)) then

          if (my_pcol==pcol(n, nblk, np_cols)) then

            ! The process owning the upper left remaining block does the
            ! Cholesky-Factorization of this block
196
            call obj%timer%start("blas")
197
198

            call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
199
            call obj%timer%stop("blas")
200
201
202

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
203
        &MATH_DATATYPE&
204
205

#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
206
        &: Error in dpotrf 2: ",info
207
208
#endif
#if COMPLEXCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
209
        &: Error in zpotrf 2: ",info
210
211
212
213
214
215
216
217
218
219
220
221
222

#endif
              success = .false.
              return
            endif

            nc = 0
            do i=1,nblk
              tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
              nc = nc+i
            enddo
          endif
#ifdef WITH_MPI
223
          call obj%timer%start("mpi_communication")
224
225
226

          call MPI_Bcast(tmp1, nblk*(nblk+1)/2,      &
#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
227
                   MPI_REAL_PRECISION,         &
228
229
230
231
#endif
#if COMPLEXCASE == 1
                         MPI_COMPLEX_PRECISION,      &
#endif
Andreas Marek's avatar
Retab  
Andreas Marek committed
232
       pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
233

234
          call obj%timer%stop("mpi_communication")
235
236
237
238
239
240
241
242

#endif /* WITH_MPI */
          nc = 0
          do i=1,nblk
            tmp2(1:i,i) = tmp1(nc+1:nc+i)
            nc = nc+i
          enddo

243
          call obj%timer%start("blas")
244
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
245
246
              call PRECISION_TRSM('L', 'U', BLAS_TRANS_OR_CONJ, 'N', nblk, l_cols-l_colx+1, ONE, tmp2, &
                            ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
247
          call obj%timer%stop("blas")
248
249
250
251
252
253
254
255
256
257
258
259
260
        endif

        do i=1,nblk

#if REALCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = a(l_row1+i-1,l_colx:l_cols)
#endif
#if COMPLEXCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = conjg(a(l_row1+i-1,l_colx:l_cols))
#endif

#ifdef WITH_MPI

261
          call obj%timer%start("mpi_communication")
262
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
263
264
            call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, MPI_MATH_DATATYPE_PRECISION, &
                           prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
265

266
          call obj%timer%stop("mpi_communication")
267
268
269
270
#endif /* WITH_MPI */
        enddo
        ! this has to be checked since it was changed substantially when doing type safe
        call elpa_transpose_vectors_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
271
272
273
274
  &MATH_DATATYPE&
  &_&
  &PRECISION &
                 (obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
275
276
277
278
279
280
281
282
283
                                      tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
                                      n, na, nblk, nblk)

        do i=0,(na-1)/tile_size
          lcs = max(l_colx,i*l_cols_tile+1)
          lce = min(l_cols,(i+1)*l_cols_tile)
          lrs = l_rowx
          lre = min(l_rows,(i+1)*l_rows_tile)
          if (lce<lcs .or. lre<lrs) cycle
284
          call obj%timer%start("blas")
Pavel Kus's avatar
Pavel Kus committed
285
          call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, lre-lrs+1, lce-lcs+1, nblk, -ONE,  &
286
                              tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
Pavel Kus's avatar
Pavel Kus committed
287
                              ONE, a(lrs,lcs), lda)
288
          call obj%timer%stop("blas")
289
290
291
292
293
294
295
296

        enddo

      enddo

      deallocate(tmp1, tmp2, tmatr, tmatc, stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
297
298
  &MATH_DATATYPE&
  &: error when deallocating tmp1 "//errorMessage
299
300
301
302
303
304
305
306
307
308
309
310
311
        stop 1
      endif

      ! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)

      do i=1,na
        if (my_pcol==pcol(i, nblk, np_cols)) then
          ! column i is on local processor
          l_col1 = local_index(i  , my_pcol, np_cols, nblk, +1) ! local column number
          l_row1 = local_index(i+1, my_prow, np_rows, nblk, +1) ! first row below diagonal
          a(l_row1:l_rows,l_col1) = 0
        endif
      enddo
312
      call obj%timer%stop("elpa_cholesky_&
313
314
315
      &MATH_DATATYPE&
      &_&
      &PRECISION&
316
      &")