elpa_cholesky_template.F90 11.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

45
#include "../general/sanity.F90"
46
47
48
49
     use elpa1_compute
     use elpa_utilities
     use elpa_mpi
     use precision
50
     use elpa_abstract_impl
Andreas Marek's avatar
Andreas Marek committed
51
52
53
#ifdef WITH_OPENMP
     use omp_lib
#endif
54
     implicit none
Pavel Kus's avatar
Pavel Kus committed
55
#include "../general/precision_kinds.F90"
56
      class(elpa_abstract_impl_t), intent(inout) :: obj
57
      integer(kind=ik)              :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
58
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
59
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,*)
60
#else
Pavel Kus's avatar
Pavel Kus committed
61
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,obj%local_ncols)
62
63
64
65
66
67
68
#endif
      integer(kind=ik)              :: my_prow, my_pcol, np_rows, np_cols, mpierr
      integer(kind=ik)              :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
      integer(kind=ik)              :: n, nc, i, info
      integer(kind=ik)              :: lcs, lce, lrs, lre
      integer(kind=ik)              :: tile_size, l_rows_tile, l_cols_tile

Pavel Kus's avatar
Pavel Kus committed
69
      MATH_DATATYPE(kind=rck), allocatable    :: tmp1(:), tmp2(:,:), tmatr(:,:), tmatc(:,:)
70
      logical                       :: wantDebug
71
      logical                       :: success
Andreas Marek's avatar
Andreas Marek committed
72
      integer(kind=ik)              :: istat, debug, error
73
      character(200)                :: errorMessage
74
      integer(kind=ik)              :: nrThreads
75

76
      call obj%timer%start("elpa_cholesky_&
77
78
      &MATH_DATATYPE&
      &_&
79
      &PRECISION&
80
      &")
81

Andreas Marek's avatar
Andreas Marek committed
82
#ifdef WITH_OPENMP
Andreas Marek's avatar
Andreas Marek committed
83
84
85
      !nrThreads=omp_get_max_threads()
      call obj%get("omp_threads",nrThreads,error)
      call omp_set_num_threads(nrThreads)
Andreas Marek's avatar
Andreas Marek committed
86
#else
87
      nrThreads=1
Andreas Marek's avatar
Andreas Marek committed
88
89
#endif

90
91
92
93
94
      na         = obj%na
      lda        = obj%local_nrows
      nblk       = obj%nblk
      matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
95
96
97
98
99
100
101
102
103
104
      call obj%get("mpi_comm_rows",mpi_comm_rows,error )
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
      call obj%get("mpi_comm_cols",mpi_comm_cols,error)
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
105

Andreas Marek's avatar
Andreas Marek committed
106
107
108
109
110
      call obj%get("debug",debug,error)
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
111
      if (debug == 1) then
112
113
114
115
116
        wantDebug = .true.
      else
        wantDebug = .false.
      endif

117
      call obj%timer%start("mpi_communication")
118
119
120
121
      call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
      call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
      call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
      call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
122
      call obj%timer%stop("mpi_communication")
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
      success = .true.

      ! Matrix is split into tiles; work is done only for tiles on the diagonal or above

      tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
      tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide

      l_rows_tile = tile_size/np_rows ! local rows of a tile
      l_cols_tile = tile_size/np_cols ! local cols of a tile

      l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
      l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a

      allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
139
  &MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
140
141
142
143
144
145
        stop 1
      endif

      allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
146
147
  &MATH_DATATYPE&
  &: error when allocating tmp2 "//errorMessage
148
149
150
151
152
153
154
155
156
        stop 1
      endif

      tmp1 = 0
      tmp2 = 0

      allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
157
158
  &MATH_DATATYPE&
  &: error when allocating tmatr "//errorMessage
159
160
161
162
163
164
        stop 1
      endif

      allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
165
166
  &MATH_DATATYPE&
  &: error when allocating tmatc "//errorMessage
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        stop 1
      endif

      tmatr = 0
      tmatc = 0

      do n = 1, na, nblk
        ! Calculate first local row and column of the still remaining matrix
        ! on the local processor

        l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
        l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)

        l_rowx = local_index(n+nblk, my_prow, np_rows, nblk, +1)
        l_colx = local_index(n+nblk, my_pcol, np_cols, nblk, +1)

        if (n+nblk > na) then

          ! This is the last step, just do a Cholesky-Factorization
          ! of the remaining block

          if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
189
            call obj%timer%start("blas")
190
191

            call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
192
            call obj%timer%stop("blas")
193
194
195

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
196
        &MATH_DATATYPE&
197
198

#if REALCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
199
        &: Error in dpotrf: ",info
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#endif
#if COMPLEXCASE == 1
              &: Error in zpotrf: ",info
#endif
              success = .false.
              return
            endif

          endif

          exit ! Loop

        endif

        if (my_prow==prow(n, nblk, np_rows)) then

          if (my_pcol==pcol(n, nblk, np_cols)) then

            ! The process owning the upper left remaining block does the
            ! Cholesky-Factorization of this block
220
            call obj%timer%start("blas")
221
222

            call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
223
            call obj%timer%stop("blas")
224
225
226

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
227
        &MATH_DATATYPE&
228
229

#if REALCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
230
        &: Error in dpotrf 2: ",info
231
232
#endif
#if COMPLEXCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
233
        &: Error in zpotrf 2: ",info
234
235
236
237
238
239
240
241
242
243
244
245
246

#endif
              success = .false.
              return
            endif

            nc = 0
            do i=1,nblk
              tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
              nc = nc+i
            enddo
          endif
#ifdef WITH_MPI
247
          call obj%timer%start("mpi_communication")
248
249
250

          call MPI_Bcast(tmp1, nblk*(nblk+1)/2,      &
#if REALCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
251
                   MPI_REAL_PRECISION,         &
252
253
254
255
#endif
#if COMPLEXCASE == 1
                         MPI_COMPLEX_PRECISION,      &
#endif
Andreas Marek's avatar
Retab    
Andreas Marek committed
256
       pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
257

258
          call obj%timer%stop("mpi_communication")
259
260
261
262
263
264
265
266

#endif /* WITH_MPI */
          nc = 0
          do i=1,nblk
            tmp2(1:i,i) = tmp1(nc+1:nc+i)
            nc = nc+i
          enddo

267
          call obj%timer%start("blas")
268
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
269
270
              call PRECISION_TRSM('L', 'U', BLAS_TRANS_OR_CONJ, 'N', nblk, l_cols-l_colx+1, ONE, tmp2, &
                            ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
271
          call obj%timer%stop("blas")
272
273
274
275
276
277
278
279
280
281
282
283
284
        endif

        do i=1,nblk

#if REALCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = a(l_row1+i-1,l_colx:l_cols)
#endif
#if COMPLEXCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = conjg(a(l_row1+i-1,l_colx:l_cols))
#endif

#ifdef WITH_MPI

285
          call obj%timer%start("mpi_communication")
286
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
287
288
            call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, MPI_MATH_DATATYPE_PRECISION, &
                           prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
289

290
          call obj%timer%stop("mpi_communication")
291
292
293
294
#endif /* WITH_MPI */
        enddo
        ! this has to be checked since it was changed substantially when doing type safe
        call elpa_transpose_vectors_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
295
296
297
298
  &MATH_DATATYPE&
  &_&
  &PRECISION &
                 (obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
299
                                      tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
300
                                      n, na, nblk, nblk, nrThreads)
301
302
303
304
305
306
307

        do i=0,(na-1)/tile_size
          lcs = max(l_colx,i*l_cols_tile+1)
          lce = min(l_cols,(i+1)*l_cols_tile)
          lrs = l_rowx
          lre = min(l_rows,(i+1)*l_rows_tile)
          if (lce<lcs .or. lre<lrs) cycle
308
          call obj%timer%start("blas")
Pavel Kus's avatar
Pavel Kus committed
309
          call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, lre-lrs+1, lce-lcs+1, nblk, -ONE,  &
310
                              tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
Pavel Kus's avatar
Pavel Kus committed
311
                              ONE, a(lrs,lcs), lda)
312
          call obj%timer%stop("blas")
313
314
315
316
317
318
319
320

        enddo

      enddo

      deallocate(tmp1, tmp2, tmatr, tmatc, stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
321
322
  &MATH_DATATYPE&
  &: error when deallocating tmp1 "//errorMessage
323
324
325
326
327
328
329
330
331
332
333
334
335
        stop 1
      endif

      ! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)

      do i=1,na
        if (my_pcol==pcol(i, nblk, np_cols)) then
          ! column i is on local processor
          l_col1 = local_index(i  , my_pcol, np_cols, nblk, +1) ! local column number
          l_row1 = local_index(i+1, my_prow, np_rows, nblk, +1) ! first row below diagonal
          a(l_row1:l_rows,l_col1) = 0
        endif
      enddo
336
      call obj%timer%stop("elpa_cholesky_&
337
338
339
      &MATH_DATATYPE&
      &_&
      &PRECISION&
340
      &")