elpa_cholesky_template.F90 11 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.

45
#include "../general/sanity.F90"
46 47 48 49
     use elpa1_compute
     use elpa_utilities
     use elpa_mpi
     use precision
50
     use elpa_abstract_impl
Andreas Marek's avatar
Andreas Marek committed
51 52 53
#ifdef WITH_OPENMP
     use omp_lib
#endif
54
     implicit none
Pavel Kus's avatar
Pavel Kus committed
55
#include "../general/precision_kinds.F90"
56
      class(elpa_abstract_impl_t), intent(inout) :: obj
57
      integer(kind=ik)              :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
58
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
59
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,*)
60
#else
Pavel Kus's avatar
Pavel Kus committed
61
      MATH_DATATYPE(kind=rck)      :: a(obj%local_nrows,obj%local_ncols)
62 63 64 65 66 67 68
#endif
      integer(kind=ik)              :: my_prow, my_pcol, np_rows, np_cols, mpierr
      integer(kind=ik)              :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
      integer(kind=ik)              :: n, nc, i, info
      integer(kind=ik)              :: lcs, lce, lrs, lre
      integer(kind=ik)              :: tile_size, l_rows_tile, l_cols_tile

Pavel Kus's avatar
Pavel Kus committed
69
      MATH_DATATYPE(kind=rck), allocatable    :: tmp1(:), tmp2(:,:), tmatr(:,:), tmatc(:,:)
70
      logical                       :: wantDebug
71
      logical                       :: success
Andreas Marek's avatar
Andreas Marek committed
72
      integer(kind=ik)              :: istat, debug, error
73
      character(200)                :: errorMessage
74
      integer(kind=ik)              :: nrThreads
75

76
      call obj%timer%start("elpa_cholesky_&
77 78
      &MATH_DATATYPE&
      &_&
79
      &PRECISION&
80
      &")
81

Andreas Marek's avatar
Andreas Marek committed
82
#ifdef WITH_OPENMP
83
      nrThreads=omp_get_max_threads()
Andreas Marek's avatar
Andreas Marek committed
84
#else
85
      nrThreads=1
Andreas Marek's avatar
Andreas Marek committed
86 87
#endif

88 89 90 91 92
      na         = obj%na
      lda        = obj%local_nrows
      nblk       = obj%nblk
      matrixCols = obj%local_ncols

Andreas Marek's avatar
Andreas Marek committed
93 94 95 96 97 98 99 100 101 102
      call obj%get("mpi_comm_rows",mpi_comm_rows,error )
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
      call obj%get("mpi_comm_cols",mpi_comm_cols,error)
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
103

Andreas Marek's avatar
Andreas Marek committed
104 105 106 107 108
      call obj%get("debug",debug,error)
      if (error .ne. ELPA_OK) then
        print *,"Problem getting option. Aborting..."
        stop
      endif
109
      if (debug == 1) then
110 111 112 113 114
        wantDebug = .true.
      else
        wantDebug = .false.
      endif

115
      call obj%timer%start("mpi_communication")
116 117 118 119
      call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
      call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
      call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
      call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
120
      call obj%timer%stop("mpi_communication")
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
      success = .true.

      ! Matrix is split into tiles; work is done only for tiles on the diagonal or above

      tile_size = nblk*least_common_multiple(np_rows,np_cols) ! minimum global tile size
      tile_size = ((128*max(np_rows,np_cols)-1)/tile_size+1)*tile_size ! make local tiles at least 128 wide

      l_rows_tile = tile_size/np_rows ! local rows of a tile
      l_cols_tile = tile_size/np_cols ! local cols of a tile

      l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
      l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a

      allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
137
  &MATH_DATATYPE&: error when allocating tmp1 "//errorMessage
138 139 140 141 142 143
        stop 1
      endif

      allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
144 145
  &MATH_DATATYPE&
  &: error when allocating tmp2 "//errorMessage
146 147 148 149 150 151 152 153 154
        stop 1
      endif

      tmp1 = 0
      tmp2 = 0

      allocate(tmatr(l_rows,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
155 156
  &MATH_DATATYPE&
  &: error when allocating tmatr "//errorMessage
157 158 159 160 161 162
        stop 1
      endif

      allocate(tmatc(l_cols,nblk), stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
163 164
  &MATH_DATATYPE&
  &: error when allocating tmatc "//errorMessage
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
        stop 1
      endif

      tmatr = 0
      tmatc = 0

      do n = 1, na, nblk
        ! Calculate first local row and column of the still remaining matrix
        ! on the local processor

        l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
        l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)

        l_rowx = local_index(n+nblk, my_prow, np_rows, nblk, +1)
        l_colx = local_index(n+nblk, my_pcol, np_cols, nblk, +1)

        if (n+nblk > na) then

          ! This is the last step, just do a Cholesky-Factorization
          ! of the remaining block

          if (my_prow==prow(n, nblk, np_rows) .and. my_pcol==pcol(n, nblk, np_cols)) then
187
            call obj%timer%start("blas")
188 189

            call PRECISION_POTRF('U', na-n+1, a(l_row1,l_col1), lda, info)
190
            call obj%timer%stop("blas")
191 192 193

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
194
        &MATH_DATATYPE&
195 196

#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
197
        &: Error in dpotrf: ",info
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
#endif
#if COMPLEXCASE == 1
              &: Error in zpotrf: ",info
#endif
              success = .false.
              return
            endif

          endif

          exit ! Loop

        endif

        if (my_prow==prow(n, nblk, np_rows)) then

          if (my_pcol==pcol(n, nblk, np_cols)) then

            ! The process owning the upper left remaining block does the
            ! Cholesky-Factorization of this block
218
            call obj%timer%start("blas")
219 220

            call PRECISION_POTRF('U', nblk, a(l_row1,l_col1), lda, info)
221
            call obj%timer%stop("blas")
222 223 224

            if (info/=0) then
              if (wantDebug) write(error_unit,*) "elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
225
        &MATH_DATATYPE&
226 227

#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
228
        &: Error in dpotrf 2: ",info
229 230
#endif
#if COMPLEXCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
231
        &: Error in zpotrf 2: ",info
232 233 234 235 236 237 238 239 240 241 242 243 244

#endif
              success = .false.
              return
            endif

            nc = 0
            do i=1,nblk
              tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
              nc = nc+i
            enddo
          endif
#ifdef WITH_MPI
245
          call obj%timer%start("mpi_communication")
246 247 248

          call MPI_Bcast(tmp1, nblk*(nblk+1)/2,      &
#if REALCASE == 1
Andreas Marek's avatar
Retab  
Andreas Marek committed
249
                   MPI_REAL_PRECISION,         &
250 251 252 253
#endif
#if COMPLEXCASE == 1
                         MPI_COMPLEX_PRECISION,      &
#endif
Andreas Marek's avatar
Retab  
Andreas Marek committed
254
       pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
255

256
          call obj%timer%stop("mpi_communication")
257 258 259 260 261 262 263 264

#endif /* WITH_MPI */
          nc = 0
          do i=1,nblk
            tmp2(1:i,i) = tmp1(nc+1:nc+i)
            nc = nc+i
          enddo

265
          call obj%timer%start("blas")
266
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
267 268
              call PRECISION_TRSM('L', 'U', BLAS_TRANS_OR_CONJ, 'N', nblk, l_cols-l_colx+1, ONE, tmp2, &
                            ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
269
          call obj%timer%stop("blas")
270 271 272 273 274 275 276 277 278 279 280 281 282
        endif

        do i=1,nblk

#if REALCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = a(l_row1+i-1,l_colx:l_cols)
#endif
#if COMPLEXCASE == 1
          if (my_prow==prow(n, nblk, np_rows)) tmatc(l_colx:l_cols,i) = conjg(a(l_row1+i-1,l_colx:l_cols))
#endif

#ifdef WITH_MPI

283
          call obj%timer%start("mpi_communication")
284
          if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
285 286
            call MPI_Bcast(tmatc(l_colx,i), l_cols-l_colx+1, MPI_MATH_DATATYPE_PRECISION, &
                           prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
287

288
          call obj%timer%stop("mpi_communication")
289 290 291 292
#endif /* WITH_MPI */
        enddo
        ! this has to be checked since it was changed substantially when doing type safe
        call elpa_transpose_vectors_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
293 294 295 296
  &MATH_DATATYPE&
  &_&
  &PRECISION &
                 (obj, tmatc, ubound(tmatc,dim=1), mpi_comm_cols, &
297
                                      tmatr, ubound(tmatr,dim=1), mpi_comm_rows, &
298
                                      n, na, nblk, nblk, nrThreads)
299 300 301 302 303 304 305

        do i=0,(na-1)/tile_size
          lcs = max(l_colx,i*l_cols_tile+1)
          lce = min(l_cols,(i+1)*l_cols_tile)
          lrs = l_rowx
          lre = min(l_rows,(i+1)*l_rows_tile)
          if (lce<lcs .or. lre<lrs) cycle
306
          call obj%timer%start("blas")
Pavel Kus's avatar
Pavel Kus committed
307
          call PRECISION_GEMM('N', BLAS_TRANS_OR_CONJ, lre-lrs+1, lce-lcs+1, nblk, -ONE,  &
308
                              tmatr(lrs,1), ubound(tmatr,dim=1), tmatc(lcs,1), ubound(tmatc,dim=1), &
Pavel Kus's avatar
Pavel Kus committed
309
                              ONE, a(lrs,lcs), lda)
310
          call obj%timer%stop("blas")
311 312 313 314 315 316 317 318

        enddo

      enddo

      deallocate(tmp1, tmp2, tmatr, tmatc, stat=istat, errmsg=errorMessage)
      if (istat .ne. 0) then
        print *,"elpa_cholesky_&
Andreas Marek's avatar
Retab  
Andreas Marek committed
319 320
  &MATH_DATATYPE&
  &: error when deallocating tmp1 "//errorMessage
321 322 323 324 325 326 327 328 329 330 331 332 333
        stop 1
      endif

      ! Set the lower triangle to 0, it contains garbage (form the above matrix multiplications)

      do i=1,na
        if (my_pcol==pcol(i, nblk, np_cols)) then
          ! column i is on local processor
          l_col1 = local_index(i  , my_pcol, np_cols, nblk, +1) ! local column number
          l_row1 = local_index(i+1, my_prow, np_rows, nblk, +1) ! first row below diagonal
          a(l_row1:l_rows,l_col1) = 0
        endif
      enddo
334
      call obj%timer%stop("elpa_cholesky_&
335 336 337
      &MATH_DATATYPE&
      &_&
      &PRECISION&
338
      &")