elpa_invert_trm.F90 9.07 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!    This particular source code file contains additions, changes and
!    enhancements authored by Intel Corporation which is not part of
!    the ELPA consortium.
!
!    More information can be found here:
!    http://elpa.mpcdf.mpg.de/
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
!
! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".

53
#include "../general/sanity.F90"
54
55
56
57
58

       use precision
       use elpa1_compute
       use elpa_utilities
       use elpa_mpi
59
       use elpa_abstract_impl
60
       implicit none
Pavel Kus's avatar
Pavel Kus committed
61
#include "../general/precision_kinds.F90"
62
       class(elpa_abstract_impl_t), intent(inout) :: obj
63
64
       integer(kind=ik)             :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
#ifdef USE_ASSUMED_SIZE
Pavel Kus's avatar
Pavel Kus committed
65
       MATH_DATATYPE(kind=rck)     :: a(obj%local_nrows,*)
66
#else
Pavel Kus's avatar
Pavel Kus committed
67
       MATH_DATATYPE(kind=rck)     :: a(obj%local_nrows,obj%local_ncols)
68
69
70
71
72
#endif

       integer(kind=ik)             :: my_prow, my_pcol, np_rows, np_cols, mpierr
       integer(kind=ik)             :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
       integer(kind=ik)             :: n, nc, i, info, ns, nb
Pavel Kus's avatar
Pavel Kus committed
73
       MATH_DATATYPE(kind=rck), allocatable   :: tmp1(:), tmp2(:,:), tmat1(:,:), tmat2(:,:)
74
       logical                      :: wantDebug
75
       logical                      :: success
76
       integer(kind=ik)             :: istat, debug
77
78
       character(200)               :: errorMessage

79
      call obj%timer%start("elpa_invert_trm_&
Andreas Marek's avatar
Andreas Marek committed
80
81
82
83
84
      &MATH_DATATYPE&
      &_&
      &PRECISION&
      &")

85
86
87
88
89
       na         = obj%na
       lda        = obj%local_nrows
       nblk       = obj%nblk
       matrixCols = obj%local_ncols

90
91
       call obj%get("mpi_comm_rows",mpi_comm_rows)
       call obj%get("mpi_comm_cols",mpi_comm_cols)
92

93
94
       call obj%get("debug", debug)
       if (debug == 1) then
95
96
97
98
         wantDebug = .true.
       else
         wantDebug = .true.
       endif
99
       call obj%timer%start("mpi_communication")
100
101
102
103
       call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
       call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
       call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
       call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
104
       call obj%timer%stop("mpi_communication")
105
106
107
108
109
110
111
112
       success = .true.

       l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
       l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a

       allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
       if (istat .ne. 0) then
         print *,"elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
113
114
   &MATH_DATATYPE&
   &: error when allocating tmp1 "//errorMessage
115
116
117
118
119
120
         stop 1
       endif

       allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
       if (istat .ne. 0) then
         print *,"elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
121
122
   &MATH_DATATYPE&
   &: error when allocating tmp2 "//errorMessage
123
124
125
126
127
128
129
130
131
         stop 1
       endif

       tmp1 = 0
       tmp2 = 0

       allocate(tmat1(l_rows,nblk), stat=istat, errmsg=errorMessage)
       if (istat .ne. 0) then
         print *,"elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
132
133
   &MATH_DATATYPE&
   &: error when allocating tmat1 "//errorMessage
134
135
136
137
138
139
         stop 1
       endif

       allocate(tmat2(nblk,l_cols), stat=istat, errmsg=errorMessage)
       if (istat .ne. 0) then
         print *,"elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
140
141
   &MATH_DATATYPE&
   &: error when allocating tmat2 "//errorMessage
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
         stop 1
       endif

       tmat1 = 0
       tmat2 = 0


       ns = ((na-1)/nblk)*nblk + 1

       do n = ns,1,-nblk

         l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
         l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)

         nb = nblk
         if (na-n+1 < nblk) nb = na-n+1

         l_rowx = local_index(n+nb, my_prow, np_rows, nblk, +1)
         l_colx = local_index(n+nb, my_pcol, np_cols, nblk, +1)

         if (my_prow==prow(n, nblk, np_rows)) then

           if (my_pcol==pcol(n, nblk, np_cols)) then
165
             call obj%timer%start("blas")
166
167
168

             call PRECISION_TRTRI('U', 'N', nb, a(l_row1,l_col1), lda, info)

169
             call obj%timer%stop("blas")
170
171
172

             if (info/=0) then
               if (wantDebug) write(error_unit,*) "elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
173
         &MATH_DATATYPE&
174
175

#if REALCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
176
         &: Error in DTRTRI"
177
178
#endif
#if COMPLEXCASE == 1
Andreas Marek's avatar
Retab    
Andreas Marek committed
179
         &: Error in ZTRTRI"
180
181
182
#endif

               success = .false.
183
               call obj%timer%stop("elpa_invert_trm_&
Andreas Marek's avatar
Andreas Marek committed
184
185
186
187
               &MATH_DATATYPE&
               &_&
               &PRECISION&
               &")
188
189
190
191
192
193
194
195
196
197
               return
             endif

             nc = 0
             do i=1,nb
               tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
               nc = nc+i
             enddo
           endif
#ifdef WITH_MPI
198
           call obj%timer%start("mpi_communication")
Pavel Kus's avatar
Pavel Kus committed
199
           call MPI_Bcast(tmp1, nb*(nb+1)/2, MPI_MATH_DATATYPE_PRECISION,       &
200
                          pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
201
           call obj%timer%stop("mpi_communication")
202
203
204
205
206
207
208
#endif /* WITH_MPI */
           nc = 0
           do i=1,nb
             tmp2(1:i,i) = tmp1(nc+1:nc+i)
             nc = nc+i
           enddo

209
           call obj%timer%start("blas")
210
           if (l_cols-l_colx+1>0) &
Pavel Kus's avatar
Pavel Kus committed
211
        call PRECISION_TRMM('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, ONE, &
212
                                   tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
213
           call obj%timer%stop("blas")
214
215
216
217
218
219
220
221
222
223
224
225
226
           if (l_colx<=l_cols)   tmat2(1:nb,l_colx:l_cols) = a(l_row1:l_row1+nb-1,l_colx:l_cols)
           if (my_pcol==pcol(n, nblk, np_cols)) tmat2(1:nb,l_col1:l_col1+nb-1) = tmp2(1:nb,1:nb) ! tmp2 has the lower left triangle 0

         endif

         if (l_row1>1) then
           if (my_pcol==pcol(n, nblk, np_cols)) then
             tmat1(1:l_row1-1,1:nb) = a(1:l_row1-1,l_col1:l_col1+nb-1)
             a(1:l_row1-1,l_col1:l_col1+nb-1) = 0
           endif

           do i=1,nb
#ifdef WITH_MPI
227
             call obj%timer%start("mpi_communication")
Andreas Marek's avatar
Andreas Marek committed
228
             call MPI_Bcast(tmat1(1,i), l_row1-1, MPI_MATH_DATATYPE_PRECISION, &
229
230
                             pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)

231
             call obj%timer%stop("mpi_communication")
232
233
234
235
#endif /* WITH_MPI */
           enddo
         endif
#ifdef WITH_MPI
236
         call obj%timer%start("mpi_communication")
237
         if (l_cols-l_col1+1>0) &
Pavel Kus's avatar
Pavel Kus committed
238
      call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, MPI_MATH_DATATYPE_PRECISION, &
239
240
                             prow(n, nblk, np_rows), mpi_comm_rows, mpierr)

241
          call obj%timer%stop("mpi_communication")
242
243
#endif /* WITH_MPI */

244
         call obj%timer%start("blas")
245
         if (l_row1>1 .and. l_cols-l_col1+1>0) &
Pavel Kus's avatar
Pavel Kus committed
246
247
           call PRECISION_GEMM('N', 'N', l_row1-1, l_cols-l_col1+1, nb, -ONE, &
                                tmat1, ubound(tmat1,dim=1), tmat2(1,l_col1), ubound(tmat2,dim=1), ONE, &
248
249
                                 a(1,l_col1), lda)

250
         call obj%timer%stop("blas")
251
252
253
254
255
256

       enddo

       deallocate(tmp1, tmp2, tmat1, tmat2, stat=istat, errmsg=errorMessage)
       if (istat .ne. 0) then
         print *,"elpa_invert_trm_&
Andreas Marek's avatar
Retab    
Andreas Marek committed
257
258
   &MATH_DATATYPE&
   &: error when deallocating tmp1 "//errorMessage
259
260
261
         stop 1
       endif

262
       call obj%timer%stop("elpa_invert_trm_&
Andreas Marek's avatar
Andreas Marek committed
263
264
265
266
       &MATH_DATATYPE&
       &_&
       &PRECISION&
       &")