elpa_transpose_vectors.F90 8.44 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15
16
17
18
19
20
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
21
!    http://elpa.mpcdf.mpg.de/
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
47
! Author: Andreas Marek, MPCDF
48
49
#endif

50
#include "config-f90.h"
51
#include "../general/sanity.F90"
52
#include "../general/error_checking.inc"
53

Andreas Marek's avatar
Andreas Marek committed
54
#undef ROUTINE_NAME
55
#ifdef SKEW_SYMMETRIC_BUILD
56
57
58
59
60
61
62
#define ROUTINE_NAME elpa_transpose_vectors_ss_
#else
#define ROUTINE_NAME elpa_transpose_vectors_
#endif


subroutine ROUTINE_NAME&
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
63
64
65
&MATH_DATATYPE&
&_&
&PRECISION &
Andreas Marek's avatar
Andreas Marek committed
66
(obj, vmat_s, ld_s, comm_s, vmat_t, ld_t, comm_t, nvs, nvr, nvc, nblk, nrThreads)
67

68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
!-------------------------------------------------------------------------------
! This routine transposes an array of vectors which are distributed in
! communicator comm_s into its transposed form distributed in communicator comm_t.
! There must be an identical copy of vmat_s in every communicator comm_s.
! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
!
! vmat_s    original array of vectors
! ld_s      leading dimension of vmat_s
! comm_s    communicator over which vmat_s is distributed
! vmat_t    array of vectors in transposed form
! ld_t      leading dimension of vmat_t
! comm_t    communicator over which vmat_t is distributed
! nvs       global index where to start in vmat_s/vmat_t
!           Please note: this is kind of a hint, some values before nvs will be
!           accessed in vmat_s/put into vmat_t
! nvr       global length of vmat_s/vmat_t
! nvc       number of columns in vmat_s/vmat_t
! nblk      block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
Andreas Marek's avatar
Andreas Marek committed
88
89
  use precision
  use elpa_abstract_impl
90
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
91
  use omp_lib
92
#endif
Andreas Marek's avatar
Andreas Marek committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
  use elpa_mpi

  implicit none
  class(elpa_abstract_impl_t), intent(inout) :: obj
  integer(kind=ik), intent(in)                      :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
  MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in)   :: vmat_s(ld_s,nvc)
  MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout):: vmat_t(ld_t,nvc)

  MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable  :: aux(:)
  integer(kind=ik)                                  :: myps, mypt, nps, npt
  integer(kind=MPI_KIND)                            :: mypsMPI, myptMPI, npsMPI, nptMPI
  integer(kind=ik)                                  :: n, lc, k, i, ips, ipt, ns, nl
  integer(kind=MPI_KIND)                            :: mpierr
  integer(kind=ik)                                  :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
  integer(kind=ik)                                  :: auxstride
  integer(kind=ik), intent(in)                      :: nrThreads
  integer(kind=ik)                                  :: istat
  character(200)                                    :: errorMessage

  call obj%timer%start("&
          &ROUTINE_NAME&
  &MATH_DATATYPE&
  &" // &
  &PRECISION_SUFFIX &
  )

  call obj%timer%start("mpi_communication")
  call mpi_comm_rank(int(comm_s,kind=MPI_KIND),mypsMPI, mpierr)
  call mpi_comm_size(int(comm_s,kind=MPI_KIND),npsMPI ,mpierr)
  call mpi_comm_rank(int(comm_t,kind=MPI_KIND),myptMPI, mpierr)
  call mpi_comm_size(int(comm_t,kind=MPI_KIND),nptMPI ,mpierr)
  myps = int(mypsMPI,kind=c_int)
  nps = int(npsMPI,kind=c_int)
  mypt = int(myptMPI,kind=c_int)
  npt = int(nptMPI,kind=c_int)


  call obj%timer%stop("mpi_communication")
  ! The basic idea of this routine is that for every block (in the block cyclic
  ! distribution), the processor within comm_t which owns the diagonal
  ! broadcasts its values of vmat_s to all processors within comm_t.
  ! Of course this has not to be done for every block separately, since
  ! the communictation pattern repeats in the global matrix after
  ! the least common multiple of (nps,npt) blocks

  lcm_s_t   = least_common_multiple(nps,npt) ! least common multiple of nps, npt

  nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr

  ! Get the number of blocks to be skipped at the begin.
  ! This must be a multiple of lcm_s_t (else it is getting complicated),
  ! thus some elements before nvs will be accessed/set.

  nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t

  allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ), stat=istat, errmsg=errorMessage)
  check_allocate("elpa_transpose_vectors: aux", istat, errorMessage)
150
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
151
  !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
Andreas Marek's avatar
Andreas Marek committed
152
#endif
Andreas Marek's avatar
Andreas Marek committed
153
  do n = 0, lcm_s_t-1
154

Andreas Marek's avatar
Andreas Marek committed
155
156
    ips = mod(n,nps)
    ipt = mod(n,npt)
157

Andreas Marek's avatar
Andreas Marek committed
158
    if (mypt == ipt) then
159

Andreas Marek's avatar
Andreas Marek committed
160
161
162
163
164
165
      nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
      auxstride = nblk * nblks_comm
!      if(nblks_comm==0) cycle
      if (nblks_comm .ne. 0) then
        if (myps == ips) then
!          k = 0
166
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
167
          !$omp do
Andreas Marek's avatar
Andreas Marek committed
168
#endif
Andreas Marek's avatar
Andreas Marek committed
169
170
171
172
173
174
175
          do lc=1,nvc
            do i = nblks_skip+n, nblks_tot-1, lcm_s_t
              k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
              ns = (i/nps)*nblk ! local start of block i
              nl = min(nvr-i*nblk,nblk) ! length
              aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
!              k = k+nblk
176
            enddo
Andreas Marek's avatar
Andreas Marek committed
177
178
          enddo
        endif
179

180
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
181
182
        !$omp barrier
        !$omp master
Andreas Marek's avatar
Andreas Marek committed
183
#endif
184

185
#ifdef WITH_MPI
186
        call obj%timer%start("mpi_communication")
187

188
        call MPI_Bcast(aux, int(nblks_comm*nblk*nvc,kind=MPI_KIND),    &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
189
#if REALCASE == 1
Andreas Marek's avatar
Andreas Marek committed
190
                      MPI_REAL_PRECISION,    &
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
191
192
#endif
#if COMPLEXCASE == 1
Andreas Marek's avatar
Andreas Marek committed
193
                      MPI_COMPLEX_PRECISION, &
194
#endif
Andreas Marek's avatar
Andreas Marek committed
195
                      int(ips,kind=MPI_KIND), int(comm_s,kind=MPI_KIND), mpierr)
196
197


Andreas Marek's avatar
Andreas Marek committed
198
        call obj%timer%stop("mpi_communication")
199
200
#endif /* WITH_MPI */

201
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
202
203
        !$omp end master
        !$omp barrier
Andreas Marek's avatar
Andreas Marek committed
204

Andreas Marek's avatar
Andreas Marek committed
205
        !$omp do
Andreas Marek's avatar
Andreas Marek committed
206
#endif
Andreas Marek's avatar
Andreas Marek committed
207
208
209
210
211
212
!        k = 0
        do lc=1,nvc
          do i = nblks_skip+n, nblks_tot-1, lcm_s_t
            k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
            ns = (i/npt)*nblk ! local start of block i
            nl = min(nvr-i*nblk,nblk) ! length
213
#ifdef SKEW_SYMMETRIC_BUILD
Andreas Marek's avatar
Andreas Marek committed
214
            vmat_t(ns+1:ns+nl,lc) = - aux(k+1:k+nl)
215
#else
Andreas Marek's avatar
Andreas Marek committed
216
            vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
217
#endif
Andreas Marek's avatar
Andreas Marek committed
218
219
220
!            k = k+nblk
          enddo
        enddo
221
      endif
Andreas Marek's avatar
Andreas Marek committed
222
    endif
223

Andreas Marek's avatar
Andreas Marek committed
224
  enddo
225
#ifdef WITH_OPENMP_TRADITIONAL
Andreas Marek's avatar
Andreas Marek committed
226
  !$omp end parallel
Andreas Marek's avatar
Andreas Marek committed
227
#endif
Andreas Marek's avatar
Andreas Marek committed
228
229
230
231
232
233
234
235
236
  deallocate(aux, stat=istat, errmsg=errorMessage)
  check_deallocate("elpa_transpose_vectors: aux", istat, errorMessage)

  call obj%timer%stop("&
  &ROUTINE_NAME&
  &MATH_DATATYPE&
  &" // &
  &PRECISION_SUFFIX &
  )
237
238
239

end subroutine