elpa_transpose_vectors.X90 7.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15
16
17
18
19
20
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
21
!    http://elpa.mpcdf.mpg.de/
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
47
! Author: Andreas Marek, MPCDF
48
49
#endif

50
51
#include "config-f90.h"

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
52
53
54
55
56
subroutine elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
             (vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvs,nvr,nvc,nblk)
57

58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
!-------------------------------------------------------------------------------
! This routine transposes an array of vectors which are distributed in
! communicator comm_s into its transposed form distributed in communicator comm_t.
! There must be an identical copy of vmat_s in every communicator comm_s.
! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
!
! vmat_s    original array of vectors
! ld_s      leading dimension of vmat_s
! comm_s    communicator over which vmat_s is distributed
! vmat_t    array of vectors in transposed form
! ld_t      leading dimension of vmat_t
! comm_t    communicator over which vmat_t is distributed
! nvs       global index where to start in vmat_s/vmat_t
!           Please note: this is kind of a hint, some values before nvs will be
!           accessed in vmat_s/put into vmat_t
! nvr       global length of vmat_s/vmat_t
! nvc       number of columns in vmat_s/vmat_t
! nblk      block size of block cyclic distribution
!
!-------------------------------------------------------------------------------
Andreas Marek's avatar
Andreas Marek committed
78
    use precision
79
80
#ifdef HAVE_DETAILED_TIMINGS
    use timings
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
81
82
#else
    use timings_dummy
83
#endif
84
!   use ELPA1 ! for least_common_multiple
Andreas Marek's avatar
Andreas Marek committed
85
86
#ifdef WITH_OPENMP
   use omp_lib
87
#endif
88
89
   use elpa_mpi

90
91
   implicit none

Andreas Marek's avatar
Andreas Marek committed
92
   integer(kind=ik), intent(in)              :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
93
94
   MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in)                      :: vmat_s(ld_s,nvc)
   MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout)                   :: vmat_t(ld_t,nvc)
95

96
   MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable                     :: aux(:)
Andreas Marek's avatar
Andreas Marek committed
97
98
99
100
   integer(kind=ik)                          :: myps, mypt, nps, npt
   integer(kind=ik)                          :: n, lc, k, i, ips, ipt, ns, nl, mpierr
   integer(kind=ik)                          :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
   integer(kind=ik)                          :: auxstride
101

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
102
103
104
105
106
   call timer%start("elpa_transpose_vectors_&
   &MATH_DATATYPE&
   &" // &
   &PRECISION_SUFFIX &
   )
107
108

   call timer%start("mpi_communication")
109
110
111
112
113
   call mpi_comm_rank(comm_s,myps,mpierr)
   call mpi_comm_size(comm_s,nps ,mpierr)
   call mpi_comm_rank(comm_t,mypt,mpierr)
   call mpi_comm_size(comm_t,npt ,mpierr)

114
   call timer%stop("mpi_communication")
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
   ! The basic idea of this routine is that for every block (in the block cyclic
   ! distribution), the processor within comm_t which owns the diagonal
   ! broadcasts its values of vmat_s to all processors within comm_t.
   ! Of course this has not to be done for every block separately, since
   ! the communictation pattern repeats in the global matrix after
   ! the least common multiple of (nps,npt) blocks

   lcm_s_t   = least_common_multiple(nps,npt) ! least common multiple of nps, npt

   nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr

   ! Get the number of blocks to be skipped at the begin.
   ! This must be a multiple of lcm_s_t (else it is getting complicated),
   ! thus some elements before nvs will be accessed/set.

   nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t

   allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
Andreas Marek's avatar
Andreas Marek committed
133
#ifdef WITH_OPENMP
Andreas Marek's avatar
Andreas Marek committed
134
135
   !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
#endif
136
137
138
139
140
141
142
143
   do n = 0, lcm_s_t-1

      ips = mod(n,nps)
      ipt = mod(n,npt)

      if(mypt == ipt) then

         nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
Andreas Marek's avatar
Andreas Marek committed
144
145
146
         auxstride = nblk * nblks_comm
!         if(nblks_comm==0) cycle
         if (nblks_comm .ne. 0) then
147
         if(myps == ips) then
Andreas Marek's avatar
Andreas Marek committed
148
149
150
151
!            k = 0
#ifdef WITH_OPENMP
            !$omp do
#endif
152
153
            do lc=1,nvc
               do i = nblks_skip+n, nblks_tot-1, lcm_s_t
Andreas Marek's avatar
Andreas Marek committed
154
                  k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
155
156
157
                  ns = (i/nps)*nblk ! local start of block i
                  nl = min(nvr-i*nblk,nblk) ! length
                  aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
Andreas Marek's avatar
Andreas Marek committed
158
!                  k = k+nblk
159
160
161
162
               enddo
            enddo
         endif

Andreas Marek's avatar
Andreas Marek committed
163
164
165
166
#ifdef WITH_OPENMP
         !$omp barrier
         !$omp master
#endif
167

168
#ifdef WITH_MPI
169
        call timer%start("mpi_communication")
170

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
171
172
173
174
175
176
        call MPI_Bcast(aux, nblks_comm*nblk*nvc,    &
#if REALCASE == 1
                       MPI_REAL_PRECISION,    &
#endif
#if COMPLEXCASE == 1
                       MPI_COMPLEX_PRECISION, &
177
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
178
                       ips, comm_s, mpierr)
179
180


181
         call timer%stop("mpi_communication")
182
183
#endif /* WITH_MPI */

Andreas Marek's avatar
Andreas Marek committed
184
185
186
187
188
189
190
#ifdef WITH_OPENMP
         !$omp end master
         !$omp barrier

         !$omp do
#endif
!         k = 0
191
192
         do lc=1,nvc
            do i = nblks_skip+n, nblks_tot-1, lcm_s_t
Andreas Marek's avatar
Andreas Marek committed
193
               k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
194
195
196
               ns = (i/npt)*nblk ! local start of block i
               nl = min(nvr-i*nblk,nblk) ! length
               vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
Andreas Marek's avatar
Andreas Marek committed
197
!               k = k+nblk
198
199
            enddo
         enddo
Andreas Marek's avatar
Andreas Marek committed
200
         endif
201
202
203
      endif

   enddo
Andreas Marek's avatar
Andreas Marek committed
204
205
206
#ifdef WITH_OPENMP
   !$omp end parallel
#endif
207
   deallocate(aux)
208

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
209
210
211
212
213
   call timer%stop("elpa_transpose_vectors_&
   &MATH_DATATYPE&
   &" // &
   &PRECISION_SUFFIX &
   )
214
215
216

end subroutine