elpa_reduce_add_vectors.X90 6.73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
#if 0
!    This file is part of ELPA.
!
!    The ELPA library was originally created by the ELPA consortium,
!    consisting of the following organizations:
!
!    - Max Planck Computing and Data Facility (MPCDF), formerly known as
!      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
!    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
!      Informatik,
!    - Technische Universität München, Lehrstuhl für Informatik mit
!      Schwerpunkt Wissenschaftliches Rechnen ,
!    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14
!    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15
16
17
18
19
20
!      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
!      and
!    - IBM Deutschland GmbH
!
!
!    More information can be found here:
21
!    http://elpa.mpcdf.mpg.de/
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
!
!    ELPA is free software: you can redistribute it and/or modify
!    it under the terms of the version 3 of the license of the
!    GNU Lesser General Public License as published by the Free
!    Software Foundation.
!
!    ELPA is distributed in the hope that it will be useful,
!    but WITHOUT ANY WARRANTY; without even the implied warranty of
!    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
!    GNU Lesser General Public License for more details.
!
!    You should have received a copy of the GNU Lesser General Public License
!    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
!
!    ELPA reflects a substantial effort on the part of the original
!    ELPA consortium, and we ask you to respect the spirit of the
!    license that we chose: i.e., please contribute any changes you
!    may have back to the original ELPA library distribution, and keep
!    any derivatives of ELPA under the same license that we chose for
!    the original distribution, the GNU Lesser General Public License.
!
43
! Author: Andreas Marek, MPCDF
44
45
#endif

46
47
#include "config-f90.h"

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
48
49
50
51
52
subroutine elpa_reduce_add_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
            (vmat_s,ld_s,comm_s,vmat_t,ld_t,comm_t,nvr,nvc,nblk)
53

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
!-------------------------------------------------------------------------------
! This routine does a reduce of all vectors in vmat_s over the communicator comm_t.
! The result of the reduce is gathered on the processors owning the diagonal
! and added to the array of vectors vmat_t (which is distributed over comm_t).
!
! Opposed to elpa_transpose_vectors, there is NO identical copy of vmat_s
! in the different members within vmat_t (else a reduce wouldn't be necessary).
! After this routine, an allreduce of vmat_t has to be done.
!
! vmat_s    array of vectors to be reduced and added
! ld_s      leading dimension of vmat_s
! comm_s    communicator over which vmat_s is distributed
! vmat_t    array of vectors to which vmat_s is added
! ld_t      leading dimension of vmat_t
! comm_t    communicator over which vmat_t is distributed
! nvr       global length of vmat_s/vmat_t
! nvc       number of columns in vmat_s/vmat_t
! nblk      block size of block cyclic distribution
!
!-------------------------------------------------------------------------------

Andreas Marek's avatar
Andreas Marek committed
75
   use precision
76
!   use ELPA1 ! for least_common_multiple
Andreas Marek's avatar
Andreas Marek committed
77
78
#ifdef WITH_OPENMP
   use omp_lib
79
80
81
#endif
#ifdef HAVE_DETAILED_TIMINGS
   use timings
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
82
83
#else
   use timings_dummy
Andreas Marek's avatar
Andreas Marek committed
84
#endif
85
   use elpa_mpi
86
87
88
   implicit none


Andreas Marek's avatar
Andreas Marek committed
89
   integer(kind=ik), intent(in)              :: ld_s, comm_s, ld_t, comm_t, nvr, nvc, nblk
90
91
   MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in)                      :: vmat_s(ld_s,nvc)
   MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout)                   :: vmat_t(ld_t,nvc)
92

93
   MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable                     :: aux1(:), aux2(:)
Andreas Marek's avatar
Andreas Marek committed
94
95
96
97
   integer(kind=ik)                          :: myps, mypt, nps, npt
   integer(kind=ik)                          :: n, lc, k, i, ips, ipt, ns, nl, mpierr
   integer(kind=ik)                          :: lcm_s_t, nblks_tot
   integer(kind=ik)                          :: auxstride, tylerk, error_unit
98

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
99
100
101
102
103
   call timer%start("elpa_reduce_add_vectors_&
   &MATH_DATATYPE&
   &" // &
   &PRECISION_SUFFIX &
   )
104
105

   call timer%start("mpi_communication")
106
107
108
109
   call mpi_comm_rank(comm_s,myps,mpierr)
   call mpi_comm_size(comm_s,nps ,mpierr)
   call mpi_comm_rank(comm_t,mypt,mpierr)
   call mpi_comm_size(comm_t,npt ,mpierr)
110
   call timer%stop("mpi_communication")
111
112
113
114
115
116
117
118
119
120
121
122
123
124

   ! Look to elpa_transpose_vectors for the basic idea!

   ! The communictation pattern repeats in the global matrix after
   ! the least common multiple of (nps,npt) blocks

   lcm_s_t   = least_common_multiple(nps,npt) ! least common multiple of nps, npt

   nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr

   allocate(aux1( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
   allocate(aux2( ((nblks_tot+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
   aux1(:) = 0
   aux2(:) = 0
Andreas Marek's avatar
Andreas Marek committed
125
126
127
#ifdef WITH_OPENMP
   !$omp parallel private(ips, ipt, auxstride, lc, i, k, ns, nl)
#endif
128
129
130
131
132
   do n = 0, lcm_s_t-1

      ips = mod(n,nps)
      ipt = mod(n,npt)

Andreas Marek's avatar
Andreas Marek committed
133
134
      auxstride = nblk * ((nblks_tot - n + lcm_s_t - 1)/lcm_s_t)

135
136
      if(myps == ips) then

Andreas Marek's avatar
Andreas Marek committed
137
138
139
140
!         k = 0
#ifdef WITH_OPENMP
         !$omp do
#endif
141
142
         do lc=1,nvc
            do i = n, nblks_tot-1, lcm_s_t
Andreas Marek's avatar
Andreas Marek committed
143
	       k = (i - n)/lcm_s_t * nblk + (lc - 1) * auxstride
144
145
146
               ns = (i/nps)*nblk ! local start of block i
               nl = min(nvr-i*nblk,nblk) ! length
               aux1(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
Andreas Marek's avatar
Andreas Marek committed
147
!               k = k+nblk
148
149
150
            enddo
         enddo

Andreas Marek's avatar
Andreas Marek committed
151
152
153
154
155
         k = nvc * auxstride
#ifdef WITH_OPENMP
         !$omp barrier
         !$omp master
#endif
156

157
#ifdef WITH_MPI
158
         call timer%start("mpi_communication")
159

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
160
161
162
         if (k>0) call mpi_reduce(aux1, aux2, k, &
#if REALCASE == 1
                                  MPI_REAL_PRECISION,  &
163
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
164
165
#if COMPLEXCASE == 1
                                  MPI_COMPLEX_PRECISION, &
166
#endif
Andreas Marek's avatar
Cleanup    
Andreas Marek committed
167
                                  MPI_SUM, ipt, comm_t, mpierr)
Andreas Marek's avatar
Andreas Marek committed
168

169
          call timer%stop("mpi_communication")
170

171
172
173
#else /* WITH_MPI */
         if(k>0) aux2 = aux1
#endif /* WITH_MPI */
174

Andreas Marek's avatar
Andreas Marek committed
175
176
177
178
179
180
181
182
183
#ifdef WITH_OPENMP
         !$omp end master
         !$omp barrier
#endif
         if (mypt == ipt) then
!            k = 0
#ifdef WITH_OPENMP
         !$omp do
#endif
184
185
            do lc=1,nvc
               do i = n, nblks_tot-1, lcm_s_t
Andreas Marek's avatar
Andreas Marek committed
186
	          k = (i - n)/lcm_s_t * nblk + (lc - 1) * auxstride
187
188
189
                  ns = (i/npt)*nblk ! local start of block i
                  nl = min(nvr-i*nblk,nblk) ! length
                  vmat_t(ns+1:ns+nl,lc) = vmat_t(ns+1:ns+nl,lc) + aux2(k+1:k+nl)
Andreas Marek's avatar
Andreas Marek committed
190
!                  k = k+nblk
191
192
193
194
195
196
197
               enddo
            enddo
         endif

      endif

   enddo
Andreas Marek's avatar
Andreas Marek committed
198
199
200
#ifdef WITH_OPENMP
   !$omp end parallel
#endif
201
202
203
204

   deallocate(aux1)
   deallocate(aux2)

Andreas Marek's avatar
Cleanup    
Andreas Marek committed
205
206
207
208
209
   call timer%stop("elpa_reduce_add_vectors_&
   &MATH_DATATYPE&
   &" // &
   &PRECISION_SUFFIX &
   )
210
211
212
end subroutine