distributed_do.py 13.7 KB
Newer Older
1
2
3
4
import numpy as np
from .random import Random
from mpi4py import MPI

Martin Reinecke's avatar
Martin Reinecke committed
5
6
7
8
9
10
11
__all__ = ["ntask", "rank", "master", "local_shape", "data_object", "full",
           "empty", "zeros", "ones", "empty_like", "vdot", "abs", "exp",
           "log", "sqrt", "bincount", "from_object", "from_random",
           "local_data", "ibegin", "np_allreduce_sum", "distaxis",
           "from_local_data", "from_global_data", "to_global_data",
           "redistribute", "default_distaxis"]

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
12
13
14
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
15
master = (rank == 0)
16
17


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
18
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
19
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
20

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
21
22

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
23
24
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
25
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
26
    hi = lo + nbase + int(myshare<additional)
Martin Reinecke's avatar
Martin Reinecke committed
27
28
    return lo, hi

29

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
30
def local_shape(shape, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
31
    if len(shape) == 0 or distaxis == -1:
32
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
33
34
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
35
36
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
37

38
39
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
40
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
41
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
42
            distaxis = -1
43
44
45
        self._distaxis = distaxis
        self._data = data

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
46
    def _sanity_checks(self):
47
        # check whether the distaxis is consistent
Martin Reinecke's avatar
Martin Reinecke committed
48
        if self._distaxis < -1 or self._distaxis >= len(self._shape):
49
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
50
51
52
53
        itmp = np.array(self._distaxis)
        otmp = np.empty(ntask, dtype=np.int)
        _comm.Allgather(itmp, otmp)
        if np.any(otmp != self._distaxis):
54
55
            raise ValueError
        # check whether the global shape is consistent
Martin Reinecke's avatar
Martin Reinecke committed
56
57
58
        itmp = np.array(self._shape)
        otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
        _comm.Allgather(itmp, otmp)
59
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
60
            if np.any(otmp[i, :] != self._shape):
61
62
                raise ValueError
        # check shape of local data
Martin Reinecke's avatar
Martin Reinecke committed
63
64
        if self._distaxis < 0:
            if self._data.shape != self._shape:
65
66
                raise ValueError
        else:
Martin Reinecke's avatar
Martin Reinecke committed
67
68
69
70
            itmp = np.array(self._shape)
            itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
                                              ntask, rank)
            if np.any(self._data.shape != itmp):
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
                raise ValueError

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
87
        return data_object(self._shape, self._data.real, self._distaxis)
88
89
90

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
91
        return data_object(self._shape, self._data.imag, self._distaxis)
92

Martin Reinecke's avatar
Martin Reinecke committed
93
94
95
96
97
98
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
99
    def _contraction_helper(self, op, mpiop, axis):
100
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
101
            if len(axis) == len(self._data.shape):
102
103
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
104
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
105
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
106
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
107
108
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
109
            return res2[()]
110
111

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
112
113
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
114
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
115
            return from_global_data(res2, distaxis=0)
116
        else:
Martin Reinecke's avatar
Martin Reinecke committed
117
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
118
119
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
120
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
121
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
122
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
123
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
124
125
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
126
127
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
128
129
130

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
131

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
132
133
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
134

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
135
136
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
137

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
138
139
    def mean(self):
        return self.sum()/self.size
Martin Reinecke's avatar
Martin Reinecke committed
140

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
141
142
    def std(self):
        return np.sqrt(self.var())
Martin Reinecke's avatar
Martin Reinecke committed
143

Martin Reinecke's avatar
Martin Reinecke committed
144
    # FIXME: to be improved!
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
145
146
147
    def var(self):
        return (abs(self-self.mean())**2).mean()

148
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
149
        a = self
150
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
151
            b = other
152
153
154
155
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
156
157
            a = a._data
            b = b._data
158
        else:
Martin Reinecke's avatar
Martin Reinecke committed
159
            a = a._data
160
161
162
            b = other

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
163
164
165
166
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200

    def __add__(self, other):
        return self._binary_helper(other, op='__add__')

    def __radd__(self, other):
        return self._binary_helper(other, op='__radd__')

    def __iadd__(self, other):
        return self._binary_helper(other, op='__iadd__')

    def __sub__(self, other):
        return self._binary_helper(other, op='__sub__')

    def __rsub__(self, other):
        return self._binary_helper(other, op='__rsub__')

    def __isub__(self, other):
        return self._binary_helper(other, op='__isub__')

    def __mul__(self, other):
        return self._binary_helper(other, op='__mul__')

    def __rmul__(self, other):
        return self._binary_helper(other, op='__rmul__')

    def __imul__(self, other):
        return self._binary_helper(other, op='__imul__')

    def __div__(self, other):
        return self._binary_helper(other, op='__div__')

    def __rdiv__(self, other):
        return self._binary_helper(other, op='__rdiv__')

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
201
202
203
    def __idiv__(self, other):
        return self._binary_helper(other, op='__idiv__')

204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
    def __truediv__(self, other):
        return self._binary_helper(other, op='__truediv__')

    def __rtruediv__(self, other):
        return self._binary_helper(other, op='__rtruediv__')

    def __pow__(self, other):
        return self._binary_helper(other, op='__pow__')

    def __rpow__(self, other):
        return self._binary_helper(other, op='__rpow__')

    def __ipow__(self, other):
        return self._binary_helper(other, op='__ipow__')

    def __eq__(self, other):
        return self._binary_helper(other, op='__eq__')

    def __ne__(self, other):
        return self._binary_helper(other, op='__ne__')

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
226
        return data_object(self._shape, -self._data, self._distaxis)
227
228

    def __abs__(self):
Martin Reinecke's avatar
Martin Reinecke committed
229
        return data_object(self._shape, np.abs(self._data), self._distaxis)
230
231

    def all(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
232
        return self.sum() == self.size
233
234

    def any(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
235
        return self.sum() != 0
236
237


Martin Reinecke's avatar
Martin Reinecke committed
238
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
239
240
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
241
242


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
243
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
244
245
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
246
247


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
248
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
249
250
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
251
252


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
253
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
254
255
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
256
257
258
259
260
261
262


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
263
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
264
265
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
266
    return res[()]
267
268
269
270
271
272
273


def _math_helper(x, function, out):
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
274
        return data_object(x.shape, function(x._data), x._distaxis)
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300


def abs(a, out=None):
    return _math_helper(a, np.abs, out)


def exp(a, out=None):
    return _math_helper(a, np.exp, out)


def log(a, out=None):
    return _math_helper(a, np.log, out)


def sqrt(a, out=None):
    return _math_helper(a, np.sqrt, out)


def bincount(x, weights=None, minlength=None):
    if weights is not None:
        weights = weights._data
    res = np.bincount(x._data, weights, minlength)
    return data_object(res)


def from_object(object, dtype=None, copy=True):
Martin Reinecke's avatar
Martin Reinecke committed
301
302
303
    return data_object(object._shape, np.array(object._data, dtype=dtype,
                                               copy=copy),
                       distaxis=object._distaxis)
304
305


Martin Reinecke's avatar
Martin Reinecke committed
306
307
308
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
def from_random(random_type, shape, dtype=np.float64, **kwargs):
309
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
310
311
312
313
314
315
316
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
317

Martin Reinecke's avatar
Martin Reinecke committed
318

Martin Reinecke's avatar
Martin Reinecke committed
319
320
321
322
def local_data(arr):
    return arr._data


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
323
324
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
325
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
326
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
327
328


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
329
330
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
331
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
332
    return res
Martin Reinecke's avatar
Martin Reinecke committed
333
334
335
336
337
338


def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
339
def from_local_data(shape, arr, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
340
341
342
    return data_object(shape, arr, distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
343
344
def from_global_data(arr, distaxis=0):
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
345
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
346
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
347
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
348
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
349
350
351
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
352
353
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
354
355
356
357
358
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
359
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
360
361
362
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
363
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
364
365
366
367
368
369
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
370
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
371
372
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
373
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
374
                break
Martin Reinecke's avatar
Martin Reinecke committed
375

Martin Reinecke's avatar
Martin Reinecke committed
376
    if arr._distaxis == -1:  # all data available, just pick the proper subset
Martin Reinecke's avatar
Martin Reinecke committed
377
        return from_global_data(arr._data, dist)
Martin Reinecke's avatar
Martin Reinecke committed
378
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
379
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
380
381
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
382
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
383
384
385
386
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
387
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
388
389
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
390
391
392
393
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
394
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
395
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
396

Martin Reinecke's avatar
Martin Reinecke committed
397
    # real redistribution via Alltoallv
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
398
    # temporary slow, but simple solution for comparison purposes:
Martin Reinecke's avatar
Martin Reinecke committed
399
    # return redistribute(redistribute(arr,dist=-1),dist=dist)
Martin Reinecke's avatar
Martin Reinecke committed
400
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
401
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
402
403
404
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
405
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            sslice[dist] = slice(lo,hi)
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append (0, np.cumsum(ssz[:-1]))
    rdisp = np.append (0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
429
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
            rslice[arr._distaxis] = slice(lo,hi)
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
445
446
447
448


def default_distaxis():
    return 0