distributed_do.py 13.4 KB
Newer Older
1
2
3
4
import numpy as np
from .random import Random
from mpi4py import MPI

Martin Reinecke's avatar
Martin Reinecke committed
5
6
7
8
9
10
11
__all__ = ["ntask", "rank", "master", "local_shape", "data_object", "full",
           "empty", "zeros", "ones", "empty_like", "vdot", "abs", "exp",
           "log", "sqrt", "bincount", "from_object", "from_random",
           "local_data", "ibegin", "np_allreduce_sum", "distaxis",
           "from_local_data", "from_global_data", "to_global_data",
           "redistribute", "default_distaxis"]

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
12
13
14
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
15
master = rank == 0
16
17


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
18
def _shareSize(nwork, nshares, myshare):
19
    nbase = nwork//nshares
Martin Reinecke's avatar
Martin Reinecke committed
20
21
    return nbase if myshare >= nwork % nshares else nbase+1

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
22
23

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
24
25
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
26
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
27
28
29
    hi = lo + nbase + (1 if myshare < additional else 0)
    return lo, hi

30

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
31
def local_shape(shape, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
32
    if len(shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
33
        distaxis = -1
Martin Reinecke's avatar
Martin Reinecke committed
34
    if distaxis == -1:
35
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
36
37
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
38
39
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
40

41
42
43
class data_object(object):
    def __init__(self, shape, data, distaxis):
        """Must not be called directly by users"""
Martin Reinecke's avatar
Martin Reinecke committed
44
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
45
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
46
            distaxis = -1
47
48
49
        self._distaxis = distaxis
        self._data = data

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
50
    def _sanity_checks(self):
51
        # check whether the distaxis is consistent
Martin Reinecke's avatar
Martin Reinecke committed
52
        if self._distaxis < -1 or self._distaxis >= len(self._shape):
53
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
54
55
56
57
        itmp = np.array(self._distaxis)
        otmp = np.empty(ntask, dtype=np.int)
        _comm.Allgather(itmp, otmp)
        if np.any(otmp != self._distaxis):
58
59
            raise ValueError
        # check whether the global shape is consistent
Martin Reinecke's avatar
Martin Reinecke committed
60
61
62
        itmp = np.array(self._shape)
        otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
        _comm.Allgather(itmp, otmp)
63
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
64
            if np.any(otmp[i, :] != self._shape):
65
66
                raise ValueError
        # check shape of local data
Martin Reinecke's avatar
Martin Reinecke committed
67
68
        if self._distaxis < 0:
            if self._data.shape != self._shape:
69
70
                raise ValueError
        else:
Martin Reinecke's avatar
Martin Reinecke committed
71
72
73
74
            itmp = np.array(self._shape)
            itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
                                              ntask, rank)
            if np.any(self._data.shape != itmp):
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
                raise ValueError

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
91
        return data_object(self._shape, self._data.real, self._distaxis)
92
93
94

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
95
        return data_object(self._shape, self._data.imag, self._distaxis)
96

Martin Reinecke's avatar
Martin Reinecke committed
97
98
99
100
101
102
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
103
    def _contraction_helper(self, op, mpiop, axis):
104
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
105
            if len(axis) == len(self._data.shape):
106
107
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
108
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
109
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
110
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
111
112
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
113
            return res2[()]
114
115

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
116
117
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
118
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
119
            return from_global_data(res2, distaxis=0)
120
        else:
Martin Reinecke's avatar
Martin Reinecke committed
121
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
122
123
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
124
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
125
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
126
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
127
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
128
129
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
130
131
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
132
133
134

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
135

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
136
137
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
138

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
139
140
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
141

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
142
143
144
    # FIXME: to be improved!
    def mean(self):
        return self.sum()/self.size
Martin Reinecke's avatar
Martin Reinecke committed
145

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
146
147
    def std(self):
        return np.sqrt(self.var())
Martin Reinecke's avatar
Martin Reinecke committed
148

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
149
150
151
    def var(self):
        return (abs(self-self.mean())**2).mean()

152
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
153
        a = self
154
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
155
            b = other
156
157
158
159
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
160
161
            a = a._data
            b = b._data
162
        else:
Martin Reinecke's avatar
Martin Reinecke committed
163
            a = a._data
164
165
166
            b = other

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
167
168
169
170
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

    def __add__(self, other):
        return self._binary_helper(other, op='__add__')

    def __radd__(self, other):
        return self._binary_helper(other, op='__radd__')

    def __iadd__(self, other):
        return self._binary_helper(other, op='__iadd__')

    def __sub__(self, other):
        return self._binary_helper(other, op='__sub__')

    def __rsub__(self, other):
        return self._binary_helper(other, op='__rsub__')

    def __isub__(self, other):
        return self._binary_helper(other, op='__isub__')

    def __mul__(self, other):
        return self._binary_helper(other, op='__mul__')

    def __rmul__(self, other):
        return self._binary_helper(other, op='__rmul__')

    def __imul__(self, other):
        return self._binary_helper(other, op='__imul__')

    def __div__(self, other):
        return self._binary_helper(other, op='__div__')

    def __rdiv__(self, other):
        return self._binary_helper(other, op='__rdiv__')

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
205
206
207
    def __idiv__(self, other):
        return self._binary_helper(other, op='__idiv__')

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
    def __truediv__(self, other):
        return self._binary_helper(other, op='__truediv__')

    def __rtruediv__(self, other):
        return self._binary_helper(other, op='__rtruediv__')

    def __pow__(self, other):
        return self._binary_helper(other, op='__pow__')

    def __rpow__(self, other):
        return self._binary_helper(other, op='__rpow__')

    def __ipow__(self, other):
        return self._binary_helper(other, op='__ipow__')

    def __eq__(self, other):
        return self._binary_helper(other, op='__eq__')

    def __ne__(self, other):
        return self._binary_helper(other, op='__ne__')

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
230
        return data_object(self._shape, -self._data, self._distaxis)
231
232

    def __abs__(self):
Martin Reinecke's avatar
Martin Reinecke committed
233
        return data_object(self._shape, np.abs(self._data), self._distaxis)
234
235
236
237
238
239
240
241

    def all(self):
        return self._data.all()

    def any(self):
        return self._data.any()


Martin Reinecke's avatar
Martin Reinecke committed
242
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
243
244
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
245
246


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
247
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
248
249
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
250
251


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
252
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
253
254
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
255
256


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
257
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
258
259
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
260
261
262
263
264
265
266


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
267
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
268
269
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
270
    return res[()]
271
272
273
274
275
276
277


def _math_helper(x, function, out):
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
278
        return data_object(x.shape, function(x._data), x._distaxis)
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304


def abs(a, out=None):
    return _math_helper(a, np.abs, out)


def exp(a, out=None):
    return _math_helper(a, np.exp, out)


def log(a, out=None):
    return _math_helper(a, np.log, out)


def sqrt(a, out=None):
    return _math_helper(a, np.sqrt, out)


def bincount(x, weights=None, minlength=None):
    if weights is not None:
        weights = weights._data
    res = np.bincount(x._data, weights, minlength)
    return data_object(res)


def from_object(object, dtype=None, copy=True):
Martin Reinecke's avatar
Martin Reinecke committed
305
306
307
    return data_object(object._shape, np.array(object._data, dtype=dtype,
                                               copy=copy),
                       distaxis=object._distaxis)
308
309


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
310
def from_random(random_type, shape, dtype=np.float64, distaxis=0, **kwargs):
311
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
312
313
    # lshape = local_shape(shape, distaxis)
    # return data_object(shape, generator_function(dtype=dtype, shape=lshape, **kwargs), distaxis=distaxis)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
314
    return from_global_data(generator_function(dtype=dtype, shape=shape, **kwargs), distaxis=distaxis)
315

Martin Reinecke's avatar
Martin Reinecke committed
316

Martin Reinecke's avatar
Martin Reinecke committed
317
318
319
320
def local_data(arr):
    return arr._data


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
321
322
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
323
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
324
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
325
326


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
327
328
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
329
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
330
    return res
Martin Reinecke's avatar
Martin Reinecke committed
331
332
333
334
335
336


def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
337
def from_local_data(shape, arr, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
338
339
340
    return data_object(shape, arr, distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
341
342
def from_global_data(arr, distaxis=0):
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
343
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
344
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
345
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
346
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
347
348
349
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
350
351
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
352
353
354
355
356
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
357
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
358
359
360
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
361
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
362
363
364
365
366
367
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
368
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
369
370
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
371
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
372
                break
Martin Reinecke's avatar
Martin Reinecke committed
373

Martin Reinecke's avatar
Martin Reinecke committed
374
    if arr._distaxis == -1:  # just pick the proper subset
Martin Reinecke's avatar
Martin Reinecke committed
375
        return from_global_data(arr._data, dist)
Martin Reinecke's avatar
Martin Reinecke committed
376
    if dist == -1:  # gather data
Martin Reinecke's avatar
Martin Reinecke committed
377
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
378
379
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
380
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
381
382
383
384
385
386
387
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
        tmp = tmp.flatten()
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
388
389
390
391
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
392
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
393
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
394
    # real redistribution via Alltoallv
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
395
    # temporary slow, but simple solution for comparison purposes:
Martin Reinecke's avatar
Martin Reinecke committed
396
    # return redistribute(redistribute(arr,dist=-1),dist=dist)
Martin Reinecke's avatar
Martin Reinecke committed
397

Martin Reinecke's avatar
Martin Reinecke committed
398
399
    tmp = np.moveaxis(arr._data, (dist, arr._distaxis), (0, 1))
    tshape = tmp.shape
Martin Reinecke's avatar
Martin Reinecke committed
400
401
402
    slabsize = np.prod(tmp.shape[2:])*tmp.itemsize
    ssz = np.empty(ntask, dtype=np.int)
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
403
    for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
404
405
406
407
408
409
410
411
412
413
414
415
        ssz[i] = _shareSize(arr.shape[dist], ntask, i)*tmp.shape[1]*slabsize
        rsz[i] = _shareSize(arr.shape[dist], ntask, rank) * \
            _shareSize(arr.shape[arr._distaxis], ntask, i) * \
            slabsize
    sdisp = np.empty(ntask, dtype=np.int)
    rdisp = np.empty(ntask, dtype=np.int)
    sdisp[0] = 0
    rdisp[0] = 0
    sdisp[1:] = np.cumsum(ssz[:-1])
    rdisp[1:] = np.cumsum(rsz[:-1])
    tmp = tmp.flatten()
    out = np.empty(np.prod(local_shape(arr.shape, dist)), dtype=arr.dtype)
Martin Reinecke's avatar
Martin Reinecke committed
416
417
    s_msg = [tmp, (ssz, sdisp), MPI.BYTE]
    r_msg = [out, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
418
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
419
420
421
422
    out2 = np.empty([_shareSize(arr.shape[dist], ntask, rank),
                     arr.shape[arr._distaxis]] + list(tshape[2:]),
                    dtype=arr.dtype)
    ofs = 0
423
424
    for i in range(ntask):
        lsize = rsz[i]//tmp.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
425
426
427
        lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
        out2[slice(None), slice(lo, hi)] = \
            out[ofs:ofs+lsize].reshape([_shareSize(arr.shape[dist], ntask, rank),_shareSize(arr.shape[arr._distaxis],ntask,i)]+list(tshape[2:]))
428
        ofs += lsize
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
429
    new_shape = [_shareSize(arr.shape[dist],ntask,rank), arr.shape[arr._distaxis]] +list(tshape[2:])
Martin Reinecke's avatar
Martin Reinecke committed
430
    out2 = out2.reshape(new_shape)
431
    out2 = np.moveaxis(out2, (0, 1), (dist, arr._distaxis))
Martin Reinecke's avatar
Martin Reinecke committed
432
    return from_local_data(arr.shape, out2, dist)
Martin Reinecke's avatar
Martin Reinecke committed
433
434
435
436


def default_distaxis():
    return 0