distributed_do.py 18.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
14
# Copyright(C) 2013-2019 Max-Planck-Society
15
#
16
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik.
Philipp Arras's avatar
Philipp Arras committed
17
18
19

import sys

20
21
import numpy as np
from mpi4py import MPI
Philipp Arras's avatar
Philipp Arras committed
22
23

from .random import Random
24

Martin Reinecke's avatar
Martin Reinecke committed
25
26
27
28
29
30
__all__ = ["ntask", "rank", "master", "local_shape", "data_object", "full",
           "empty", "zeros", "ones", "empty_like", "vdot", "exp",
           "log", "tanh", "sqrt", "from_object", "from_random",
           "local_data", "ibegin", "ibegin_from_shape", "np_allreduce_sum",
           "np_allreduce_min", "np_allreduce_max",
           "distaxis", "from_local_data", "from_global_data", "to_global_data",
Martin Reinecke's avatar
Martin Reinecke committed
31
           "redistribute", "default_distaxis", "is_numpy", "absmax", "norm",
Martin Reinecke's avatar
Martin Reinecke committed
32
           "lock", "locked", "uniform_full", "transpose", "to_global_data_rw",
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
33
34
           "ensure_not_distributed", "ensure_default_distributed",
           "clipped_exp"]
Martin Reinecke's avatar
Martin Reinecke committed
35

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
36
37
38
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
39
master = (rank == 0)
40
41


Martin Reinecke's avatar
Martin Reinecke committed
42
43
44
45
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
46
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
47
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
48

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
49
50

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
51
52
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
53
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
54
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
55
56
    return lo, hi

57

58
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
59
    if len(shape) == 0 or distaxis == -1:
60
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
61
62
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
63
64
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
65

66
67
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
68
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
69
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
70
            distaxis = -1
Martin Reinecke's avatar
Martin Reinecke committed
71
72
            if not isinstance(data, np.ndarray):
                data = np.full((), data)
73
74
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
75
76
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
77

78
79
80
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
122
        return data_object(self._shape, self._data.real, self._distaxis)
123
124
125

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
126
        return data_object(self._shape, self._data.imag, self._distaxis)
127

Martin Reinecke's avatar
Martin Reinecke committed
128
129
130
131
132
133
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
134
    def _contraction_helper(self, op, mpiop, axis):
135
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
136
            if len(axis) == len(self._data.shape):
137
138
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
139
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
140
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
141
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
142
143
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
144
            return res2[()]
145
146

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
147
148
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
149
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
150
            return from_global_data(res2, distaxis=0)
151
        else:
Martin Reinecke's avatar
Martin Reinecke committed
152
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
153
154
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
155
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
156
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
157
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
158
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
159
160
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
161
162
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
163
164
165

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
166

167
168
169
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

170
171
#    def min(self, axis=None):
#        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
172

173
174
#    def max(self, axis=None):
#        return self._contraction_helper("max", MPI.MAX, axis)
175

176
177
178
179
180
181
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
182

183
184
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
185

Martin Reinecke's avatar
Martin Reinecke committed
186
    # FIXME: to be improved!
187
188
189
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
190
191
        return (abs(self-self.mean())**2).mean()

192
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
193
        a = self
194
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
195
            b = other
196
197
198
199
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
200
201
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
202
203
204
205
206
        elif np.isscalar(other):
            a = a._data
            b = other
        else:
            return NotImplemented
207
208

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
209
210
211
212
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
213
214

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
215
        return data_object(self._shape, -self._data, self._distaxis)
216
217

    def __abs__(self):
218
        return data_object(self._shape, abs(self._data), self._distaxis)
219
220

    def all(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
221
        return self.sum() == self.size
222
223

    def any(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
224
        return self.sum() != 0
225

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
226
227
    def fill(self, value):
        self._data.fill(value)
228

229

230
231
232
233
234
235
236
237
238
239
240
241
242
243
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
244

Martin Reinecke's avatar
Martin Reinecke committed
245
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
246
247
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
248
249


Martin Reinecke's avatar
Martin Reinecke committed
250
251
252
253
254
255
def uniform_full(shape, fill_value, dtype=None, distaxis=0):
    return data_object(
        shape, np.broadcast_to(fill_value, local_shape(shape, distaxis)),
        distaxis)


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
256
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
257
258
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
259
260


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
261
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
262
263
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
264
265


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
266
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
267
268
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
269
270
271
272
273
274
275


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
276
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
277
    if a._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
278
        return tmp[()]
Martin Reinecke's avatar
Martin Reinecke committed
279
280
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
281
    return res[()]
282
283
284


def _math_helper(x, function, out):
285
    function = getattr(np, function)
286
287
288
289
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
290
        return data_object(x.shape, function(x._data), x._distaxis)
291
292


293
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
294

295
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
296
297
298
299
300
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
301
302


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
303
def clipped_exp(a):
Martin Reinecke's avatar
fix    
Martin Reinecke committed
304
    return data_object(x.shape, np.exp(np.clip(x.data, -300, 300), x.distaxis))
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
305
306


Martin Reinecke's avatar
Martin Reinecke committed
307
308
309
310
311
312
313
314
315
316
317
318
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix    
Martin Reinecke committed
319
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
320
    return data_object(object._shape, data, distaxis=object._distaxis)
321
322


Martin Reinecke's avatar
Martin Reinecke committed
323
324
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
325
326
327
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
328
def from_random(random_type, shape, dtype=np.float64, **kwargs):
329
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
330
    if len(shape) == 0:
Martin Reinecke's avatar
Martin Reinecke committed
331
332
333
        ldat = generator_function(dtype=dtype, shape=shape, **kwargs)
        ldat = _comm.bcast(ldat)
        return from_local_data(shape, ldat, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
334
335
336
337
338
339
340
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
341

Martin Reinecke's avatar
Martin Reinecke committed
342

Martin Reinecke's avatar
Martin Reinecke committed
343
344
345
346
def local_data(arr):
    return arr._data


347
348
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
349
    if distaxis < 0:
350
351
352
353
354
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
355
356
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
357
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
358
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
359
360


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
361
362
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
363
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
364
    return res
Martin Reinecke's avatar
Martin Reinecke committed
365
366


367
368
369
370
371
372
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
373
374
375
376
377
378
def np_allreduce_max(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MAX)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
379
380
381
382
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
383
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
384
385
386
    return data_object(shape, arr, distaxis)


387
388
389
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
390
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
391
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
392
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
393
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
394
    sl[distaxis] = slice(lo, hi)
395
    return data_object(arr.shape, arr[tuple(sl)], distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
396
397


Martin Reinecke's avatar
Martin Reinecke committed
398
399
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
400
401
402
403
404
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


405
406
407
408
409
410
411
def to_global_data_rw(arr):
    if arr._distaxis == -1:
        return arr._data.copy()
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
412
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
413
414
415
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
416
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
417
418
419
420
421
422
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
423
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
424
425
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
426
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
427
                break
Martin Reinecke's avatar
Martin Reinecke committed
428

Martin Reinecke's avatar
Martin Reinecke committed
429
    if arr._distaxis == -1:  # all data available, just pick the proper subset
430
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
431
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
432
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
433
434
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
435
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
436
437
438
439
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
440
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
441
442
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
443
444
445
446
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
447
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
448
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
449

Martin Reinecke's avatar
Martin Reinecke committed
450
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
451
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
452
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
453
454
455
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
456
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
457
458
459
460
461
462
463
464
465
466
467
468
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
469
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
470
            ssz[i] = ssz0*(hi-lo)
471
            sbuf[ofs:ofs+ssz[i]] = arr._data[tuple(sslice)].flat
Martin Reinecke's avatar
Martin Reinecke committed
472
473
474
475
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
476
477
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
478
479
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
480
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
481
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
482
483
484
485
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
Martin Reinecke's avatar
Martin Reinecke committed
486
        arrnew = np.empty(local_shape(arr.shape, dist), dtype=arr.dtype)
Martin Reinecke's avatar
Martin Reinecke committed
487
488
489
490
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
491
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
492
            sz = rsz[i]//arr._data.itemsize
493
            arrnew[tuple(rslice)].flat = rbuf[ofs:ofs+sz]
Martin Reinecke's avatar
Martin Reinecke committed
494
            ofs += sz
Martin Reinecke's avatar
Martin Reinecke committed
495
        arrnew = from_local_data(arr.shape, arrnew, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
496
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
497
498


Martin Reinecke's avatar
Martin Reinecke committed
499
500
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
501
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
502
503
504
505
506
507
508
509
510
511
512
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
513
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
514
515
516
517
518
519
520
521
522
523
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
524
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
525
526
    arrnew = np.empty((sz2, arr.shape[0]), dtype=arr.dtype)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
527
528
529
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
530
        arrnew[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
531
        ofs += sz
532
    return from_local_data((arr.shape[1], arr.shape[0]), arrnew, 0)
Martin Reinecke's avatar
Martin Reinecke committed
533
534


Martin Reinecke's avatar
Martin Reinecke committed
535
536
def default_distaxis():
    return 0
537
538
539
540
541
542
543
544


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable
Martin Reinecke's avatar
Martin Reinecke committed
545
546
547
548
549
550
551
552
553
554
555
556


def ensure_not_distributed(arr, axes):
    if arr._distaxis in axes:
        arr = redistribute(arr, nodist=axes)
    return arr, arr._data


def ensure_default_distributed(arr):
    if arr._distaxis != 0:
        arr = redistribute(arr, dist=0)
    return arr
Martin Reinecke's avatar
Martin Reinecke committed
557
558
559
560
561
562


def absmax(arr):
    if arr._data.size == 0:
        tmp = np.array(0, dtype=arr._data.dtype)
    else:
563
        tmp = np.asarray(np.linalg.norm(arr._data.reshape(-1), ord=np.inf))
Martin Reinecke's avatar
Martin Reinecke committed
564
565
566
567
568
569
570
571
    res = np.empty_like(tmp)
    _comm.Allreduce(tmp, res, MPI.MAX)
    return res[()]


def norm(arr, ord=2):
    if ord == np.inf:
        return absmax(arr)
572
    tmp = np.asarray(np.linalg.norm(arr._data.reshape(-1), ord=ord) ** ord)
Martin Reinecke's avatar
Martin Reinecke committed
573
    res = np.empty_like(tmp)
Martin Reinecke's avatar
Martin Reinecke committed
574
575
576
577
    if len(arr._data.shape) == 0:
        res = tmp
    else:
        _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
Martin Reinecke committed
578
    return res[()] ** (1./ord)