distributed_do.py 16.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
20
21
from __future__ import (absolute_import, division, print_function)
from builtins import *
from functools import reduce
22
23
24
import numpy as np
from .random import Random
from mpi4py import MPI
25
import sys
26

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
27
28
29
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
30
master = (rank == 0)
31
32


Martin Reinecke's avatar
Martin Reinecke committed
33
34
35
36
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
37
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
38
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
39

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
40
41

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
42
43
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
44
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
45
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
46
47
    return lo, hi

48

49
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
50
    if len(shape) == 0 or distaxis == -1:
51
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
52
53
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
54
55
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
56

57
58
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
59
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
60
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
61
            distaxis = -1
62
63
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
64
65
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
66

67
68
69
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
111
        return data_object(self._shape, self._data.real, self._distaxis)
112
113
114

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
115
        return data_object(self._shape, self._data.imag, self._distaxis)
116

Martin Reinecke's avatar
Martin Reinecke committed
117
118
119
120
121
122
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
123
    def _contraction_helper(self, op, mpiop, axis):
124
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
125
            if len(axis) == len(self._data.shape):
126
127
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
128
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
129
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
130
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
131
132
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
133
            return res2[()]
134
135

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
136
137
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
138
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
139
            return from_global_data(res2, distaxis=0)
140
        else:
Martin Reinecke's avatar
Martin Reinecke committed
141
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
142
143
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
144
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
145
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
146
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
147
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
148
149
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
150
151
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
152
153
154

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
155

156
157
158
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
159
160
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
161

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
162
163
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
164

165
166
167
168
169
170
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
171

172
173
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
174

Martin Reinecke's avatar
Martin Reinecke committed
175
    # FIXME: to be improved!
176
177
178
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
179
180
        return (abs(self-self.mean())**2).mean()

181
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
182
        a = self
183
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
184
            b = other
185
186
187
188
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
189
190
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
191
192
193
194
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
195
            a = a._data
196
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
197
198
        else:
            return NotImplemented
199
200

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
201
202
203
204
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
205
206

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
207
        return data_object(self._shape, -self._data, self._distaxis)
208
209

    def __abs__(self):
210
        return data_object(self._shape, abs(self._data), self._distaxis)
211
212

    def all(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
213
        return self.sum() == self.size
214
215

    def any(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
216
        return self.sum() != 0
217

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
218
219
    def fill(self, value):
        self._data.fill(value)
220

221

222
223
224
225
226
227
228
229
230
231
232
233
234
235
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
236

Martin Reinecke's avatar
Martin Reinecke committed
237
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
238
239
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
240
241


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
242
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
243
244
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
245
246


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
247
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
248
249
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
250
251


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
252
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
253
254
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
255
256
257
258
259
260
261


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
262
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
263
264
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
265
    return res[()]
266
267
268


def _math_helper(x, function, out):
269
    function = getattr(np, function)
270
271
272
273
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
274
        return data_object(x.shape, function(x._data), x._distaxis)
275
276


277
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
278

279
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
280
281
282
283
284
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
285
286


Martin Reinecke's avatar
Martin Reinecke committed
287
288
289
290
291
292
293
294
295
296
297
298
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix    
Martin Reinecke committed
299
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
300
    return data_object(object._shape, data, distaxis=object._distaxis)
301
302


Martin Reinecke's avatar
Martin Reinecke committed
303
304
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
305
306
307
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
308
def from_random(random_type, shape, dtype=np.float64, **kwargs):
309
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
310
311
312
313
314
315
316
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
317

Martin Reinecke's avatar
Martin Reinecke committed
318

Martin Reinecke's avatar
Martin Reinecke committed
319
320
321
322
def local_data(arr):
    return arr._data


323
324
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
325
    if distaxis < 0:
326
327
328
329
330
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
331
332
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
333
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
334
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
335
336


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
337
338
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
339
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
340
    return res
Martin Reinecke's avatar
Martin Reinecke committed
341
342


343
344
345
346
347
348
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
349
350
351
352
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
353
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
354
355
356
    return data_object(shape, arr, distaxis)


357
358
359
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
360
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
361
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
362
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
363
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
364
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
365
366
367
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
368
369
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
370
371
372
373
374
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
375
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
376
377
378
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
379
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
380
381
382
383
384
385
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
386
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
387
388
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
389
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
390
                break
Martin Reinecke's avatar
Martin Reinecke committed
391

Martin Reinecke's avatar
Martin Reinecke committed
392
    if arr._distaxis == -1:  # all data available, just pick the proper subset
393
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
394
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
395
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
396
397
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
398
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
399
400
401
402
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
403
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
404
405
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
406
407
408
409
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
410
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
411
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
412

Martin Reinecke's avatar
Martin Reinecke committed
413
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
414
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
415
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
416
417
418
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
419
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
420
421
422
423
424
425
426
427
428
429
430
431
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
432
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
433
434
435
436
437
438
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
439
440
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
441
442
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
443
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
444
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
445
446
447
448
449
450
451
452
453
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
454
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
455
456
457
458
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
459
460


Martin Reinecke's avatar
Martin Reinecke committed
461
462
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
463
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
464
465
466
467
468
469
470
471
472
473
474
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
475
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
476
477
478
479
480
481
482
483
484
485
486
487
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
488
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
489
490
491
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
492
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
493
494
495
496
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
497
498
def default_distaxis():
    return 0
499
500
501
502
503
504
505
506


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable