distributed_do.py 16.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19 20
from __future__ import absolute_import, division, print_function
from ..compat import *
21 22 23
import numpy as np
from .random import Random
from mpi4py import MPI
24
import sys
25

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
26 27 28
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
29
master = (rank == 0)
30 31


Martin Reinecke's avatar
Martin Reinecke committed
32 33 34 35
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
36
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
37
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
38

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
39 40

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
41 42
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
43
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
44
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
45 46
    return lo, hi

47

48
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
49
    if len(shape) == 0 or distaxis == -1:
50
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
51 52
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
53 54
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
55

56 57
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
58
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
59
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
60
            distaxis = -1
61 62
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
63 64
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
65

66 67 68
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
110
        return data_object(self._shape, self._data.real, self._distaxis)
111 112 113

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
114
        return data_object(self._shape, self._data.imag, self._distaxis)
115

Martin Reinecke's avatar
Martin Reinecke committed
116 117 118 119 120 121
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
122
    def _contraction_helper(self, op, mpiop, axis):
123
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
124
            if len(axis) == len(self._data.shape):
125 126
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
127
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
128
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
129
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
130 131
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
132
            return res2[()]
133 134

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
135 136
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
137
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
138
            return from_global_data(res2, distaxis=0)
139
        else:
Martin Reinecke's avatar
Martin Reinecke committed
140
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
141 142
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
143
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
144
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
145
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
146
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
147 148
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
149 150
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
151 152 153

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
154

155 156 157
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
158 159
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
160

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
161 162
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
163

164 165 166 167 168 169
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
170

171 172
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
173

Martin Reinecke's avatar
Martin Reinecke committed
174
    # FIXME: to be improved!
175 176 177
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
178 179
        return (abs(self-self.mean())**2).mean()

180
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
181
        a = self
182
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
183
            b = other
184 185 186 187
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
188 189
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
190 191 192 193
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
194
            a = a._data
195
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
196 197
        else:
            return NotImplemented
198 199

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
200 201 202 203
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
204 205

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
206
        return data_object(self._shape, -self._data, self._distaxis)
207 208

    def __abs__(self):
209
        return data_object(self._shape, abs(self._data), self._distaxis)
210 211

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
212
        return self.sum() == self.size
213 214

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
215
        return self.sum() != 0
216

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
217 218
    def fill(self, value):
        self._data.fill(value)
219

220

221 222 223 224 225 226 227 228 229 230 231 232 233 234
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
235

Martin Reinecke's avatar
Martin Reinecke committed
236
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
237 238
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
239 240


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
241
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
242 243
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
244 245


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
246
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
247 248
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
249 250


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
251
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
252 253
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
254 255 256 257 258 259 260


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
261
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
262 263
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
264
    return res[()]
265 266 267


def _math_helper(x, function, out):
268
    function = getattr(np, function)
269 270 271 272
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
273
        return data_object(x.shape, function(x._data), x._distaxis)
274 275


276
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
277

278
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
279 280 281 282 283
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
284 285


Martin Reinecke's avatar
Martin Reinecke committed
286 287 288 289 290 291 292 293 294 295 296 297
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix  
Martin Reinecke committed
298
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
299
    return data_object(object._shape, data, distaxis=object._distaxis)
300 301


Martin Reinecke's avatar
Martin Reinecke committed
302 303
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
304 305 306
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
307
def from_random(random_type, shape, dtype=np.float64, **kwargs):
308
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
309 310 311 312 313 314 315
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
316

Martin Reinecke's avatar
Martin Reinecke committed
317

Martin Reinecke's avatar
Martin Reinecke committed
318 319 320 321
def local_data(arr):
    return arr._data


322 323
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
324
    if distaxis < 0:
325 326 327 328 329
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
330 331
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
332
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
333
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
334 335


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
336 337
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
338
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
339
    return res
Martin Reinecke's avatar
Martin Reinecke committed
340 341


342 343 344 345 346 347
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
348 349 350 351
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
352
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
353 354 355
    return data_object(shape, arr, distaxis)


356 357 358
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
359
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
360
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
361
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
362
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
363
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
364 365 366
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
367 368
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
369 370 371 372 373
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
374
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
375 376 377
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
378
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
379 380 381 382 383 384
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
385
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
386 387
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
388
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
389
                break
Martin Reinecke's avatar
Martin Reinecke committed
390

Martin Reinecke's avatar
Martin Reinecke committed
391
    if arr._distaxis == -1:  # all data available, just pick the proper subset
392
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
393
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
394
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
395 396
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
397
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
398 399 400 401
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
402
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
403 404
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
405 406 407 408
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
409
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
410
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
411

Martin Reinecke's avatar
Martin Reinecke committed
412
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
413
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
414
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
415 416 417
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
418
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
419 420 421 422 423 424 425 426 427 428 429 430
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
431
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
432 433 434 435 436 437
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
438 439
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
440 441
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
442
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
443
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
444 445 446 447 448 449 450 451 452
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
453
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
454 455 456 457
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
458 459


Martin Reinecke's avatar
Martin Reinecke committed
460 461
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
462
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
463 464 465 466 467 468 469 470 471 472 473
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
474
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
475 476 477 478 479 480 481 482 483 484 485 486
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
487
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
488 489 490
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
491
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
492 493 494 495
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
496 497
def default_distaxis():
    return 0
498 499 500 501 502 503 504 505


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable