distributed_do.py 16.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
from __future__ import absolute_import, division, print_function
Philipp Arras's avatar
Philipp Arras committed
20 21 22

import sys

23 24
import numpy as np
from mpi4py import MPI
Philipp Arras's avatar
Philipp Arras committed
25 26 27

from ..compat import *
from .random import Random
28

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
29 30 31
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
32
master = (rank == 0)
33 34


Martin Reinecke's avatar
Martin Reinecke committed
35 36 37 38
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
39
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
40
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
41

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
42 43

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
44 45
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
46
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
47
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
48 49
    return lo, hi

50

51
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
52
    if len(shape) == 0 or distaxis == -1:
53
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
54 55
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
56 57
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
58

59 60
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
61
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
62
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
63
            distaxis = -1
64 65
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
66 67
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
68

69 70 71
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
113
        return data_object(self._shape, self._data.real, self._distaxis)
114 115 116

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
117
        return data_object(self._shape, self._data.imag, self._distaxis)
118

Martin Reinecke's avatar
Martin Reinecke committed
119 120 121 122 123 124
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
125
    def _contraction_helper(self, op, mpiop, axis):
126
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
127
            if len(axis) == len(self._data.shape):
128 129
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
130
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
131
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
132
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
133 134
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
135
            return res2[()]
136 137

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
138 139
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
140
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
141
            return from_global_data(res2, distaxis=0)
142
        else:
Martin Reinecke's avatar
Martin Reinecke committed
143
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
144 145
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
146
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
147
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
148
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
149
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
150 151
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
152 153
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
154 155 156

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
157

158 159 160
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

161 162
#    def min(self, axis=None):
#        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
163

164 165
#    def max(self, axis=None):
#        return self._contraction_helper("max", MPI.MAX, axis)
166

167 168 169 170 171 172
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
173

174 175
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
176

Martin Reinecke's avatar
Martin Reinecke committed
177
    # FIXME: to be improved!
178 179 180
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
181 182
        return (abs(self-self.mean())**2).mean()

183
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
184
        a = self
185
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
186
            b = other
187 188 189 190
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
191 192
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
193 194 195 196
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
197
            a = a._data
198
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
199 200
        else:
            return NotImplemented
201 202

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
203 204 205 206
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
207 208

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
209
        return data_object(self._shape, -self._data, self._distaxis)
210 211

    def __abs__(self):
212
        return data_object(self._shape, abs(self._data), self._distaxis)
213 214

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
215
        return self.sum() == self.size
216 217

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
218
        return self.sum() != 0
219

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
220 221
    def fill(self, value):
        self._data.fill(value)
222

223

224 225 226 227 228 229 230 231 232 233 234 235 236 237
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
238

Martin Reinecke's avatar
Martin Reinecke committed
239
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
240 241
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
242 243


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
244
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
245 246
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
247 248


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
249
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
250 251
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
252 253


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
254
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
255 256
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
257 258 259 260 261 262 263


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
264
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
265 266
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
267
    return res[()]
268 269 270


def _math_helper(x, function, out):
271
    function = getattr(np, function)
272 273 274 275
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
276
        return data_object(x.shape, function(x._data), x._distaxis)
277 278


279
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
280

281
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
282 283 284 285 286
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
287 288


Martin Reinecke's avatar
Martin Reinecke committed
289 290 291 292 293 294 295 296 297 298 299 300
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix  
Martin Reinecke committed
301
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
302
    return data_object(object._shape, data, distaxis=object._distaxis)
303 304


Martin Reinecke's avatar
Martin Reinecke committed
305 306
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
307 308 309
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
310
def from_random(random_type, shape, dtype=np.float64, **kwargs):
311
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
312 313 314 315 316 317 318
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
319

Martin Reinecke's avatar
Martin Reinecke committed
320

Martin Reinecke's avatar
Martin Reinecke committed
321 322 323 324
def local_data(arr):
    return arr._data


325 326
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
327
    if distaxis < 0:
328 329 330 331 332
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
333 334
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
335
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
336
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
337 338


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
339 340
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
341
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
342
    return res
Martin Reinecke's avatar
Martin Reinecke committed
343 344


345 346 347 348 349 350
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
351 352 353 354 355 356
def np_allreduce_max(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MAX)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
357 358 359 360
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
361
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
362 363 364
    return data_object(shape, arr, distaxis)


365 366 367
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
368
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
369
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
370
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
371
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
372
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
373 374 375
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
376 377
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
378 379 380 381 382
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
383
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
384 385 386
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
387
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
388 389 390 391 392 393
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
394
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
395 396
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
397
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
398
                break
Martin Reinecke's avatar
Martin Reinecke committed
399

Martin Reinecke's avatar
Martin Reinecke committed
400
    if arr._distaxis == -1:  # all data available, just pick the proper subset
401
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
402
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
403
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
404 405
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
406
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
407 408 409 410
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
411
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
412 413
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
414 415 416 417
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
418
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
419
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
420

Martin Reinecke's avatar
Martin Reinecke committed
421
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
422
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
423
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
424 425 426
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
427
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
428 429 430 431 432 433 434 435 436 437 438 439
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
440
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
441 442 443 444 445 446
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
447 448
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
449 450
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
451
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
452
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
453 454 455 456 457 458 459 460 461
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
462
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
463 464 465 466
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
467 468


Martin Reinecke's avatar
Martin Reinecke committed
469 470
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
471
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
472 473 474 475 476 477 478 479 480 481 482
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
483
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
484 485 486 487 488 489 490 491 492 493 494 495
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
496
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
497 498 499
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
500
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
501 502 503 504
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
505 506
def default_distaxis():
    return 0
507 508 509 510 511 512 513 514


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable