distributed_do.py 17.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
from __future__ import absolute_import, division, print_function
Philipp Arras's avatar
Philipp Arras committed
20 21 22

import sys

23 24
import numpy as np
from mpi4py import MPI
Philipp Arras's avatar
Philipp Arras committed
25 26 27

from ..compat import *
from .random import Random
28

Martin Reinecke's avatar
Martin Reinecke committed
29 30 31 32 33 34 35
__all__ = ["ntask", "rank", "master", "local_shape", "data_object", "full",
           "empty", "zeros", "ones", "empty_like", "vdot", "exp",
           "log", "tanh", "sqrt", "from_object", "from_random",
           "local_data", "ibegin", "ibegin_from_shape", "np_allreduce_sum",
           "np_allreduce_min", "np_allreduce_max",
           "distaxis", "from_local_data", "from_global_data", "to_global_data",
           "redistribute", "default_distaxis", "is_numpy",
36
           "lock", "locked", "uniform_full", "transpose", "to_global_data_rw"]
Martin Reinecke's avatar
Martin Reinecke committed
37

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
38 39 40
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
41
master = (rank == 0)
42 43


Martin Reinecke's avatar
Martin Reinecke committed
44 45 46 47
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
48
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
49
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
50

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
51 52

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
53 54
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
55
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
56
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
57 58
    return lo, hi

59

60
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
61
    if len(shape) == 0 or distaxis == -1:
62
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
63 64
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
65 66
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
67

68 69
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
70
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
71
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
72
            distaxis = -1
Martin Reinecke's avatar
Martin Reinecke committed
73 74
            if not isinstance(data, np.ndarray):
                data = np.full((), data)
75 76
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
77 78
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
79

80 81 82
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
124
        return data_object(self._shape, self._data.real, self._distaxis)
125 126 127

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
128
        return data_object(self._shape, self._data.imag, self._distaxis)
129

Martin Reinecke's avatar
Martin Reinecke committed
130 131 132 133 134 135
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
136
    def _contraction_helper(self, op, mpiop, axis):
137
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
138
            if len(axis) == len(self._data.shape):
139 140
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
141
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
142
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
143
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
144 145
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
146
            return res2[()]
147 148

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
149 150
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
151
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
152
            return from_global_data(res2, distaxis=0)
153
        else:
Martin Reinecke's avatar
Martin Reinecke committed
154
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
155 156
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
157
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
158
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
159
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
160
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
161 162
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
163 164
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
165 166 167

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
168

169 170 171
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

172 173
#    def min(self, axis=None):
#        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
174

175 176
#    def max(self, axis=None):
#        return self._contraction_helper("max", MPI.MAX, axis)
177

178 179 180 181 182 183
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
184

185 186
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
187

Martin Reinecke's avatar
Martin Reinecke committed
188
    # FIXME: to be improved!
189 190 191
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
192 193
        return (abs(self-self.mean())**2).mean()

194
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
195
        a = self
196
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
197
            b = other
198 199 200 201
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
202 203
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
204 205 206 207 208
        elif np.isscalar(other):
            a = a._data
            b = other
        else:
            return NotImplemented
209 210

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
211 212 213 214
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
215 216

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
217
        return data_object(self._shape, -self._data, self._distaxis)
218 219

    def __abs__(self):
220
        return data_object(self._shape, abs(self._data), self._distaxis)
221 222

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
223
        return self.sum() == self.size
224 225

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
226
        return self.sum() != 0
227

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
228 229
    def fill(self, value):
        self._data.fill(value)
230

231

232 233 234 235 236 237 238 239 240 241 242 243 244 245
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
246

Martin Reinecke's avatar
Martin Reinecke committed
247
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
248 249
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
250 251


Martin Reinecke's avatar
Martin Reinecke committed
252 253 254 255 256 257
def uniform_full(shape, fill_value, dtype=None, distaxis=0):
    return data_object(
        shape, np.broadcast_to(fill_value, local_shape(shape, distaxis)),
        distaxis)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
258
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
259 260
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
261 262


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
263
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
264 265
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
266 267


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
268
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
269 270
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
271 272 273 274 275 276 277


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
278
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
279
    if a._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
280
        return tmp[()]
Martin Reinecke's avatar
Martin Reinecke committed
281 282
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
283
    return res[()]
284 285 286


def _math_helper(x, function, out):
287
    function = getattr(np, function)
288 289 290 291
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
292
        return data_object(x.shape, function(x._data), x._distaxis)
293 294


295
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
296

297
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
298 299 300 301 302
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
303 304


Martin Reinecke's avatar
Martin Reinecke committed
305 306 307 308 309 310 311 312 313 314 315 316
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix  
Martin Reinecke committed
317
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
318
    return data_object(object._shape, data, distaxis=object._distaxis)
319 320


Martin Reinecke's avatar
Martin Reinecke committed
321 322
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
323 324 325
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
326
def from_random(random_type, shape, dtype=np.float64, **kwargs):
327
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
328
    if len(shape) == 0:
Martin Reinecke's avatar
Martin Reinecke committed
329 330 331
        ldat = generator_function(dtype=dtype, shape=shape, **kwargs)
        ldat = _comm.bcast(ldat)
        return from_local_data(shape, ldat, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
332 333 334 335 336 337 338
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
339

Martin Reinecke's avatar
Martin Reinecke committed
340

Martin Reinecke's avatar
Martin Reinecke committed
341 342 343 344
def local_data(arr):
    return arr._data


345 346
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
347
    if distaxis < 0:
348 349 350 351 352
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
353 354
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
355
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
356
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
357 358


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
359 360
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
361
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
362
    return res
Martin Reinecke's avatar
Martin Reinecke committed
363 364


365 366 367 368 369 370
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
371 372 373 374 375 376
def np_allreduce_max(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MAX)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
377 378 379 380
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
381
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
382 383 384
    return data_object(shape, arr, distaxis)


385 386 387
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
388
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
389
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
390
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
391
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
392
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
393 394 395
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
396 397
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
398 399 400 401 402
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


403 404 405 406 407 408 409
def to_global_data_rw(arr):
    if arr._distaxis == -1:
        return arr._data.copy()
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
410
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
411 412 413
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
414
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
415 416 417 418 419 420
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
421
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
422 423
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
424
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
425
                break
Martin Reinecke's avatar
Martin Reinecke committed
426

Martin Reinecke's avatar
Martin Reinecke committed
427
    if arr._distaxis == -1:  # all data available, just pick the proper subset
428
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
429
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
430
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
431 432
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
433
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
434 435 436 437
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
438
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
439 440
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
441 442 443 444
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
445
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
446
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
447

Martin Reinecke's avatar
Martin Reinecke committed
448
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
449
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
450
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
451 452 453
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
454
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
455 456 457 458 459 460 461 462 463 464 465 466
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
467
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
468 469 470 471 472 473
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
474 475
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
476 477
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
478
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
479
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
480 481 482 483
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
Martin Reinecke's avatar
Martin Reinecke committed
484
        arrnew = np.empty(local_shape(arr.shape, dist), dtype=arr.dtype)
Martin Reinecke's avatar
Martin Reinecke committed
485 486 487 488
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
489
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
490
            sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
491
            arrnew[rslice].flat = rbuf[ofs:ofs+sz]
Martin Reinecke's avatar
Martin Reinecke committed
492
            ofs += sz
Martin Reinecke's avatar
Martin Reinecke committed
493
        arrnew = from_local_data(arr.shape, arrnew, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
494
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
495 496


Martin Reinecke's avatar
Martin Reinecke committed
497 498
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
499
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
500 501 502 503 504 505 506 507 508 509 510
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
511
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
512 513 514 515 516 517 518 519 520 521
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
522
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
523 524
    arrnew = np.empty((sz2, arr.shape[0]), dtype=arr.dtype)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
525 526 527
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
528
        arrnew[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
529
        ofs += sz
530
    return from_local_data((arr.shape[1], arr.shape[0]), arrnew, 0)
Martin Reinecke's avatar
Martin Reinecke committed
531 532


Martin Reinecke's avatar
Martin Reinecke committed
533 534
def default_distaxis():
    return 0
535 536 537 538 539 540 541 542


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable