distributed_do.py 18.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
from __future__ import absolute_import, division, print_function
Philipp Arras's avatar
Philipp Arras committed
20 21 22

import sys

23 24
import numpy as np
from mpi4py import MPI
Philipp Arras's avatar
Philipp Arras committed
25 26 27

from ..compat import *
from .random import Random
28

Martin Reinecke's avatar
Martin Reinecke committed
29 30 31 32 33 34
__all__ = ["ntask", "rank", "master", "local_shape", "data_object", "full",
           "empty", "zeros", "ones", "empty_like", "vdot", "exp",
           "log", "tanh", "sqrt", "from_object", "from_random",
           "local_data", "ibegin", "ibegin_from_shape", "np_allreduce_sum",
           "np_allreduce_min", "np_allreduce_max",
           "distaxis", "from_local_data", "from_global_data", "to_global_data",
Martin Reinecke's avatar
Martin Reinecke committed
35
           "redistribute", "default_distaxis", "is_numpy", "absmax", "norm",
Martin Reinecke's avatar
Martin Reinecke committed
36
           "lock", "locked", "uniform_full", "transpose", "to_global_data_rw",
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
37 38
           "ensure_not_distributed", "ensure_default_distributed",
           "clipped_exp"]
Martin Reinecke's avatar
Martin Reinecke committed
39

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
40 41 42
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
43
master = (rank == 0)
44 45


Martin Reinecke's avatar
Martin Reinecke committed
46 47 48 49
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
50
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
51
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
52

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
53 54

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
55 56
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
57
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
58
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
59 60
    return lo, hi

61

62
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
63
    if len(shape) == 0 or distaxis == -1:
64
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
65 66
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
67 68
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
69

70 71
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
72
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
73
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
74
            distaxis = -1
Martin Reinecke's avatar
Martin Reinecke committed
75 76
            if not isinstance(data, np.ndarray):
                data = np.full((), data)
77 78
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
79 80
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
81

82 83 84
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
126
        return data_object(self._shape, self._data.real, self._distaxis)
127 128 129

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
130
        return data_object(self._shape, self._data.imag, self._distaxis)
131

Martin Reinecke's avatar
Martin Reinecke committed
132 133 134 135 136 137
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
138
    def _contraction_helper(self, op, mpiop, axis):
139
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
140
            if len(axis) == len(self._data.shape):
141 142
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
143
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
144
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
145
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
146 147
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
148
            return res2[()]
149 150

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
151 152
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
153
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
154
            return from_global_data(res2, distaxis=0)
155
        else:
Martin Reinecke's avatar
Martin Reinecke committed
156
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
157 158
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
159
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
160
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
161
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
162
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
163 164
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
165 166
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
167 168 169

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
170

171 172 173
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

174 175
#    def min(self, axis=None):
#        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
176

177 178
#    def max(self, axis=None):
#        return self._contraction_helper("max", MPI.MAX, axis)
179

180 181 182 183 184 185
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
186

187 188
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
189

Martin Reinecke's avatar
Martin Reinecke committed
190
    # FIXME: to be improved!
191 192 193
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
194 195
        return (abs(self-self.mean())**2).mean()

196
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
197
        a = self
198
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
199
            b = other
200 201 202 203
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
204 205
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
206 207 208 209 210
        elif np.isscalar(other):
            a = a._data
            b = other
        else:
            return NotImplemented
211 212

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
213 214 215 216
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
217 218

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
219
        return data_object(self._shape, -self._data, self._distaxis)
220 221

    def __abs__(self):
222
        return data_object(self._shape, abs(self._data), self._distaxis)
223 224

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
225
        return self.sum() == self.size
226 227

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
228
        return self.sum() != 0
229

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
230 231
    def fill(self, value):
        self._data.fill(value)
232

233

234 235 236 237 238 239 240 241 242 243 244 245 246 247
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
248

Martin Reinecke's avatar
Martin Reinecke committed
249
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
250 251
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
252 253


Martin Reinecke's avatar
Martin Reinecke committed
254 255 256 257 258 259
def uniform_full(shape, fill_value, dtype=None, distaxis=0):
    return data_object(
        shape, np.broadcast_to(fill_value, local_shape(shape, distaxis)),
        distaxis)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
260
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
261 262
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
263 264


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
265
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
266 267
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
268 269


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
270
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
271 272
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
273 274 275 276 277 278 279


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
280
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
281
    if a._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
282
        return tmp[()]
Martin Reinecke's avatar
Martin Reinecke committed
283 284
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
285
    return res[()]
286 287 288


def _math_helper(x, function, out):
289
    function = getattr(np, function)
290 291 292 293
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
294
        return data_object(x.shape, function(x._data), x._distaxis)
295 296


297
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
298

299
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
300 301 302 303 304
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
305 306


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
307 308 309 310
def clipped_exp(a):
    return data_object(x.shape, np.exp(np.clip(x.data, -300, 300), x.distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
311 312 313 314 315 316 317 318 319 320 321 322
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix  
Martin Reinecke committed
323
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
324
    return data_object(object._shape, data, distaxis=object._distaxis)
325 326


Martin Reinecke's avatar
Martin Reinecke committed
327 328
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
329 330 331
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
332
def from_random(random_type, shape, dtype=np.float64, **kwargs):
333
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
334
    if len(shape) == 0:
Martin Reinecke's avatar
Martin Reinecke committed
335 336 337
        ldat = generator_function(dtype=dtype, shape=shape, **kwargs)
        ldat = _comm.bcast(ldat)
        return from_local_data(shape, ldat, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
338 339 340 341 342 343 344
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
345

Martin Reinecke's avatar
Martin Reinecke committed
346

Martin Reinecke's avatar
Martin Reinecke committed
347 348 349 350
def local_data(arr):
    return arr._data


351 352
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
353
    if distaxis < 0:
354 355 356 357 358
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
359 360
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
361
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
362
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
363 364


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
365 366
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
367
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
368
    return res
Martin Reinecke's avatar
Martin Reinecke committed
369 370


371 372 373 374 375 376
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
377 378 379 380 381 382
def np_allreduce_max(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MAX)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
383 384 385 386
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
387
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
388 389 390
    return data_object(shape, arr, distaxis)


391 392 393
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
394
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
395
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
396
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
397
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
398
    sl[distaxis] = slice(lo, hi)
399
    return data_object(arr.shape, arr[tuple(sl)], distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
400 401


Martin Reinecke's avatar
Martin Reinecke committed
402 403
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
404 405 406 407 408
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


409 410 411 412 413 414 415
def to_global_data_rw(arr):
    if arr._distaxis == -1:
        return arr._data.copy()
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
416
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
417 418 419
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
420
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
421 422 423 424 425 426
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
427
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
428 429
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
430
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
431
                break
Martin Reinecke's avatar
Martin Reinecke committed
432

Martin Reinecke's avatar
Martin Reinecke committed
433
    if arr._distaxis == -1:  # all data available, just pick the proper subset
434
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
435
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
436
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
437 438
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
439
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
440 441 442 443
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
444
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
445 446
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
447 448 449 450
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
451
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
452
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
453

Martin Reinecke's avatar
Martin Reinecke committed
454
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
455
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
456
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
457 458 459
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
460
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
461 462 463 464 465 466 467 468 469 470 471 472
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
473
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
474
            ssz[i] = ssz0*(hi-lo)
475
            sbuf[ofs:ofs+ssz[i]] = arr._data[tuple(sslice)].flat
Martin Reinecke's avatar
Martin Reinecke committed
476 477 478 479
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
480 481
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
482 483
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
484
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
485
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
486 487 488 489
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
Martin Reinecke's avatar
Martin Reinecke committed
490
        arrnew = np.empty(local_shape(arr.shape, dist), dtype=arr.dtype)
Martin Reinecke's avatar
Martin Reinecke committed
491 492 493 494
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
495
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
496
            sz = rsz[i]//arr._data.itemsize
497
            arrnew[tuple(rslice)].flat = rbuf[ofs:ofs+sz]
Martin Reinecke's avatar
Martin Reinecke committed
498
            ofs += sz
Martin Reinecke's avatar
Martin Reinecke committed
499
        arrnew = from_local_data(arr.shape, arrnew, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
500
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
501 502


Martin Reinecke's avatar
Martin Reinecke committed
503 504
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
505
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
506 507 508 509 510 511 512 513 514 515 516
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
517
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
518 519 520 521 522 523 524 525 526 527
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
528
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
529 530
    arrnew = np.empty((sz2, arr.shape[0]), dtype=arr.dtype)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
531 532 533
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
534
        arrnew[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
535
        ofs += sz
536
    return from_local_data((arr.shape[1], arr.shape[0]), arrnew, 0)
Martin Reinecke's avatar
Martin Reinecke committed
537 538


Martin Reinecke's avatar
Martin Reinecke committed
539 540
def default_distaxis():
    return 0
541 542 543 544 545 546 547 548


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable
Martin Reinecke's avatar
Martin Reinecke committed
549 550 551 552 553 554 555 556 557 558 559 560


def ensure_not_distributed(arr, axes):
    if arr._distaxis in axes:
        arr = redistribute(arr, nodist=axes)
    return arr, arr._data


def ensure_default_distributed(arr):
    if arr._distaxis != 0:
        arr = redistribute(arr, dist=0)
    return arr
Martin Reinecke's avatar
Martin Reinecke committed
561 562 563 564 565 566


def absmax(arr):
    if arr._data.size == 0:
        tmp = np.array(0, dtype=arr._data.dtype)
    else:
567
        tmp = np.asarray(np.linalg.norm(arr._data.reshape(-1), ord=np.inf))
Martin Reinecke's avatar
Martin Reinecke committed
568 569 570 571 572 573 574 575
    res = np.empty_like(tmp)
    _comm.Allreduce(tmp, res, MPI.MAX)
    return res[()]


def norm(arr, ord=2):
    if ord == np.inf:
        return absmax(arr)
576
    tmp = np.asarray(np.linalg.norm(arr._data.reshape(-1), ord=ord) ** ord)
Martin Reinecke's avatar
Martin Reinecke committed
577
    res = np.empty_like(tmp)
Martin Reinecke's avatar
Martin Reinecke committed
578 579 580 581
    if len(arr._data.shape) == 0:
        res = tmp
    else:
        _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
Martin Reinecke committed
582
    return res[()] ** (1./ord)