distributed_do.py 16.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
from __future__ import absolute_import, division, print_function
Philipp Arras's avatar
Philipp Arras committed
20
21
22

import sys

23
24
import numpy as np
from mpi4py import MPI
Philipp Arras's avatar
Philipp Arras committed
25
26
27

from ..compat import *
from .random import Random
28

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
29
30
31
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
32
master = (rank == 0)
33
34


Martin Reinecke's avatar
Martin Reinecke committed
35
36
37
38
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
39
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
40
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
41

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
42
43

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
44
45
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
46
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
47
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
48
49
    return lo, hi

50

51
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
52
    if len(shape) == 0 or distaxis == -1:
53
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
54
55
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
56
57
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
58

59
60
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
61
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
62
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
63
            distaxis = -1
64
65
        self._distaxis = distaxis
        self._data = data
Martin Reinecke's avatar
Martin Reinecke committed
66
67
        if local_shape(self._shape, self._distaxis) != self._data.shape:
            raise ValueError("shape mismatch")
68

69
70
71
    def copy(self):
        return data_object(self._shape, self._data.copy(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#     def _sanity_checks(self):
#         # check whether the distaxis is consistent
#         if self._distaxis < -1 or self._distaxis >= len(self._shape):
#             raise ValueError
#         itmp = np.array(self._distaxis)
#         otmp = np.empty(ntask, dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         if np.any(otmp != self._distaxis):
#             raise ValueError
#         # check whether the global shape is consistent
#         itmp = np.array(self._shape)
#         otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
#         _comm.Allgather(itmp, otmp)
#         for i in range(ntask):
#             if np.any(otmp[i, :] != self._shape):
#                 raise ValueError
#         # check shape of local data
#         if self._distaxis < 0:
#             if self._data.shape != self._shape:
#                 raise ValueError
#         else:
#             itmp = np.array(self._shape)
#             itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
#                                               ntask, rank)
#             if np.any(self._data.shape != itmp):
#                 raise ValueError
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
113
        return data_object(self._shape, self._data.real, self._distaxis)
114
115
116

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
117
        return data_object(self._shape, self._data.imag, self._distaxis)
118

Martin Reinecke's avatar
Martin Reinecke committed
119
120
121
122
123
124
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
125
    def _contraction_helper(self, op, mpiop, axis):
126
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
127
            if len(axis) == len(self._data.shape):
128
129
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
130
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
131
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
132
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
133
134
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
135
            return res2[()]
136
137

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
138
139
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
140
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
141
            return from_global_data(res2, distaxis=0)
142
        else:
Martin Reinecke's avatar
Martin Reinecke committed
143
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
144
145
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
146
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
147
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
148
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
149
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
150
151
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
152
153
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
154
155
156

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
157

158
159
160
    def prod(self, axis=None):
        return self._contraction_helper("prod", MPI.PROD, axis)

161
162
#    def min(self, axis=None):
#        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
163

164
165
#    def max(self, axis=None):
#        return self._contraction_helper("max", MPI.MAX, axis)
166

167
168
169
170
171
172
    def mean(self, axis=None):
        if axis is None:
            sz = self.size
        else:
            sz = reduce(lambda x, y: x*y, [self.shape[i] for i in axis])
        return self.sum(axis)/sz
Martin Reinecke's avatar
Martin Reinecke committed
173

174
175
    def std(self, axis=None):
        return np.sqrt(self.var(axis))
Martin Reinecke's avatar
Martin Reinecke committed
176

Martin Reinecke's avatar
Martin Reinecke committed
177
    # FIXME: to be improved!
178
179
180
    def var(self, axis=None):
        if axis is not None and len(axis) != len(self.shape):
            raise ValueError("functionality not yet supported")
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
181
182
        return (abs(self-self.mean())**2).mean()

183
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
184
        a = self
185
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
186
            b = other
187
188
189
190
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
191
192
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
193
194
195
196
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
197
            a = a._data
198
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
199
200
        else:
            return NotImplemented
201
202

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
203
204
205
206
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
207
208

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
209
        return data_object(self._shape, -self._data, self._distaxis)
210
211

    def __abs__(self):
212
        return data_object(self._shape, abs(self._data), self._distaxis)
213
214

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
215
        return self.sum() == self.size
216
217

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
218
        return self.sum() != 0
219

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
220
221
    def fill(self, value):
        self._data.fill(value)
222

223

224
225
226
227
228
229
230
231
232
233
234
235
236
237
for op in ["__add__", "__radd__", "__iadd__",
           "__sub__", "__rsub__", "__isub__",
           "__mul__", "__rmul__", "__imul__",
           "__div__", "__rdiv__", "__idiv__",
           "__truediv__", "__rtruediv__", "__itruediv__",
           "__floordiv__", "__rfloordiv__", "__ifloordiv__",
           "__pow__", "__rpow__", "__ipow__",
           "__lt__", "__le__", "__gt__", "__ge__", "__eq__", "__ne__"]:
    def func(op):
        def func2(self, other):
            return self._binary_helper(other, op=op)
        return func2
    setattr(data_object, op, func(op))

Martin Reinecke's avatar
Martin Reinecke committed
238

Martin Reinecke's avatar
Martin Reinecke committed
239
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
240
241
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
242
243


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
244
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
245
246
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
247
248


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
249
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
250
251
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
252
253


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
254
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
255
256
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
257
258
259
260
261
262
263


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
264
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
265
266
    if a._distaxis==-1:
        return tmp[()]
Martin Reinecke's avatar
Martin Reinecke committed
267
268
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
269
    return res[()]
270
271
272


def _math_helper(x, function, out):
273
    function = getattr(np, function)
274
275
276
277
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
278
        return data_object(x.shape, function(x._data), x._distaxis)
279
280


281
_current_module = sys.modules[__name__]
Martin Reinecke's avatar
Martin Reinecke committed
282

283
for f in ["sqrt", "exp", "log", "tanh", "conjugate"]:
284
285
286
287
288
    def func(f):
        def func2(x, out=None):
            return _math_helper(x, f, out)
        return func2
    setattr(_current_module, f, func(f))
289
290


Martin Reinecke's avatar
Martin Reinecke committed
291
292
293
294
295
296
297
298
299
300
301
302
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix  
Martin Reinecke committed
303
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
304
    return data_object(object._shape, data, distaxis=object._distaxis)
305
306


Martin Reinecke's avatar
Martin Reinecke committed
307
308
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
309
310
311
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
312
def from_random(random_type, shape, dtype=np.float64, **kwargs):
313
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
314
315
316
317
    if shape == ():
        ldat = generator_function(dtype=dtype, shape=shape, **kwargs)
        ldat = _comm.bcast(ldat)
        return from_local_data(shape, ldat, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
318
319
320
321
322
323
324
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
325

Martin Reinecke's avatar
Martin Reinecke committed
326

Martin Reinecke's avatar
Martin Reinecke committed
327
328
329
330
def local_data(arr):
    return arr._data


331
332
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
333
    if distaxis < 0:
334
335
336
337
338
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
339
340
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
341
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
342
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
343
344


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
345
346
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
347
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
348
    return res
Martin Reinecke's avatar
Martin Reinecke committed
349
350


351
352
353
354
355
356
def np_allreduce_min(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MIN)
    return res


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
357
358
359
360
361
362
def np_allreduce_max(arr):
    res = np.empty_like(arr)
    _comm.Allreduce(arr, res, MPI.MAX)
    return res


Martin Reinecke's avatar
Martin Reinecke committed
363
364
365
366
def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
367
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
368
369
370
    return data_object(shape, arr, distaxis)


371
372
373
def from_global_data(arr, sum_up=False, distaxis=0):
    if sum_up:
        arr = np_allreduce_sum(arr)
Martin Reinecke's avatar
Martin Reinecke committed
374
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
375
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
376
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
377
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
378
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
379
380
381
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
382
383
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
384
385
386
387
388
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
389
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
390
391
392
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
393
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
394
395
396
397
398
399
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
400
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
401
402
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
403
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
404
                break
Martin Reinecke's avatar
Martin Reinecke committed
405

Martin Reinecke's avatar
Martin Reinecke committed
406
    if arr._distaxis == -1:  # all data available, just pick the proper subset
407
        return from_global_data(arr._data, distaxis=dist)
Martin Reinecke's avatar
Martin Reinecke committed
408
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
409
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
410
411
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
412
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
413
414
415
416
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
417
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
418
419
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
420
421
422
423
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
424
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
425
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
426

Martin Reinecke's avatar
Martin Reinecke committed
427
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
428
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
429
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
430
431
432
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
433
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
434
435
436
437
438
439
440
441
442
443
444
445
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
446
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
447
448
449
450
451
452
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
453
454
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
455
456
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
457
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
458
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
459
460
461
462
463
464
465
466
467
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
468
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
469
470
471
472
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
473
474


Martin Reinecke's avatar
Martin Reinecke committed
475
476
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
477
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
478
479
480
481
482
483
484
485
486
487
488
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
489
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
490
491
492
493
494
495
496
497
498
499
500
501
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
502
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
503
504
505
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
506
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
507
508
509
510
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
511
512
def default_distaxis():
    return 0
513
514
515
516
517
518
519
520


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable