distributed_do.py 16.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

19
20
21
22
import numpy as np
from .random import Random
from mpi4py import MPI

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
23
24
25
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
26
master = (rank == 0)
27
28


Martin Reinecke's avatar
Martin Reinecke committed
29
30
31
32
def is_numpy():
    return False


Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
33
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
34
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
35

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
36
37

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
38
39
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
40
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
41
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
42
43
    return lo, hi

44

45
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
46
    if len(shape) == 0 or distaxis == -1:
47
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
48
49
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
50
51
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
52

53
54
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
55
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
56
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
57
            distaxis = -1
58
59
60
        self._distaxis = distaxis
        self._data = data

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
61
    def _sanity_checks(self):
62
        # check whether the distaxis is consistent
Martin Reinecke's avatar
Martin Reinecke committed
63
        if self._distaxis < -1 or self._distaxis >= len(self._shape):
64
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
65
66
67
68
        itmp = np.array(self._distaxis)
        otmp = np.empty(ntask, dtype=np.int)
        _comm.Allgather(itmp, otmp)
        if np.any(otmp != self._distaxis):
69
70
            raise ValueError
        # check whether the global shape is consistent
Martin Reinecke's avatar
Martin Reinecke committed
71
72
73
        itmp = np.array(self._shape)
        otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
        _comm.Allgather(itmp, otmp)
74
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
75
            if np.any(otmp[i, :] != self._shape):
76
77
                raise ValueError
        # check shape of local data
Martin Reinecke's avatar
Martin Reinecke committed
78
79
        if self._distaxis < 0:
            if self._data.shape != self._shape:
80
81
                raise ValueError
        else:
Martin Reinecke's avatar
Martin Reinecke committed
82
83
84
85
            itmp = np.array(self._shape)
            itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
                                              ntask, rank)
            if np.any(self._data.shape != itmp):
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
                raise ValueError

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
102
        return data_object(self._shape, self._data.real, self._distaxis)
103
104
105

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
106
        return data_object(self._shape, self._data.imag, self._distaxis)
107

Martin Reinecke's avatar
Martin Reinecke committed
108
109
110
111
112
113
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
114
    def _contraction_helper(self, op, mpiop, axis):
115
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
116
            if len(axis) == len(self._data.shape):
117
118
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
119
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
120
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
121
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
122
123
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
124
            return res2[()]
125
126

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
127
128
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
129
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
130
            return from_global_data(res2, distaxis=0)
131
        else:
Martin Reinecke's avatar
Martin Reinecke committed
132
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
133
134
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
135
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
136
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
137
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
138
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
139
140
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
141
142
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
143
144
145

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
146

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
147
148
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
149

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
150
151
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
152

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
153
154
    def mean(self):
        return self.sum()/self.size
Martin Reinecke's avatar
Martin Reinecke committed
155

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
156
157
    def std(self):
        return np.sqrt(self.var())
Martin Reinecke's avatar
Martin Reinecke committed
158

Martin Reinecke's avatar
Martin Reinecke committed
159
    # FIXME: to be improved!
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
160
161
162
    def var(self):
        return (abs(self-self.mean())**2).mean()

163
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
164
        a = self
165
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
166
            b = other
167
168
169
170
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
171
172
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
173
174
175
176
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
177
            a = a._data
178
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
179
180
        else:
            return NotImplemented
181
182

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
183
184
185
186
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220

    def __add__(self, other):
        return self._binary_helper(other, op='__add__')

    def __radd__(self, other):
        return self._binary_helper(other, op='__radd__')

    def __iadd__(self, other):
        return self._binary_helper(other, op='__iadd__')

    def __sub__(self, other):
        return self._binary_helper(other, op='__sub__')

    def __rsub__(self, other):
        return self._binary_helper(other, op='__rsub__')

    def __isub__(self, other):
        return self._binary_helper(other, op='__isub__')

    def __mul__(self, other):
        return self._binary_helper(other, op='__mul__')

    def __rmul__(self, other):
        return self._binary_helper(other, op='__rmul__')

    def __imul__(self, other):
        return self._binary_helper(other, op='__imul__')

    def __div__(self, other):
        return self._binary_helper(other, op='__div__')

    def __rdiv__(self, other):
        return self._binary_helper(other, op='__rdiv__')

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
221
222
223
    def __idiv__(self, other):
        return self._binary_helper(other, op='__idiv__')

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
    def __truediv__(self, other):
        return self._binary_helper(other, op='__truediv__')

    def __rtruediv__(self, other):
        return self._binary_helper(other, op='__rtruediv__')

    def __pow__(self, other):
        return self._binary_helper(other, op='__pow__')

    def __rpow__(self, other):
        return self._binary_helper(other, op='__rpow__')

    def __ipow__(self, other):
        return self._binary_helper(other, op='__ipow__')

239
240
241
242
243
    def __lt__(self, other):
        return self._binary_helper(other, op='__lt__')

    def __le__(self, other):
        return self._binary_helper(other, op='__le__')
244
245
246
247

    def __ne__(self, other):
        return self._binary_helper(other, op='__ne__')

248
249
250
251
252
253
254
255
256
    def __eq__(self, other):
        return self._binary_helper(other, op='__eq__')

    def __ge__(self, other):
        return self._binary_helper(other, op='__ge__')

    def __gt__(self, other):
        return self._binary_helper(other, op='__gt__')

257
    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
258
        return data_object(self._shape, -self._data, self._distaxis)
259
260

    def __abs__(self):
Martin Reinecke's avatar
Martin Reinecke committed
261
        return data_object(self._shape, np.abs(self._data), self._distaxis)
262
263

    def all(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
264
        return self.sum() == self.size
265
266

    def any(self):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
267
        return self.sum() != 0
268

Martin Reinecke's avatar
fixes    
Martin Reinecke committed
269
270
    def fill(self, value):
        self._data.fill(value)
271

Martin Reinecke's avatar
Martin Reinecke committed
272

Martin Reinecke's avatar
Martin Reinecke committed
273
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
274
275
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
276
277


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
278
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
279
280
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
281
282


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
283
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
284
285
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
286
287


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
288
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
289
290
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
291
292
293
294
295
296
297


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
298
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
299
300
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
301
    return res[()]
302
303
304
305
306
307
308


def _math_helper(x, function, out):
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
309
        return data_object(x.shape, function(x._data), x._distaxis)
310
311
312
313
314
315
316
317
318
319
320
321
322
323


def abs(a, out=None):
    return _math_helper(a, np.abs, out)


def exp(a, out=None):
    return _math_helper(a, np.exp, out)


def log(a, out=None):
    return _math_helper(a, np.log, out)


Martin Reinecke's avatar
Martin Reinecke committed
324
325
326
327
def tanh(a, out=None):
    return _math_helper(a, np.tanh, out)


328
329
330
331
def sqrt(a, out=None):
    return _math_helper(a, np.sqrt, out)


Martin Reinecke's avatar
Martin Reinecke committed
332
333
334
335
336
337
338
339
340
341
342
343
def from_object(object, dtype, copy, set_locked):
    if dtype is None:
        dtype = object.dtype
    dtypes_equal = dtype == object.dtype
    if set_locked and dtypes_equal and locked(object):
        return object
    if not dtypes_equal and not copy:
        raise ValueError("cannot change data type without copying")
    if set_locked and not copy:
        raise ValueError("cannot lock object without copying")
    data = np.array(object._data, dtype=dtype, copy=copy)
    if set_locked:
Martin Reinecke's avatar
fix    
Martin Reinecke committed
344
        data.flags.writeable = False
Martin Reinecke's avatar
Martin Reinecke committed
345
    return data_object(object._shape, data, distaxis=object._distaxis)
346
347


Martin Reinecke's avatar
Martin Reinecke committed
348
349
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
350
351
352
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
353
def from_random(random_type, shape, dtype=np.float64, **kwargs):
354
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
355
356
357
358
359
360
361
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
362

Martin Reinecke's avatar
Martin Reinecke committed
363

Martin Reinecke's avatar
Martin Reinecke committed
364
365
366
367
def local_data(arr):
    return arr._data


368
369
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
Martin Reinecke's avatar
Martin Reinecke committed
370
    if distaxis < 0:
371
372
373
374
375
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
376
377
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
378
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
379
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
380
381


Martin Reinecke's avatar
fixes    
Martin Reinecke committed
382
383
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
384
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
385
    return res
Martin Reinecke's avatar
Martin Reinecke committed
386
387
388
389
390
391


def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
392
def from_local_data(shape, arr, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
393
394
395
    return data_object(shape, arr, distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
396
397
def from_global_data(arr, distaxis=0):
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
398
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
399
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
400
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
401
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
402
403
404
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
405
406
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
407
408
409
410
411
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
412
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
413
414
415
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
416
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
417
418
419
420
421
422
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
423
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
424
425
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
426
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
427
                break
Martin Reinecke's avatar
Martin Reinecke committed
428

Martin Reinecke's avatar
Martin Reinecke committed
429
    if arr._distaxis == -1:  # all data available, just pick the proper subset
Martin Reinecke's avatar
Martin Reinecke committed
430
        return from_global_data(arr._data, dist)
Martin Reinecke's avatar
Martin Reinecke committed
431
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
432
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
433
434
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
435
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
436
437
438
439
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
440
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
441
442
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
443
444
445
446
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
447
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
448
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
449

Martin Reinecke's avatar
Martin Reinecke committed
450
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
451
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
452
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
453
454
455
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
456
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
457
458
459
460
461
462
463
464
465
466
467
468
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
469
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
470
471
472
473
474
475
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
476
477
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
478
479
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
480
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
481
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
482
483
484
485
486
487
488
489
490
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
491
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
492
493
494
495
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
496
497


Martin Reinecke's avatar
Martin Reinecke committed
498
499
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
500
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
501
502
503
504
505
506
507
508
509
510
511
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
512
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
513
514
515
516
517
518
519
520
521
522
523
524
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
525
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
526
527
528
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
529
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
530
531
532
533
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
534
535
def default_distaxis():
    return 0
536
537
538
539
540
541
542
543


def lock(arr):
    arr._data.flags.writeable = False


def locked(arr):
    return not arr._data.flags.writeable