distributed_do.py 15.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Copyright(C) 2013-2018 Max-Planck-Society
#
# NIFTy is being developed at the Max-Planck-Institut fuer Astrophysik
# and financially supported by the Studienstiftung des deutschen Volkes.

Martin Reinecke's avatar
Martin Reinecke committed
19
from __future__ import print_function
20 21 22 23
import numpy as np
from .random import Random
from mpi4py import MPI

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
24 25 26
_comm = MPI.COMM_WORLD
ntask = _comm.Get_size()
rank = _comm.Get_rank()
Martin Reinecke's avatar
Martin Reinecke committed
27
master = (rank == 0)
28 29


Martin Reinecke's avatar
Martin Reinecke committed
30 31 32 33 34
def mprint(*args):
    if master:
        print(*args)


Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
35
def _shareSize(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
36
    return (nwork//nshares) + int(myshare < nwork % nshares)
Martin Reinecke's avatar
Martin Reinecke committed
37

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
38 39

def _shareRange(nwork, nshares, myshare):
Martin Reinecke's avatar
Martin Reinecke committed
40 41
    nbase = nwork//nshares
    additional = nwork % nshares
Martin Reinecke's avatar
Martin Reinecke committed
42
    lo = myshare*nbase + min(myshare, additional)
Martin Reinecke's avatar
Martin Reinecke committed
43
    hi = lo + nbase + int(myshare < additional)
Martin Reinecke's avatar
Martin Reinecke committed
44 45
    return lo, hi

46

47
def local_shape(shape, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
48
    if len(shape) == 0 or distaxis == -1:
49
        return shape
Martin Reinecke's avatar
Martin Reinecke committed
50 51
    shape2 = list(shape)
    shape2[distaxis] = _shareSize(shape[distaxis], ntask, rank)
52 53
    return tuple(shape2)

Martin Reinecke's avatar
Martin Reinecke committed
54

55 56
class data_object(object):
    def __init__(self, shape, data, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
57
        self._shape = tuple(shape)
Martin Reinecke's avatar
Martin Reinecke committed
58
        if len(self._shape) == 0:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
59
            distaxis = -1
60 61 62
        self._distaxis = distaxis
        self._data = data

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
63
    def _sanity_checks(self):
64
        # check whether the distaxis is consistent
Martin Reinecke's avatar
Martin Reinecke committed
65
        if self._distaxis < -1 or self._distaxis >= len(self._shape):
66
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
67 68 69 70
        itmp = np.array(self._distaxis)
        otmp = np.empty(ntask, dtype=np.int)
        _comm.Allgather(itmp, otmp)
        if np.any(otmp != self._distaxis):
71 72
            raise ValueError
        # check whether the global shape is consistent
Martin Reinecke's avatar
Martin Reinecke committed
73 74 75
        itmp = np.array(self._shape)
        otmp = np.empty((ntask, len(self._shape)), dtype=np.int)
        _comm.Allgather(itmp, otmp)
76
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
77
            if np.any(otmp[i, :] != self._shape):
78 79
                raise ValueError
        # check shape of local data
Martin Reinecke's avatar
Martin Reinecke committed
80 81
        if self._distaxis < 0:
            if self._data.shape != self._shape:
82 83
                raise ValueError
        else:
Martin Reinecke's avatar
Martin Reinecke committed
84 85 86 87
            itmp = np.array(self._shape)
            itmp[self._distaxis] = _shareSize(self._shape[self._distaxis],
                                              ntask, rank)
            if np.any(self._data.shape != itmp):
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
                raise ValueError

    @property
    def dtype(self):
        return self._data.dtype

    @property
    def shape(self):
        return self._shape

    @property
    def size(self):
        return np.prod(self._shape)

    @property
    def real(self):
Martin Reinecke's avatar
Martin Reinecke committed
104
        return data_object(self._shape, self._data.real, self._distaxis)
105 106 107

    @property
    def imag(self):
Martin Reinecke's avatar
Martin Reinecke committed
108
        return data_object(self._shape, self._data.imag, self._distaxis)
109

Martin Reinecke's avatar
Martin Reinecke committed
110 111 112 113 114 115
    def conj(self):
        return data_object(self._shape, self._data.conj(), self._distaxis)

    def conjugate(self):
        return data_object(self._shape, self._data.conjugate(), self._distaxis)

Martin Reinecke's avatar
Martin Reinecke committed
116
    def _contraction_helper(self, op, mpiop, axis):
117
        if axis is not None:
Martin Reinecke's avatar
Martin Reinecke committed
118
            if len(axis) == len(self._data.shape):
119 120
                axis = None
        if axis is None:
Martin Reinecke's avatar
Martin Reinecke committed
121
            res = np.array(getattr(self._data, op)())
Martin Reinecke's avatar
Martin Reinecke committed
122
            if (self._distaxis == -1):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
123
                return res[()]
Martin Reinecke's avatar
Martin Reinecke committed
124 125
            res2 = np.empty((), dtype=res.dtype)
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
126
            return res2[()]
127 128

        if self._distaxis in axis:
Martin Reinecke's avatar
Martin Reinecke committed
129 130
            res = getattr(self._data, op)(axis=axis)
            res2 = np.empty_like(res)
Martin Reinecke's avatar
Martin Reinecke committed
131
            _comm.Allreduce(res, res2, mpiop)
Martin Reinecke's avatar
Martin Reinecke committed
132
            return from_global_data(res2, distaxis=0)
133
        else:
Martin Reinecke's avatar
Martin Reinecke committed
134
            # perform the contraction on the local data
Martin Reinecke's avatar
Martin Reinecke committed
135 136
            res = getattr(self._data, op)(axis=axis)
            if self._distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
137
                return from_global_data(res, distaxis=0)
Martin Reinecke's avatar
Martin Reinecke committed
138
            shp = list(res.shape)
Martin Reinecke's avatar
Martin Reinecke committed
139
            shift = 0
Martin Reinecke's avatar
Martin Reinecke committed
140
            for ax in axis:
Martin Reinecke's avatar
Martin Reinecke committed
141 142
                if ax < self._distaxis:
                    shift += 1
Martin Reinecke's avatar
Martin Reinecke committed
143 144
            shp[self._distaxis-shift] = self.shape[self._distaxis]
            return from_local_data(shp, res, self._distaxis-shift)
145 146 147

    def sum(self, axis=None):
        return self._contraction_helper("sum", MPI.SUM, axis)
Martin Reinecke's avatar
Martin Reinecke committed
148

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
149 150
    def min(self, axis=None):
        return self._contraction_helper("min", MPI.MIN, axis)
Martin Reinecke's avatar
Martin Reinecke committed
151

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
152 153
    def max(self, axis=None):
        return self._contraction_helper("max", MPI.MAX, axis)
154

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
155 156
    def mean(self):
        return self.sum()/self.size
Martin Reinecke's avatar
Martin Reinecke committed
157

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
158 159
    def std(self):
        return np.sqrt(self.var())
Martin Reinecke's avatar
Martin Reinecke committed
160

Martin Reinecke's avatar
Martin Reinecke committed
161
    # FIXME: to be improved!
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
162 163 164
    def var(self):
        return (abs(self-self.mean())**2).mean()

165
    def _binary_helper(self, other, op):
Martin Reinecke's avatar
Martin Reinecke committed
166
        a = self
167
        if isinstance(other, data_object):
Martin Reinecke's avatar
Martin Reinecke committed
168
            b = other
169 170 171 172
            if a._shape != b._shape:
                raise ValueError("shapes are incompatible.")
            if a._distaxis != b._distaxis:
                raise ValueError("distributions are incompatible.")
Martin Reinecke's avatar
Martin Reinecke committed
173 174
            a = a._data
            b = b._data
Martin Reinecke's avatar
Martin Reinecke committed
175 176 177 178
        elif np.isscalar(other):
            a = a._data
            b = other
        elif isinstance(other, np.ndarray):
Martin Reinecke's avatar
Martin Reinecke committed
179
            a = a._data
180
            b = other
Martin Reinecke's avatar
Martin Reinecke committed
181 182
        else:
            return NotImplemented
183 184

        tval = getattr(a, op)(b)
Martin Reinecke's avatar
Martin Reinecke committed
185 186 187 188
        if tval is a:
            return self
        else:
            return data_object(self._shape, tval, self._distaxis)
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

    def __add__(self, other):
        return self._binary_helper(other, op='__add__')

    def __radd__(self, other):
        return self._binary_helper(other, op='__radd__')

    def __iadd__(self, other):
        return self._binary_helper(other, op='__iadd__')

    def __sub__(self, other):
        return self._binary_helper(other, op='__sub__')

    def __rsub__(self, other):
        return self._binary_helper(other, op='__rsub__')

    def __isub__(self, other):
        return self._binary_helper(other, op='__isub__')

    def __mul__(self, other):
        return self._binary_helper(other, op='__mul__')

    def __rmul__(self, other):
        return self._binary_helper(other, op='__rmul__')

    def __imul__(self, other):
        return self._binary_helper(other, op='__imul__')

    def __div__(self, other):
        return self._binary_helper(other, op='__div__')

    def __rdiv__(self, other):
        return self._binary_helper(other, op='__rdiv__')

Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
223 224 225
    def __idiv__(self, other):
        return self._binary_helper(other, op='__idiv__')

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
    def __truediv__(self, other):
        return self._binary_helper(other, op='__truediv__')

    def __rtruediv__(self, other):
        return self._binary_helper(other, op='__rtruediv__')

    def __pow__(self, other):
        return self._binary_helper(other, op='__pow__')

    def __rpow__(self, other):
        return self._binary_helper(other, op='__rpow__')

    def __ipow__(self, other):
        return self._binary_helper(other, op='__ipow__')

    def __eq__(self, other):
        return self._binary_helper(other, op='__eq__')

    def __ne__(self, other):
        return self._binary_helper(other, op='__ne__')

    def __neg__(self):
Martin Reinecke's avatar
Martin Reinecke committed
248
        return data_object(self._shape, -self._data, self._distaxis)
249 250

    def __abs__(self):
Martin Reinecke's avatar
Martin Reinecke committed
251
        return data_object(self._shape, np.abs(self._data), self._distaxis)
252 253

    def all(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
254
        return self.sum() == self.size
255 256

    def any(self):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
257
        return self.sum() != 0
258

Martin Reinecke's avatar
fixes  
Martin Reinecke committed
259 260
    def fill(self, value):
        self._data.fill(value)
261

Martin Reinecke's avatar
Martin Reinecke committed
262
def full(shape, fill_value, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
263 264
    return data_object(shape, np.full(local_shape(shape, distaxis),
                                      fill_value, dtype), distaxis)
265 266


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
267
def empty(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
268 269
    return data_object(shape, np.empty(local_shape(shape, distaxis),
                                       dtype), distaxis)
270 271


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
272
def zeros(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
273 274
    return data_object(shape, np.zeros(local_shape(shape, distaxis), dtype),
                       distaxis)
275 276


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
277
def ones(shape, dtype=None, distaxis=0):
Martin Reinecke's avatar
Martin Reinecke committed
278 279
    return data_object(shape, np.ones(local_shape(shape, distaxis), dtype),
                       distaxis)
280 281 282 283 284 285 286


def empty_like(a, dtype=None):
    return data_object(np.empty_like(a._data, dtype))


def vdot(a, b):
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
287
    tmp = np.array(np.vdot(a._data, b._data))
Martin Reinecke's avatar
Martin Reinecke committed
288 289
    res = np.empty((), dtype=tmp.dtype)
    _comm.Allreduce(tmp, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
290
    return res[()]
291 292 293 294 295 296 297


def _math_helper(x, function, out):
    if out is not None:
        function(x._data, out=out._data)
        return out
    else:
Martin Reinecke's avatar
Martin Reinecke committed
298
        return data_object(x.shape, function(x._data), x._distaxis)
299 300 301 302 303 304 305 306 307 308 309 310 311 312


def abs(a, out=None):
    return _math_helper(a, np.abs, out)


def exp(a, out=None):
    return _math_helper(a, np.exp, out)


def log(a, out=None):
    return _math_helper(a, np.log, out)


Martin Reinecke's avatar
Martin Reinecke committed
313 314 315 316
def tanh(a, out=None):
    return _math_helper(a, np.tanh, out)


317 318 319 320 321
def sqrt(a, out=None):
    return _math_helper(a, np.sqrt, out)


def from_object(object, dtype=None, copy=True):
Martin Reinecke's avatar
Martin Reinecke committed
322 323 324
    return data_object(object._shape, np.array(object._data, dtype=dtype,
                                               copy=copy),
                       distaxis=object._distaxis)
325 326


Martin Reinecke's avatar
Martin Reinecke committed
327 328
# This function draws all random numbers on all tasks, to produce the same
# array independent on the number of tasks
Martin Reinecke's avatar
Martin Reinecke committed
329 330 331
# MR FIXME: depending on what is really wanted/needed (i.e. same result
# independent of number of tasks, performance etc.) we need to adjust the
# algorithm.
Martin Reinecke's avatar
Martin Reinecke committed
332
def from_random(random_type, shape, dtype=np.float64, **kwargs):
333
    generator_function = getattr(Random, random_type)
Martin Reinecke's avatar
Martin Reinecke committed
334 335 336 337 338 339 340
    for i in range(ntask):
        lshape = list(shape)
        lshape[0] = _shareSize(shape[0], ntask, i)
        ldat = generator_function(dtype=dtype, shape=lshape, **kwargs)
        if i == rank:
            outdat = ldat
    return from_local_data(shape, outdat, distaxis=0)
341

Martin Reinecke's avatar
Martin Reinecke committed
342

Martin Reinecke's avatar
Martin Reinecke committed
343 344 345 346
def local_data(arr):
    return arr._data


347 348 349 350 351 352 353 354
def ibegin_from_shape(glob_shape, distaxis=0):
    res = [0] * len(glob_shape)
    if distaxis<0:
        return res
    res[distaxis] = _shareRange(glob_shape[distaxis], ntask, rank)[0]
    return tuple(res)


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
355 356
def ibegin(arr):
    res = [0] * arr._data.ndim
Martin Reinecke's avatar
Martin Reinecke committed
357
    res[arr._distaxis] = _shareRange(arr._shape[arr._distaxis], ntask, rank)[0]
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
358
    return tuple(res)
Martin Reinecke's avatar
Martin Reinecke committed
359 360


Martin Reinecke's avatar
fixes  
Martin Reinecke committed
361 362
def np_allreduce_sum(arr):
    res = np.empty_like(arr)
Martin Reinecke's avatar
Martin Reinecke committed
363
    _comm.Allreduce(arr, res, MPI.SUM)
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
364
    return res
Martin Reinecke's avatar
Martin Reinecke committed
365 366 367 368 369 370


def distaxis(arr):
    return arr._distaxis


Martin Reinecke's avatar
Martin Reinecke committed
371
def from_local_data(shape, arr, distaxis):
Martin Reinecke's avatar
Martin Reinecke committed
372 373 374
    return data_object(shape, arr, distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
375 376
def from_global_data(arr, distaxis=0):
    if distaxis == -1:
Martin Reinecke's avatar
Martin Reinecke committed
377
        return data_object(arr.shape, arr, distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
378
    lo, hi = _shareRange(arr.shape[distaxis], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
379
    sl = [slice(None)]*len(arr.shape)
Martin Reinecke's avatar
Martin Reinecke committed
380
    sl[distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
381 382 383
    return data_object(arr.shape, arr[sl], distaxis)


Martin Reinecke's avatar
Martin Reinecke committed
384 385
def to_global_data(arr):
    if arr._distaxis == -1:
Martin Reinecke's avatar
fixes  
Martin Reinecke committed
386 387 388 389 390
        return arr._data
    tmp = redistribute(arr, dist=-1)
    return tmp._data


Martin Reinecke's avatar
Martin Reinecke committed
391
def redistribute(arr, dist=None, nodist=None):
Martin Reinecke's avatar
Martin Reinecke committed
392 393 394
    if dist is not None:
        if nodist is not None:
            raise ValueError
Martin Reinecke's avatar
Martin Reinecke committed
395
        if dist == arr._distaxis:
Martin Reinecke's avatar
Martin Reinecke committed
396 397 398 399 400 401
            return arr
    else:
        if nodist is None:
            raise ValueError
        if arr._distaxis not in nodist:
            return arr
Martin Reinecke's avatar
Martin Reinecke committed
402
        dist = -1
Martin Reinecke's avatar
Martin Reinecke committed
403 404
        for i in range(len(arr.shape)):
            if i not in nodist:
Martin Reinecke's avatar
Martin Reinecke committed
405
                dist = i
Martin Reinecke's avatar
Martin Reinecke committed
406
                break
Martin Reinecke's avatar
Martin Reinecke committed
407

Martin Reinecke's avatar
Martin Reinecke committed
408
    if arr._distaxis == -1:  # all data available, just pick the proper subset
Martin Reinecke's avatar
Martin Reinecke committed
409
        return from_global_data(arr._data, dist)
Martin Reinecke's avatar
Martin Reinecke committed
410
    if dist == -1:  # gather all data on all tasks
Martin Reinecke's avatar
Martin Reinecke committed
411
        tmp = np.moveaxis(arr._data, arr._distaxis, 0)
Martin Reinecke's avatar
Martin Reinecke committed
412 413
        slabsize = np.prod(tmp.shape[1:])*tmp.itemsize
        sz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
414
        for i in range(ntask):
Martin Reinecke's avatar
Martin Reinecke committed
415 416 417 418
            sz[i] = slabsize*_shareSize(arr.shape[arr._distaxis], ntask, i)
        disp = np.empty(ntask, dtype=np.int)
        disp[0] = 0
        disp[1:] = np.cumsum(sz[:-1])
Martin Reinecke's avatar
Martin Reinecke committed
419
        tmp = np.require(tmp, requirements="C")
Martin Reinecke's avatar
Martin Reinecke committed
420 421
        out = np.empty(arr.size, dtype=arr.dtype)
        _comm.Allgatherv(tmp, [out, sz, disp, MPI.BYTE])
Martin Reinecke's avatar
Martin Reinecke committed
422 423 424 425
        shp = np.array(arr._shape)
        shp[1:arr._distaxis+1] = shp[0:arr._distaxis]
        shp[0] = arr.shape[arr._distaxis]
        out = out.reshape(shp)
Martin Reinecke's avatar
Martin Reinecke committed
426
        out = np.moveaxis(out, 0, arr._distaxis)
Martin Reinecke's avatar
Martin Reinecke committed
427
        return from_global_data(out, distaxis=-1)
Martin Reinecke's avatar
Martin Reinecke committed
428

Martin Reinecke's avatar
Martin Reinecke committed
429
    # real redistribution via Alltoallv
Martin Reinecke's avatar
Martin Reinecke committed
430
    ssz0 = arr._data.size//arr.shape[dist]
Martin Reinecke's avatar
Martin Reinecke committed
431
    ssz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
432 433 434
    rszall = arr.size//arr.shape[dist]*_shareSize(arr.shape[dist], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[arr._distaxis]
Martin Reinecke's avatar
Martin Reinecke committed
435
    rsz = np.empty(ntask, dtype=np.int)
Martin Reinecke's avatar
Martin Reinecke committed
436 437 438 439 440 441 442 443 444 445 446 447
    if dist == 0:  # shortcut possible
        sbuf = np.ascontiguousarray(arr._data)
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
            ssz[i] = ssz0*(hi-lo)
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    else:
        sbuf = np.empty(arr._data.size, dtype=arr.dtype)
        sslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[dist], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
448
            sslice[dist] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
449 450 451 452 453 454
            ssz[i] = ssz0*(hi-lo)
            sbuf[ofs:ofs+ssz[i]] = arr._data[sslice].flat
            ofs += ssz[i]
            rsz[i] = rsz0*_shareSize(arr.shape[arr._distaxis], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
455 456
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
Martin Reinecke's avatar
Martin Reinecke committed
457 458
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
459
    _comm.Alltoallv(s_msg, r_msg)
Martin Reinecke's avatar
Martin Reinecke committed
460
    del sbuf  # free memory
Martin Reinecke's avatar
Martin Reinecke committed
461 462 463 464 465 466 467 468 469
    if arr._distaxis == 0:
        rbuf = rbuf.reshape(local_shape(arr.shape, dist))
        arrnew = from_local_data(arr.shape, rbuf, distaxis=dist)
    else:
        arrnew = empty(arr.shape, dtype=arr.dtype, distaxis=dist)
        rslice = [slice(None)]*arr._data.ndim
        ofs = 0
        for i in range(ntask):
            lo, hi = _shareRange(arr.shape[arr._distaxis], ntask, i)
Martin Reinecke's avatar
Martin Reinecke committed
470
            rslice[arr._distaxis] = slice(lo, hi)
Martin Reinecke's avatar
Martin Reinecke committed
471 472 473 474
            sz = rsz[i]//arr._data.itemsize
            arrnew._data[rslice].flat = rbuf[ofs:ofs+sz]
            ofs += sz
    return arrnew
Martin Reinecke's avatar
Martin Reinecke committed
475 476


Martin Reinecke's avatar
Martin Reinecke committed
477 478
def transpose(arr):
    if len(arr.shape) != 2 or arr._distaxis != 0:
Martin Reinecke's avatar
Martin Reinecke committed
479
        raise ValueError("bad input")
Martin Reinecke's avatar
Martin Reinecke committed
480 481 482 483 484 485 486 487 488 489 490
    ssz0 = arr._data.size//arr.shape[1]
    ssz = np.empty(ntask, dtype=np.int)
    rszall = arr.size//arr.shape[1]*_shareSize(arr.shape[1], ntask, rank)
    rbuf = np.empty(rszall, dtype=arr.dtype)
    rsz0 = rszall//arr.shape[0]
    rsz = np.empty(ntask, dtype=np.int)
    sbuf = np.empty(arr._data.size, dtype=arr.dtype)
    ofs = 0
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[1], ntask, i)
        ssz[i] = ssz0*(hi-lo)
Martin Reinecke's avatar
Martin Reinecke committed
491
        sbuf[ofs:ofs+ssz[i]] = arr._data[:, lo:hi].flat
Martin Reinecke's avatar
Martin Reinecke committed
492 493 494 495 496 497 498 499 500 501 502 503
        ofs += ssz[i]
        rsz[i] = rsz0*_shareSize(arr.shape[0], ntask, i)
    ssz *= arr._data.itemsize
    rsz *= arr._data.itemsize
    sdisp = np.append(0, np.cumsum(ssz[:-1]))
    rdisp = np.append(0, np.cumsum(rsz[:-1]))
    s_msg = [sbuf, (ssz, sdisp), MPI.BYTE]
    r_msg = [rbuf, (rsz, rdisp), MPI.BYTE]
    _comm.Alltoallv(s_msg, r_msg)
    del sbuf  # free memory
    arrnew = empty((arr.shape[1], arr.shape[0]), dtype=arr.dtype, distaxis=0)
    ofs = 0
Martin Reinecke's avatar
Martin Reinecke committed
504
    sz2 = _shareSize(arr.shape[1], ntask, rank)
Martin Reinecke's avatar
Martin Reinecke committed
505 506 507
    for i in range(ntask):
        lo, hi = _shareRange(arr.shape[0], ntask, i)
        sz = rsz[i]//arr._data.itemsize
Martin Reinecke's avatar
Martin Reinecke committed
508
        arrnew._data[:, lo:hi] = rbuf[ofs:ofs+sz].reshape(hi-lo, sz2).T
Martin Reinecke's avatar
Martin Reinecke committed
509 510 511 512
        ofs += sz
    return arrnew


Martin Reinecke's avatar
Martin Reinecke committed
513 514
def default_distaxis():
    return 0