Commit b3cb661c authored by theos's avatar theos
Browse files

Moved d2o into dedicated subpackage.

parent 320ab0c7
......@@ -37,7 +37,7 @@ from nifty_core import space,\
point_space,\
field
from nifty_mpi_data import distributed_data_object, d2o_librarian
from d2o import distributed_data_object, d2o_librarian
from nifty_random import random
from nifty_simple_math import *
from nifty_utilities import *
......
# -*- coding: utf-8 -*-
from __future__ import division
from distributed_data_object import distributed_data_object
from d2o_librarian import d2o_librarian
from strategies import STRATEGIES
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy as np
class d2o_iter(object):
def __init__(self, d2o):
self.d2o = d2o
self.i = 0
self.n = np.prod(self.d2o.shape)
self.initialize_current_local_data()
def __iter__(self):
return self
def next(self):
if self.n == 0:
raise StopIteration()
self.update_current_local_data()
if self.i < self.n:
i = self.i
self.i += 1
return self.current_local_data[i]
else:
raise StopIteration()
def initialize_current_local_data(self):
raise NotImplementedError
def update_current_local_data(self):
raise NotImplementedError
class d2o_not_iter(d2o_iter):
def initialize_current_local_data(self):
self.current_local_data = self.d2o.data.flatten()
def update_current_local_data(self):
pass
class d2o_slicing_iter(d2o_iter):
def __init__(self, d2o):
self.d2o = d2o
self.i = 0
self.n = np.prod(self.d2o.shape)
self.local_dim_offset_list = \
self.d2o.distributor.all_local_slices[:, 4]
self.active_node = None
self.initialize_current_local_data()
def initialize_current_local_data(self):
self.update_current_local_data()
def update_current_local_data(self):
new_active_node = np.searchsorted(self.local_dim_offset_list,
self.i,
side='right')-1
# new_active_node = min(new_active_node, self.d2o.comm.size-1)
if self.active_node != new_active_node:
self.active_node = new_active_node
self.current_local_data = self.d2o.comm.bcast(
self.d2o.get_local_data().flatten(),
root=self.active_node)
# -*- coding: utf-8 -*-
from weakref import WeakValueDictionary as weakdict
class _d2o_librarian(object):
def __init__(self):
self.library = weakdict()
self.counter = 0
def register(self, d2o):
self.counter += 1
self.library[self.counter] = d2o
return self.counter
def __getitem__(self, key):
return self.library[key]
d2o_librarian = _d2o_librarian()
# -*- coding: utf-8 -*-
import numpy as np
from nifty.keepers import about,\
global_configuration as gc,\
global_dependency_injector as gdi
from d2o_librarian import d2o_librarian
from strategies import STRATEGIES
MPI = gdi[gc['mpi_module']]
class distributed_data_object(object):
"""A multidimensional array with modular MPI-based distribution schemes.
The purpose of a distributed_data_object (d2o) is to provide the user
with a numpy.ndarray like interface while storing the data on an arbitrary
number of MPI nodes. The logic of a certain distribution strategy is
implemented by an associated distributor.
Parameters
----------
global_data : array-like, at least 1-dimensional
Used with global-type distribution strategies in order to fill the
d2o with data during initialization.
global_shape : tuple of ints
Used with global-type distribution strategies. If no global_data is
supplied, it will be used.
dtype : {np.dtype, type}
Used as the d2o's datatype. Overwrites the data-type of any init data.
local_data : array-like, at least 1-dimensional
Used with local-type distribution strategies in order to fill the
d2o with data during initialization.
local_shape : tuple of ints
Used with local-type distribution strategies. If no local_data is
supplied, local_shape will be used.
distribution_strategy : optional[{'fftw', 'equal', 'not', 'freeform'}]
Specifies which distributor will be created and used.
'fftw' uses the distribution strategy of pyfftw,
'equal' tries to distribute the data as uniform as possible
'not' does not distribute the data at all
'freeform' distribute the data according to the given local data/shape
hermitian : boolean
Specifies if the given init-data is hermitian or not. The
self.hermitian attribute will be set accordingly.
alias : String
Used in order to initialize the d2o from a hdf5 file.
path : String
Used in order to initialize the d2o from a hdf5 file. If no path is
given, '$working_directory/alias' is used.
comm : mpi4py.MPI.Intracomm
The MPI communicator on which the d2o lives.
copy : boolean
If true it is guaranteed that the input data will be copied. If false
copying is tried to be avoided.
*args
Although not directly used during the init process, further parameters
are stored in the self.init_args attribute.
**kwargs
Additional keyword arguments are passed to the distributor_factory and
furthermore get stored in the self.init_kwargs attribute.
skip_parsing : boolean (optional keyword argument)
If true, the distribution_factory will skip all sanity checks and
completions of the given (keyword-)arguments. It just uses what it
gets. Hence the user is fully responsible for supplying complete and
consistent parameters. This can be used in order to speed up the init
process. Also see notes section.
Attributes
----------
data : numpy.ndarray
The numpy.ndarray in which the individual node's data is stored.
dtype : type
Data type of the data object.
distribution_strategy : string
Name of the used distribution_strategy.
distributor : distributor
The distributor object which takes care of all distribution and
consolidation of the data.
shape : tuple of int
The global shape of the data.
local_shape : tuple of int
The nodes individual local shape of the stored data.
comm : mpi4py.MPI.Intracomm
The MPI communicator on which the d2o lives.
hermitian : boolean
Specfies whether the d2o's data definitely possesses hermitian
symmetry.
index : int
The d2o's registration index it got from the d2o_librarian.
init_args : list
Any additional initialization arguments are stored here.
init_kwargs : dict
Any additional initialization keyword arguments are stored here.
Raises
------
ValueError
Raised if
* the supplied distribution strategy is not known
* comm is None
* different distribution strategies where given on the
individual nodes
* different dtypes where given on the individual nodes
* neither a non-0-dimensional global_data nor global_shape nor
hdf5 file supplied
* global_shape == ()
* different global_shapes where given on the individual nodes
* neither non-0-dimensional local_data nor local_shape nor
global d2o supplied
* local_shape == ()
* the first entry of local_shape is not the same on all nodes
Notes
-----
The index is the d2o's global unique indentifier. One may use it in order
to assemble the corresponding local d2o objects on different nodes if
only one local object on a specific node is given.
In order to speed up the init process the distributor_factory checks
if the global_configuration object gc yields gc['d2o_init_checks'] == True.
If yes, all checks expensive checks are skipped; namely those which need
mpi communication. Use this in order to get a fast init speed without
loosing d2o's init parsing logic.
Examples
--------
>>> a = np.arange(16, dtype=np.float).reshape((4,4))
>>> obj = distributed_data_object(a, dtype=np.complex)
>>> obj
<distributed_data_object>
array([[ 0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j],
[ 4.+0.j, 5.+0.j, 6.+0.j, 7.+0.j],
[ 8.+0.j, 9.+0.j, 10.+0.j, 11.+0.j],
[ 12.+0.j, 13.+0.j, 14.+0.j, 15.+0.j]])
See Also
--------
distributor
"""
def __init__(self, global_data=None, global_shape=None, dtype=None,
local_data=None, local_shape=None,
distribution_strategy=None, hermitian=False,
alias=None, path=None, comm=MPI.COMM_WORLD,
copy=True, *args, **kwargs):
# TODO: allow init with empty shape
if isinstance(global_data, tuple) or isinstance(global_data, list):
global_data = np.array(global_data, copy=False)
if isinstance(local_data, tuple) or isinstance(local_data, list):
local_data = np.array(local_data, copy=False)
if distribution_strategy is None:
distribution_strategy = gc['default_distribution_strategy']
from distributor_factory import distributor_factory
self.distributor = distributor_factory.get_distributor(
distribution_strategy=distribution_strategy,
comm=comm,
global_data=global_data,
global_shape=global_shape,
local_data=local_data,
local_shape=local_shape,
alias=alias,
path=path,
dtype=dtype,
**kwargs)
self.distribution_strategy = distribution_strategy
self.dtype = self.distributor.dtype
self.shape = self.distributor.global_shape
self.local_shape = self.distributor.local_shape
self.comm = self.distributor.comm
self.init_args = args
self.init_kwargs = kwargs
(self.data, self.hermitian) = self.distributor.initialize_data(
global_data=global_data,
local_data=local_data,
alias=alias,
path=path,
hermitian=hermitian,
copy=copy)
self.index = d2o_librarian.register(self)
@property
def real(self):
""" Returns a d2o containing the real part of the d2o's elements.
Returns
-------
out : distributed_data_object
The output object. The new datatype is the one numpy yields when
taking the real part on the local data.
"""
new_data = self.get_local_data(copy=False).real
new_dtype = new_data.dtype
new_d2o = self.copy_empty(dtype=new_dtype)
new_d2o.set_local_data(data=new_data,
copy=False,
hermitian=self.hermitian)
return new_d2o
@property
def imag(self):
""" Returns a d2o containing the imaginary part of the d2o's elements.
Returns
-------
out : distributed_data_object
The output object. The new datatype is the one numpy yields when
taking the imaginary part on the local data.
"""
new_data = self.get_local_data(copy=False).imag
new_dtype = new_data.dtype
new_d2o = self.copy_empty(dtype=new_dtype)
new_d2o.set_local_data(data=new_data,
copy=False,
hermitian=self.hermitian)
return new_d2o
@property
def hermitian(self):
return self._hermitian
@hermitian.setter
def hermitian(self, value):
self._hermitian = bool(value)
def _fast_copy_empty(self):
""" Make a very fast low level copy of the d2o without its data.
This function is fast, because it uses EmptyD2o - a derived class from
distributed_data_object and then copies the __dict__ directly. Unlike
copy_empty, _fast_copy_empty will copy all attributes unchanged.
"""
# make an empty d2o
new_copy = EmptyD2o()
# repair its class
new_copy.__class__ = self.__class__
# now copy everthing in the __dict__ except for the data array
for key, value in self.__dict__.items():
if key != 'data':
new_copy.__dict__[key] = value
else:
new_copy.__dict__[key] = np.empty_like(value)
# Register the new d2o at the librarian in order to get a unique index
new_copy.index = d2o_librarian.register(new_copy)
return new_copy
def copy(self, dtype=None, distribution_strategy=None, **kwargs):
""" Returns a full copy of the distributed data object.
If no keyword arguments are given, the returned object will be an
identical copy of the original d2o. By explicit specification one is
able to define the dtype and the distribution_strategy of the returned
d2o.
Parameters
----------
dtype : type
The dtype that the new d2o will have. The data of the primary
d2o will be casted.
distribution_strategy : all supported distribution strategies
The distribution strategy the new d2o should have. If not None and
different from the original one, there will certainly be inter-node
communication.
**kwargs
Additional keyword arguments get passed to the used copy_empty
routine.
Returns
-------
out : distributed_data_object
The output object. It containes the old data, possibly casted to a
new datatype and distributed according to a new distribution
strategy
See Also
--------
copy_empty
"""
temp_d2o = self.copy_empty(dtype=dtype,
distribution_strategy=distribution_strategy,
**kwargs)
if distribution_strategy is None or \
distribution_strategy == self.distribution_strategy:
temp_d2o.set_local_data(self.get_local_data(), copy=True)
else:
temp_d2o.set_full_data(self, hermitian=self.hermitian)
temp_d2o.hermitian = self.hermitian
return temp_d2o
def copy_empty(self, global_shape=None, local_shape=None, dtype=None,
distribution_strategy=None, **kwargs):
""" Returns an empty copy of the distributed data object.
If no keyword arguments are given, the returned object will be an
identical copy of the original d2o containing random data. By explicit
specification one is able to define the new dtype and
distribution_strategy of the returned d2o and to modify the new shape.
Parameters
----------
global_shape : tuple of ints
The global shape that the new d2o shall have. Relevant for
global-type distribution strategies like 'equal' or 'fftw'.
local_shape : tuple of ints
The local shape that the new d2o shall have. Relevant for
local-type distribution strategies like 'freeform'.
dtype : type
The dtype that the new d2o will have.
distribution_strategy : all supported distribution strategies
The distribution strategy the new d2o should have.
**kwargs
Additional keyword arguments get passed to the init-call if the
full initialization of a new distributed_data_object is necessary
Returns
-------
out : distributed_data_object
The output object. It contains random data.
See Also
--------
copy
"""
if self.distribution_strategy == 'not' and \
distribution_strategy in STRATEGIES['local'] and \
local_shape is None:
result = self.copy_empty(global_shape=global_shape,
local_shape=local_shape,
dtype=dtype,
distribution_strategy='equal',
**kwargs)
return result.copy_empty(
distribution_strategy=distribution_strategy)
if global_shape is None:
global_shape = self.shape
if local_shape is None:
local_shape = self.local_shape
if dtype is None:
dtype = self.dtype
else:
dtype = np.dtype(dtype)
if distribution_strategy is None:
distribution_strategy = self.distribution_strategy
# check if all parameters remain the same -> use the _fast_copy_empty
if (global_shape == self.shape and
local_shape == self.local_shape and
dtype == self.dtype and
distribution_strategy == self.distribution_strategy and
kwargs == self.init_kwargs):
return self._fast_copy_empty()
kwargs.update(self.init_kwargs)
temp_d2o = distributed_data_object(
global_shape=global_shape,
local_shape=local_shape,
dtype=dtype,
distribution_strategy=distribution_strategy,
comm=self.comm,
*self.init_args,
**kwargs)
return temp_d2o
def apply_scalar_function(self, function, inplace=False, dtype=None):
""" Maps a scalar function on each entry of an array.
The result of the function evaluation may be stored in the original
array or in a new array (default). Furthermore the dtype of the
returned array can be specified explicitly if inplace is set to False.
Parameters
----------
function : callable
Will be applied to the array's entries. It will be the node's local
data array into function as a whole. If this fails, the numpy
vectorize function will be used.
inplace : boolean
Specifies if the result of the function evaluation should be stored
in the original array or not.
dtype : type
If inplace is set to False, it is possible to specify the return
d2o's dtype explicitly.
Returns
-------
out : distributed_data_object
Resulting d2o. This is either a newly created array or the primary
d2o itself.
"""
remember_hermitianQ = self.hermitian
if inplace is True:
temp = self
if dtype is not None and self.dtype != np.dtype(dtype):
about.warnings.cprint(
"WARNING: Inplace dtype conversion is not possible!")
else:
temp = self.copy_empty(dtype=dtype)
if np.prod(self.local_shape) != 0:
try:
temp.data[:] = function(self.data)
except:
about.warnings.cprint(
"WARNING: Trying to use np.vectorize!")
temp.data[:] = np.vectorize(function)(self.data)
else:
# Noting to do here. The value-empty array
# is also geometrically empty
pass
if function in (np.exp, np.log):
temp.hermitian = remember_hermitianQ
else:
temp.hermitian = False
return temp
def apply_generator(self, generator, copy=False):
""" Evaluates generator(local_shape) and stores the result locally.
Parameters
----------
generator : callable
This function must be able to process the node's local data shape
and return a numpy.ndarray of this very shape. This array is then
stored as the local data array on each node.
copy : boolean
Specifies whether the self.set_local_data method is instructed to
copy the result from generator or not.
Notes
-----
The generator function yields node-local results. Therefore it is
assumed that the resulting overall d2o does not possess hermitian
symmetry anymore. Therefore self.hermitian is set to False.
"""
self.set_local_data(generator(self.distributor.local_shape), copy=copy)
self.hermitian = False
def __array__(self):
""" Returns the d2o's full data. """
return self.get_full_data()
def __str__(self):
""" x.__str__() <==> str(x)"""
return self.data.__str__()
def __repr__(self):
""" x.__repr__() <==> repr(x)"""
return '<distributed_data_object>\n' + self.data.__repr__()
def _compare_helper(self, other, op):
""" _compare_helper is used for <, <=, ==, !=, >= and >.
It checks the class of `other` and then utilizes the appropriate
methods of self. If `other` is not a scalar, numpy.ndarray or
distributed_data_object this method will use numpy casting.
Parameters
----------
other : scalar, numpy.ndarray, distributed_data_object, array_like
This is the object that will be compared to self.
op : string
The name of the comparison function, e.g. '__ne__'.
Returns
-------
result : boolean, distributed_data_object
If `other` was None, False will be returned. This follows the
behaviour of numpy but will changed as soon as numpy changed their
convention. In every other case a distributed_data_object with
element-wise comparison results will be returned.
"""
if other is not None:
result = self.copy_empty(dtype=np.bool_)
# Case 1: 'other' is a scalar
# -> make element-wise comparison
if np.isscalar(other):
result.set_local_data(
getattr(self.get_local_data(copy=False), op)(other))
return result
# Case 2: 'other' is a numpy array or a distributed_data_object
# -> extract the local data and make element-wise comparison
elif isinstance(other, np.ndarray) or\
isinstance(other, distributed_data_object):
temp_data = self.distributor.extract_local_data(other)
result.set_local_data(
getattr(self.get_local_data(copy=False), op)(temp_data))
return result