Commit f1057ce0 authored by theos's avatar theos
Browse files

Moved keepers and d2o into separate modules.

parent d06ba788
......@@ -26,18 +26,20 @@ mpl.use('Agg')
import dummys
from keepers import about,\
global_dependency_injector,\
global_configuration
# it is important to import config before d2o such that NIFTy is able to
# pre-create d2o's configuration object with the corrected path
from config import about,\
dependency_injector,\
nifty_configuration,\
d2o_configuration
from d2o import distributed_data_object, d2o_librarian
from nifty_cmaps import ncmap
from nifty_core import space,\
point_space,\
field
from d2o import distributed_data_object, d2o_librarian
from nifty_random import random
from nifty_simple_math import *
from nifty_utilities import *
......
# -*- coding: utf-8 -*-
from nifty_about import *
from nifty_config import dependency_injector,\
nifty_configuration
from d2o_config import d2o_configuration
# -*- coding: utf-8 -*-
import os
import keepers
# pre-create the D2O configuration instance and set its path explicitly
d2o_configuration = keepers.get_Configuration(
'D2O',
path=os.path.expanduser('~') + "/.nifty/d2o_config")
......@@ -2,92 +2,89 @@
import os
from nifty_dependency_injector import dependency_injector
from nifty_configuration import variable,\
configuration
import keepers
global_dependency_injector = dependency_injector(
['h5py',
('mpi4py.MPI', 'MPI'),
('nifty.dummys.MPI_dummy', 'MPI_dummy'),
# Setup the dependency injector
dependency_injector = keepers.DependencyInjector(
[('mpi4py.MPI', 'MPI'),
'h5py',
'gfft',
('nifty.dummys.gfft_dummy', 'gfft_dummy'),
'healpy',
'libsharp_wrapper_gl'])
global_dependency_injector.register('pyfftw', lambda z: hasattr(z, 'FFTW_MPI'))
dependency_injector.register('pyfftw', lambda z: hasattr(z, 'FFTW_MPI'))
variable_fft_module = variable('fft_module',
['pyfftw', 'gfft', 'gfft_dummy'],
lambda z: z in global_dependency_injector)
# Initialize the variables
variable_fft_module = keepers.Variable(
'fft_module',
['pyfftw', 'gfft', 'gfft_dummy'],
lambda z: z in dependency_injector)
# gl_space needs libsharp
variable_lm2gl = variable('lm2gl',
variable_lm2gl = keepers.Variable(
'lm2gl',
[True, False],
lambda z: (('libsharp_wrapper_gl' in
global_dependency_injector)
dependency_injector)
if z else True) and isinstance(z, bool),
genus='boolean')
variable_use_healpy = variable(
variable_use_healpy = keepers.Variable(
'use_healpy',
[True, False],
lambda z: (('healpy' in global_dependency_injector)
lambda z: (('healpy' in dependency_injector)
if z else True) and isinstance(z, bool),
genus='boolean')
variable_use_libsharp = variable('use_libsharp',
variable_use_libsharp = keepers.Variable(
'use_libsharp',
[True, False],
lambda z: (('libsharp_wrapper_gl' in
global_dependency_injector)
dependency_injector)
if z else True) and
isinstance(z, bool),
genus='boolean')
variable_verbosity = variable('verbosity',
[1],
lambda z: z == abs(int(z)),
genus='int')
variable_mpi_module = variable('mpi_module',
['MPI', 'MPI_dummy'],
lambda z: z in global_dependency_injector)
variable_verbosity = keepers.Variable('verbosity',
[1],
lambda z: z == abs(int(z)),
genus='int')
variable_default_distribution_strategy = variable(
'default_distribution_strategy',
['fftw', 'equal', 'not'],
lambda z: (('pyfftw' in global_dependency_injector)
if (z == 'fftw') else True)
)
variable_d2o_init_checks = variable('d2o_init_checks',
[True, False],
lambda z: isinstance(z, bool),
genus='boolean')
global_configuration = configuration(
nifty_configuration = keepers.get_Configuration(
'NIFTy',
[variable_fft_module,
variable_lm2gl,
variable_use_healpy,
variable_use_libsharp,
variable_verbosity,
variable_mpi_module,
variable_default_distribution_strategy,
variable_d2o_init_checks
],
path=os.path.expanduser('~') + "/.nifty/global_config")
path=os.path.expanduser('~') + "/.nifty/nifty_config")
########
### Compatibility variables
########
variable_mpi_module = keepers.Variable('mpi_module',
['MPI'],
lambda z: z in dependency_injector)
nifty_configuration.register(variable_mpi_module)
variable_default_comm = variable(
# register the default comm variable as the 'mpi_module' variable is now
# available
variable_default_comm = keepers.Variable(
'default_comm',
['COMM_WORLD'],
lambda z: hasattr(global_dependency_injector[
global_configuration['mpi_module']], z))
lambda z: hasattr(dependency_injector[
nifty_configuration['mpi_module']], z))
nifty_configuration.register(variable_default_comm)
global_configuration.register(variable_default_comm)
########
########
try:
global_configuration.load()
nifty_configuration.load()
except:
pass
# -*- coding: utf-8 -*-
from __future__ import division
from distributed_data_object import distributed_data_object
from d2o_librarian import d2o_librarian
from strategies import STRATEGIES
\ No newline at end of file
# -*- coding: utf-8 -*-
import numpy as np
from nifty import about
def cast_axis_to_tuple(axis):
if axis is None:
return None
try:
axis = tuple([int(item) for item in axis])
except(TypeError):
if np.isscalar(axis):
axis = (int(axis), )
else:
raise TypeError(about._errors.cstring(
"ERROR: Could not convert axis-input to tuple of ints"))
return axis
# -*- coding: utf-8 -*-
import numpy as np
class d2o_iter(object):
def __init__(self, d2o):
self.d2o = d2o
self.i = 0
self.n = np.prod(self.d2o.shape)
self.initialize_current_local_data()
def __iter__(self):
return self
def next(self):
if self.n == 0:
raise StopIteration()
self.update_current_local_data()
if self.i < self.n:
i = self.i
self.i += 1
return self.current_local_data[i]
else:
raise StopIteration()
def initialize_current_local_data(self):
raise NotImplementedError
def update_current_local_data(self):
raise NotImplementedError
class d2o_not_iter(d2o_iter):
def initialize_current_local_data(self):
self.current_local_data = self.d2o.data.flatten()
def update_current_local_data(self):
pass
class d2o_slicing_iter(d2o_iter):
def __init__(self, d2o):
self.d2o = d2o
self.i = 0
self.n = np.prod(self.d2o.shape)
self.local_dim_offset_list = \
self.d2o.distributor.all_local_slices[:, 4]
self.active_node = None
self.initialize_current_local_data()
def initialize_current_local_data(self):
self.update_current_local_data()
def update_current_local_data(self):
new_active_node = np.searchsorted(self.local_dim_offset_list,
self.i,
side='right')-1
# new_active_node = min(new_active_node, self.d2o.comm.size-1)
if self.active_node != new_active_node:
self.active_node = new_active_node
self.current_local_data = self.d2o.comm.bcast(
self.d2o.get_local_data().flatten(),
root=self.active_node)
# -*- coding: utf-8 -*-
from weakref import WeakValueDictionary as weakdict
class _d2o_librarian(object):
def __init__(self):
self.library = weakdict()
self.counter = 0
def register(self, d2o):
self.counter += 1
self.library[self.counter] = d2o
return self.counter
def __getitem__(self, key):
return self.library[key]
d2o_librarian = _d2o_librarian()
# -*- coding: utf-8 -*-
import numpy as np
from nifty.keepers import about,\
global_configuration as gc,\
global_dependency_injector as gdi
from d2o_librarian import d2o_librarian
from cast_axis_to_tuple import cast_axis_to_tuple
from strategies import STRATEGIES
MPI = gdi[gc['mpi_module']]
class distributed_data_object(object):
"""A multidimensional array with modular MPI-based distribution schemes.
The purpose of a distributed_data_object (d2o) is to provide the user
with a numpy.ndarray like interface while storing the data on an arbitrary
number of MPI nodes. The logic of a certain distribution strategy is
implemented by an associated distributor.
Parameters
----------
global_data : array-like, at least 1-dimensional
Used with global-type distribution strategies in order to fill the
d2o with data during initialization.
global_shape : tuple of ints
Used with global-type distribution strategies. If no global_data is
supplied, it will be used.
dtype : {np.dtype, type}
Used as the d2o's datatype. Overwrites the data-type of any init data.
local_data : array-like, at least 1-dimensional
Used with local-type distribution strategies in order to fill the
d2o with data during initialization.
local_shape : tuple of ints
Used with local-type distribution strategies. If no local_data is
supplied, local_shape will be used.
distribution_strategy : optional[{'fftw', 'equal', 'not', 'freeform'}]
Specifies which distributor will be created and used.
'fftw' uses the distribution strategy of pyfftw,
'equal' tries to distribute the data as uniform as possible
'not' does not distribute the data at all
'freeform' distribute the data according to the given local data/shape
hermitian : boolean
Specifies if the given init-data is hermitian or not. The
self.hermitian attribute will be set accordingly.
alias : String
Used in order to initialize the d2o from a hdf5 file.
path : String
Used in order to initialize the d2o from a hdf5 file. If no path is
given, '$working_directory/alias' is used.
comm : mpi4py.MPI.Intracomm
The MPI communicator on which the d2o lives.
copy : boolean
If true it is guaranteed that the input data will be copied. If false
copying is tried to be avoided.
*args
Although not directly used during the init process, further parameters
are stored in the self.init_args attribute.
**kwargs
Additional keyword arguments are passed to the distributor_factory and
furthermore get stored in the self.init_kwargs attribute.
skip_parsing : boolean (optional keyword argument)
If true, the distribution_factory will skip all sanity checks and
completions of the given (keyword-)arguments. It just uses what it
gets. Hence the user is fully responsible for supplying complete and
consistent parameters. This can be used in order to speed up the init
process. Also see notes section.
Attributes
----------
data : numpy.ndarray
The numpy.ndarray in which the individual node's data is stored.
dtype : type
Data type of the data object.
distribution_strategy : string
Name of the used distribution_strategy.
distributor : distributor
The distributor object which takes care of all distribution and
consolidation of the data.
shape : tuple of int
The global shape of the data.
local_shape : tuple of int
The nodes individual local shape of the stored data.
comm : mpi4py.MPI.Intracomm
The MPI communicator on which the d2o lives.
hermitian : boolean
Specfies whether the d2o's data definitely possesses hermitian
symmetry.
index : int
The d2o's registration index it got from the d2o_librarian.
init_args : list
Any additional initialization arguments are stored here.
init_kwargs : dict
Any additional initialization keyword arguments are stored here.
Raises
------
ValueError
Raised if
* the supplied distribution strategy is not known
* comm is None
* different distribution strategies where given on the
individual nodes
* different dtypes where given on the individual nodes
* neither a non-0-dimensional global_data nor global_shape nor
hdf5 file supplied
* global_shape == ()
* different global_shapes where given on the individual nodes
* neither non-0-dimensional local_data nor local_shape nor
global d2o supplied
* local_shape == ()
* the first entry of local_shape is not the same on all nodes
Notes
-----
The index is the d2o's global unique indentifier. One may use it in order
to assemble the corresponding local d2o objects on different nodes if
only one local object on a specific node is given.
In order to speed up the init process the distributor_factory checks
if the global_configuration object gc yields gc['d2o_init_checks'] == True.
If yes, all checks expensive checks are skipped; namely those which need
mpi communication. Use this in order to get a fast init speed without
loosing d2o's init parsing logic.
Examples
--------
>>> a = np.arange(16, dtype=np.float).reshape((4,4))
>>> obj = distributed_data_object(a, dtype=np.complex)
>>> obj
<distributed_data_object>
array([[ 0.+0.j, 1.+0.j, 2.+0.j, 3.+0.j],
[ 4.+0.j, 5.+0.j, 6.+0.j, 7.+0.j],
[ 8.+0.j, 9.+0.j, 10.+0.j, 11.+0.j],
[ 12.+0.j, 13.+0.j, 14.+0.j, 15.+0.j]])
See Also
--------
distributor
"""
def __init__(self, global_data=None, global_shape=None, dtype=None,
local_data=None, local_shape=None,
distribution_strategy=None, hermitian=False,
alias=None, path=None, comm=MPI.COMM_WORLD,
copy=True, *args, **kwargs):
# TODO: allow init with empty shape
if isinstance(global_data, tuple) or isinstance(global_data, list):
global_data = np.array(global_data, copy=False)
if isinstance(local_data, tuple) or isinstance(local_data, list):
local_data = np.array(local_data, copy=False)
if distribution_strategy is None:
distribution_strategy = gc['default_distribution_strategy']
from distributor_factory import distributor_factory
self.distributor = distributor_factory.get_distributor(
distribution_strategy=distribution_strategy,
comm=comm,
global_data=global_data,
global_shape=global_shape,
local_data=local_data,
local_shape=local_shape,
alias=alias,
path=path,
dtype=dtype,
**kwargs)
self.distribution_strategy = distribution_strategy
self.dtype = self.distributor.dtype
self.shape = self.distributor.global_shape
self.local_shape = self.distributor.local_shape
self.comm = self.distributor.comm
self.init_args = args
self.init_kwargs = kwargs
(self.data, self.hermitian) = self.distributor.initialize_data(
global_data=global_data,
local_data=local_data,
alias=alias,
path=path,
hermitian=hermitian,
copy=copy)
self.index = d2o_librarian.register(self)
@property
def real(self):
""" Returns a d2o containing the real part of the d2o's elements.
Returns
-------
out : distributed_data_object
The output object. The new datatype is the one numpy yields when
taking the real part on the local data.
"""
new_data = self.get_local_data(copy=False).real
new_dtype = new_data.dtype
new_d2o = self.copy_empty(dtype=new_dtype)
new_d2o.set_local_data(data=new_data,
copy=False,
hermitian=self.hermitian)
return new_d2o
@property
def imag(self):
""" Returns a d2o containing the imaginary part of the d2o's elements.
Returns
-------
out : distributed_data_object
The output object. The new datatype is the one numpy yields when
taking the imaginary part on the local data.
"""
new_data = self.get_local_data(copy=False).imag
new_dtype = new_data.dtype
new_d2o = self.copy_empty(dtype=new_dtype)
new_d2o.set_local_data(data=new_data,
copy=False,
hermitian=self.hermitian)
return new_d2o
@property
def hermitian(self):
return self._hermitian
@hermitian.setter
def hermitian(self, value):
self._hermitian = bool(value)
def _fast_copy_empty(self):
""" Make a very fast low level copy of the d2o without its data.
This function is fast, because it uses EmptyD2o - a derived class from
distributed_data_object and then copies the __dict__ directly. Unlike
copy_empty, _fast_copy_empty will copy all attributes unchanged.
"""
# make an empty d2o
new_copy = EmptyD2o()
# repair its class
new_copy.__class__ = self.__class__
# now copy everthing in the __dict__ except for the data array
for key, value in self.__dict__.items():
if key != 'data':
new_copy.__dict__[key] = value
else:
new_copy.__dict__[key] = np.empty_like(value)
# Register the new d2o at the librarian in order to get a unique index
new_copy.index = d2o_librarian.register(new_copy)
return new_copy
def copy(self, dtype=None, distribution_strategy=None, **kwargs):
""" Returns a full copy of the distributed data object.
If no keyword arguments are given, the returned object will be an
identical copy of the original d2o. By explicit specification one is
able to define the dtype and the distribution_strategy of the returned
d2o.
Parameters
----------
dtype : type
The dtype that the new d2o will have. The data of the primary
d2o will be casted.
distribution_strategy : all supported distribution strategies
The distribution strategy the new d2o should have. If not None and
different from the original one, there will certainly be inter-node
communication.
**kwargs
Additional keyword arguments get passed to the used copy_empty
routine.
Returns
-------
out : distributed_data_object
The output object. It containes the old data, possibly casted to a
new datatype and distributed according to a new distribution
strategy
See Also
--------
copy_empty
"""
temp_d2o = self.copy_empty(dtype=dtype,
distribution_strategy=distribution_strategy,
**kwargs)
if distribution_strategy is None or \
distribution_strategy == self.distribution_strategy:
temp_d2o.set_local_data(self.get_local_data(copy=False), copy=True)
else:
temp_d2o.set_full_data(self, hermitian=self.hermitian)
temp_d2o.hermitian = self.hermitian
return temp_d2o
def copy_empty(self, global_shape=None, local_shape=None, dtype=None,
distribution_strategy=None, **kwargs):
""" Returns an empty copy of the distributed data object.
If no keyword arguments are given, the returned object will be an
identical copy of the original d2o containing random data. By explicit
specification one is able to define the new dtype and
distribution_strategy of the returned d2o and to modify the new shape.
Parameters