Commit 0676944f authored by Ultima's avatar Ultima
Browse files

Added nifty_utilities.py.

Started rework of the d2o slicing: introduced 0-dimensional advanced indexing capabilities.
parent 23e0643e
......@@ -35,6 +35,7 @@ from nifty_mpi_data import distributed_data_object
from nifty_power import *
from nifty_random import random
from nifty_simple_math import *
from nifty_utilities import *
from nifty_paradict import space_paradict,\
point_space_paradict,\
......
......@@ -170,7 +170,9 @@ class notification(switch):
"""
_code = "\033[0m" ## "\033[39;49m"
_ccode_red = "\033[31;1m"
_ccode_yellow = "\033[33;1m"
_ccode_green = "\033[32;1m"
def __init__(self,default=True,ccode="\033[0m"):
"""
Initializes the notification and sets `status` and `ccode`
......@@ -387,9 +389,12 @@ class _about(object): ## nifty support class for global settings
self._version = str(__version__)
## switches and notifications
self._errors = notification(default=True,ccode=notification._code)
self.warnings = notification(default=True,ccode=notification._code)
self.infos = notification(default=False,ccode=notification._code)
self._errors = notification(default=True,
ccode=notification._code)
self.warnings = notification(default=True,
ccode=notification._code)
self.infos = notification(default=False,
ccode=notification._code)
self.multiprocessing = switch(default=True)
self.hermitianize = switch(default=True)
self.lm2gl = switch(default=True)
......
## nifty configuration
##
## errors colour code
0
31
## warnings
1
## warnings colour code
0
33;1
## infos
0
## infos colour code
0
32
## multiprocessing
1
## hermitianize
......
......@@ -152,6 +152,7 @@ from nifty_about import about
from nifty_random import random
from nifty.nifty_mpi_data import distributed_data_object
import nifty.nifty_utilities as utilities
pi = 3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679
......@@ -3046,32 +3047,7 @@ class field(object):
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def _map(self, function, *args):
if self.ishape == ():
return function(*args)
else:
if args == ():
result = np.empty(self.ishape, dtype=np.object)
for i in xrange(np.prod(self.ishape)):
ii = np.unravel_index(i, self.ishape)
result[ii] = function()
return result
else:
## define a helper function in order to clip the get-indices
## to be suitable for the foreign arrays in args.
## This allows you to do operations, like adding to fields
## with ishape (3,4,3) and (3,4,1)
def get_clipped(w, ind):
w_shape = np.array(np.shape(w))
get_tuple = tuple(np.clip(ind, 0, w_shape-1))
return w[get_tuple]
result = np.empty_like(args[0])
for i in xrange(np.prod(result.shape)):
ii = np.unravel_index(i, result.shape)
result[ii] = function(*map(
lambda z: get_clipped(z, ii), args)
)
#result[ii] = function(*map(lambda z: z[ii], args))
return result
return utilities.field_map(self.ishape, function, *args)
def cast(self, x = None, ishape = None):
if ishape is None:
......
......@@ -24,31 +24,31 @@
##initialize the 'found-packages'-dictionary
found = {}
##initialize the 'FOUND-packages'-dictionary
FOUND = {}
import numpy as np
from nifty_about import about
try:
from mpi4py import MPI
found['MPI'] = True
FOUND['MPI'] = True
except(ImportError):
import mpi_dummy as MPI
found['MPI'] = False
FOUND['MPI'] = False
try:
import pyfftw
found['pyfftw'] = True
FOUND['pyfftw'] = True
except(ImportError):
found['pyfftw'] = False
FOUND['pyfftw'] = False
try:
import h5py
found['h5py'] = True
found['h5py_parallel'] = h5py.get_config().mpi
FOUND['h5py'] = True
FOUND['h5py_parallel'] = h5py.get_config().mpi
except(ImportError):
found['h5py'] = False
found['h5py_parallel'] = False
FOUND['h5py'] = False
FOUND['h5py_parallel'] = False
class distributed_data_object(object):
......@@ -96,63 +96,87 @@ class distributed_data_object(object):
"""
def __init__(self, global_data = None, global_shape=None, dtype=None,
local_data=None, local_shape=None,
distribution_strategy='fftw', hermitian=False,
alias=None, path=None, comm = MPI.COMM_WORLD,
copy = True, *args, **kwargs):
## a given hdf5 file overwrites the other parameters
if found['h5py'] == True and alias is not None:
## set file path
file_path = path if (path is not None) else alias
## open hdf5 file
if found['h5py_parallel'] == True and found['MPI'] == True:
f = h5py.File(file_path, 'r', driver='mpio', comm=comm)
else:
f= h5py.File(file_path, 'r')
## open alias in file
dset = f[alias]
## set shape
global_shape = dset.shape
## set dtype
dtype = dset.dtype.type
## if no hdf5 path was given, extract global_shape and dtype from
## the remaining arguments
else:
## an explicitly given dtype overwrites the one from global_data
if dtype is None:
if global_data is None:
raise ValueError(about._errors.cstring(
"ERROR: Neither global_data nor dtype supplied!"))
try:
dtype = global_data.dtype.type
except(AttributeError):
try:
dtype = global_data.dtype
except(AttributeError):
dtype = np.array(global_data).dtype.type
else:
dtype = np.dtype(dtype).type
## an explicitly given global_shape argument is only used if
## 1. no global_data was supplied, or
## 2. global_data is a scalar/list of dimension 0.
if global_shape is None:
if global_data is None or np.isscalar(global_data):
raise ValueError(about._errors.cstring(
"ERROR: Neither non-0-dimensional global_data nor global_shape supplied!"))
global_shape = global_data.shape
else:
if global_data is None or np.isscalar(global_data):
global_shape = tuple(global_shape)
else:
global_shape = global_data.shape
#
# ## a given hdf5 file overwrites the other parameters
# if FOUND['h5py'] == True and alias is not None:
# ## set file path
# file_path = path if (path is not None) else alias
# ## open hdf5 file
# if FOUND['h5py_parallel'] == True and FOUND['MPI'] == True:
# f = h5py.File(file_path, 'r', driver='mpio', comm=comm)
# else:
# f= h5py.File(file_path, 'r')
# ## open alias in file
# dset = f[alias]
# ## set shape
# global_shape = dset.shape
# ## set dtype
# dtype = dset.dtype.type
# ## if no hdf5 path was given, extract global_shape and dtype from
# ## the remaining arguments
# else:
# ## an explicitly given dtype overwrites the one from global_data
# if dtype is None:
# if global_data is None and local_data is None:
# raise ValueError(about._errors.cstring(
# "ERROR: Neither global_data nor local_data nor dtype supplied!"))
# elif global_data is not None:
# try:
# dtype = global_data.dtype.type
# except(AttributeError):
# try:
# dtype = global_data.dtype
# except(AttributeError):
# dtype = np.array(global_data).dtype.type
# elif local_data is not None:
# try:
# dtype = local_data.dtype.type
# except(AttributeError):
# try:
# dtype = local_data.dtype
# except(AttributeError):
# dtype = np.array(local_data).dtype.type
# else:
# dtype = np.dtype(dtype).type
#
# ## an explicitly given global_shape argument is only used if
# ## 1. no global_data was supplied, or
# ## 2. global_data is a scalar/list of dimension 0.
#
# if global_data is not None and np.isscalar(global_data) == False:
# global_shape = global_data.shape
# elif global_shape is not None:
# global_shape = tuple(global_shape)
#
# if local_data is not None
#
## if global_shape is None:
## if global_data is None or np.isscalar(global_data):
## raise ValueError(about._errors.cstring(
## "ERROR: Neither non-0-dimensional global_data nor global_shape supplied!"))
## global_shape = global_data.shape
## else:
## if global_data is None or np.isscalar(global_data):
## global_shape = tuple(global_shape)
## else:
## global_shape = global_data.shape
## TODO: allow init with empty shape
self.distributor = distributor_factory.get_distributor(
distribution_strategy = distribution_strategy,
global_data = global_data,
global_shape = global_shape,
local_data = local_data,
local_shape = local_shape,
alias = alias,
path = path,
dtype = dtype,
comm = comm,
**kwargs)
self.distribution_strategy = distribution_strategy
......@@ -162,23 +186,29 @@ class distributed_data_object(object):
self.init_args = args
self.init_kwargs = kwargs
## If a hdf5 path was given, load the data
if found['h5py'] == True and alias is not None:
self.load(alias = alias, path = path)
## close the file handle
f.close()
## If the input data was a scalar, set the whole array to this value
elif global_data != None and np.isscalar(global_data):
temp = np.empty(self.distributor.local_shape, dtype = self.dtype)
temp.fill(global_data)
self.set_local_data(temp)
self.hermitian = True
else:
self.set_full_data(data=global_data, hermitian=hermitian,
copy = copy, **kwargs)
(self.data, self.hermitian) = self.distributor.initialize_data(
global_data = global_data,
local_data = local_data,
alias = alias,
path = alias,
hermitian = hermitian,
copy = copy)
# ## If a hdf5 path was given, load the data
# if FOUND['h5py'] == True and alias is not None:
# self.load(alias = alias, path = path)
# ## close the file handle
# f.close()
#
# ## If the input data was a scalar, set the whole array to this value
# elif global_data is not None and np.isscalar(global_data):
# temp = np.empty(self.distributor.local_shape, dtype = self.dtype)
# temp.fill(global_data)
# self.set_local_data(temp)
# self.hermitian = True
# else:
# self.set_full_data(data=global_data, hermitian=hermitian,
# copy = copy, **kwargs)
#
def copy(self, dtype=None, distribution_strategy=None, **kwargs):
temp_d2o = self.copy_empty(dtype=dtype,
distribution_strategy=distribution_strategy,
......@@ -188,7 +218,7 @@ class distributed_data_object(object):
temp_d2o.set_local_data(self.get_local_data(), copy=True)
else:
#temp_d2o.set_full_data(self.get_full_data())
temp_d2o.inject([slice(None),], self, [slice(None),])
temp_d2o.inject((slice(None),), self, (slice(None),))
temp_d2o.hermitian = self.hermitian
return temp_d2o
......@@ -480,28 +510,30 @@ class distributed_data_object(object):
def __getitem__(self, key):
## Case 1: key is a boolean array.
## -> take the local data portion from key, use this for data
## extraction, and then merge the result in a flat numpy array
if isinstance(key, np.ndarray):
found = 'ndarray'
found_boolean = (key.dtype.type == np.bool_)
elif isinstance(key, distributed_data_object):
found = 'd2o'
found_boolean = (key.dtype == np.bool_)
else:
found = 'other'
## TODO: transfer this into distributor:
if (found == 'ndarray' or found == 'd2o') and found_boolean == True:
## extract the data of local relevance
local_bool_array = self.distributor.extract_local_data(key)
local_results = self.get_local_data(copy=False)[local_bool_array]
global_results = self.distributor._allgather(local_results)
global_results = np.concatenate(global_results)
return global_results
else:
return self.get_data(key)
return self.get_data(key)
# ## Case 1: key is a boolean array.
# ## -> take the local data portion from key, use this for data
# ## extraction, and then merge the result in a flat numpy array
# if isinstance(key, np.ndarray):
# found = 'ndarray'
# found_boolean = (key.dtype.type == np.bool_)
# elif isinstance(key, distributed_data_object):
# found = 'd2o'
# found_boolean = (key.dtype == np.bool_)
# else:
# found = 'other'
# ## TODO: transfer this into distributor:
# if (found == 'ndarray' or found == 'd2o') and found_boolean == True:
# ## extract the data of local relevance
# local_bool_array = self.distributor.extract_local_data(key)
# local_results = self.get_local_data(copy=False)[local_bool_array]
# global_results = self.distributor._allgather(local_results)
# global_results = np.concatenate(global_results)
# return global_results
#
# else:
# return self.get_data(key)
def __setitem__(self, key, data):
self.set_data(data, key)
......@@ -663,7 +695,6 @@ class distributed_data_object(object):
weights = local_weights,
minlength = minlength)
list_of_counts = self.distributor._allgather(local_counts)
print list_of_counts
counts = np.sum(list_of_counts, axis = 0)
return counts
......@@ -687,7 +718,7 @@ class distributed_data_object(object):
self.hermitian = hermitian
self.data = np.array(data, dtype=self.dtype, copy=copy, order='C')
def set_data(self, data, key, hermitian=False, copy=True, *args, **kwargs):
def set_data(self, data, key, hermitian=False, copy=True, **kwargs):
"""
Stores the supplied data in the region which is specified by key.
The data is distributed according to the distribution strategy. If
......@@ -708,12 +739,18 @@ class distributed_data_object(object):
"""
self.hermitian = hermitian
(slices, sliceified) = self.__sliceify__(key)
self.distributor.disperse_data(data=self.data,
to_slices = slices,
data_update = self.__enfold__(data, sliceified),
copy = copy,
*args, **kwargs)
self.distributor.disperse_data(data = self.data,
to_key = key,
data_update = data,
copy = copy,
**kwargs)
#
# (slices, sliceified) = self.__sliceify__(key)
# self.distributor.disperse_data(data=self.data,
# to_slices = slices,
# data_update = self.__enfold__(data, sliceified),
# copy = copy,
# *args, **kwargs)
def set_full_data(self, data, hermitian=False, copy = True, **kwargs):
"""
......@@ -738,7 +775,6 @@ class distributed_data_object(object):
self.hermitian = hermitian
self.data = self.distributor.distribute_data(data=data, copy = copy,
**kwargs)
def get_local_data(self, key=(slice(None),), copy=True):
"""
......@@ -779,9 +815,10 @@ class distributed_data_object(object):
global_data[key] : numpy.ndarray
"""
(slices, sliceified) = self.__sliceify__(key)
result = self.distributor.collect_data(self.data, slices, **kwargs)
return self.__defold__(result, sliceified)
return self.distributor.collect_data(self.data, key, **kwargs)
# (slices, sliceified) = self.__sliceify__(key)
# result = self.distributor.collect_data(self.data, slices, **kwargs)
# return self.__defold__(result, sliceified)
......@@ -864,90 +901,137 @@ class distributed_data_object(object):
"""
self.data = self.distributor.load_data(alias, path)
def __sliceify__(self, inp):
sliceified = []
result = []
if isinstance(inp, tuple):
x = inp
elif isinstance(inp, list):
x = tuple(inp)
else:
x = (inp, )
for i in range(len(x)):
if isinstance(x[i], slice):
result += [x[i], ]
sliceified += [False, ]
else:
result += [slice(x[i], x[i]+1), ]
sliceified += [True, ]
return (tuple(result), sliceified)
def __enfold__(self, in_data, sliceified):
data = np.array(in_data, copy=False)
temp_shape = ()
j=0
for i in sliceified:
if i == True:
temp_shape += (1,)
try:
if data.shape[j] == 1:
j +=1
except(IndexError):
pass
else:
try:
temp_shape += (data.shape[j],)
except(IndexError):
temp_shape += (1,)
j += 1
## take into account that the sliceified tuple may be too short, because
## of a non-exaustive list of slices
for i in range(len(data.shape)-j):
temp_shape += (data.shape[j],)
j += 1
return data.reshape(temp_shape)
def __defold__(self, data, sliceified):
temp_slice = ()
for i in sliceified:
if i == True:
temp_slice += (0,)
else:
temp_slice += (slice(None),)
return data[temp_slice]
class _distributor_factory(object):
'''
Comments:
- The distributor's get_data and set_data functions MUST be
supplied with a tuple of slice objects. In case that there was
a direct integer involved, the unfolding will be done by the
helper functions __sliceify__, __enfold__ and __defold__.
'''
def __init__(self):
self.distributor_store = {}
def parse_kwargs(self, strategy = None, kwargs = {}):
# def parse_kwargs(self, strategy = None, kwargs = {}):
# return_dict = {}
# if strategy == 'not':
# pass
# ## These strategies use MPI and therefore accept a MPI.comm object
# if strategy == 'fftw' or strategy == 'equal' or strategy == 'freeform':
# if kwargs.has_key('comm'):
# return_dict['comm'] = kwargs['comm']
#
# return return_dict
def parse_kwargs(self, distribution_strategy,
global_data = None, global_shape = None,
local_data = None, local_shape = None,
alias = None, path = None,
dtype = None, comm = None, **kwargs):
return_dict = {}
if strategy == 'not':
pass
if strategy == 'fftw' or strategy == 'equal':
if kwargs.has_key('comm'):
return_dict['comm'] = kwargs['comm']
## Check for an hdf5 file and open it if given
if FOUND['h5py'] == True and alias is not None:
## set file path
file_path = path if (path is not None) else alias
## open hdf5 file
if FOUND['h5py_parallel'] == True and FOUND['MPI'] == True:
f = h5py.File(file_path, 'r', driver='mpio', comm=comm)
else:
f = h5py.File(file_path, 'r')
## open alias in file
dset = f[alias]
else:
dset = None
## Parse the MPI communicator
if distribution_strategy in ['equal', 'fftw', 'freeform']:
if comm is None:
raise ValueError(about._errors.cstring(
"ERROR: The distributor needs a MPI communicator object comm!"))
else:
return_dict['comm'] = comm
## Parse the datatype
if distribution_strategy in ['not', 'equal', 'fftw'] and \
(dset is not None):
dtype = dset.dtype.type
elif distribution_strategy in ['not', 'equal', 'fftw', 'freeform']:
if dtype is None:
if global_data is None and local_data is None:
raise ValueError(about._errors.cstring(
"ERROR: Neither global_data nor local_data nor dtype supplied!"))
elif global_data is not None:
try:
dtype = global_data.dtype.type
except(AttributeError):
try:
dtype = global_data.dtype
except(AttributeError):
dtype = np.array(global_data).dtype.type
elif local_data is not None:
try:
dtype = local_data.dtype.type
except(AttributeError):
try:
dtype = local_data.dtype
except(AttributeError):
dtype = np.array(local_data).dtype.type
else:
dtype = np.dtype(dtype).type
return_dict['dtype'] = dtype
## Parse the shape
## Case 1: global-type slicer
if distribution_strategy in ['not', 'equal', 'fftw']:
if dset is not None:
global_shape = dset.shape
elif global_data is not None and np.isscalar(global_data) == False:
global_shape = global_data.shape
elif global_shape is not None:
global_shape = tuple(global_shape)
else:
raise ValueError(about._errors.cstring(
"ERROR: Neither non-0-dimensional global_data nor " +
"global_shape nor hdf5 file supplied!"))
if global_shape == ():
raise ValueError(about._errors.cstring(
"ERROR: global_shape == () is not valid shape!"))
if np.any(np.array(global_shape) == 0):
raise ValueError(about._errors.cstring(
"ERROR: Dimension of size 0 occurred!"))
return_dict['global_shape'] = global_shape
## Case 2: local-type slicer
elif distribution_strategy in ['freeform']: