Skip to content
Snippets Groups Projects
Commit dc85e5b6 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Made archive.py compatible with python>2.6.

parent 7bbbea2d
No related branches found
No related tags found
No related merge requests found
from __future__ import with_statement
from __future__ import division
from __future__ import absolute_import
import os
import math
import json
......@@ -5,9 +8,11 @@ import string
import h5py
import numpy as np
from abc import ABCMeta, abstractmethod
from io import open
class ArchiveSection(metaclass=ABCMeta):
class ArchiveSection(object):
__metaclass__ = ABCMeta
"""Defines a storage independent, dictionary like interface to a section
inside an archive file with the possibility to do recursive searches and
indexing.
......@@ -404,7 +409,7 @@ class ArchiveHDF5(Archive):
datasets.
"""
def __init__(self, filepath, use_write_cache=False):
super().__init__(filepath, use_write_cache)
super(ArchiveHDF5, self).__init__(filepath, use_write_cache)
h5_root = h5py.File(filepath, "r")
self.index_cache = {}
self.setup(h5_root)
......@@ -434,7 +439,7 @@ class ArchiveSectionHDF5(ArchiveSection):
BASE64DIGITS = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+" + "/"
def __init__(self, data, path, archive, index_datas, local_index):
super().__init__(data, path, archive)
super(ArchiveSectionHDF5, self).__init__(data, path, archive)
_, names, indices = self.get_path_parts(path)
# Here we drop out the indices of the repository and calculation
# section, as they are "None"
......@@ -641,37 +646,58 @@ class ArchiveSectionHDF5(ArchiveSection):
"Could not find value at path '{}'."
.format(child_path)
)
if test_index.size > 1:
raise ValueError(
"The HDF file contains more than one dataset for the "
"path '{}'. "
.format(child_path)
)
index_row = index_data[test_index[0]]
# This error is currently disabled, because it seems that the
# metainfo system supports repeating scalar values for one section.
# if test_index.size > 1:
# raise ValueError(
# "The HDF file contains more than one dataset for the "
# "path '{}'. "
# .format(child_path)
# )
index_rows = index_data[test_index]
# If the value can have multiple shapes, the values are split into
# different tables. For each table there is a local index in the
# second column of the index table that we must use.
if index_row.shape != (1,):
data_index = index_row[1]
else:
data_index = test_index[0]
data = []
for index_row in index_rows:
if index_row.shape != (1,):
data_index = index_row[1]
else:
data_index = test_index[0]
# The data name may depend on the shape, and if so, the
# shape is appended to the name as base64 fields
data_path = name + "-v"
index_shape = index_data.shape
if index_shape[1] > 2:
# The data name may depend on the shape, and if so, the
# shape is appended to the name as base64 fields
data_path = name + "-v"
for dim in index_data[data_index][2:]:
base64dim = self.base64convert(dim)
data_path += ".{}".format(base64dim)
data = self._data[data_path][data_index]
index_shape = index_data.shape
if index_shape[1] > 2:
data_path = name + "-v"
for dim in index_data[data_index][2:]:
base64dim = self.base64convert(dim)
data_path += ".{}".format(base64dim)
i_data = self._data[data_path][data_index]
# Convert bytestrings to regular strings
if i_data.dtype == np.object:
i_data = np.array([x.decode("utf-8") for x in i_data])
# Gather scalar values to a 1D list
if i_data.shape == (1,):
data.append(i_data[0])
else:
data.append(i_data)
# Convert bytestrings to regular strings
if data.dtype == np.object:
data = np.array(data, dtype=np.str)
# If one object returned, remove the outermost list
if len(index_rows) == 1:
if data[0].shape == ():
data = np.array([data[0]])
else:
data = data[0]
else:
data = np.array(data)
return data
......@@ -687,7 +713,7 @@ class ArchiveSectionHDF5(ArchiveSection):
digits.append(ArchiveSectionHDF5.BASE64DIGITS[x % base])
x = math.floor(x/base)
return ''.join(digits)
return "".join(digits)
class ArchiveJSON(Archive):
......@@ -698,7 +724,7 @@ class ArchiveJSON(Archive):
become a problem with big files and parallel execution on the same machine.
"""
def __init__(self, filepath, use_write_cache=False):
super().__init__(filepath, use_write_cache)
super(ArchiveJSON, self).__init__(filepath, use_write_cache)
with open(filepath, "r") as fin:
json_root = json.load(fin)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment