Made archive.py compatible with python>2.6.

dc85e5b6 · Lauri Himanen · 7bbbea2d · dc85e5b6
Commit dc85e5b6 authored 8 years ago by Lauri Himanen
--- a/common/python/nomadcore/archive.py
+++ b/common/python/nomadcore/archive.py
+from __future__ import with_statement
+from __future__ import division
+from __future__ import absolute_import
 import os
 import math
 import json
@@ -5,9 +8,11 @@ import string
 import h5py
 import numpy as np
 from abc import ABCMeta, abstractmethod
+from io import open


-class ArchiveSection(metaclass=ABCMeta):
+class ArchiveSection(object):
+    __metaclass__ = ABCMeta
    """Defines a storage independent, dictionary like interface to a section
    inside an archive file with the possibility to do recursive searches and
    indexing.
@@ -404,7 +409,7 @@ class ArchiveHDF5(Archive):
            datasets.
    """
    def __init__(self, filepath, use_write_cache=False):
-        super().__init__(filepath, use_write_cache)
+        super(ArchiveHDF5, self).__init__(filepath, use_write_cache)
        h5_root = h5py.File(filepath, "r")
        self.index_cache = {}
        self.setup(h5_root)
@@ -434,7 +439,7 @@ class ArchiveSectionHDF5(ArchiveSection):
    BASE64DIGITS = string.ascii_uppercase + string.ascii_lowercase + string.digits + "+" + "/"

    def __init__(self, data, path, archive, index_datas, local_index):
-        super().__init__(data, path, archive)
+        super(ArchiveSectionHDF5, self).__init__(data, path, archive)
        _, names, indices = self.get_path_parts(path)
        # Here we drop out the indices of the repository and calculation
        # section, as they are "None"
@@ -641,37 +646,58 @@ class ArchiveSectionHDF5(ArchiveSection):
                    "Could not find value at path '{}'."
                    .format(child_path)
                )
-            if test_index.size > 1:
-                raise ValueError(
-                    "The HDF file contains more than one dataset for the "
-                    "path '{}'. "
-                    .format(child_path)
-                )
-            index_row = index_data[test_index[0]]
+
+            # This error is currently disabled, because it seems that the
+            # metainfo system supports repeating scalar values for one section.
+            # if test_index.size > 1:
+                # raise ValueError(
+                    # "The HDF file contains more than one dataset for the "
+                    # "path '{}'. "
+                    # .format(child_path)
+                # )
+
+            index_rows = index_data[test_index]

            # If the value can have multiple shapes, the values are split into
            # different tables. For each table there is a local index in the
            # second column of the index table that we must use.
-            if index_row.shape != (1,):
-                data_index = index_row[1]
-            else:
-                data_index = test_index[0]
+            data = []
+            for index_row in index_rows:
+                if index_row.shape != (1,):
+                    data_index = index_row[1]
+                else:
+                    data_index = test_index[0]

-            # The data name may depend on the shape, and if so, the
-            # shape is appended to the name as base64 fields
-            data_path = name + "-v"
-            index_shape = index_data.shape
-            if index_shape[1] > 2:
+                # The data name may depend on the shape, and if so, the
+                # shape is appended to the name as base64 fields
                data_path = name + "-v"
-                for dim in index_data[data_index][2:]:
-                    base64dim = self.base64convert(dim)
-                    data_path += ".{}".format(base64dim)
-
-            data = self._data[data_path][data_index]
+                index_shape = index_data.shape
+                if index_shape[1] > 2:
+                    data_path = name + "-v"
+                    for dim in index_data[data_index][2:]:
+                        base64dim = self.base64convert(dim)
+                        data_path += ".{}".format(base64dim)
+
+                i_data = self._data[data_path][data_index]
+
+                # Convert bytestrings to regular strings
+                if i_data.dtype == np.object:
+                    i_data = np.array([x.decode("utf-8") for x in i_data])
+
+                # Gather scalar values to a 1D list
+                if i_data.shape == (1,):
+                    data.append(i_data[0])
+                else:
+                    data.append(i_data)

-            # Convert bytestrings to regular strings
-            if data.dtype == np.object:
-                data = np.array(data, dtype=np.str)
+            # If one object returned, remove the outermost list
+            if len(index_rows) == 1:
+                if data[0].shape == ():
+                    data = np.array([data[0]])
+                else:
+                    data = data[0]
+            else:
+                data = np.array(data)

        return data

@@ -687,7 +713,7 @@ class ArchiveSectionHDF5(ArchiveSection):
            digits.append(ArchiveSectionHDF5.BASE64DIGITS[x % base])
            x = math.floor(x/base)

-        return ''.join(digits)
+        return "".join(digits)


 class ArchiveJSON(Archive):
@@ -698,7 +724,7 @@ class ArchiveJSON(Archive):
    become a problem with big files and parallel execution on the same machine.
    """
    def __init__(self, filepath, use_write_cache=False):
-        super().__init__(filepath, use_write_cache)
+        super(ArchiveJSON, self).__init__(filepath, use_write_cache)
        with open(filepath, "r") as fin:

            json_root = json.load(fin)