diff --git a/common/python/nomadcore/archive.py b/common/python/nomadcore/archive.py index 89fd939bbca7359deada4fbd3d2502f984b9986f..227bb6131c78a36ff16b2e61f29bf2b366c5a601 100644 --- a/common/python/nomadcore/archive.py +++ b/common/python/nomadcore/archive.py @@ -37,6 +37,21 @@ class ArchiveSection(object): self._path = path self._archive = archive + def is_filtered(self, name): + """Used to filter out unnecessary information when recursing the + information. This unnecessary information includes e.g. the gIndex, + references, etc. + """ + filtered = set([ + "gIndex", + "name", + "references", + "type", + ]) + if name not in filtered: + return name + return None + def get_by_path(self, path): """Used to query the ArchiveSection recursively with a simple syntax that also allows indexing. @@ -65,7 +80,7 @@ class ArchiveSection(object): information within the metainfo hierarchy. See the examples. Returns: - ArchiveSection, a list of ArchiveSections, a concrete value + One of the following: ArchiveSection, a list of ArchiveSections or a concrete value corresponding to the given query, """ parts, _, _ = self.get_path_parts(path) @@ -82,6 +97,10 @@ class ArchiveSection(object): current_data = current_data.get_child(part) if i_part == n_parts - 1: + + filtered = self.is_filtered(part) + if filtered is None or filtered != part: + raise KeyError("Value for '{}' could not be found".format(current_path)) return current_data def get(self, key, default=None): @@ -192,6 +211,7 @@ class ArchiveSection(object): # Check that the value has not been deleted full_path = "{}/{}".format(self._path, key) deleted = self.check_deletions(full_path) + if deleted: raise KeyError("Value for '{}' has not been set.".format(full_path)) try: @@ -457,6 +477,16 @@ class ArchiveSectionHDF5(ArchiveSection): def __len__(self): return len(self.keys()) + def is_filtered(self, key): + # return key + if key.endswith("-index"): + return None + key_without_size = key.rsplit(".", 1)[0] + if key_without_size.endswith("-v"): + return key_without_size[:-2] + else: + return key + def __contains__(self, key): try: self[key] @@ -467,6 +497,9 @@ class ArchiveSectionHDF5(ArchiveSection): def items(self): local_index = 0 for key, value in self._data.items(): + key_filtered = self.is_filtered(key) + if key_filtered is None: + continue if isinstance(value, h5py.Group): index_datas = self._index_datas[:] index_data = self._data.get("{}-index".format(key)) @@ -474,7 +507,7 @@ class ArchiveSectionHDF5(ArchiveSection): index_datas = [] else: index_datas.append(index_data) - yield (key, ArchiveSectionHDF5( + yield (key_filtered, ArchiveSectionHDF5( value, "{}/{}".format(self._path, key), self._archive, @@ -482,15 +515,21 @@ class ArchiveSectionHDF5(ArchiveSection): local_index) ) else: - yield (key, value) + yield (key_filtered, value) local_index += 1 def keys(self): - return self._data.keys() + for key in self._data.keys(): + key_filtered = self.is_filtered(key) + if key_filtered is not None: + yield key_filtered def values(self): local_index = 0 for key, value in self._data.items(): + key_filtered = self.is_filtered(key) + if key_filtered is None: + continue if isinstance(value, h5py.Group): index_datas = self._index_datas[:] index_data = self._data.get("{}-index".format(key)) @@ -763,6 +802,8 @@ class ArchiveJSON(Archive): class ArchiveSectionJSON(ArchiveSection): + """Represents a section inside a JSON-file. + """ def __len__(self): return len(self._data) @@ -775,6 +816,8 @@ class ArchiveSectionJSON(ArchiveSection): def items(self): for key, value in self._data.items(): + if self.is_filtered(key) is None: + continue if isinstance(value, dict): yield (key, ArchiveSectionJSON(value, "{}/{}".format(self._path, key), self._archive)) else: @@ -787,10 +830,15 @@ class ArchiveSectionJSON(ArchiveSection): yield (key, value) def keys(self): - return self._data.keys() + for key in self._data.keys(): + if self.is_filtered(key) is None: + continue + yield key def values(self): for key, value in self._data.items(): + if self.is_filtered(key) is None: + continue if isinstance(value, dict): yield ArchiveSectionJSON(value, "{}/{}".format(self._path, key), self._archive) else: diff --git a/requirements.txt b/requirements.txt index ab907f974d4d934540f3e973040bc94675bfdaf9..c0e8bcf4d07c9b7be545ed61bd641c4f43db0f1b 100755 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,4 @@ pyyaml h5py hjson enum34 -systax +systax==0.1.2