From c8f05dd829bc166419d24ce5546d1224626a6b00 Mon Sep 17 00:00:00 2001 From: Henning Glawe <glaweh@debian.org> Date: Wed, 24 Jan 2018 12:03:35 +0100 Subject: [PATCH] synchronize 'archive.py' with the version from encyclopedia-pre-processing' 3dbeda49095faf438840110e9b6e3fccd06a1aef -- Allowed the usage of schemes other than nmd:// in the JSON files. This allows testing with files generated on a local machine. f689e878a4775b1c4a06a70e5aabbaf2d42be25c -- return code-specific section (named '^x_.*_section.*') as ArchiveSectionJSON e24727f9a67462d59c5a580888edbc73ac131aeb -- comment on code-specific x_*_section be56a3083cf035ea0d7d53946581ee1ac4c9e0b4 -- fix repeating-values issue in archive.py (instead of N actual values, N times the first value was returned when looking up indices) ae7880e302553768a40fa16cef06b848ca33359d -- moved reading mainfile_uri to archiveiterator 75f571c6c99c5377da46b6f56e276689327dfa6e -- Added pid to mongodb. Fixed tests for db changes. Fixed schema and postprocessor for empty pids and repository download uris. --- common/python/nomadcore/archive.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/common/python/nomadcore/archive.py b/common/python/nomadcore/archive.py index ea652aa..89fd939 100644 --- a/common/python/nomadcore/archive.py +++ b/common/python/nomadcore/archive.py @@ -9,6 +9,12 @@ import h5py import numpy as np from abc import ABCMeta, abstractmethod from io import open +import re + +import logging + + +LOGGER = logging.getLogger(__name__) class ArchiveSection(object): @@ -656,17 +662,16 @@ class ArchiveSectionHDF5(ArchiveSection): # .format(child_path) # ) - index_rows = index_data[test_index] - # If the value can have multiple shapes, the values are split into # different tables. For each table there is a local index in the # second column of the index table that we must use. data = [] - for index_row in index_rows: + for row_i in test_index: + index_row = index_data[row_i] if index_row.shape != (1,): data_index = index_row[1] else: - data_index = test_index[0] + data_index = row_i # The data name may depend on the shape, and if so, the # shape is appended to the name as base64 fields @@ -691,7 +696,7 @@ class ArchiveSectionHDF5(ArchiveSection): data.append(i_data) # If one object returned, remove the outermost list - if len(index_rows) == 1: + if len(test_index) == 1: if data[0].shape == (): data = np.array([data[0]]) else: @@ -734,12 +739,7 @@ class ArchiveJSON(Archive): # Get the repository name from mainFileUri mainfile_uri = json_root["mainFileUri"] - if not mainfile_uri.startswith("nmd://"): - raise ValueError( - "The mainFileUri in the JSON Archive file '{}' is invalid." - .format(filepath) - ) - repository_name = mainfile_uri[6:] + repository_name = mainfile_uri.split("://", 1)[1] repository_name = repository_name.split("/", 1)[0] root_section = { @@ -849,6 +849,9 @@ class ArchiveSectionJSON(ArchiveSection): is_section = False if path.startswith("section"): is_section = True + elif re.match(r'^x_\S+_section', path): + # code-specific section + is_section = True # If no index specified, try to get as concrete value or as a list of # sections -- GitLab