Commit 3d5808fc authored by Alvin Noe Ladines's avatar Alvin Noe Ladines Committed by Markus Scheidgen
Browse files

Alvin read metadata

parent 5ba8f3b6
......@@ -285,6 +285,8 @@ max_upload_size = 32 * (1024 ** 3)
raw_file_strip_cutoff = 1000
use_empty_parsers = False
reprocess_unmatched = True
aux_metadata_file = 'nomad_metadata'
aux_metadata_exts = ('json', 'yaml')
def normalize_loglevel(value, default_level=logging.INFO):
......
......@@ -34,6 +34,9 @@ from datetime import datetime
from pymongo import UpdateOne
import hashlib
from structlog.processors import StackInfoRenderer, format_exc_info, TimeStamper
import yaml
import json
from cachetools import cached, LRUCache
from nomad import utils, config, infrastructure, search, datamodel
from nomad.files import (
......@@ -42,7 +45,7 @@ from nomad.files import (
from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE
from nomad.parsing.parsers import parser_dict, match_parser
from nomad.normalizing import normalizers
from nomad.datamodel import EntryArchive
from nomad.datamodel import EntryArchive, EditableUserMetadata
from nomad.archive import query_archive
from nomad.datamodel.encyclopedia import EncyclopediaMetadata
......@@ -51,6 +54,25 @@ section_metadata = datamodel.EntryArchive.section_metadata.name
section_workflow = datamodel.EntryArchive.section_workflow.name
_editable_metadata = {
quantity.name: quantity for quantity in EditableUserMetadata.m_def.definitions}
@cached(cache=LRUCache(maxsize=100))
def metadata_cached(path):
for ext in config.aux_metadata_exts:
full_path = '%s.%s' % (path, ext)
if os.path.isfile(full_path):
with open(full_path) as f:
if full_path.endswith('json'):
return json.load(f)
elif full_path.endswith('yaml'):
return yaml.load(f, Loader=getattr(yaml, 'FullLoader'))
else:
return {}
return {}
def _pack_log_event(logger, method_name, event_dict):
try:
log_data = dict(event_dict)
......@@ -501,6 +523,33 @@ class Calc(Proc):
except Exception as e:
self.fail('normalizer failed with exception', exc_info=e, error=str(e), **context)
def _read_metadata_from_file(self, logger):
# metadata file name defined in nomad.config nomad_metadata.yaml/json
# which can be placed in the directory containing the mainfile or somewhere up
# highest priority is directory with mainfile
metadata_file = config.aux_metadata_file
metadata_dir = os.path.dirname(self.mainfile_file.os_path)
metadata = {}
metadata_path = None
while metadata_dir:
metadata_part = metadata_cached(os.path.join(metadata_dir, metadata_file))
for key, val in metadata_part.items():
metadata.setdefault(key, val)
metadata_dir = os.path.dirname(metadata_dir)
if metadata_path is not None:
break
for key, val in metadata.items():
definition = _editable_metadata.get(key, None)
if not definition:
logger.warn('Cannot set metadata %s' % key)
continue
self._entry_metadata.m_set(definition, val)
@task
def archiving(self):
''' The *task* that encapsulates all archival related actions. '''
......@@ -509,6 +558,9 @@ class Calc(Proc):
self._entry_metadata.apply_domain_metadata(self._parser_results)
self._entry_metadata.processed = True
# read metadata from file
self._read_metadata_from_file(logger)
# persist the calc metadata
with utils.timer(logger, 'saved calc metadata', step='metadata'):
self.apply_entry_metadata(self._entry_metadata)
......
......@@ -451,3 +451,28 @@ def test_ems_data(proc_infra, test_user):
with upload.entries_metadata() as entries:
assert_upload_files(upload.upload_id, entries, StagingUploadFiles, published=False)
assert_search_upload(entries, additional_keys, published=False)
def test_read_metadata_from_file(proc_infra, test_user, other_test_user):
upload = run_processing(
('test_upload', 'tests/data/proc/examples_with_metadata_file.zip'), test_user)
calcs = Calc.objects(upload_id=upload.upload_id)
comment = [None, 'Calculation 1 of 2', 'Calculation 2 of 2']
with_embargo = [True, True, False]
references = [None, ['http://test'], ['http://ttest']]
coauthors = [[], [other_test_user], []]
for i in range(len(calcs)):
entry_metadata = calcs[i].entry_metadata(upload.upload_files)
assert entry_metadata.comment == comment[i]
assert entry_metadata.with_embargo == with_embargo[i]
assert entry_metadata.references == references[i]
entry_coauthors = [a.m_proxy_resolve() for a in entry_metadata.coauthors]
for j in range(len(entry_coauthors)):
assert entry_coauthors[j].user_id == coauthors[i][j].user_id
assert entry_coauthors[j].username == coauthors[i][j].username
assert entry_coauthors[j].email == coauthors[i][j].email
assert entry_coauthors[j].first_name == coauthors[i][j].first_name
assert entry_coauthors[j].last_name == coauthors[i][j].last_name
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment