diff --git a/nomad/config.py b/nomad/config.py index 1fdce93b18dd0c92eb9ccf4cc030d73528badf21..e46db315040ff483b3c8c2f2943fc389d8d2d36e 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -383,6 +383,17 @@ process = NomadConfig( metadata_file_extensions=('json', 'yaml', 'yml') ) +rfc3161_timestamp = NomadConfig( + # rfc3161ng timestamping server + server='http://time.certum.pl/', + # cert PATH used for rfc3161ng timestamping server + cert=None, + # hash algorithm for rfc3161ng timestamping server, depends on the server + hash_algorithm='sha256', + username=None, + password=None +) + bundle_import = NomadConfig( # Basic settings allow_bundles_from_oasis=True, # If oasis admins can "push" bundles to this NOMAD deployment diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 78f4b5791f4889b7d6f06ab875ac607892c40bd3..0249e890ba0c9b50fc8003d66cc1ae2c166ff9dd 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -20,6 +20,8 @@ from typing import List, Any from enum import Enum + +import rfc3161ng from elasticsearch_dsl import analyzer, tokenizer import numpy as np from pint import Quantity as PintQuantity @@ -31,7 +33,7 @@ from nomad.metainfo.pydantic_extension import PydanticModel from nomad.metainfo.elasticsearch_extension import Elasticsearch, material_entry_type, entry_type as es_entry_type from .util import parse_path from ..metainfo import ( - Package, Definition, MProxy, MSection, MCategory, Section, SubSection, Quantity, Reference, + Bytes, Package, Definition, MProxy, MSection, MCategory, Section, SubSection, Quantity, Reference, SectionProxy, MEnum, Datetime, JSON) # This is usually defined automatically when the first metainfo definition is evaluated, but @@ -288,6 +290,28 @@ class SearchableQuantity(MSection): a_elasticsearch=Elasticsearch(mapping='date')) +class RFC3161Timestamp(MSection): + token_seed = Quantity( + type=str, + description='The entry hash used to get timestamp token.') + token = Quantity( + type=Bytes, + description='The token returned by RFC3161 server.') + tsa_server = Quantity( + type=str, + description='The address of RFC3161 server.') + timestamp = Quantity( + type=Datetime, + description='The RFC3161 timestamp.') + + @property + def verify_timestamp(self): + ''' + Verify token by converting it to a timestamp ad-hoc. + ''' + return rfc3161ng.get_timestamp(self.token) + + class EntryMetadata(MSection): ''' Attributes: @@ -390,6 +414,10 @@ class EntryMetadata(MSection): description='A raw file content based checksum/hash', categories=[MongoEntryMetadata]) + entry_timestamp = SubSection( + sub_section=EntryArchiveReference, + description='A timestamp based on RFC3161.') + entry_create_time = Quantity( type=Datetime, categories=[MongoEntryMetadata, MongoSystemMetadata, EditableUserMetadata], description='The date and time when the entry was created in nomad', diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 415621d584a87d6bcc0b942fcd09fd833666f72a..05797f53ce1f43c963efe15caee51ab3af54a328 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -27,8 +27,10 @@ entries, and files .. autoclass:: Upload ''' +import base64 +from typing import Optional, cast, Any, List, Tuple, Set, Iterator, Dict, Iterable, Sequence, Union -from typing import cast, Any, List, Tuple, Set, Iterator, Dict, Iterable, Sequence, Union +import rfc3161ng from mongoengine import ( StringField, DateTimeField, BooleanField, IntField, ListField, DictField) from pymongo import UpdateOne @@ -45,6 +47,7 @@ from pydantic.error_wrappers import ErrorWrapper import validators from nomad import utils, config, infrastructure, search, datamodel, metainfo, parsing, client +from nomad.datamodel.datamodel import RFC3161Timestamp from nomad.files import ( RawPathInfo, PathObject, UploadFiles, PublicUploadFiles, StagingUploadFiles, UploadBundle, create_tmp_dir, is_safe_relative_path) @@ -128,6 +131,50 @@ def keys_exist(data: Dict[str, Any], required_keys: Iterable[str], error_message current = current[sub_key] +def get_rfc3161_token( + hash_string: str, + server: Optional[str] = None, + cert: Optional[str] = None, + username: Optional[str] = None, + password: Optional[str] = None, + hash_algorithm: Optional[str] = None +) -> Optional[bytes]: + ''' + Get RFC3161 compliant time stamp as a list of int. + ''' + if server is None: + server = config.rfc3161_timestamp.server + if cert is None: + cert = config.rfc3161_timestamp.cert + if username is None: + username = config.rfc3161_timestamp.username + if password is None: + password = config.rfc3161_timestamp.password + if hash_algorithm is None: + hash_algorithm = config.rfc3161_timestamp.hash_algorithm + + # if no server assigned, does not apply RFC3161 + if not server: + return None + + # when server requires authentication, use the provided credentials + params = dict(username=username, password=password, hashname=hash_algorithm if hash_algorithm else 'sha256') + + try: + if cert: + if os.path.exists(cert): + # a local file + with open(cert, 'rb') as f: + params['certificate'] = f.read() + else: + # a network location + params['certificate'] = requests.get(cert).content + stamper = rfc3161ng.RemoteTimestamper(server, **params) + return stamper(data=hash_string.encode('utf-8')) + except Exception: + return None + + class MetadataEditRequestHandler: ''' Class for handling a request to edit metadata. The edit request can be defined either by @@ -758,10 +805,41 @@ class Entry(Proc): def _apply_metadata_from_process(self, entry_metadata: EntryMetadata): ''' Applies metadata generated when processing or re-processing an entry to `entry_metadata`. + + Only update timestamp when entry is new or changed. ''' entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_commit = '' entry_metadata.entry_hash = self.upload_files.entry_hash(self.mainfile, self.mainfile_key) + + try: + with self.upload_files.read_archive(self.entry_id) as archive: + entry_timestamp = archive[self.entry_id]['metadata']['entry_timestamp'] + stored_seed = entry_timestamp['token_seed'] + stored_token = base64.b64decode(entry_timestamp['token']) + stored_server = entry_timestamp['tsa_server'] + except KeyError: + stored_seed = None + stored_token = None + stored_server = None + if stored_seed != entry_metadata.entry_hash: + # entry is new or has changed + token = get_rfc3161_token(entry_metadata.entry_hash) + if token: + # 1. save to entry metadata + entry_metadata.entry_timestamp = RFC3161Timestamp( + token_seed=entry_metadata.entry_hash, + token=token, + tsa_server=config.rfc3161_timestamp.server, + timestamp=rfc3161ng.get_timestamp(token)) + else: + # entry is unchanged + entry_metadata.entry_timestamp = RFC3161Timestamp( + token_seed=stored_seed, + token=stored_token, + tsa_server=stored_server, + timestamp=rfc3161ng.get_timestamp(stored_token)) + entry_metadata.files = self.upload_files.entry_files(self.mainfile) entry_metadata.last_processing_time = datetime.utcnow() entry_metadata.processing_errors = [] @@ -824,7 +902,7 @@ class Entry(Proc): try: # instead of loading the whole archive, it should be enough to load the # parts that are referenced by section_metadata/EntryMetadata - # TODO somehow it should determine which root setions too load from the metainfo + # TODO somehow it should determine which root sections too load from the metainfo # or configuration archive = upload.upload_files.read_archive(self.entry_id) entry_archive = archive[self.entry_id] diff --git a/pyproject.toml b/pyproject.toml index c82f75636cb6f26cb292a430bbc1ef732e9a6180..0b7982c8ad10bd96f6aad3d35c9925c664e6c25c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ 'h5grove>=1.0.0', 'httpx==0.22.0', 'memoization==0.4.0', + 'rfc3161ng==2.1.3', 'lxml~=4.6', 'wrapt~=1.12.1', ] diff --git a/tests/processing/test_rfc3161.py b/tests/processing/test_rfc3161.py new file mode 100644 index 0000000000000000000000000000000000000000..82dff9a6642078ed70099063945358a6af736fac --- /dev/null +++ b/tests/processing/test_rfc3161.py @@ -0,0 +1,59 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import datetime + +import httpx +import pytest +import rfc3161ng + +from nomad.datamodel.datamodel import RFC3161Timestamp +from nomad.processing.data import get_rfc3161_token + + +@pytest.mark.parametrize('server,cert,result', [ + pytest.param('http://zeitstempel.dfn.de', None, True, id='zeitstempel.dfn.de'), + pytest.param('http://timestamp.sectigo.com', None, True, id='timestamp.sectigo.com'), + pytest.param('https://freetsa.org/tsr', 'https://freetsa.org/files/tsa.crt', True, id='freetsa.org/tsr'), + pytest.param( + 'http://timestamp.digicert.com/', + 'https://knowledge.digicert.com/content/dam/digicertknowledgebase/attachments/time-stamp/TSACertificate.cer', + True, + id='timestamp.digicert.com-correct-cert'), + pytest.param( + 'http://timestamp.digicert.com/', + 'https://freetsa.org/files/tsa.crt', + False, + id='timestamp.digicert.com-wrong-cert'), +]) +def test_rfc3161ng_timestamp(server, cert, result, monkeysession): + # this is due to requests being used by rfc3161ng + # requests methods are modified in conftest.py which prohibits calling external servers + monkeysession.setattr('requests.get', httpx.get) + monkeysession.setattr('requests.post', httpx.post) + + token = get_rfc3161_token('test_hash', server=server, cert=cert) + assert (token is not None) is result + if result: + rfc3161ng_time = rfc3161ng.get_timestamp(token) + assert rfc3161ng_time < datetime.timedelta(seconds=5) + datetime.datetime.now() + metadata = RFC3161Timestamp() + metadata.token = token + new_metadata = RFC3161Timestamp.m_from_dict(metadata.m_to_dict()) + assert new_metadata.token == token + assert rfc3161ng.get_timestamp(new_metadata.token) == rfc3161ng_time