diff --git a/docs/developers.md b/docs/developers.md index b396a0d8cb2643ab5147d79d1432e6fba97a09f1..b46555bdc1f89bcb3764304d2f1ee0d84273523e 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -399,7 +399,7 @@ it identifies. If something is called a *hash* then it is a hash generated based entity it identifies. This means either the whole thing or just some properties of said entities. -- The most common hashes is the `calc_hash` based on mainfile and auxfile contents. +- The most common hashes is the `entry_hash` based on mainfile and auxfile contents. - The `upload_id` is a UUID assigned to the upload on creation. It never changes. - The `mainfile` is a path within an upload that points to a file identified as parseable. This also uniquely identifies an entry within the upload. diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 9e58ec31201aba33e3fc9d3743fd2fd6f949271a..38907b8804c277479b623f0df9debdc785ec7b90 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -343,7 +343,7 @@ class EntryMetadata(metainfo.MSection): upload_name: The user provided upload name. upload_create_time: The time that the upload was created calc_id: The unique mainfile based entry id. - calc_hash: The raw file content based checksum/hash of this entry. + entry_hash: The raw file content based checksum/hash of this entry. entry_create_time: The time that the entry was created last_edit_time: The date and time the user metadata was last edited. parser_name: The NOMAD parser used for the last processing. @@ -408,7 +408,7 @@ class EntryMetadata(metainfo.MSection): aliases=['entry_id'], a_elasticsearch=Elasticsearch(material_entry_type, metrics=dict(n_entries='cardinality'))) - calc_hash = metainfo.Quantity( + entry_hash = metainfo.Quantity( # Note: This attribute is not stored in ES type=str, description='A raw file content based checksum/hash', diff --git a/nomad/files.py b/nomad/files.py index beab90fe2ea19c7a1b5e7aa0cdc3fc0b7f7ee883..b54d5b757d7e05689c88038405f7674ee6115f56 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -1061,7 +1061,7 @@ class StagingUploadFiles(UploadFiles): else: return aux_files - def calc_hash(self, mainfile: str) -> str: + def entry_hash(self, mainfile: str) -> str: ''' Calculates a hash for the given entry based on file contents and aux file contents. Arguments: diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 9d2ac4333f43c71651d6d66e4b3925e4cd868004..d3c7e613c94374b73415c39a94b951f986a21304 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -662,7 +662,7 @@ class Entry(Proc): Attributes: upload_id: the id of the upload to which this entry belongs calc_id: the id of this entry - calc_hash: the hash of the entry files + entry_hash: the hash of the entry files entry_create_time: the date and time of the creation of the entry last_processing_time: the date and time of the last processing last_edit_time: the date and time the user metadata was last edited @@ -681,7 +681,7 @@ class Entry(Proc): ''' upload_id = StringField(required=True) calc_id = StringField(primary_key=True) - calc_hash = StringField() + entry_hash = StringField() entry_create_time = DateTimeField(required=True) last_processing_time = DateTimeField() last_edit_time = DateTimeField() @@ -767,7 +767,7 @@ class Entry(Proc): ''' entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_commit = config.meta.commit - entry_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile) + entry_metadata.entry_hash = self.upload_files.entry_hash(self.mainfile) entry_metadata.files = self.upload_files.entry_files(self.mainfile) entry_metadata.last_processing_time = datetime.utcnow() entry_metadata.processing_errors = [] diff --git a/tests/parsing/test_parsing.py b/tests/parsing/test_parsing.py index ecc26b219cb27d530ab6943c59ff6a11f5d47120..cbd058d0e138a24ca8f3a193047c310dbfa777bf 100644 --- a/tests/parsing/test_parsing.py +++ b/tests/parsing/test_parsing.py @@ -161,7 +161,7 @@ def add_calculation_info(entry_archive: EntryArchive, **kwargs) -> EntryArchive: entry_metadata = entry_archive.metadata entry_metadata.upload_id = 'test_upload_id' entry_metadata.calc_id = 'test_calc_id' - entry_metadata.calc_hash = 'test_calc_hash' + entry_metadata.entry_hash = 'test_calc_hash' entry_metadata.mainfile = 'test/mainfile.txt' entry_metadata.m_update(**kwargs) return entry_archive diff --git a/tests/utils.py b/tests/utils.py index 4d2e51b97e715e9cdb90807ac73cd35ba24c29d4..aca0adddb58433984aba460a7e3e4f802bc909cf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -259,7 +259,7 @@ class ExampleData: calc_id=entry_id, upload_id=upload_id, mainfile=mainfile, - calc_hash='dummy_hash_' + entry_id, + entry_hash='dummy_hash_' + entry_id, domain='dft', entry_create_time=self._next_time_stamp(), processed=True,