From 1a03bfa35ecc17b5040b2f1e5ee20b24d544d17e Mon Sep 17 00:00:00 2001 From: Ahmed Ilyas <ahmed.ilyas@hu-berlin.de> Date: Thu, 27 Feb 2025 10:28:54 +0000 Subject: [PATCH] Add nomad distro commit info Changelog: Added --- nomad/datamodel/datamodel.py | 6 ++++ nomad/processing/data.py | 5 ++++ nomad/utils/__init__.py | 44 +++++++++++++++++++++++++++++ tests/test_utils.py | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 109 insertions(+) diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index 4f58304b22..f32012bc14 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -723,6 +723,12 @@ class EntryMetadata(MSection): a_elasticsearch=Elasticsearch(), ) + nomad_distro_commit_url = Quantity( + type=str, + description='The NOMAD distro commit url used for the last processing', + categories=[MongoEntryMetadata], + a_elasticsearch=Elasticsearch(), + ) comment = Quantity( type=str, categories=[MongoEntryMetadata, EditableUserMetadata], diff --git a/nomad/processing/data.py b/nomad/processing/data.py index a4d40a56a2..b75902e2c3 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -909,6 +909,7 @@ class Entry(Proc): external database where the data was imported from nomad_version: the NOMAD version used for the last processing nomad_commit: the NOMAD commit used for the last processing + nomad_distro_commit_url: the NOMAD distro commit url used for the last processing comment: a user provided comment for this entry references: user provided references (URLs) for this entry entry_coauthors: a user provided list of co-authors specific for this entry. Note @@ -929,6 +930,7 @@ class Entry(Proc): external_id = StringField() nomad_version = StringField() nomad_commit = StringField() + nomad_distro_commit_url = StringField() comment = StringField() references = ListField(StringField()) entry_coauthors = ListField() @@ -1012,8 +1014,11 @@ class Entry(Proc): In this case, the timestamp stored in the archive is used. If no previous timestamp is available, a new timestamp is generated. """ + distro_commit_url = utils.nomad_distro_metadata() + entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_version = config.meta.version entry_metadata.nomad_commit = '' + entry_metadata.nomad_distro_commit_url = distro_commit_url or '' entry_metadata.entry_hash = self.upload_files.entry_hash( self.mainfile, self.mainfile_key ) diff --git a/nomad/utils/__init__.py b/nomad/utils/__init__.py index 2e0bf62e05..f124328c29 100644 --- a/nomad/utils/__init__.py +++ b/nomad/utils/__init__.py @@ -54,6 +54,7 @@ from datetime import timedelta import collections import logging import inspect +from importlib.metadata import PackageNotFoundError, metadata, version import orjson import os @@ -1147,3 +1148,46 @@ def dict_to_dataframe( filtered_df = filter_df_columns_by_prefix(df, keys_to_filter) filtered_dict = dataframe_to_dict(filtered_df) return pd.json_normalize(filtered_dict, errors='ignore') + + +def nomad_distro_metadata() -> str | None: + """ + Retrieves metadata for the 'nomad-distribution' package, including the + repository URL with latest commit hash. + + Returns: + The repo url with commit hash or None if unavailable. + """ + try: + distro_metadata = metadata('nomad-distribution') + + # Extract repository URL from Project-URL metadata + project_urls: list[str] = distro_metadata.get_all('Project-URL', []) + repo_url = next( + ( + url.split(', ', 1)[1] + for url in project_urls + if url.startswith('repository, ') + ), + None, + ) + + distro_version = version('nomad-distribution') + if '+g' in distro_version: + # Split on '+g' to extract the commit hash from the version string, as 'g' is a Git-specific prefix. + commit = distro_version.split('+g')[ + -1 + ] # Extract commit hash if present (setuptools_scm format) + else: + commit = ( + f'v{distro_version}' # Otherwise, assume it's a tag and prefix with 'v' + ) + + if not repo_url or not commit: + return None + + commit_url = f'{repo_url}/tree/{commit}' + + return commit_url + except (PackageNotFoundError, IndexError, StopIteration, KeyError): + return None diff --git a/tests/test_utils.py b/tests/test_utils.py index b5da826ebd..5d7339d68e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -16,6 +16,7 @@ # limitations under the License. # +from importlib.metadata import PackageNotFoundError import time import pytest import pandas as pd @@ -25,6 +26,7 @@ from nomad.metainfo.metainfo import MSection, Quantity, SubSection from nomad import files from nomad.processing import Upload from nomad.utils import ( + nomad_distro_metadata, structlogging, flatten_dict, rebuild_dict, @@ -313,3 +315,55 @@ class TestDictDataFrameConverter: def test_invalid_input_type(self, invalid_input): with pytest.raises(ValueError, match='Input must be a dictionary'): dict_to_dataframe(invalid_input) + + +@pytest.mark.parametrize( + 'project_urls, version_str, expected_url', + [ + ( + ['repository, https://github.com/example/repo'], + '1.2.3+gabcdef', + 'https://github.com/example/repo/tree/abcdef', + ), + (['notrepository, https://github.com/example/repo'], '1.2.3+gabcdef', None), + ( + ['repository, https://github.com/example/repo'], + '1.2.3', + 'https://github.com/example/repo/tree/v1.2.3', + ), + ([], '1.2.3+gabcdef', None), + (['repository, '], '1.2.3+gabcdef', None), + ], +) +def test_nomad_distro_metadata(monkeypatch, project_urls, version_str, expected_url): + def mock_metadata(package_name): + class MockMetadata: + def get_all(self, key, default=[]): + if key == 'Project-URL': + return project_urls + return default + + return MockMetadata() + + def mock_version(package_name): + return version_str + + monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x)) + monkeypatch.setattr('nomad.utils.version', mock_version) + + actual_url = nomad_distro_metadata() + assert actual_url == expected_url + + +def test_nomad_distro_package_not_found(monkeypatch): + def mock_metadata(package_name): + raise PackageNotFoundError + + def mock_version(package_name): + return '1.2.3' + + monkeypatch.setattr('nomad.utils.metadata', lambda x: mock_metadata(x)) + monkeypatch.setattr('nomad.utils.version', mock_version) + + actual_url = nomad_distro_metadata() + assert actual_url is None -- GitLab