From aaa776ee9c2f5f508ab03c75d1894029ddb5aa03 Mon Sep 17 00:00:00 2001 From: David Sikter <david.sikter@physik.hu-berlin.de> Date: Tue, 5 Oct 2021 17:37:07 +0200 Subject: [PATCH] Flattening fields: datasets (last one) --- nomad/app/v1/routers/datasets.py | 4 ++-- nomad/app/v1/routers/entries.py | 2 +- nomad/datamodel/datamodel.py | 2 +- nomad/processing/data.py | 26 +++++++++++++------------- tests/app/v1/routers/test_datasets.py | 4 ++-- tests/app/v1/routers/test_uploads.py | 4 ++-- tests/processing/test_data.py | 2 +- 7 files changed, 22 insertions(+), 22 deletions(-) diff --git a/nomad/app/v1/routers/datasets.py b/nomad/app/v1/routers/datasets.py index d65e6483cb..7cc031d955 100644 --- a/nomad/app/v1/routers/datasets.py +++ b/nomad/app/v1/routers/datasets.py @@ -250,7 +250,7 @@ async def post_datasets( if not empty: processing.Calc._get_collection().update_many( - mongo_query, {'$push': {'metadata.datasets': dataset.dataset_id}}) + mongo_query, {'$push': {'datasets': dataset.dataset_id}}) update_by_query( ''' if (ctx._source.datasets == null) { @@ -309,7 +309,7 @@ async def delete_dataset( if len(entry_ids) > 0: processing.Calc._get_collection().update_many( - mongo_query, {'$pull': {'metadata.datasets': dataset.dataset_id}}) + mongo_query, {'$pull': {'datasets': dataset.dataset_id}}) update_by_query( ''' int index = -1; diff --git a/nomad/app/v1/routers/entries.py b/nomad/app/v1/routers/entries.py index d846503f23..69834042b4 100644 --- a/nomad/app/v1/routers/entries.py +++ b/nomad/app/v1/routers/entries.py @@ -1327,7 +1327,7 @@ async def post_entry_metadata_edit( # remove potentially empty old datasets if removed_datasets is not None: for dataset in removed_datasets: - if proc.Calc.objects(metadata__datasets=dataset).first() is None: + if proc.Calc.objects(datasets=dataset).first() is None: datamodel.Dataset.m_def.a_mongo.objects(dataset_id=dataset).delete() return data diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index da80868ffb..52858ded2d 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -574,7 +574,7 @@ class EntryMetadata(metainfo.MSection): datasets = metainfo.Quantity( type=dataset_reference, shape=['0..*'], default=[], - categories=[MongoMetadata, EditableUserMetadata], + categories=[MongoEntryMetadata, EditableUserMetadata], description='A list of user curated datasets this entry belongs to.', a_elasticsearch=Elasticsearch(material_entry_type)) diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 89a2e6b577..4fccacbc35 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -173,6 +173,7 @@ class Calc(Proc): comment: a user provided comment for this entry references: user provided references (URLs) for this entry coauthors: a user provided list of co-authors + datasets: a list of user curated datasets this entry belongs to metadata: the metadata record wit calc and user metadata, see :class:`EntryMetadata` ''' @@ -193,6 +194,7 @@ class Calc(Proc): references = ListField(StringField(), default=None) coauthors = ListField(StringField(), default=None) shared_with = ListField(StringField(), default=None) + datasets = ListField(StringField(), default=None) metadata = DictField() # Stores user provided metadata and system metadata (not archive metadata) @@ -207,7 +209,7 @@ class Calc(Proc): ('upload_id', 'nomad_version'), 'process_status', 'last_processing_time', - 'metadata.datasets', + 'datasets', 'pid' ] } @@ -475,11 +477,9 @@ class Calc(Proc): if self.upload is None: logger.error('calculation upload does not exist') - has_previous_metadata = bool(self.metadata) - # 1. Determine if we should parse or not self.set_process_step('Determining action') - if not self.upload.published or not has_previous_metadata: + if not self.upload.published or not self.nomad_version: should_parse = True else: # This entry has already been published and has metadata. @@ -1693,13 +1693,12 @@ class Upload(Proc): # Handle datasets dataset_ids: Set[str] = set() for entry_dict in bundle_info['entries']: - entry_metadata = entry_dict['metadata'] - entry_metadata_datasets = entry_metadata.get('datasets') - if entry_metadata_datasets: + entry_datasets = entry_dict.get('datasets') + if entry_datasets: if not include_datasets: - entry_metadata['datasets'] = [] + entry_dict['datasets'] = None else: - dataset_ids.update(entry_metadata_datasets) + dataset_ids.update(entry_datasets) if include_datasets: bundle_info['datasets'] = [ datamodel.Dataset.m_def.a_mongo.get(dataset_id=dataset_id).m_to_dict() @@ -1854,7 +1853,6 @@ class Upload(Proc): keys_exist(entry_dict, required_keys_entry_level, 'Missing key for entry: {key}') assert entry_dict['process_status'] in ProcessStatus.STATUSES_NOT_PROCESSING, ( f'Invalid entry `process_status`') - entry_metadata_dict = entry_dict['metadata'] # Check referential consistency assert entry_dict['upload_id'] == self.upload_id, ( 'Mismatching upload_id in entry definition') @@ -1882,10 +1880,12 @@ class Upload(Proc): # Instantiate an EntryMetadata object to validate the format try: if settings.include_datasets: - entry.metadata['datasets'] = [ - dataset_id_mapping[id] for id in entry_metadata_dict.get('datasets', [])] + entry_datasets = entry_dict.get('datasets') + if entry_datasets: + entry.datasets = [ + dataset_id_mapping[id] for id in entry_datasets] or None else: - entry.metadata['datasets'] = [] + entry.datasets = None entry.mongo_metadata(self) # TODO: if we don't import archive files, should we still index something in ES? except Exception as e: diff --git a/tests/app/v1/routers/test_datasets.py b/tests/app/v1/routers/test_datasets.py index 4630805eb9..efcf89f8ed 100644 --- a/tests/app/v1/routers/test_datasets.py +++ b/tests/app/v1/routers/test_datasets.py @@ -152,7 +152,7 @@ def assert_dataset(dataset, query: Query = None, entries: List[str] = None, n_en expected_n_entries = n_entries if dataset['dataset_type'] == 'owned' else 0 assert search_results.pagination.total == expected_n_entries - assert processing.Calc.objects(metadata__datasets=dataset_id).count() == expected_n_entries + assert processing.Calc.objects(datasets=dataset_id).count() == expected_n_entries def assert_dataset_deleted(dataset_id): @@ -162,7 +162,7 @@ def assert_dataset_deleted(dataset_id): search_results = search( owner='admin', query={'datasets.dataset_id': dataset_id}, user_id=admin_user_id) assert search_results.pagination.total == 0 - assert processing.Calc.objects(metadata__datasets=dataset_id).count() == 0 + assert processing.Calc.objects(datasets=dataset_id).count() == 0 @pytest.mark.parametrize('query, size, status_code', [ diff --git a/tests/app/v1/routers/test_uploads.py b/tests/app/v1/routers/test_uploads.py index 84c5900bd0..85f41a7751 100644 --- a/tests/app/v1/routers/test_uploads.py +++ b/tests/app/v1/routers/test_uploads.py @@ -1262,7 +1262,7 @@ def test_post_upload_action_publish_to_central_nomad( datamodel.Dataset( dataset_id='dataset_id', dataset_name='dataset_name', user_id=test_users_dict[user].user_id).a_mongo.save() - calc.metadata['datasets'] = ['dataset_id'] + calc.datasets = ['dataset_id'] calc.save() # Finally, invoke the method to publish to central nomad @@ -1288,7 +1288,7 @@ def test_post_upload_action_publish_to_central_nomad( 'upload_id', 'calc_id', 'upload_create_time', 'entry_create_time', 'last_processing_time', 'publish_time'): assert new_calc_metadata_dict[k] == v, f'Metadata not matching: {k}' - assert new_calc.metadata.get('datasets') == ['dataset_id'] + assert new_calc.datasets == ['dataset_id'] assert old_upload.published_to[0] == config.oasis.central_nomad_deployment_id assert new_upload.from_oasis and new_upload.oasis_deployment_id diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 56f0fa1569..877de1e2ae 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -278,7 +278,7 @@ def test_oasis_upload_processing(proc_infra, oasis_example_uploaded: Tuple[str, assert_processing(upload, published=True) calc = Calc.objects(upload_id='oasis_upload_id').first() assert calc.calc_id == 'test_calc_id' - assert calc.metadata['datasets'] == ['oasis_dataset_1', 'cn_dataset_2'] + assert calc.datasets == ['oasis_dataset_1', 'cn_dataset_2'] @pytest.mark.timeout(config.tests.default_timeout) -- GitLab