From 24d4f5dbad43df886bb5705f418fb3829918514f Mon Sep 17 00:00:00 2001 From: David Sikter <david.sikter@physik.hu-berlin.de> Date: Tue, 21 Sep 2021 11:59:35 +0200 Subject: [PATCH] Remove redundancy of the published and with_embargo fields --- nomad/app/flask/api/dataset.py | 3 ++- nomad/cli/admin/admin.py | 4 ++-- nomad/cli/client/update_database.py | 3 +-- nomad/datamodel/datamodel.py | 4 ++-- nomad/files.py | 19 ++++------------ nomad/processing/data.py | 34 +++++----------------------- tests/app/flask/test_api.py | 19 +++------------- tests/app/v1/routers/test_uploads.py | 4 ++-- tests/processing/test_data.py | 14 ++++-------- tests/test_cli.py | 10 ++++---- tests/utils.py | 5 ++-- 11 files changed, 36 insertions(+), 83 deletions(-) diff --git a/nomad/app/flask/api/dataset.py b/nomad/app/flask/api/dataset.py index 7f667c800c..07fdd3d49f 100644 --- a/nomad/app/flask/api/dataset.py +++ b/nomad/app/flask/api/dataset.py @@ -140,7 +140,8 @@ class DatasetResource(Resource): abort(400, 'Dataset with name %s already has a DOI' % name) # check if the DOI can be created - published_values = proc.Calc.objects(metadata__datasets=result.dataset_id).distinct('metadata.published') + upload_ids = proc.Calc.objects(metadata__datasets=result.dataset_id).distinct('upload_id') + published_values = proc.Upload.objects(upload_id__in=upload_ids).distinct('published') if False in published_values: abort(400, 'Dataset must not contain non published entries.') diff --git a/nomad/cli/admin/admin.py b/nomad/cli/admin/admin.py index 690a70d8c7..a2f09cb1c3 100644 --- a/nomad/cli/admin/admin.py +++ b/nomad/cli/admin/admin.py @@ -319,8 +319,8 @@ def index_materials(threads, code, dry, in_place, n, source): calc.calc_id = metadata["calc_id"] calc.upload_id = metadata["upload_id"] mongo_calc = proc.Calc.get(calc.calc_id) - calc.published = mongo_calc["metadata"]["published"] - calc.with_embargo = mongo_calc["metadata"]["with_embargo"] + calc.published = mongo_calc.upload.published + calc.with_embargo = mongo_calc.upload.embargo_length > 0 calc.owners = [mongo_calc.upload.user_id] + mongo_calc["metadata"]["shared_with"] enc_idealized_structure = encyclopedia.material.idealized_structure idealized_structure = IdealizedStructure() diff --git a/nomad/cli/client/update_database.py b/nomad/cli/client/update_database.py index 41b19f6bad..9ca21d1bda 100644 --- a/nomad/cli/client/update_database.py +++ b/nomad/cli/client/update_database.py @@ -356,7 +356,7 @@ class DbUpdater: upload_id = None for entry in entries: - if entry['name'] == uploadname: + if entry['upload_name'] == uploadname: status = 'uploaded' if entry['published']: status = 'published' @@ -370,7 +370,6 @@ class DbUpdater: return dict( operation='publish', metadata=dict( - with_embargo=False, comment='', references=[ 'http://www.sciencedirect.com/science/article/pii/S0927025614003322', diff --git a/nomad/datamodel/datamodel.py b/nomad/datamodel/datamodel.py index cfa629e1d8..22ab1df89a 100644 --- a/nomad/datamodel/datamodel.py +++ b/nomad/datamodel/datamodel.py @@ -478,7 +478,7 @@ class EntryMetadata(metainfo.MSection): published = metainfo.Quantity( type=bool, default=False, description='Indicates if the entry is published', - categories=[MongoMetadata, OasisMetadata], + categories=[OasisMetadata], a_search=Search(), a_elasticsearch=Elasticsearch(material_entry_type)) processed = metainfo.Quantity( @@ -602,7 +602,7 @@ class EntryMetadata(metainfo.MSection): categories=[MongoMetadata, EditableUserMetadata]) with_embargo = metainfo.Quantity( - type=bool, default=False, categories=[MongoMetadata], + type=bool, default=False, description='Indicated if this entry is under an embargo', a_search=Search(), a_elasticsearch=Elasticsearch(material_entry_type)) diff --git a/nomad/files.py b/nomad/files.py index c62638ad06..8161c4a411 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -28,11 +28,11 @@ almost readonly (beside metadata) storage. .. code-block:: sh fs/staging/<upload>/raw/** - /archive/<calc>.json - fs/public/<upload>/raw-public.plain.zip - /raw-restricted.plain.zip - /archive-public.json.zip - /archive-restricted.json.zip + /archive/<calc>.msg + fs/public/<upload>/raw-{access}.plain.zip + /archive-{access}.msg.msg + +Where `access` is either "public" (non-embargoed) or "restricted" (embargoed). There is an implicit relationship between files, based on them being in the same directory. Each directory with at least one *mainfile* is a *calculation directory* @@ -41,15 +41,6 @@ respective files actually contributing data or not. A *calculation directory* mi contain multiple *mainfile*. E.g., user simulated multiple states of the same system, have one calculation based on the other, etc. In this case the other *mainfile* is an *aux* file to the original *mainfile* and vice versa. - -Published files are kept in pairs of public and restricted files. Here the multiple *mainfiles* -per directory provides a dilemma. If on *mainfile* is restricted, all its *aux* files -should be restricted too. But if one of the *aux* files is actually a *mainfile* it -might be published! - -There are multiple ways to solve this. Due to the rarity of the case, we take the -most simple solution: if one file is public, all files are made public, execpt those -being other mainfiles. Therefore, the aux files of a restricted calc might become public! ''' from abc import ABCMeta diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 09d0c0e3c2..b9c3f5ce5b 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -171,7 +171,6 @@ class Calc(Proc): ('upload_id', 'metadata.nomad_version'), 'metadata.processed', 'metadata.last_processing', - 'metadata.published', 'metadata.datasets', 'metadata.pid' ] @@ -297,6 +296,7 @@ class Calc(Proc): entry_metadata.uploader = upload.user_id entry_metadata.upload_time = upload.upload_time entry_metadata.upload_name = upload.name + entry_metadata.published = upload.published entry_metadata.with_embargo = (upload.embargo_length > 0) # Entry metadata if self.parser is not None: @@ -961,28 +961,13 @@ class Upload(Proc): self.embargo_length = embargo_length with utils.lnr(logger, 'publish failed'): - with self.entries_metadata() as calcs: - - with utils.timer(logger, 'upload metadata updated'): - def create_update(calc): - calc.published = True - if embargo_length is not None: - calc.with_embargo = (embargo_length > 0) - else: - assert calc.with_embargo is not None, 'with_embargo flag is None' - return UpdateOne( - {'_id': calc.calc_id}, - {'$set': {'metadata': calc.m_to_dict( - include_defaults=True, categories=[datamodel.MongoMetadata])}}) - - Calc._get_collection().bulk_write([create_update(calc) for calc in calcs]) - + with self.entries_metadata() as entries: if isinstance(self.upload_files, StagingUploadFiles): with utils.timer(logger, 'staged upload files packed'): - self.staging_upload_files.pack(calcs, with_embargo=(self.embargo_length > 0)) + self.staging_upload_files.pack(entries, with_embargo=(self.embargo_length > 0)) with utils.timer(logger, 'index updated'): - search.publish(calcs) + search.publish(entries) if isinstance(self.upload_files, StagingUploadFiles): with utils.timer(logger, 'upload staging files deleted'): @@ -1660,7 +1645,6 @@ class Upload(Proc): if self.published and (self.embargo_length > 0) != (upload_metadata.embargo_length > 0): need_to_repack = True need_to_reindex = True - new_entry_metadata['with_embargo'] = (upload_metadata.embargo_length > 0) self.embargo_length = upload_metadata.embargo_length if upload_metadata.uploader is not None: self.user_id = upload_metadata.uploader.user_id @@ -1828,7 +1812,7 @@ class Upload(Proc): required_keys_entry_level = ( '_id', 'upload_id', 'mainfile', 'parser', 'process_status', 'create_time', 'metadata') required_keys_entry_metadata = ( - 'upload_time', 'published', 'with_embargo', 'calc_hash') + 'upload_time', 'calc_hash') required_keys_datasets = ( 'dataset_id', 'name', 'user_id') @@ -1927,15 +1911,9 @@ class Upload(Proc): 'Mismatching upload_id in entry definition') assert entry_dict['_id'] == generate_entry_id(self.upload_id, entry_dict['mainfile']), ( 'Provided entry id does not match generated value') - for k, v in ( - ('published', self.published), - ('with_embargo', self.embargo_length > 0)): - assert entry_metadata_dict.get(k) == v, f'Inconsistent entry metadata: {k}' check_user_ids(entry_dict.get('coauthors', []), 'Invalid coauthor reference: {id}') check_user_ids(entry_dict.get('shared_with', []), 'Invalid shared_with reference: {id}') - if embargo_length is not None: - # Update the embargo flag on the entry level - entry_metadata_dict['with_embargo'] = (embargo_length > 0) + # Instantiate an entry object from the json, and validate it entry_keys_to_copy = ( 'upload_id', 'mainfile', 'parser', 'metadata', 'errors', 'warnings', diff --git a/tests/app/flask/test_api.py b/tests/app/flask/test_api.py index 5af9358dcb..ef7d098a6b 100644 --- a/tests/app/flask/test_api.py +++ b/tests/app/flask/test_api.py @@ -2172,19 +2172,6 @@ class TestDataset: assert files.UploadFiles.get('1') is not None assert example_entry.metadata.datasets[0].dataset_id == '1' - # entry_archive = EntryArchive() - # entry_metadata = entry_archive.m_create( - # EntryMetadata, - # domain='dft', calc_id='1', upload_id='1', published=True, with_embargo=False, - # datasets=['1']) - # Calc( - # calc_id='1', upload_id='1', create_time=datetime.datetime.now(), - # metadata=entry_metadata.m_to_dict()).save() - # upload_files = files.StagingUploadFiles(upload_id='1', create=True) - # upload_files.write_archive('1', dict(metadata=entry_metadata.m_to_dict())) - # upload_files.close() - # index(entry_archive) - def test_delete_dataset(self, api, test_user_auth, example_dataset_with_entry): rv = api.delete('/datasets/ds1', headers=test_user_auth) assert rv.status_code == 200 @@ -2217,9 +2204,9 @@ class TestDataset: entry_metadata = EntryMetadata( domain='dft', calc_id='1', upload_id='1', published=False, with_embargo=False, datasets=['1']) - Calc( - calc_id='1', upload_id='1', create_time=datetime.datetime.now(), - metadata=entry_metadata.m_to_dict()).save() + entry = Calc(calc_id='1', upload_id='1', create_time=datetime.datetime.now()) + entry._apply_metadata_to_mongo_entry(entry_metadata) + entry.save() rv = api.post('/datasets/ds1', headers=test_user_auth) assert rv.status_code == 400 diff --git a/tests/app/v1/routers/test_uploads.py b/tests/app/v1/routers/test_uploads.py index 3d3c3e1e13..8a62bdff3f 100644 --- a/tests/app/v1/routers/test_uploads.py +++ b/tests/app/v1/routers/test_uploads.py @@ -311,7 +311,7 @@ def get_upload_entries_metadata(entries: List[Dict[str, Any]]) -> Iterable[Entry return [ EntryMetadata( domain='dft', calc_id=entry['entry_id'], mainfile=entry['mainfile'], - with_embargo=Calc.get(entry['entry_id']).metadata.get('with_embargo')) + with_embargo=Upload.get(entry['upload_id']).embargo_length > 0) for entry in entries] @@ -985,7 +985,7 @@ def test_put_upload_metadata( pass if upload_id == 'id_published_w': - assert Calc.get('id_published_w_entry').metadata['with_embargo'] + assert Upload.get(upload_id).embargo_length > 0 es_data = search(owner=None, query=dict(entry_id='id_published_w_entry')).data[0] assert es_data['with_embargo'] diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 9a77ee612b..8b1cb97afc 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -95,7 +95,6 @@ def assert_processing(upload: Upload, published: bool = False, process='process_ assert calc.parser is not None assert calc.mainfile is not None assert calc.process_status == ProcessStatus.SUCCESS - assert calc.metadata['published'] == published with upload_files.read_archive(calc.calc_id) as archive: calc_archive = archive[calc.calc_id] @@ -283,7 +282,6 @@ def test_oasis_upload_processing(proc_infra, oasis_example_uploaded: Tuple[str, assert_processing(upload, published=True) calc = Calc.objects(upload_id='oasis_upload_id').first() assert calc.calc_id == 'test_calc_id' - assert calc.metadata['published'] assert calc.metadata['datasets'] == ['oasis_dataset_1', 'cn_dataset_2'] @@ -527,20 +525,18 @@ def test_re_process_match(non_empty_processed, published, monkeypatch, no_warn): assert upload.total_calcs == 2 if not published: - for calc in upload.calcs: - assert calc.metadata['published'] == published - assert not calc.metadata['with_embargo'] + assert upload.published == published + assert upload.embargo_length == 0 def test_re_pack(published: Upload): upload_id = published.upload_id upload_files: PublicUploadFiles = published.upload_files # type: ignore assert upload_files.access == 'restricted' - # Lift embargo + assert published.embargo_length > 0 calc = Calc.objects(upload_id=upload_id).first() - assert calc.metadata['with_embargo'] - calc.metadata['with_embargo'] = False - calc.save() + + # Lift embargo published.embargo_length = 0 published.save() upload_files.re_pack(with_embargo=False) diff --git a/tests/test_cli.py b/tests/test_cli.py index d0926848a3..2a7a9e2c63 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -105,7 +105,7 @@ class TestAdmin: calc = Calc.objects(upload_id=upload_id).first() assert published.upload_files.exists() - assert calc.metadata['with_embargo'] + assert published.embargo_length > 0 assert search.SearchRequest().owner('public').search_parameter('upload_id', upload_id).execute()['total'] == 0 result = click.testing.CliRunner().invoke( @@ -114,7 +114,7 @@ class TestAdmin: assert result.exit_code == 0 published.block_until_complete() - assert not Calc.objects(upload_id=upload_id).first().metadata['with_embargo'] == lifted + assert not (published.embargo_length > 0) == lifted assert (search.SearchRequest().owner('public').search_parameter('upload_id', upload_id).execute()['total'] > 0) == lifted if lifted: with files.UploadFiles.get(upload_id=upload_id).read_archive(calc_id=calc.calc_id) as archive: @@ -242,9 +242,9 @@ class TestAdminUploads: def test_re_pack(self, published, monkeypatch): upload_id = published.upload_id calc = Calc.objects(upload_id=upload_id).first() - assert calc.metadata['with_embargo'] - calc.metadata['with_embargo'] = False - calc.save() + assert published.embargo_length > 0 + published.embargo_length = 0 + published.save() result = click.testing.CliRunner().invoke( cli, ['admin', 'uploads', 're-pack', '--parallel', '2', upload_id], catch_exceptions=False) diff --git a/tests/utils.py b/tests/utils.py index ae638000ad..8bb1c4cc4e 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -166,6 +166,7 @@ class ExampleData: if upload_id in self.uploads: assert embargo_length is not None, 'No embargo provided on upload' assert (embargo_length > 0) == with_embargo, 'Inconsistent embargo' + assert published == self.uploads[upload_id]['published'] else: # No uploads created. Just generate it embargo_length = 36 if with_embargo else 0 @@ -300,9 +301,9 @@ class ExampleData: calc_hash='dummy_hash_' + entry_id, domain='dft', upload_time=upload_time, - published=True, processed=True, - with_embargo=False, + published=self.uploads.get(upload_id, {}).get('published', True), + with_embargo=self.uploads.get(upload_id, {}).get('embargo_length', 0) > 0, parser_name='parsers/vasp') entry_metadata.m_update(**self.entry_defaults) entry_metadata.m_update(**kwargs) -- GitLab