diff --git a/docs/api.md b/docs/api.md index 087a774920e687bc563222d7621e7903576bcedf..59b9c502caec3f9e7f7be8590ebb60b4f35ec344 100644 --- a/docs/api.md +++ b/docs/api.md @@ -315,7 +315,7 @@ was successful and you are satisfied with our processing, you have to publish th This also allows you to add additional meta-data to your upload (e.g. comments, references, coauthors, etc.). Here you also determine, if you want an *embargo* on your data. -Once the data was published, you cannot delete it anymore. You can skip this step, but +Once the data is published, you cannot delete it anymore. You can skip this step, but the reset of the tutorial, will only work for you, because the data is only visible to you. To initiate the publish and provide further data: diff --git a/nomad/app/flask/api/dataset.py b/nomad/app/flask/api/dataset.py index 07fdd3d49fdc703e1e999c58325bfb106855642f..6bd7c39533cac96237c718f52aa98d5d63a291cb 100644 --- a/nomad/app/flask/api/dataset.py +++ b/nomad/app/flask/api/dataset.py @@ -140,13 +140,14 @@ class DatasetResource(Resource): abort(400, 'Dataset with name %s already has a DOI' % name) # check if the DOI can be created + # TODO: quick and dirty fix, as this should be removed soon upload_ids = proc.Calc.objects(metadata__datasets=result.dataset_id).distinct('upload_id') - published_values = proc.Upload.objects(upload_id__in=upload_ids).distinct('published') + published_values = proc.Upload.objects(upload_id__in=upload_ids).distinct('publish_time') - if False in published_values: + if None in published_values: abort(400, 'Dataset must not contain non published entries.') - if True not in published_values: + if not published_values: abort(400, 'Dataset must not be empty.') # set the DOI diff --git a/nomad/app/flask/api/mirror.py b/nomad/app/flask/api/mirror.py index 11b3ad6f3ea51b789940202515efaf081a4f18e2..c545ab97d1fe72142ad7a192e02e4475ec139a07 100644 --- a/nomad/app/flask/api/mirror.py +++ b/nomad/app/flask/api/mirror.py @@ -68,6 +68,13 @@ class MirrorUploadsResource(Resource): query = {} else: query = json_data.get('query', {}) + if 'published' in query: + # Need to translate into a query about publish_time + published = query.pop('published') + if published: + query['publish_time__ne'] = None + else: + query['publish_time'] = None try: return [ diff --git a/nomad/app/flask/api/upload.py b/nomad/app/flask/api/upload.py index 9a5bd54170ca286e28df4ade10418354cf2d0eda..e0bf42a02060d447329496259242939805de0659 100644 --- a/nomad/app/flask/api/upload.py +++ b/nomad/app/flask/api/upload.py @@ -199,9 +199,9 @@ class UploadListResource(Resource): query_kwargs = {} if state == 'published': - query_kwargs.update(published=True) + query_kwargs.update(publish_time__ne=None) elif state == 'unpublished': - query_kwargs.update(published=False) + query_kwargs.update(publish_time=None) elif state == 'all': pass else: @@ -215,7 +215,7 @@ class UploadListResource(Resource): results = [ upload - for upload in uploads.order_by('published', '-upload_time')[(page - 1) * per_page: page * per_page]] + for upload in uploads.order_by('publish_time', '-upload_time')[(page - 1) * per_page: page * per_page]] return dict( pagination=dict(total=total, page=page, per_page=per_page), @@ -253,7 +253,7 @@ class UploadListResource(Resource): # check the upload limit if not g.user.is_admin: - if Upload.user_uploads(g.user, published=False).count() >= config.services.upload_limit: + if Upload.user_uploads(g.user, publish_time=None).count() >= config.services.upload_limit: abort(400, 'Limit of unpublished uploads exceeded for user.') # check if the upload is to be published directly diff --git a/nomad/app/v1/routers/uploads.py b/nomad/app/v1/routers/uploads.py index 1d29ef54906182ec24f1932288bcbbeba1e4527c..d1e2b6b0ebd14d2899dbc664036c7fc73508464d 100644 --- a/nomad/app/v1/routers/uploads.py +++ b/nomad/app/v1/routers/uploads.py @@ -112,7 +112,7 @@ class UploadProcDataPagination(Pagination): def validate_order_by(cls, order_by): # pylint: disable=no-self-argument if order_by is None: return 'create_time' # Default value - assert order_by in ('create_time', 'published'), 'order_by must be a valid attribute' + assert order_by in ('create_time', 'publish_time'), 'order_by must be a valid attribute' return order_by @validator('page_after_value') @@ -369,9 +369,9 @@ async def get_uploads( query_kwargs.update(process_status__in=ProcessStatus.STATUSES_NOT_PROCESSING) if query.is_published is True: - query_kwargs.update(published=True) + query_kwargs.update(publish_time__ne=None) elif query.is_published is False: - query_kwargs.update(published=False) + query_kwargs.update(publish_time=None) # Fetch data from DB mongodb_query = _query_mongodb(**query_kwargs) @@ -383,7 +383,7 @@ async def get_uploads( order_by_with_sign = order_by if pagination.order == Direction.asc else '-' + order_by if order_by == 'create_time': order_by_args = [order_by_with_sign, 'upload_id'] # Use upload_id as tie breaker - elif order_by == 'published': + elif order_by == 'publish_time': order_by_args = [order_by_with_sign, 'create_time', 'upload_id'] mongodb_query = mongodb_query.order_by(*order_by_args) @@ -836,7 +836,7 @@ async def post_upload( ''' if not user.is_admin: # Check upload limit - if _query_mongodb(user_id=str(user.user_id), published=False).count() >= config.services.upload_limit: # type: ignore + if _query_mongodb(user_id=str(user.user_id), publish_time=None).count() >= config.services.upload_limit: # type: ignore raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=strip(''' Limit of unpublished uploads exceeded for user.''')) @@ -1205,7 +1205,7 @@ async def post_upload_bundle( if is_oasis and not config.bundle_import.allow_unpublished_bundles_from_oasis: bundle_info = bundle.bundle_info - if not bundle_info.get('upload', {}).get('published'): + if not bundle_info.get('upload', {}).get('publish_time'): raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f'Bundles uploaded from an oasis must be published in the oasis first.') diff --git a/nomad/files.py b/nomad/files.py index 8161c4a41135050ef6e5479d9414b9445517e1d1..510ead598c1b0a960ef04b9d2a05584c95870265 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -1458,7 +1458,7 @@ class UploadBundle: try: upload_files: UploadFiles = None upload_id: str = self.bundle_info['upload_id'] - published: bool = self.bundle_info['upload']['published'] + published: bool = self.bundle_info.get('upload', {}).get('publish_time') is not None cls = PublicUploadFiles if published else StagingUploadFiles assert not os.path.exists(cls.base_folder_for(upload_id)), 'Upload folder already exists' upload_files = cls(upload_id, create=True) diff --git a/nomad/processing/data.py b/nomad/processing/data.py index b9c3f5ce5b3da896978f76d83bb6b5f570a4a2e6..ac6879d85c7c865c88c0b3620256864a4accb253 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -757,7 +757,6 @@ class Upload(Proc): upload_time: Datetime of the original upload independent of the NOMAD deployment it was first uploaded to. user_id: The id of the user that created this upload. - published: Boolean that indicates that the upload is published on this NOMAD deployment. publish_time: Datetime when the upload was initially published on this NOMAD deployment. last_update: Datetime of the last modifying process run (publish, processing, upload). @@ -777,7 +776,6 @@ class Upload(Proc): name = StringField(default=None) upload_time = DateTimeField() user_id = StringField(required=True) - published = BooleanField(default=False) publish_time = DateTimeField() last_update = DateTimeField() @@ -791,7 +789,7 @@ class Upload(Proc): meta: Any = { 'strict': False, 'indexes': [ - 'user_id', 'process_status', 'published', 'upload_time', 'create_time' + 'user_id', 'process_status', 'upload_time', 'create_time', 'publish_time' ] } @@ -831,6 +829,10 @@ class Upload(Proc): def uploader(self) -> datamodel.User: return datamodel.User.get(self.user_id) + @property + def published(self) -> bool: + return self.publish_time is not None + def get_logger(self, **kwargs): logger = super().get_logger() user = self.uploader @@ -972,7 +974,6 @@ class Upload(Proc): if isinstance(self.upload_files, StagingUploadFiles): with utils.timer(logger, 'upload staging files deleted'): self.upload_files.delete() - self.published = True self.publish_time = datetime.utcnow() self.last_update = datetime.utcnow() self.save() @@ -1511,7 +1512,6 @@ class Upload(Proc): logger.warn('oasis upload without upload time') self.publish_time = datetime.utcnow() - self.published = True self.last_update = datetime.utcnow() self.save() @@ -1805,7 +1805,7 @@ class Upload(Proc): 'export_options.include_raw_files', 'export_options.include_archive_files', 'export_options.include_datasets', - 'upload._id', 'upload.user_id', 'upload.published', + 'upload._id', 'upload.user_id', 'upload.create_time', 'upload.upload_time', 'upload.process_status', 'upload.embargo_length', 'entries') @@ -1837,17 +1837,14 @@ class Upload(Proc): upload_dict = bundle_info['upload'] assert self.upload_id == bundle_info['upload_id'] == upload_dict['_id'], ( 'Inconsisten upload id information') - published = upload_dict['published'] + published = upload_dict.get('publish_time') is not None if published: assert bundle_info['entries'], 'Upload published but no entries in bundle_info.json' - if published and settings.keep_original_timestamps: - assert 'publish_time' in upload_dict, '`publish_time` not provided in bundle.' # Define which keys we think okay to copy from the bundle upload_keys_to_copy = [ - 'name', 'embargo_length', 'published', 'create_time', - 'from_oasis', 'oasis_deployment_id'] + 'name', 'embargo_length', 'from_oasis', 'oasis_deployment_id'] if settings.keep_original_timestamps: - upload_keys_to_copy.extend(('upload_time', 'publish_time')) + upload_keys_to_copy.extend(('create_time', 'upload_time', 'publish_time',)) try: # Update the upload with data from the json, and validate it update = {k: upload_dict[k] for k in upload_keys_to_copy if k in upload_dict} @@ -1857,11 +1854,11 @@ class Upload(Proc): assert False, 'Bad upload json data: ' + str(e) current_time = datetime.utcnow() current_time_plus_tolerance = current_time + timedelta(minutes=2) - if self.published and not settings.keep_original_timestamps: + if published and not settings.keep_original_timestamps: self.publish_time = current_time - for timestamp in (self.upload_time, self.last_update, self.complete_time, self.publish_time): - assert timestamp is None or self.create_time <= timestamp < current_time_plus_tolerance, ( - 'Bad/inconsistent timestamp') + for timestamp in (self.create_time, self.upload_time, self.last_update, self.complete_time, self.publish_time): + assert timestamp is None or timestamp < current_time_plus_tolerance, ( + 'Timestamp is in the future') if settings.set_from_oasis: self.from_oasis = True source_deployment_id = bundle_info['source']['deployment_id'] diff --git a/tests/test_files.py b/tests/test_files.py index b4332d712c97288531373dff1edd346f433d02cd..7cb41d295ec7844df3a231b99d0c263bdf7c4f09 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -17,6 +17,7 @@ # from typing import Generator, Any, Dict, Tuple, Iterable, List +from datetime import datetime import os import os.path import shutil @@ -387,7 +388,7 @@ def create_public_upload( upload_files.delete() if with_upload: upload = Upload.get(upload_id) - upload.published = True + upload.publish_time = datetime.utcnow() assert upload.embargo_length == embargo_length, 'Wrong embargo_length provided' upload.save() return upload_id, entries, PublicUploadFiles(upload_id) diff --git a/tests/utils.py b/tests/utils.py index 8bb1c4cc4e67923a32c9f189bfa8ae29f8b4de85..ba293947a7efa4e1715eb72657ad1c1024462c36 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -166,7 +166,7 @@ class ExampleData: if upload_id in self.uploads: assert embargo_length is not None, 'No embargo provided on upload' assert (embargo_length > 0) == with_embargo, 'Inconsistent embargo' - assert published == self.uploads[upload_id]['published'] + assert published == (self.uploads[upload_id]['publish_time'] is not None) else: # No uploads created. Just generate it embargo_length = 36 if with_embargo else 0 @@ -229,7 +229,7 @@ class ExampleData: if upload_files is not None: upload_files.delete() - def create_upload(self, upload_id, **kwargs): + def create_upload(self, upload_id, published=None, **kwargs): ''' Creates a dictionary holding all the upload information. Default values are used/generated, and can be set via kwargs. @@ -245,11 +245,14 @@ class ExampleData: 'complete_time': self._next_time_stamp(), 'last_update': self._next_time_stamp(), 'embargo_length': 0, - 'published': False, + 'publish_time': None, 'published_to': []} upload_dict.update(kwargs) - if upload_dict['published'] and 'publish_time' not in upload_dict: - upload_dict['publish_time'] = self._next_time_stamp() + if published is not None: + if published and not upload_dict['publish_time']: + upload_dict['publish_time'] = self._next_time_stamp() + elif not published: + assert not upload_dict['publish_time'] if 'user_id' not in upload_dict and 'uploader' in self.entry_defaults: upload_dict['user_id'] = self.entry_defaults['uploader'].user_id self.uploads[upload_id] = upload_dict @@ -302,7 +305,7 @@ class ExampleData: domain='dft', upload_time=upload_time, processed=True, - published=self.uploads.get(upload_id, {}).get('published', True), + published=bool(self.uploads.get(upload_id, {}).get('publish_time', True)), with_embargo=self.uploads.get(upload_id, {}).get('embargo_length', 0) > 0, parser_name='parsers/vasp') entry_metadata.m_update(**self.entry_defaults)