diff --git a/nomad/api/repo.py b/nomad/api/repo.py index dc312901d3e63417c25e4926e8b29da79b15e049..dc94ee84e3de87648683a53e7dce5db568c6c2fe 100644 --- a/nomad/api/repo.py +++ b/nomad/api/repo.py @@ -20,12 +20,12 @@ meta-data. from flask_restplus import Resource, abort, fields from flask import request, g from elasticsearch_dsl import Q +from elasticsearch.exceptions import NotFoundError -from nomad.files import UploadFiles, Restricted from nomad import search from .app import api -from .auth import login_if_available, create_authorization_predicate +from .auth import login_if_available from .common import pagination_model, pagination_request_parser, calc_route ns = api.namespace('repo', description='Access repository metadata.') @@ -42,21 +42,28 @@ class RepoCalcResource(Resource): """ Get calculation metadata in repository form. - Repository metadata only entails the quanties shown in the repository. + Repository metadata only entails the quantities shown in the repository. Calcs are references via *upload_id*, *calc_id* pairs. """ - # TODO use elastic search instead of the files - # TODO add missing user metadata (from elastic or repo db) - upload_files = UploadFiles.get(upload_id, create_authorization_predicate(upload_id, calc_id)) - if upload_files is None: - abort(404, message='There is no upload %s' % upload_id) - try: - return upload_files.metadata.get(calc_id), 200 - except Restricted: - abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id)) - except KeyError: - abort(404, message='There is no calculation for %s/%s' % (upload_id, calc_id)) + calc = search.Entry.get(calc_id) + except NotFoundError: + abort(404, message='There is no calculation %s/%s' % (upload_id, calc_id)) + + if calc.with_embargo or not calc.published: + if g.user is None: + abort(401, message='Not logged in to access %s/%s.' % (upload_id, calc_id)) + + is_owner = g.user.user_id == 0 + if not is_owner: + for owner in calc.owners: + if owner.user_id == str(g.user.user_id): + is_owner = True + break + if not is_owner: + abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id)) + + return calc.to_dict(), 200 repo_calcs_model = api.model('RepoCalculations', { diff --git a/nomad/api/upload.py b/nomad/api/upload.py index ea0c93f1a665851d1e979824041aeb80de98db1d..acfa3a8c50ead256e1b9489c422e32b75fc04913 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -366,6 +366,8 @@ class UploadResource(Resource): abort(400, message='The upload is not processed yet') if upload.tasks_status == FAILURE: abort(400, message='Cannot publish an upload that failed processing') + if upload.processed_calcs == 0: + abort(400, message='Cannot publish an upload without calculations') try: upload.metadata = metadata upload.publish_upload() diff --git a/nomad/coe_repo/calc.py b/nomad/coe_repo/calc.py index f76a5689db0cbf09ad4e41ac3fd8883cc337ab18..fcbf1b832758971c6f77583a2e7a95806eb26e2b 100644 --- a/nomad/coe_repo/calc.py +++ b/nomad/coe_repo/calc.py @@ -237,7 +237,7 @@ class Calc(Base): # user relations def add_users_to_relation(source_users, relation): for source_user in source_users: - coe_user = context.cache(User, user_id=source_user.id) + coe_user = context.cache(User, user_id=int(source_user.id)) if coe_user is None: raise IllegalCalcMetadata( 'User with user_id %s does not exist.' % source_user.id) diff --git a/nomad/config.py b/nomad/config.py index c1262f5eaad78da1aa30744f6c5e96b09cda331d..10927eaaabd0572a816ff2434110eaa9d9c115d2 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -145,6 +145,7 @@ console_log_level = logging.WARNING service = 'unknown nomad service' release = 'devel' auxfile_cutoff = 30 +version = '4.3' # TODO replace with git hash? def get_loglevel_from_env(key, default_level=logging.INFO): diff --git a/nomad/datamodel.py b/nomad/datamodel.py index 22d8b8fe381550db517120f44f0ec30f28c046b5..3b51af46e4671b457dc360ee75091b8b1132016e 100644 --- a/nomad/datamodel.py +++ b/nomad/datamodel.py @@ -31,7 +31,7 @@ classes. These are the implemented transformations: .. image:: datamodel_transformations.png """ -from typing import Iterable, List +from typing import Iterable, List, Dict import datetime from nomad import utils @@ -47,11 +47,15 @@ class UploadWithMetadata(): self.uploader: utils.POPO = None self.upload_time: datetime.datetime = None - self.calcs: Iterable[CalcWithMetadata] = list() + self.calcs: Iterable['CalcWithMetadata'] = list() for key, value in kwargs.items(): setattr(self, key, value) + @property + def calcs_dict(self) -> Dict[str, 'CalcWithMetadata']: + return {calc.calc_id: calc for calc in self.calcs} + class CalcWithMetadata(): """ @@ -63,12 +67,19 @@ class CalcWithMetadata(): Attributes: upload_id: The ``upload_id`` of the calculations upload (random UUID). calc_id: The unique mainfile based calculation id. - upload_time: The time when the calc was uploaded. calc_hash: The raw file content based checksum/hash of this calculation. pid: The unique persistent id of this calculation. mainfile: The upload relative mainfile path. + files: A list of all files, relative to upload. + upload_time: The time when the calc was uploaded. uploader: An object describing the uploading user, has at least ``user_id`` + processed: Boolean indicating if this calc was successfully processed and archive + data and calc metadata is available. + last_processing: A datatime with the time of the last successful processing. + nomad_version: A string that describes the version of the nomad software that was + used to do the last successful processing. + with_embargo: Show if user set an embargo on the calculation. coauthors: List of coauther user objects with at ``user_id``. shared_with: List of users this calcs ownership is shared with, objects with at ``user_id``. @@ -76,6 +87,7 @@ class CalcWithMetadata(): references: Objects describing user provided references, keys are ``id`` and ``value``. datasets: Objects describing the datasets, keys are ``id``, ``name``, ``doi``. DOI is optional, is an object with key ``id``, ``value``. + formula: The chemical formula atoms: A list of all atoms, as labels. All atoms means the whole composition, with atom labels repeated. basis_set: The basis set type of this calculation. @@ -87,16 +99,22 @@ class CalcWithMetadata(): code_version: The version of the used code. """ def __init__(self, **kwargs): + # id relevant metadata self.upload_id: str = None self.calc_id: str = None - - self.upload_time: datetime.datetime = None self.calc_hash: str = None - self.pid: int = None self.mainfile: str = None + self.pid: int = None + + # basic upload and processing related metadata + self.upload_time: datetime.datetime = None self.files: List[str] = None self.uploader: utils.POPO = None + self.processed: bool = False + self.last_processing: datetime.datetime = None + self.nomad_version: str = None + # user metadata, i.e. quantities given and editable by the user self.with_embargo: bool = None self.published: bool = False self.coauthors: List[utils.POPO] = [] @@ -105,6 +123,7 @@ class CalcWithMetadata(): self.references: List[utils.POPO] = [] self.datasets: List[utils.POPO] = [] + # DFT specific calc metadata, derived from raw data through successful processing self.formula: str = None self.atoms: List[str] = [] self.basis_set: str = None @@ -116,6 +135,7 @@ class CalcWithMetadata(): self.code_name: str = None self.code_version: str = None + # temporary reference to the backend after successful processing self.backend = None self.update(**kwargs) @@ -128,6 +148,12 @@ class CalcWithMetadata(): def update(self, **kwargs): for key, value in kwargs.items(): + if isinstance(value, list): + if len(value) > 0 and isinstance(value[0], dict) and not isinstance(value[0], utils.POPO): + value = list(utils.POPO(**item) for item in value) + if isinstance(value, dict) and not isinstance(value, utils.POPO): + value = utils.POPO(**value) + setattr(self, key, value) def apply_user_metadata(self, metadata: dict): @@ -139,13 +165,13 @@ class CalcWithMetadata(): self.upload_time = metadata.get('_upload_time') uploader_id = metadata.get('_uploader') if uploader_id is not None: - self.uploader = utils.POPO(id=uploader_id) + self.uploader = utils.POPO(id=int(uploader_id)) self.references = [utils.POPO(value=ref) for ref in metadata.get('references', [])] self.with_embargo = metadata.get('with_embargo', False) self.coauthors = [ - utils.POPO(id=user) for user in metadata.get('coauthors', [])] + utils.POPO(id=int(user)) for user in metadata.get('coauthors', [])] self.shared_with = [ - utils.POPO(id=user) for user in metadata.get('shared_with', [])] + utils.POPO(id=int(user)) for user in metadata.get('shared_with', [])] self.datasets = [ - utils.POPO(id=ds['id'], doi=utils.POPO(value=ds.get('_doi')), name=ds.get('_name')) + utils.POPO(id=int(ds['id']), doi=utils.POPO(value=ds.get('_doi')), name=ds.get('_name')) for ds in metadata.get('datasets', [])] diff --git a/nomad/files.py b/nomad/files.py index 2229e82fe299cca03ba212b5aa4232f5f615cf1e..80aca2f485d638148577490047eec2cb86218cac 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -52,8 +52,7 @@ being other mainfiles. Therefore, the aux files of a restricted calc might becom """ from abc import ABCMeta -from typing import IO, Generator, Dict, Iterator, Iterable, Callable -import json +from typing import IO, Generator, Dict, Iterable, Callable import os.path import os import shutil @@ -61,9 +60,9 @@ from zipfile import ZipFile, BadZipFile import tarfile import hashlib import io -import gzip from nomad import config, utils +from nomad.datamodel import UploadWithMetadata class PathObject: @@ -155,117 +154,6 @@ class ExtractError(Exception): pass -class Metadata(metaclass=ABCMeta): - """ - An ABC for upload metadata classes that encapsulates access to a set of calc metadata. - """ - def get(self, calc_id: str) -> dict: - """ Retrive the calc metadata for a given calc. """ - raise NotImplementedError() - - def __iter__(self) -> Iterator[dict]: - raise NotImplementedError() - - def __len__(self) -> int: - raise NotImplementedError() - - -class StagingMetadata(Metadata): - """ - A Metadata implementation based on individual .json files per calc stored in a given - directory. - Arguments: - directory: The DirectoryObject for the directory to store the metadata in. - """ - def __init__(self, directory: DirectoryObject) -> None: - self._dir = directory - - def remove(self, calc: dict) -> None: - id = calc['calc_id'] - path = self._dir.join_file('%s.json' % id) - assert path.exists() - os.remove(path.os_path) - - def insert(self, calc: dict) -> None: - """ Insert a calc, using calc_id as key. """ - id = calc['calc_id'] - path = self._dir.join_file('%s.json' % id) - assert not path.exists() - with open(path.os_path, 'wt') as f: - json.dump(calc, f, sort_keys=True, default=str) - - def update(self, calc_id: str, updates: dict) -> dict: - """ Updating a calc, using calc_id as key and running dict update with the given data. """ - metadata = self.get(calc_id) - metadata.update(updates) - path = self._dir.join_file('%s.json' % calc_id) - with open(path.os_path, 'wt') as f: - json.dump(metadata, f, sort_keys=True, default=str) - return metadata - - def get(self, calc_id: str) -> dict: - try: - with open(self._dir.join_file('%s.json' % calc_id).os_path, 'rt') as f: - return json.load(f) - except FileNotFoundError: - raise KeyError() - - def __iter__(self) -> Iterator[dict]: - for root, _, files in os.walk(self._dir.os_path): - for file in files: - with open(os.path.join(root, file), 'rt') as f: - yield json.load(f) - - def __len__(self) -> int: - return len(os.listdir(self._dir.os_path)) - - -class PublicMetadata(Metadata): - """ - A Metadata implementation based on a single .json file. - - Arguments: - path: The parent directory for the metadata and lock file. - """ - def __init__(self, path: str, lock_timeout=1) -> None: - self._db_file = os.path.join(path, 'metadata.json.gz') - self._modified = False - self._data: Dict[str, dict] = None - - @property - def data(self): - if self._data is None: - with gzip.open(self._db_file, 'rt') as f: - self._data = json.load(f) - return self._data - - def _create(self, calcs: Iterable[dict]) -> None: - assert not os.path.exists(self._db_file) and self._data is None - self._data = {data['calc_id']: data for data in calcs} - with gzip.open(self._db_file, 'wt') as f: - json.dump(self._data, f, sort_keys=True, default=str) - - def insert(self, calc: dict) -> None: - assert self.data is not None, "Metadata is not open." - - id = calc['calc_id'] - assert id not in self.data - self.data[id] = calc - self._modified = True - - def update(self, calc_id: str, updates: dict) -> dict: - raise NotImplementedError - - def get(self, calc_id: str) -> dict: - return self.data[calc_id] - - def __iter__(self) -> Iterator[dict]: - return self.data.values().__iter__() - - def __len__(self) -> int: - return len(self.data) - - class Restricted(Exception): pass @@ -297,11 +185,6 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): else: return None - @property - def metadata(self) -> Metadata: - """ The calc metadata for this upload. """ - raise NotImplementedError - def raw_file(self, file_path: str, *args, **kwargs) -> IO: """ Opens a raw file and returns a file-like object. Additional args, kwargs are @@ -357,21 +240,12 @@ class StagingUploadFiles(UploadFiles): self._archive_dir = self.join_dir('archive') self._frozen_file = self.join_file('.frozen') - metadata_dir = self.join_dir('metadata') - self._metadata = StagingMetadata(metadata_dir) - self._size = 0 @property def size(self) -> int: return self._size - @property - def metadata(self) -> StagingMetadata: - if not self._is_authorized(): - raise Restricted - return self._metadata - def _file(self, path_object: PathObject, *args, **kwargs) -> IO: try: return open(path_object.os_path, *args, **kwargs) @@ -456,13 +330,14 @@ class StagingUploadFiles(UploadFiles): """ Returns True if this upload is already *bagged*. """ return self._frozen_file.exists() - def pack(self, bagit_metadata: dict = None) -> None: + def pack(self, upload: UploadWithMetadata) -> None: """ Replaces the staging upload data with a public upload record by packing all data into files. It is only available if upload *is_bag*. This is potentially a long running operation. Arguments: - bagit_metadata: Additional data added to the bagit metadata. + calcs: The calculation metadata of the upload used to determine what files to + pack and what the embargo situation is. """ self.logger.debug('started to pack upload') @@ -491,16 +366,16 @@ class StagingUploadFiles(UploadFiles): # 1. add all public raw files # 1.1 collect all public mainfiles and aux files public_files: Dict[str, str] = {} - for calc in self.metadata: - if not calc.get('with_embargo', False): - mainfile = calc['mainfile'] + for calc in upload.calcs: + if not calc.with_embargo: + mainfile = calc.mainfile assert mainfile is not None for filepath in self.calc_files(mainfile): public_files[filepath] = None # 1.2 remove the non public mainfiles that have been added as auxfiles of public mainfiles - for calc in self.metadata: - if calc.get('with_embargo', False): - mainfile = calc['mainfile'] + for calc in upload.calcs: + if calc.with_embargo: + mainfile = calc.mainfile assert mainfile is not None if mainfile in public_files: del(public_files[mainfile]) @@ -521,13 +396,13 @@ class StagingUploadFiles(UploadFiles): archive_public_zip = create_zipfile('archive', 'public', self._archive_ext) archive_restricted_zip = create_zipfile('archive', 'restricted', self._archive_ext) - for calc in self.metadata: - archive_zip = archive_restricted_zip if calc.get('with_embargo', False) else archive_public_zip + for calc in upload.calcs: + archive_zip = archive_restricted_zip if calc.with_embargo else archive_public_zip - archive_filename = '%s.%s' % (calc['calc_id'], self._archive_ext) + archive_filename = '%s.%s' % (calc.calc_id, self._archive_ext) archive_zip.write(self._archive_dir.join_file(archive_filename).os_path, archive_filename) - archive_log_filename = '%s.%s' % (calc['calc_id'], 'log') + archive_log_filename = '%s.%s' % (calc.calc_id, 'log') log_file = self._archive_dir.join_file(archive_log_filename) if log_file.exists(): archive_zip.write(log_file.os_path, archive_log_filename) @@ -536,11 +411,6 @@ class StagingUploadFiles(UploadFiles): archive_public_zip.close() self.logger.debug('packed archives') - # pack metadata - packed_metadata = PublicMetadata(target_dir.os_path) - packed_metadata._create(self._metadata) - self.logger.debug('packed metadata') - self.logger.debug('packed upload') def raw_file_manifest(self, path_prefix: str = None) -> Generator[str, None, None]: @@ -650,12 +520,6 @@ class PublicUploadFiles(UploadFiles): def __init__(self, *args, **kwargs) -> None: super().__init__(config.fs.public, *args, **kwargs) - self._metadata = PublicMetadata(self.os_path) - - @property - def metadata(self) -> Metadata: - return self._metadata - def _file(self, prefix: str, ext: str, path: str, *args, **kwargs) -> IO: mode = kwargs.get('mode') if len(args) == 0 else args[0] if 'mode' in kwargs: diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 66c86a38aefb0a5309d6ce07b18d803a38bcfcf7..594063a5ce4669c5aff92d0d735e1e4ba5512fef 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -24,17 +24,19 @@ calculations, and files :members: """ -from typing import List, Any, ContextManager, Tuple, Generator, Dict +from typing import List, Any, ContextManager, Tuple, Generator, Dict, cast from mongoengine import StringField, DateTimeField, DictField, BooleanField import logging from structlog import wrap_logger from contextlib import contextmanager import os.path +from datetime import datetime +from pymongo import UpdateOne from nomad import utils, coe_repo, config, infrastructure, search -from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles +from nomad.files import PathObject, UploadFiles, ExtractError, ArchiveBasedStagingUploadFiles, PublicUploadFiles from nomad.processing.base import Proc, process, task, PENDING, SUCCESS, FAILURE -from nomad.parsing import parser_dict, match_parser +from nomad.parsing import parser_dict, match_parser, LocalBackend from nomad.normalizing import normalizers from nomad.datamodel import UploadWithMetadata, CalcWithMetadata @@ -54,12 +56,16 @@ class Calc(Proc): parser: the name of the parser used to process this calc upload_id: the id of the upload used to create this calculation mainfile: the mainfile (including path in upload) that was used to create this calc + + metadata: the metadata record wit calc and user metadata, see :class:`CalcWithMetadata` """ calc_id = StringField(primary_key=True) upload_id = StringField() mainfile = StringField() parser = StringField() + metadata = DictField() + queue = 'calcs' meta: Any = { @@ -70,7 +76,7 @@ class Calc(Proc): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self._parser_backend = None + self._parser_backend: LocalBackend = None self._upload: Upload = None self._upload_files: ArchiveBasedStagingUploadFiles = None self._calc_proc_logwriter = None @@ -261,14 +267,19 @@ class Calc(Proc): logger = self.get_logger() calc_with_metadata = self._parser_backend.to_calc_with_metadata() + calc_with_metadata.published = False + calc_with_metadata.uploader = self.upload.uploader.to_popo() + calc_with_metadata.processed = True + calc_with_metadata.last_processing = datetime.now() + calc_with_metadata.nomad_version = config.version # persist the repository metadata with utils.timer(logger, 'saved repo metadata', step='metadata'): - self.upload_files.metadata.insert(calc_with_metadata.to_dict()) + self.metadata = calc_with_metadata.to_dict() + self.save() # index in search with utils.timer(logger, 'indexed', step='index'): - calc_with_metadata.update(published=False, uploader=self.upload.uploader.to_popo()) search.Entry.from_calc_with_metadata(calc_with_metadata).save() # persist the archive @@ -318,6 +329,8 @@ class Upload(Proc): metadata = DictField(default=None) upload_time = DateTimeField() user_id = StringField(required=True) + published = BooleanField(default=False) + publish_time = DateTimeField() queue = 'uploads' @@ -332,13 +345,18 @@ class Upload(Proc): self._upload_files: ArchiveBasedStagingUploadFiles = None @classmethod - def get(cls, id): - return cls.get_by_id(id, 'upload_id') + def get(cls, id: str, include_published: bool = False) -> 'Upload': + upload = cls.get_by_id(id, 'upload_id') + # TODO published uploads should not be hidden by this and API + if upload is not None and (not upload.published or include_published): + return upload + + raise KeyError() @classmethod def user_uploads(cls, user: coe_repo.User) -> List['Upload']: """ Returns all uploads for the given user. Currently returns all uploads. """ - return cls.objects(user_id=str(user.user_id)) + return cls.objects(user_id=str(user.user_id), published=False) @property def uploader(self): @@ -411,6 +429,8 @@ class Upload(Proc): coe repository db and remove this instance and its calculation from the processing state db. """ + assert self.processed_calcs > 0 + logger = self.get_logger() logger.info('started to publish') @@ -435,15 +455,19 @@ class Upload(Proc): with utils.timer( logger, 'upload metadata updated', step='metadata', upload_size=self.upload_files.size): - for calc in calcs: + + def create_update(calc): calc.published = True - self.upload_files.metadata.update( - calc_id=calc.calc_id, updates=calc.to_dict()) + return UpdateOne( + {'_id': calc.calc_id}, + {'$set': {'metadata': calc.to_dict()}}) + + Calc._get_collection().bulk_write([create_update(calc) for calc in calcs]) with utils.timer( logger, 'staged upload files packed', step='pack', upload_size=self.upload_files.size): - self.upload_files.pack() + self.upload_files.pack(upload_with_metadata) with utils.timer( logger, 'index updated', step='index', @@ -454,9 +478,9 @@ class Upload(Proc): logger, 'staged upload deleted', step='delete staged', upload_size=self.upload_files.size): self.upload_files.delete() - self.delete() - - return True # do not save the process status on the delete upload + self.published = True + self.publish_time = datetime.now() + self.save() @process def process_upload(self): @@ -468,12 +492,20 @@ class Upload(Proc): pass @property - def upload_files(self) -> ArchiveBasedStagingUploadFiles: - if not self._upload_files: - self._upload_files = ArchiveBasedStagingUploadFiles( + def upload_files(self) -> UploadFiles: + upload_files_class = ArchiveBasedStagingUploadFiles if not self.published else PublicUploadFiles + + if not self._upload_files or not isinstance(self._upload_files, upload_files_class): + self._upload_files = upload_files_class( self.upload_id, is_authorized=lambda: True, upload_path=self.upload_path) + return self._upload_files + @property + def staging_upload_files(self) -> ArchiveBasedStagingUploadFiles: + assert not self.published + return cast(ArchiveBasedStagingUploadFiles, self.upload_files) + @task def extracting(self): """ @@ -512,9 +544,10 @@ class Upload(Proc): Tuples of mainfile, filename, and parsers """ directories_with_match: Dict[str, str] = dict() - for filename in self.upload_files.raw_file_manifest(): + upload_files = self.staging_upload_files + for filename in upload_files.raw_file_manifest(): try: - parser = match_parser(filename, self.upload_files) + parser = match_parser(filename, upload_files) if parser is not None: directory = os.path.dirname(filename) if directory in directories_with_match: @@ -588,6 +621,9 @@ class Upload(Proc): # don't fail or present this error to clients self.logger.error('could not send after processing email', exc_info=e) + def get_calc(self, calc_id) -> Calc: + return Calc.objects(upload_id=self.upload_id, calc_id=calc_id).first() + @property def processed_calcs(self): return Calc.objects(upload_id=self.upload_id, tasks_status__in=[SUCCESS, FAILURE]).count() @@ -612,12 +648,7 @@ class Upload(Proc): return Calc.objects(upload_id=self.upload_id, tasks_status=SUCCESS) def to_upload_with_metadata(self) -> UploadWithMetadata: - # TODO remove the very verbose metadata after debugging and optimizing - - logger = self.get_logger(step='publish') - # prepare user metadata per upload and per calc - logger.info('prepare user metadata per upload and per calc') calc_metadatas: Dict[str, Any] = dict() upload_metadata: Dict[str, Any] = dict() @@ -635,26 +666,20 @@ class Upload(Proc): uploader=utils.POPO(id=int(self.user_id)), upload_time=self.upload_time if user_upload_time is None else user_upload_time) - logger.info('read calc data from files and apply user metadata') - upload_files = UploadFiles.get(self.upload_id, is_authorized=lambda: True) - if self.upload_files is None: - raise KeyError - def apply_metadata(calc): - calc_data = upload_files.metadata.get(calc.calc_id) + calc_data = calc.metadata calc_with_metadata = CalcWithMetadata(**calc_data) calc_metadata = dict(upload_metadata) calc_metadata.update(calc_metadatas.get(calc.mainfile, {})) calc_with_metadata.apply_user_metadata(calc_metadata) - logger.debug('prepared calc with metadata', calc_id=calc_with_metadata.calc_id) return calc_with_metadata + # TODO publish failed calcs + # result.calcs = [apply_metadata(calc) for calc in Calc.objects(upload_id=self.upload_id)] result.calcs = [apply_metadata(calc) for calc in self.calcs] - logger.info('prepared user metadata') - return result def __str__(self): diff --git a/tests/conftest.py b/tests/conftest.py index 498ff2c987b98abbe9b17946a9d8dfdc58a66d59..a610a772f1baed7b7f4f9efef271b9f815cfc043 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -474,6 +474,16 @@ def example_upload(request) -> str: return request.param +@pytest.fixture(scope='session') +def non_empty_example_upload(): + return example_file + + +@pytest.fixture(scope='session') +def empty_upload(): + return empty_file + + @pytest.fixture(scope='module') def example_user_metadata(other_test_user, test_user) -> dict: return { @@ -507,10 +517,15 @@ def uploaded(example_upload: str, raw_files) -> Tuple[str, str]: Clears files after test. """ example_upload_id = os.path.basename(example_upload).replace('.zip', '') - return example_upload_id, example_upload +@pytest.fixture(scope='function') +def non_empty_uploaded(non_empty_example_upload: str, raw_files) -> Tuple[str, str]: + example_upload_id = os.path.basename(non_empty_example_upload).replace('.zip', '') + return example_upload_id, non_empty_example_upload + + @pytest.mark.timeout(10) @pytest.fixture(scope='function') def processed(uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) -> processing.Upload: @@ -520,6 +535,15 @@ def processed(uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) - return test_processing.run_processing(uploaded, test_user) +@pytest.mark.timeout(10) +@pytest.fixture(scope='function') +def non_empty_processed(non_empty_uploaded: Tuple[str, str], test_user: coe_repo.User, proc_infra) -> processing.Upload: + """ + Provides a processed upload. Upload was uploaded with test_user. + """ + return test_processing.run_processing(non_empty_uploaded, test_user) + + @pytest.fixture(scope='function', params=[False, True]) def with_publish_to_coe_repo(monkeypatch, request): monkeypatch.setattr('nomad.config.repository_db.publish_enabled', request.param) diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 040baf3f989ae9f2f48c9a1ec7ca384a89c3060c..11c18081e6ff6e4431719b5d17af13d086a0797f 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -92,7 +92,7 @@ def assert_processing(upload: Upload): with upload_files.raw_file(calc.mainfile) as f: f.read() - assert upload_files.metadata.get(calc.calc_id) is not None + assert upload.get_calc(calc.calc_id).metadata is not None def test_processing(processed, no_warn, mails, monkeypatch): @@ -102,10 +102,10 @@ def test_processing(processed, no_warn, mails, monkeypatch): assert re.search(r'Processing completed', mails.messages[0].data.decode('utf-8')) is not None -def test_publish(processed: Upload, no_warn, example_user_metadata, monkeypatch, with_publish_to_coe_repo): +def test_publish(non_empty_processed: Upload, no_warn, example_user_metadata, monkeypatch, with_publish_to_coe_repo): + processed = non_empty_processed processed.metadata = example_user_metadata - n_calcs = processed.total_calcs additional_keys = ['with_embargo'] if with_publish_to_coe_repo: additional_keys.append('pid') @@ -116,12 +116,12 @@ def test_publish(processed: Upload, no_warn, example_user_metadata, monkeypatch, except Exception: pass - assert_coe_upload(processed.upload_id, user_metadata=example_user_metadata) - - assert_upload_files( - processed.upload_id, PublicUploadFiles, n_calcs, additional_keys, published=True) + upload = processed.to_upload_with_metadata() + if with_publish_to_coe_repo: + assert_coe_upload(upload.upload_id, user_metadata=example_user_metadata) - assert_search_upload(processed.upload_id, n_calcs, additional_keys, published=True) + assert_upload_files(upload, PublicUploadFiles, additional_keys, published=True) + assert_search_upload(upload, additional_keys, published=True) @pytest.mark.timeout(10) diff --git a/tests/test_api.py b/tests/test_api.py index cf85b84860e1af5d998e9b20825025104ab80312..c888d192d657ef36e1f6f24846b27730c4108f96 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -25,6 +25,7 @@ from datetime import datetime from nomad import coe_repo, search, parsing, files from nomad.files import UploadFiles, PublicUploadFiles from nomad.processing import Upload, Calc, SUCCESS +from nomad.datamodel import UploadWithMetadata, CalcWithMetadata from tests.conftest import create_auth_headers, clear_elastic from tests.test_files import example_file, example_file_mainfile, example_file_contents @@ -45,6 +46,14 @@ def test_user_signature_token(client, test_user_auth): return json.loads(rv.data)['token'] +def get_upload_with_metadata(upload: dict) -> UploadWithMetadata: + """ Create a :class:`UploadWithMetadata` from a API upload json record. """ + return UploadWithMetadata( + upload_id=upload['upload_id'], calcs=[ + CalcWithMetadata(calc_id=calc['calc_id'], mainfile=calc['mainfile']) + for calc in upload['calcs']['results']]) + + class TestAdmin: @pytest.mark.timeout(10) @@ -194,14 +203,7 @@ class TestUploads: upload_endpoint = '/uploads/%s' % upload_id # poll until completed - while True: - time.sleep(0.1) - rv = client.get(upload_endpoint, headers=test_user_auth) - assert rv.status_code == 200 - upload = self.assert_upload(rv.data) - assert 'upload_time' in upload - if not upload['tasks_running']: - break + upload = self.block_until_completed(client, upload_id, test_user_auth) assert len(upload['tasks']) == 4 assert upload['tasks_status'] == SUCCESS @@ -209,7 +211,6 @@ class TestUploads: assert not upload['process_running'] calcs = upload['calcs']['results'] - n_calcs = upload['calcs']['pagination']['total'] for calc in calcs: assert calc['tasks_status'] == SUCCESS assert calc['current_task'] == 'archiving' @@ -222,13 +223,15 @@ class TestUploads: upload = self.assert_upload(rv.data) assert len(upload['calcs']['results']) == 1 - assert_upload_files(upload_id, files.StagingUploadFiles, n_calcs) - assert_search_upload(upload_id, n_calcs) + upload_with_metadata = get_upload_with_metadata(upload) + assert_upload_files(upload_with_metadata, files.StagingUploadFiles) + assert_search_upload(upload_with_metadata) - def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_pid=True, metadata={}): + def assert_published(self, client, test_user_auth, upload_id, proc_infra, with_coe_repo=True, metadata={}): rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) upload = self.assert_upload(rv.data) - n_calcs = upload['calcs']['pagination']['total'] + + upload_with_metadata = get_upload_with_metadata(upload) rv = client.post( '/uploads/%s' % upload_id, @@ -241,25 +244,32 @@ class TestUploads: assert upload['process_running'] additional_keys = ['with_embargo'] - if with_pid: + if with_coe_repo: additional_keys.append('pid') - self.assert_upload_does_not_exist(client, upload_id, test_user_auth) - assert_coe_upload(upload_id, user_metadata=metadata) - assert_upload_files(upload_id, files.PublicUploadFiles, n_calcs, additional_keys=additional_keys, published=True) - assert_search_upload(upload_id, n_calcs, additional_keys=additional_keys, published=True) - def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth): - # poll until publish/delete completed + self.block_until_completed(client, upload_id, test_user_auth) + upload_proc = Upload.objects(upload_id=upload_id).first() + assert upload_proc is not None + assert upload_proc.published is True + + if with_coe_repo: + assert_coe_upload(upload_with_metadata.upload_id, user_metadata=metadata) + assert_upload_files(upload_with_metadata, files.PublicUploadFiles, additional_keys=additional_keys, published=True) + assert_search_upload(upload_with_metadata, additional_keys=additional_keys, published=True) + + def block_until_completed(self, client, upload_id: str, test_user_auth): while True: time.sleep(0.1) rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) if rv.status_code == 200: upload = self.assert_upload(rv.data) - assert upload['process_running'] + if not upload['process_running'] and not upload['tasks_running']: + return upload elif rv.status_code == 404: - break - else: - assert False + return None + + def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth): + self.block_until_completed(client, upload_id, test_user_auth) rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) assert rv.status_code == 404 @@ -336,7 +346,7 @@ class TestUploads: rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) - self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_pid=with_publish_to_coe_repo) + self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo) rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth) assert rv.status_code == 404 @@ -348,11 +358,23 @@ class TestUploads: assert rv.status_code == 200 self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth) - def test_post(self, client, test_user_auth, example_upload, proc_infra, no_warn, with_publish_to_coe_repo): - rv = client.put('/uploads/?local_path=%s' % example_upload, headers=test_user_auth) + def test_post_empty(self, client, test_user_auth, empty_upload, proc_infra, no_warn): + rv = client.put('/uploads/?local_path=%s' % empty_upload, headers=test_user_auth) + assert rv.status_code == 200 + upload = self.assert_upload(rv.data) + self.assert_processing(client, test_user_auth, upload['upload_id']) + rv = client.post( + '/uploads/%s' % upload['upload_id'], headers=test_user_auth, + data=json.dumps(dict(operation='publish')), + content_type='application/json') + assert rv.status_code == 400 + + def test_post(self, client, test_user_auth, non_empty_example_upload, proc_infra, no_warn, with_publish_to_coe_repo): + rv = client.put('/uploads/?local_path=%s' % non_empty_example_upload, headers=test_user_auth) + assert rv.status_code == 200 upload = self.assert_upload(rv.data) self.assert_processing(client, test_user_auth, upload['upload_id']) - self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_pid=with_publish_to_coe_repo) + self.assert_published(client, test_user_auth, upload['upload_id'], proc_infra, with_coe_repo=with_publish_to_coe_repo) def test_post_metadata( self, client, proc_infra, admin_user_auth, test_user_auth, test_user, @@ -468,9 +490,9 @@ class UploadFilesBasedTests: calc_specs = 'r' if restricted else 'p' if in_staging: Upload.create(user=test_user, upload_id='test_upload') - upload_files = create_staging_upload('test_upload', calc_specs=calc_specs) + _, upload_files = create_staging_upload('test_upload', calc_specs=calc_specs) else: - upload_files = create_public_upload('test_upload', calc_specs=calc_specs) + _, upload_files = create_public_upload('test_upload', calc_specs=calc_specs) postgres.begin() coe_upload = coe_repo.Upload( upload_name='test_upload', @@ -518,12 +540,11 @@ class TestArchive(UploadFilesBasedTests): assert rv.status_code == 200 -class TestRepo(UploadFilesBasedTests): +class TestRepo(): @pytest.fixture(scope='class') def example_elastic_calcs( self, elastic_infra, normalized: parsing.LocalBackend, test_user: coe_repo.User, other_test_user: coe_repo.User): - clear_elastic(elastic_infra) calc_with_metadata = normalized.to_calc_with_metadata() @@ -541,14 +562,24 @@ class TestRepo(UploadFilesBasedTests): calc_with_metadata.update(calc_id='4', uploader=other_test_user.to_popo(), published=True, with_embargo=True) search.Entry.from_calc_with_metadata(calc_with_metadata).save(refresh=True) - @UploadFilesBasedTests.ignore_authorization - def test_calc(self, client, upload, auth_headers): - rv = client.get('/repo/%s/0' % upload, headers=auth_headers) + def test_own_calc(self, client, example_elastic_calcs, no_warn, test_user_auth): + rv = client.get('/repo/0/1', headers=test_user_auth) assert rv.status_code == 200 - @UploadFilesBasedTests.ignore_authorization - def test_non_existing_calcs(self, client, upload, auth_headers): - rv = client.get('/repo/doesnt/exist', headers=auth_headers) + def test_public_calc(self, client, example_elastic_calcs, no_warn, other_test_user_auth): + rv = client.get('/repo/0/1', headers=other_test_user_auth) + assert rv.status_code == 200 + + def test_embargo_calc(self, client, example_elastic_calcs, no_warn, test_user_auth): + rv = client.get('/repo/0/4', headers=test_user_auth) + assert rv.status_code == 401 + + def test_staging_calc(self, client, example_elastic_calcs, no_warn, test_user_auth): + rv = client.get('/repo/0/3', headers=test_user_auth) + assert rv.status_code == 401 + + def test_non_existing_calcs(self, client, example_elastic_calcs, test_user_auth): + rv = client.get('/repo/0/10', headers=test_user_auth) assert rv.status_code == 404 @pytest.mark.parametrize('calcs, owner, auth', [ diff --git a/tests/test_client.py b/tests/test_client.py index d435be08ab87c0ac2e43a7939217aacb4bfa9e4f..ed1c976309107fc0c6cfd3c9d55268cb8868cf76 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -15,8 +15,10 @@ import time from nomad.processing import SUCCESS +from nomad.datamodel import CalcWithMetadata -from tests.test_files import example_file, create_public_upload +from tests.test_files import example_file +from tests.test_search import create_entry def test_get_upload_command(bravado, no_warn): @@ -34,8 +36,8 @@ def test_upload(bravado, proc_infra, no_warn): assert upload.tasks_status == SUCCESS -def test_get_repo_calc(bravado, raw_files, no_warn): - create_public_upload('test_upload', 'pp') +def test_get_repo_calc(bravado, proc_infra, raw_files): + create_entry(CalcWithMetadata(calc_id=0, upload_id='test_upload', published=True, with_embargo=False)) repo = bravado.repo.get_repo_calc(upload_id='test_upload', calc_id='0').response().result assert repo is not None assert repo['calc_id'] is not None diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py index 34bc0cde84ff6660ecc65f1ec9a11e0917a69268..e2491c357855b2d7aef4f3e80c4a7ece694c6f34 100644 --- a/tests/test_coe_repo.py +++ b/tests/test_coe_repo.py @@ -45,7 +45,7 @@ def test_password_authorize(test_user): assert_user(user, test_user) -def assert_coe_upload(upload_id, upload: datamodel.UploadWithMetadata = None, user_metadata: dict = None): +def assert_coe_upload(upload_id: str, upload: datamodel.UploadWithMetadata = None, user_metadata: dict = None): coe_upload = Upload.from_upload_id(upload_id) if upload is not None: @@ -123,14 +123,14 @@ def test_add_normalized_calc_with_metadata( def test_add_upload(processed: processing.Upload): upload_with_metadata = processed.to_upload_with_metadata() Upload.publish(upload_with_metadata) - assert_coe_upload(processed.upload_id, upload_with_metadata) + assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) def test_delete_upload(processed: processing.Upload, example_user_metadata_with_dataset, no_warn): processed.metadata = example_user_metadata_with_dataset upload_with_metadata = processed.to_upload_with_metadata() Upload.publish(upload_with_metadata) - assert_coe_upload(processed.upload_id, upload_with_metadata) + assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) for calc in upload_with_metadata.calcs: assert Calc.from_calc_id(calc.calc_id) is not None @@ -172,8 +172,7 @@ def test_add_upload_with_metadata(processed, example_user_metadata_with_dataset) processed.metadata = example_user_metadata_with_dataset upload_with_metadata = processed.to_upload_with_metadata() Upload.publish(upload_with_metadata) - assert_coe_upload( - processed.upload_id, upload_with_metadata) + assert_coe_upload(upload_with_metadata.upload_id, upload_with_metadata) @pytest.mark.parametrize('crypted', [True, False]) diff --git a/tests/test_datamodel.py b/tests/test_datamodel.py index 9eb04cac62edc729d80ba372f4e20606f5024fcc..40c65399d231f2769ce7bfe54b4b7664e46a245b 100644 --- a/tests/test_datamodel.py +++ b/tests/test_datamodel.py @@ -120,16 +120,17 @@ if __name__ == '__main__': for calcs_per_upload in utils.chunks(range(0, n_calcs), int(n_calcs / n_uploads)): upload_id = utils.create_uuid() + upload = datamodel.UploadWithMetadata(upload_id=upload_id) upload_files = files.StagingUploadFiles( upload_id=upload_id, create=True, is_authorized=lambda: True) search_entries = [] + calcs = [] for _ in calcs_per_upload: calc = generate_calc(pid, upload_id=upload_id) assert calc.upload_id == upload_id calc.published = True - upload_files.metadata.insert(calc.to_dict()) for filepath in calc.files: if len(filepath) > 0: with upload_files.raw_file(filepath, 'wt') as f: @@ -147,9 +148,13 @@ if __name__ == '__main__': search_entries.append(search_entry) pid += 1 + calcs.append(calc) + + upload.calcs = calcs bulk( infrastructure.elastic_client, [entry.to_dict(include_meta=True) for entry in search_entries]) - upload_files.pack() + + upload_files.pack(upload) upload_files.delete() diff --git a/tests/test_files.py b/tests/test_files.py index c60bc7cf7cfb63c814fb10f57c0eb62337476297..ec12604a16223762d05b1cbb360eafe7ae57a724 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Generator, Any, Dict, List +from typing import Generator, Any, Dict, List, Tuple import os import os.path import shutil @@ -23,12 +23,17 @@ import zipfile import re from nomad import config +from nomad.datamodel import UploadWithMetadata, CalcWithMetadata from nomad.files import DirectoryObject, PathObject -from nomad.files import Metadata, PublicMetadata, StagingMetadata from nomad.files import StagingUploadFiles, PublicUploadFiles, UploadFiles, Restricted, \ ArchiveBasedStagingUploadFiles +CalcWithFiles = Tuple[CalcWithMetadata, str] +UploadWithFiles = Tuple[UploadWithMetadata, UploadFiles] +StagingUploadWithFiles = Tuple[UploadWithMetadata, StagingUploadFiles] +PublicUploadWithFiles = Tuple[UploadWithMetadata, PublicUploadFiles] + # example_file uses an artificial parser for faster test execution, can also be # changed to examples_vasp.zip for using vasp parser example_file = 'tests/data/proc/examples_template.zip' @@ -46,6 +51,12 @@ example_bucket = 'test_bucket' example_data = dict(test_key='test_value') +@pytest.fixture(scope='function', autouse=True) +def raw_files_on_all_tests(raw_files): + """ Autouse fixture to apply raw_files to all tests. """ + pass + + class TestObjects: @pytest.fixture(scope='function') @@ -108,8 +119,13 @@ example_calc: Dict[str, Any] = { example_calc_id = example_calc['calc_id'] -def generate_example_calc(calc_id: int, with_mainfile_prefix: bool, subdirectory: str = None, **kwargs): - example_calc = dict(calc_id=str(calc_id), data='value') +def generate_example_calc( + calc_id: int, with_mainfile_prefix: bool, subdirectory: str = None, + **kwargs) -> CalcWithFiles: + """ Generate an example calc with :class:`CalcWithMetadata` and rawfile. """ + + example_calc = CalcWithMetadata(calc_id=str(calc_id)) + if with_mainfile_prefix: mainfile = '%d.template.json' % calc_id else: @@ -118,7 +134,7 @@ def generate_example_calc(calc_id: int, with_mainfile_prefix: bool, subdirectory if subdirectory is not None: mainfile = os.path.join(subdirectory, mainfile) - example_calc['mainfile'] = mainfile + example_calc.mainfile = mainfile example_calc.update(**kwargs) example_file = os.path.join(config.fs.tmp, 'example.zip') @@ -159,86 +175,6 @@ def assert_example_calc(calc): assert calc['data'] == example_calc['data'] -class MetadataContract: - @pytest.fixture(scope='function') - def test_dir(self): - path = os.path.join(config.fs.tmp, 'test_dir') - os.makedirs(path) - yield path - shutil.rmtree(path) - - @pytest.fixture(scope='function') - def md(self, test_dir): - raise NotImplementedError() - - def test_get(self, md: Metadata): - assert_example_calc(md.get(example_calc_id)) - - def test_get_fail(self, md: Metadata): - failed = False - try: - md.get('unknown') - except KeyError: - failed = True - assert failed - - -class TestStagingMetadata(MetadataContract): - @pytest.fixture(scope='function') - def md(self, test_dir): - md = StagingMetadata(DirectoryObject(None, None, os_path=test_dir)) - md.insert(example_calc) - return md - - def test_remove(self, md: StagingMetadata): - md.remove(example_calc) - failed = False - try: - assert md.get(example_calc['calc_id']) - except KeyError: - failed = True - assert failed - - def test_insert(self, md: StagingMetadata): - md.remove(example_calc) - md.insert(example_calc) - assert len(md) == 1 - assert_example_calc(md.get(example_calc_id)) - - def test_insert_fail(self, md: StagingMetadata): - failed = False - try: - md.insert(example_calc) - except Exception: - failed = True - - assert failed - assert len(md) == 1 - - def test_update(self, md: StagingMetadata): - md.update(example_calc_id, dict(data='updated')) - assert len(md) == 1 - assert md.get(example_calc_id)['data'] == 'updated' - - def test_update_fail(self, md: StagingMetadata): - failed = False - try: - md.update('unknown', dict(data='updated')) - except KeyError: - failed = True - assert failed - assert len(md) == 1 - - -class TestPublicMetadata(MetadataContract): - - @pytest.fixture(scope='function') - def md(self, test_dir): - md = PublicMetadata(test_dir) - md._create([example_calc]) - return md - - class UploadFilesFixtures: @pytest.fixture(scope='function') @@ -257,54 +193,54 @@ class UploadFilesFixtures: class UploadFilesContract(UploadFilesFixtures): @pytest.fixture(scope='function', params=['r']) - def test_upload(self, request, test_upload_id) -> UploadFiles: + def test_upload(self, request, test_upload_id) -> UploadWithFiles: raise NotImplementedError() @pytest.fixture(scope='function') - def empty_test_upload(self, test_upload_id) -> Generator[UploadFiles, None, None]: + def empty_test_upload(self, test_upload_id) -> UploadFiles: raise NotImplementedError() def test_create(self, empty_test_upload): assert UploadFiles.get(empty_test_upload.upload_id).__class__ == empty_test_upload.__class__ - def test_rawfile(self, test_upload): - assert len(test_upload.metadata) > 0 - for calc in test_upload.metadata: + def test_rawfile(self, test_upload: UploadWithFiles): + upload, upload_files = test_upload + for calc in upload.calcs: try: - with test_upload.raw_file(calc['mainfile']) as f: + with upload_files.raw_file(calc.mainfile) as f: assert len(f.read()) > 0 - if not test_upload._is_authorized(): - assert not test_upload.metadata.get(calc['calc_id']).get('with_embargo', False) + if not upload_files._is_authorized(): + assert not calc.with_embargo except Restricted: - assert not test_upload._is_authorized() - assert test_upload.metadata.get(calc['calc_id']).get('with_embargo', False) + assert not upload_files._is_authorized() + assert calc.with_embargo @pytest.mark.parametrize('prefix', [None, 'examples']) - def test_raw_file_manifest(self, test_upload: StagingUploadFiles, prefix: str): - raw_files = list(test_upload.raw_file_manifest(path_prefix=prefix)) + def test_raw_file_manifest(self, test_upload: UploadWithFiles, prefix: str): + _, upload_files = test_upload + raw_files = list(upload_files.raw_file_manifest(path_prefix=prefix)) assert_example_files(raw_files) @pytest.mark.parametrize('test_logs', [True, False]) - def test_archive(self, test_upload, test_logs: bool): + def test_archive(self, test_upload: UploadWithFiles, test_logs: bool): + upload, upload_files = test_upload + calcs = upload.calcs_dict try: if test_logs: - with test_upload.archive_log_file(example_calc_id, 'rt') as f: + with upload_files.archive_log_file(example_calc_id, 'rt') as f: assert f.read() == 'archive' else: - f = test_upload.archive_file(example_calc_id, 'rt') + f = upload_files.archive_file(example_calc_id, 'rt') assert json.load(f) == 'archive' - if not test_upload._is_authorized(): - assert not test_upload.metadata.get(example_calc_id).get('with_embargo', False) + if not upload_files._is_authorized(): + assert not calcs.get(example_calc_id).with_embargo except Restricted: - assert not test_upload._is_authorized() - assert test_upload.metadata.get(example_calc_id).get('with_embargo', False) - - def test_metadata(self, test_upload): - assert_example_calc(test_upload.metadata.get(example_calc_id)) + assert not upload_files._is_authorized() + assert calcs.get(example_calc_id).with_embargo -def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadFiles: +def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadWithFiles: """ Create an upload according to given spec. Additional arguments are given to the StagingUploadFiles contstructor. @@ -317,7 +253,9 @@ def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadFiles First calc is at top level, following calcs will be put under 1/, 2/, etc. All calcs with capital `P`/`R` will be put in the same directory under multi/. """ - upload = StagingUploadFiles(upload_id, create=True, is_authorized=lambda: True) + upload_files = StagingUploadFiles(upload_id, create=True, is_authorized=lambda: True) + upload = UploadWithMetadata(upload_id=upload_id) + calcs = [] prefix = 0 for calc_spec in calc_specs: @@ -329,34 +267,34 @@ def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadFiles else: directory = os.path.join(str(prefix), 'examples_template') - calc, upload_file = generate_example_calc( + calc, calc_file = generate_example_calc( prefix, with_mainfile_prefix=is_multi, subdirectory=directory, with_embargo=calc_spec == 'r') - calc_id = calc['calc_id'] - upload.add_rawfiles(upload_file) + upload_files.add_rawfiles(calc_file) - with upload.archive_file(calc_id, 'wt') as f: + with upload_files.archive_file(calc.calc_id, 'wt') as f: f.write('"archive"') - with upload.archive_log_file(calc_id, 'wt') as f: + with upload_files.archive_log_file(calc.calc_id, 'wt') as f: f.write('archive') - upload.metadata.insert(calc) + calcs.append(calc) prefix += 1 - assert len(upload.metadata) == len(calc_specs) - return upload + assert len(calcs) == len(calc_specs) + upload.calcs = calcs + return upload, upload_files class TestStagingUploadFiles(UploadFilesContract): @pytest.fixture(scope='function', params=['r', 'rr', 'pr', 'rp', 'p', 'pp', 'RP', 'RR', 'PP']) - def test_upload(self, request, test_upload_id: str) -> StagingUploadFiles: + def test_upload(self, request, test_upload_id: str) -> StagingUploadWithFiles: return create_staging_upload(test_upload_id, calc_specs=request.param) @pytest.fixture(scope='function') - def empty_test_upload(self, test_upload_id) -> Generator[UploadFiles, None, None]: - yield StagingUploadFiles(test_upload_id, create=True, is_authorized=lambda: True) + def empty_test_upload(self, test_upload_id) -> UploadFiles: + return StagingUploadFiles(test_upload_id, create=True, is_authorized=lambda: True) @pytest.mark.parametrize('prefix', [None, 'prefix']) def test_add_rawfiles_zip(self, test_upload_id, prefix): @@ -369,29 +307,30 @@ class TestStagingUploadFiles(UploadFilesContract): if filepath == example_file_mainfile: assert len(content) > 0 - def test_write_archive(self, test_upload): - assert json.load(test_upload.archive_file(example_calc_id, 'rt')) == 'archive' + def test_write_archive(self, test_upload: StagingUploadWithFiles): + _, upload_files = test_upload + assert json.load(upload_files.archive_file(example_calc_id, 'rt')) == 'archive' - def test_calc_id(self, test_upload): - assert test_upload.calc_id(example_file_mainfile) is not None + def test_calc_id(self, test_upload: StagingUploadWithFiles): + _, upload_files = test_upload + assert upload_files.calc_id(example_file_mainfile) is not None - def test_pack(self, test_upload): - test_upload.pack() + def test_pack(self, test_upload: StagingUploadWithFiles): + upload, upload_files = test_upload + upload_files.pack(upload) @pytest.mark.parametrize('with_mainfile', [True, False]) - def test_calc_files(self, test_upload: StagingUploadFiles, with_mainfile): - for calc in test_upload.metadata: - mainfile = calc['mainfile'] - calc_files = test_upload.calc_files(mainfile, with_mainfile=with_mainfile) + def test_calc_files(self, test_upload: StagingUploadWithFiles, with_mainfile): + upload, upload_files = test_upload + for calc in upload.calcs: + mainfile = calc.mainfile + calc_files = upload_files.calc_files(mainfile, with_mainfile=with_mainfile) assert_example_files(calc_files, with_mainfile=with_mainfile) - def test_delete(self, test_upload: StagingUploadFiles): - test_upload.delete() - assert not test_upload.exists() - - def test_update_metadata(self, test_upload): - test_upload.metadata.update(example_calc_id, dict(data='updated')) - test_upload.metadata.get(example_calc_id)['data'] == 'updated' + def test_delete(self, test_upload: StagingUploadWithFiles): + _, upload_files = test_upload + upload_files.delete() + assert not upload_files.exists() class TestArchiveBasedStagingUploadFiles(UploadFilesFixtures): @@ -409,32 +348,37 @@ class TestArchiveBasedStagingUploadFiles(UploadFilesFixtures): test_upload_id, create=True, upload_path='does not exist').is_valid -def create_public_upload(upload_id: str, calc_specs: str, **kwargs): - staging_upload = create_staging_upload(upload_id, calc_specs) - staging_upload.pack() - staging_upload.delete() - return PublicUploadFiles(upload_id, **kwargs) +def create_public_upload( + upload_id: str, calc_specs: str, **kwargs) -> PublicUploadWithFiles: + + upload, upload_files = create_staging_upload(upload_id, calc_specs) + upload_files.pack(upload) + upload_files.delete() + return upload, PublicUploadFiles(upload_id, **kwargs) class TestPublicUploadFiles(UploadFilesContract): @pytest.fixture(scope='function') - def empty_test_upload(self, test_upload_id: str) -> Generator[UploadFiles, None, None]: - yield create_public_upload(test_upload_id, calc_specs='', is_authorized=lambda: True) + def empty_test_upload(self, test_upload_id: str) -> UploadFiles: + _, upload_files = create_public_upload( + test_upload_id, calc_specs='', is_authorized=lambda: True) + + return upload_files @pytest.fixture(scope='function', params=itertools.product( ['r', 'rr', 'pr', 'rp', 'p', 'pp', 'RP', 'RR', 'PP'], [True, False])) - def test_upload(self, request, test_upload_id: str) -> PublicUploadFiles: + def test_upload(self, request, test_upload_id: str) -> PublicUploadWithFiles: calc_specs, protected = request.param - staging_upload = create_staging_upload(test_upload_id, calc_specs=calc_specs) - staging_upload.pack() - return PublicUploadFiles(test_upload_id, is_authorized=lambda: not protected) + upload, upload_files = create_staging_upload(test_upload_id, calc_specs=calc_specs) + upload_files.pack(upload) + return upload, PublicUploadFiles(test_upload_id, is_authorized=lambda: not protected) -def assert_upload_files(upload_id: str, cls, n_calcs: int, additional_keys: List[str] = [], **kwargs): +def assert_upload_files( + upload: UploadWithMetadata, cls, additional_keys: List[str] = [], **kwargs): """ - Asserts the files and search index aspect of uploaded data after processing - or publishing + Asserts the files aspect of uploaded data after processing or publishing Arguments: upload_id: The id of the upload to assert @@ -443,31 +387,19 @@ def assert_upload_files(upload_id: str, cls, n_calcs: int, additional_keys: List additional_keys: Keys that each calc metadata should have **kwargs: Key, value pairs that each calc metadata should have """ - keys = ['calc_id', 'upload_id', 'mainfile', 'calc_hash'] - upload_files = UploadFiles.get(upload_id, is_authorized=lambda: True) + upload_files = UploadFiles.get(upload.upload_id, is_authorized=lambda: True) assert upload_files is not None assert isinstance(upload_files, cls) - upload_metadata = upload_files.metadata - assert len(upload_metadata) == n_calcs - for calc_metadata in upload_metadata: - assert 'calc_hash' in calc_metadata - for key in keys: - assert key in calc_metadata - for additional_key in additional_keys: - assert additional_key in calc_metadata - for key, value in kwargs.items(): - assert calc_metadata[key] == value - - upload_files = UploadFiles.get(upload_id) - for calc_metadata in upload_metadata: + upload_files = UploadFiles.get(upload.upload_id) + for calc in upload.calcs: try: - with upload_files.raw_file(calc_metadata['mainfile']) as f: + with upload_files.raw_file(calc.mainfile) as f: f.read() - with upload_files.archive_file(calc_metadata['calc_id']) as f: + with upload_files.archive_file(calc.calc_id) as f: f.read() - with upload_files.archive_log_file(calc_metadata['calc_id']) as f: + with upload_files.archive_log_file(calc.calc_id) as f: f.read() - assert not calc_metadata.get('with_embargo', False) and isinstance(upload_files, PublicUploadFiles) + assert not calc.with_embargo and isinstance(upload_files, PublicUploadFiles) except Restricted: - assert calc_metadata.get('with_embargo', False) or isinstance(upload_files, StagingUploadFiles) + assert calc.with_embargo or isinstance(upload_files, StagingUploadFiles) diff --git a/tests/test_migration.py b/tests/test_migration.py index c9e8a3c010a25cf9d8f2db1810b09857afab622c..7fd8d2bb631969fc649a578dfb99213201a2e409 100644 --- a/tests/test_migration.py +++ b/tests/test_migration.py @@ -21,7 +21,7 @@ import glob from io import StringIO import bravado.exception -from nomad import infrastructure, coe_repo, utils, files +from nomad import infrastructure, coe_repo, utils, files, processing from nomad.migration import NomadCOEMigration, SourceCalc, Package from nomad.infrastructure import repository_db_connection @@ -308,10 +308,12 @@ def perform_migration_test(migrate_infra, name, test_directory, assertions, kwar migrate_infra.two_client.raw.get( upload_id=calc_1['upload_id'], path=calc_1['mainfile']).response().result + upload_proc = processing.Upload.get(calc_1['upload_id'], include_published=True) + upload_with_metadata = upload_proc.to_upload_with_metadata() assert_search_upload( - calc_1['upload_id'], 2, additional_keys=['with_embargo', 'pid'], published=True) + upload_with_metadata, additional_keys=['with_embargo', 'pid'], published=True) assert_upload_files( - calc_1['upload_id'], files.PublicUploadFiles, 2, additional_keys=['with_embargo', 'pid'], published=True) + upload_with_metadata, files.PublicUploadFiles, additional_keys=['with_embargo', 'pid'], published=True) def test_skip_on_same_version(migrate_infra, monkeypatch, caplog): diff --git a/tests/test_search.py b/tests/test_search.py index 6e017b76f458f7e27768393607c66f6dc8828daa..4e875446e19deef57b8190504adf2a58b0503aa6 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -123,11 +123,11 @@ def assert_entry(calc_id): assert results[0]['calc_id'] == calc_id -def assert_search_upload(upload_id, n_calcs: int, additional_keys: List[str] = [], **kwargs): +def assert_search_upload(upload: datamodel.UploadWithMetadata, additional_keys: List[str] = [], **kwargs): keys = ['calc_id', 'upload_id', 'mainfile', 'calc_hash'] refresh_index() search = Entry.search().query('match_all')[0:10] - assert search.count() == n_calcs + assert search.count() == len(list(upload.calcs)) if search.count() > 0: for hit in search: hit = hit.to_dict()