diff --git a/nomad/api/admin.py b/nomad/api/admin.py index 02b48ca587c671b6d6e3868fdef66add23a6142b..ea6a8d006b9639fd54683145fdcde4d3e127532c 100644 --- a/nomad/api/admin.py +++ b/nomad/api/admin.py @@ -27,7 +27,6 @@ ns = api.namespace('admin', description='Administrative operations') @ns.route('/<string:operation>') @api.doc(params={'operation': 'The operation to perform.'}) class AdminOperationsResource(Resource): - # TODO in production this requires authorization @api.doc('exec_admin_command') @api.response(200, 'Operation performed') @api.response(404, 'Operation does not exist') diff --git a/nomad/api/app.py b/nomad/api/app.py index b621b6b397afa0c4239a8a84aa515163d45bfa1a..cea33721d85a20bb364068630bda5e745c84dcd8 100644 --- a/nomad/api/app.py +++ b/nomad/api/app.py @@ -22,8 +22,9 @@ from flask_cors import CORS from werkzeug.exceptions import HTTPException from werkzeug.wsgi import DispatcherMiddleware import os.path +import inspect -from nomad import config +from nomad import config, utils base_path = config.services.api_base_path """ Provides the root path of the nomad APIs. """ @@ -59,11 +60,12 @@ api = Api( """ Provides the flask restplust api instance """ -@app.errorhandler(HTTPException) -def handle(error): +@app.errorhandler(Exception) +@api.errorhandler(Exception) +def handle(error: Exception): status_code = getattr(error, 'code', 500) name = getattr(error, 'name', 'Internal Server Error') - description = getattr(error, 'description', None) + description = getattr(error, 'description', 'No description available') data = dict( code=status_code, name=name, @@ -71,4 +73,40 @@ def handle(error): data.update(getattr(error, 'data', [])) response = jsonify(data) response.status_code = status_code + if status_code == 500: + utils.get_logger(__name__).error('internal server error', exc_info=error) return response + + +def with_logger(func): + """ + Decorator for endpoint implementations that provides a pre configured logger and + automatically logs errors on all 500 responses. + """ + signature = inspect.signature(func) + has_logger = 'logger' in signature.parameters + wrapper_signature = signature.replace(parameters=tuple( + param for param in signature.parameters.values() + if param.name != 'logger' + )) + + def wrapper(*args, **kwargs): + if has_logger: + args = inspect.getcallargs(wrapper, *args, **kwargs) + logger_args = { + k: v for k, v in args.items() + if k in ['upload_id', 'upload_hash', 'calc_hash']} + logger = utils.get_logger(__name__, **logger_args) + args.update(logger=logger) + try: + return func(**args) + except HTTPException as e: + if getattr(e, 'code', None) == 500: + logger.error('Internal server error', exc_info=e) + raise e + except Exception as e: + logger.error('Internal server error', exc_info=e) + raise e + + wrapper.__signature__ = wrapper_signature + return wrapper diff --git a/nomad/api/raw.py b/nomad/api/raw.py index f2057024c43c652766dc04ddc964bf38d87fe36f..a30ea954e30c1396711a83b1699dacce87f3c8e1 100644 --- a/nomad/api/raw.py +++ b/nomad/api/raw.py @@ -16,8 +16,6 @@ The raw API of the nomad@FAIRDI APIs. Can be used to retrieve raw calculation files. """ -# TODO implement restrictions based on user, permissions, and upload/calc metadata - import os.path from zipfile import ZIP_DEFLATED, ZIP_STORED @@ -33,12 +31,6 @@ from .auth import login_if_available, create_authorization_predicate ns = api.namespace('raw', description='Downloading raw data files.') -def fix_file_paths(path): - """ Removed the leading data from file paths that where given in mainfile uris. """ - # TODO, mainfile URI's should change or this implementation should change - return path[5:] - - raw_file_compress_argument = dict( name='compress', type=bool, help='Use compression on .zip files, default is not.', location='args') @@ -68,7 +60,7 @@ class RawFileFromPathResource(Resource): Zip files are streamed; instead of 401 errors, the zip file will just not contain any files that the user is not authorized to access. """ - upload_filepath = fix_file_paths(path) + upload_filepath = path upload_files = UploadFiles.get( upload_hash, create_authorization_predicate(upload_hash)) @@ -132,7 +124,7 @@ class RawFilesResource(Resource): """ json_data = request.get_json() compress = json_data.get('compress', False) - files = [fix_file_paths(file.strip()) for file in json_data['files']] + files = [file.strip() for file in json_data['files']] return respond_to_get_raw_files(upload_hash, files, compress) @@ -153,7 +145,7 @@ class RawFilesResource(Resource): if files_str is None: abort(400, message="No files argument given.") - files = [fix_file_paths(file.strip()) for file in files_str.split(',')] + files = [file.strip() for file in files_str.split(',')] return respond_to_get_raw_files(upload_hash, files, compress) diff --git a/nomad/api/upload.py b/nomad/api/upload.py index 2c889086b40beccf3b2b6393d7d84125d84e74a9..915ba2c271c56ffff24f0a61cf5e15a8db0666ae 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -23,13 +23,12 @@ from datetime import datetime from werkzeug.datastructures import FileStorage import os.path -from nomad import config +from nomad import config, utils from nomad.processing import Upload from nomad.processing import NotAllowedDuringProcessing -from nomad.utils import get_logger -from nomad.uploads import ArchiveBasedStagingUploadFiles +from nomad.uploads import ArchiveBasedStagingUploadFiles, StagingUploadFiles, UploadFiles -from .app import api +from .app import api, with_logger from .auth import login_really_required from .common import pagination_request_parser, pagination_model @@ -133,7 +132,8 @@ class UploadListResource(Resource): @api.marshal_with(upload_model, skip_none=True, code=200, description='Upload received') @api.expect(upload_metadata_parser) @login_really_required - def put(self): + @with_logger + def put(self, logger): """ Upload a file and automatically create a new upload in the process. Can be used to upload files via browser or other http clients like curl. @@ -160,8 +160,7 @@ class UploadListResource(Resource): name=request.args.get('name'), local_path=local_path) - logger = get_logger(__name__, endpoint='upload', action='put', upload_id=upload.upload_id) - logger.info('upload created') + logger.info('upload created', upload_id=upload.upload_id) upload_files = ArchiveBasedStagingUploadFiles( upload.upload_id, create=True, local_path=local_path) @@ -187,12 +186,13 @@ class UploadListResource(Resource): f.write(request.stream.read(1024)) except Exception as e: - logger.error('Error on streaming upload', exc_info=e) + logger.warning('Error on streaming upload', exc_info=e) abort(400, message='Some IO went wrong, download probably aborted/disrupted.') if not upload_files.is_valid: - # TODO upload_files.delete() + upload_files.delete() upload.delete() + logger.info('Invalid upload') abort(400, message='Bad file format, excpected %s.' % ", ".join(upload_files.formats)) logger.info('received uploaded file') @@ -267,15 +267,17 @@ class UploadResource(Resource): @api.doc('delete_upload') @api.response(404, 'Upload does not exist') - @api.response(400, 'Not allowed during processing or when not in staging') + @api.response(401, 'Upload does not belong to authenticated user.') + @api.response(400, 'Not allowed during processing') @api.marshal_with(upload_model, skip_none=True, code=200, description='Upload deleted') @login_really_required - def delete(self, upload_id: str): + @with_logger + def delete(self, upload_id: str, logger): """ Delete an existing upload. - Only ``is_ready`` uploads - can be deleted. Deleting an upload in processing is not allowed. + Only uploads that are sill in staging, not already delete, not still uploaded, and + not currently processed, can be deleted. """ try: upload = Upload.get(upload_id) @@ -283,19 +285,25 @@ class UploadResource(Resource): abort(404, message='Upload with id %s does not exist.' % upload_id) if upload.user_id != str(g.user.user_id) and not g.user.is_admin: - abort(404, message='Upload with id %s does not exist.' % upload_id) + abort(401, message='Upload with id %s does not belong to you.' % upload_id) - if not upload.in_staging: - abort(400, message='Operation not allowed, upload is not in staging.') + with utils.lnr(logger, 'delete processing upload'): + try: + upload.delete() + except NotAllowedDuringProcessing: + abort(400, message='You must not delete an upload during processing.') - try: - upload.delete() - return upload, 200 - except NotAllowedDuringProcessing: - abort(400, message='You must not delete an upload during processing.') + with utils.lnr(logger, 'delete upload files'): + upload_files = UploadFiles.get(upload_id) + assert upload_files is not None, 'Uploads existing in staging must have files.' + if upload_files is not None: + assert isinstance(upload_files, StagingUploadFiles), 'Uploads in staging must have staging files.' + upload_files.delete() + + return upload, 200 @api.doc('exec_upload_command') - @api.response(404, 'Upload does not exist or is not allowed') + @api.response(404, 'Upload does not exist or not in staging') @api.response(400, 'Operation is not supported') @api.response(401, 'If the operation is not allowed for the current user') @api.marshal_with(upload_model, skip_none=True, code=200, description='Upload unstaged successfully') @@ -334,9 +342,6 @@ class UploadResource(Resource): break if operation == 'unstage': - if not upload.in_staging: - abort(400, message='Operation not allowed, upload is not in staging.') - try: upload.unstage(meta_data) except NotAllowedDuringProcessing: diff --git a/nomad/client.py b/nomad/client.py index d56a8c087427a15b31312080ab4d5d763daee7e1..358326e9322af4d38e661343892442d6abbad76e 100644 --- a/nomad/client.py +++ b/nomad/client.py @@ -28,7 +28,7 @@ from bravado.requests_client import RequestsClient from bravado.client import SwaggerClient from nomad import config, utils -from nomad.files import UploadFile +from nomad.uploads import ArchiveBasedStagingUploadFiles from nomad.parsing import parsers, parser_dict, LocalBackend from nomad.normalizing import normalizers @@ -163,17 +163,17 @@ class CalcProcReproduction: else: self.logger.info('Calc already downloaded.') - self.upload_file = UploadFile(upload_id='tmp_%s' % archive_id, local_path=local_path) + self.upload_files = ArchiveBasedStagingUploadFiles(upload_id='tmp_%s' % archive_id, local_path=local_path) def __enter__(self): # open/extract upload file self.logger.info('Extracting calc data.') - self.upload_file.__enter__() + self.upload_files.extract() # find mainfile matching calc_hash self.mainfile = next( - filename for filename in self.upload_file.filelist - if utils.hash(filename) == self.calc_hash) + filename for filename in self.upload_files.raw_file_manifest() + if self.upload_files.calc_hash(filename) == self.calc_hash) assert self.mainfile is not None, 'The mainfile could not be found.' self.logger = self.logger.bind(mainfile=self.mainfile) @@ -182,19 +182,18 @@ class CalcProcReproduction: return self def __exit__(self, *args): - self.upload_file.__exit__(*args) + self.upload_files.delete() def parse(self, parser_name: str = None) -> LocalBackend: """ Run the given parser on the downloaded calculation. If no parser is given, do parser matching and use the respective parser. """ - mainfile = self.upload_file.get_file(self.mainfile) if parser_name is not None: parser = parser_dict.get(parser_name) else: for potential_parser in parsers: - with mainfile.open() as mainfile_f: + with self.upload_files.raw_file(self.mainfile) as mainfile_f: if potential_parser.is_mainfile(self.mainfile, lambda fn: mainfile_f): parser = potential_parser break @@ -203,7 +202,7 @@ class CalcProcReproduction: self.logger = self.logger.bind(parser=parser.name) # type: ignore self.logger.info('identified parser') - parser_backend = parser.run(mainfile.os_path, logger=self.logger) + parser_backend = parser.run(self.upload_files.raw_file_object(self.mainfile).os_path, logger=self.logger) self.logger.info('ran parser') return parser_backend diff --git a/nomad/files.py b/nomad/files.py deleted file mode 100644 index 1187bb30204b83320768f937734ede75193caebb..0000000000000000000000000000000000000000 --- a/nomad/files.py +++ /dev/null @@ -1,700 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This file storage abstraction uses an *object storage*-like metaphor to store -objects on the file system. Objects are organized in *buckets* and object ids -are basically paths. All major file system operations for dealing with -uploaded files, archive, files, raw files, etc. should be part of this module to -allow later introduction of real object storage systems. - -.. note:: This module still uses ``os.path``. As long as the whole nomad runs on a - POSIX (or Windows) os everything should be fine. This means respective paths in the - dbs, and indices. In the future, this should be replaced with abstract path representations - ala ``PathLib``. - -.. autoclass:: File - :members: -.. autoclass:: ZippedFile - :members: -.. autoclass:: ObjectFile - :members: -.. autoclass:: UploadFile - :members: -.. autoclass:: ArchiveFile - :members: -.. autoclass:: DataContainer - :members: -.. autoclass:: BaggedDataContainer - :members: -.. autoclass:: ZippedDataContainer - :members: -""" -# from abc import ABC -# from typing import List, Generator, IO, TextIO, cast, Dict, Any -# import os -# import os.path -# from zipfile import ZipFile, BadZipFile, is_zipfile, ZIP_DEFLATED -# import shutil -# from contextlib import contextmanager -# import gzip -# import io -# import bagit -# import json - -# from nomad import config, utils - - -# class File: -# """ -# Base class for handling a file. Allows to open (read, write) and delete files. - -# Arguments: -# os_path: The path to the file in the os filesystem. - -# Attributes: -# logger: A structured logger with bucket and object information. -# path: The abstract path of the file. -# """ -# def __init__(self, os_path: str = None) -> None: -# self.os_path = os_path - -# self.logger = self.bind_logger(utils.get_logger(__name__)) - -# def bind_logger(self, logger): -# """ Adds context information to the given logger and returns it. """ -# return logger.bind(path=self.os_path) - -# @contextmanager -# def open(self, mode: str = 'r', *args, **kwargs) -> Generator[IO, None, None]: -# """ Opens the object with he given mode, etc. """ -# self.logger.debug('open file') -# try: -# if mode.startswith('w'): -# self.create_dirs() -# with open(self.os_path, mode, *args, **kwargs) as f: -# yield f -# except FileNotFoundError: -# raise KeyError() - -# def delete(self) -> None: -# """ Deletes the file. """ -# try: -# os.remove(self.os_path) -# self.logger.debug('file deleted') -# except FileNotFoundError: -# raise KeyError() - -# def exists(self) -> bool: -# """ Returns true if object exists. """ -# return os.path.exists(self.os_path) - -# @property -# def size(self) -> int: -# """ Returns the os determined file size. """ -# return os.stat(self.os_path).st_size - -# @property -# def path(self) -> str: -# return self.os_path - -# def create_dirs(self) -> None: -# directory = os.path.dirname(self.os_path) -# if not os.path.isdir(directory): -# os.makedirs(directory) - - -# class ZippedFile(File): -# """ A file contained in a .zip archive. """ -# def __init__(self, zip_os_path: str, filename: str) -> None: -# self.filename = filename -# super().__init__(zip_os_path) - -# def bind_logger(self, logger): -# return super().bind_logger(logger).bind(filename=self.filename) - -# @contextmanager -# def open(self, *args, **kwargs) -> Generator[IO, None, None]: -# self.logger.debug('open file') -# try: -# with ZipFile(self.os_path) as zip_file: -# yield zip_file.open(self.filename, *args, **kwargs) -# except FileNotFoundError: -# raise KeyError() -# except KeyError as e: -# raise e -# except Exception as e: -# msg = 'Could not read upload.' -# self.logger.error(msg, exc_info=e) -# raise FileError(msg, e) - -# def delete(self) -> None: -# assert False, "A file in a zip archive cannot be deleted." - -# @property -# def size(self) -> int: -# with ZipFile(self.os_path) as zip_file: -# return zip_file.getinfo(self.filename).file_size - -# @property -# def path(self) -> str: -# return os.path.join( -# os.path.dirname(self.os_path), -# os.path.basename(self.os_path), -# self.filename) - - -# class Objects: -# @classmethod -# def _os_path(cls, bucket: str, name: str, ext: str = None) -> str: -# if ext is not None and ext != '': -# file_name = '%s.%s' % (name, ext) -# elif name is None or name == '': -# file_name = '' -# else: -# file_name = name - -# # add an extra directory to limit the files per directory (gpfs) -# file_name = '%s/%s' % (file_name[0:3], file_name) - -# path_segments = file_name.split('/') -# path = os.path.join(*([config.fs.objects, bucket] + path_segments)) - -# return os.path.abspath(path) - -# @classmethod -# def delete_all(cls, bucket: str, prefix: str = ''): -# """ Delete all files with given prefix, prefix must denote a directory. """ -# try: -# shutil.rmtree(cls._os_path(bucket, prefix, ext=None)) -# except FileNotFoundError: -# pass - - -# class ObjectFile(File): -# """ -# Base class for file objects. Allows to open (read, write) and delete objects. -# File objects filesystem location is govern by its bucket, object_id, and ext. -# This object store location can be overridden with a local_path. - -# Arguments: -# bucket (str): The 'bucket' for this object. -# object_id (str): The object_id for this object. Might contain `/` to structure -# the bucket further. Will be mapped to directories in the filesystem. -# ext (str): Optional extension for the object file in the filesystem. - -# Attributes: -# logger: A structured logger with bucket and object information. -# has_local_path: True, if this object is stored somewhere else in the fs. -# """ -# def __init__(self, bucket: str, object_id: str, ext: str = None, local_path: str = None) -> None: -# self.bucket = bucket -# self.object_id = object_id -# self.ext = ext - -# self.has_local_path = local_path is not None -# path = Objects._os_path(self.bucket, self.object_id, self.ext) -# path = local_path if self.has_local_path else path - -# super().__init__(path) - -# def bind_logger(self, logger): -# """ Adds context information to the given logger and returns it. """ -# return super().bind_logger(logger).bind(bucket=self.bucket, object=self.object_id) - -# def delete(self) -> None: -# """ Deletes the file, if it has not a localpath. Localpath files are never deleted. """ -# # Do not delete local files, no matter what -# if not self.has_local_path: -# super().delete() - - -# class FileError(Exception): -# def __init__(self, msg, cause): -# super().__init__(msg, cause) - - -# class UploadFile(ObjectFile): -# """ -# Instances of ``UploadFile`` represent an uploaded file in the *'object storage'*. - -# Currently only user ``.zip`` files are supported. - -# Uploads can be extracted to tmp storage (open/close), the list of files in -# the upload is provided, and files can be opened for read. Extracting uploads -# is optional, all functions in this module are also available without extracting. -# Extracts are automatically bagged with *bagit*. - -# This class is a context manager, that extracts the file when using a ``with`` -# statement with instances of this class. - -# UploadFiles are stored in their own *bucket*. But, storage can be overridden -# by providing a ``local_path``. This is useful when the file is already stored -# in nomad's distributed file system, e.g. for bulk processing of already uploaded -# files. - -# Uploads can be persistet as :class:`ZippedDataContainers` for permanent repository -# raw data storage. - -# Arguments: -# upload_id: The upload of this uploaded file. -# local_path: Optional override for the path used to store/access the uploaded file. - -# Attributes: -# is_extracted: True if the upload is extracted. -# upload_extract_dir: The path of the tmp directory with the extracted contents. -# filelist: A list of filenames relative to the .zipped upload root. -# """ - -# formats = ['zip'] -# """ A human readable list of supported file formats. """ - -# def __init__(self, upload_id: str, local_path: str = None) -> None: -# super().__init__( -# bucket=config.files.uploads_bucket, -# object_id=upload_id, -# ext='zip', -# local_path=local_path) - -# self._extract_dir: str = os.path.join(config.fs.tmp, 'uploads_extracted', upload_id) -# self._bagged_container: DataContainer = None -# if os.path.isdir(self._extract_dir): -# self._bagged_container = BaggedDataContainer(self._extract_dir) - -# def bind_logger(self, logger): -# return super().bind_logger(logger).bind(upload_id=self.object_id) - -# # There is not good way to capsule decorators in a class: -# # https://medium.com/@vadimpushtaev/decorator-inside-python-class-1e74d23107f6 -# class Decorators: -# @classmethod -# def handle_errors(cls, decorated): -# def wrapper(self, *args, **kwargs): -# try: -# return decorated(self, *args, **kwargs) -# except Exception as e: -# msg = 'Could not %s upload.' % decorated.__name__ -# self.logger.error(msg, upload_id=self.object_id, exc_info=e) -# raise FileError(msg, e) -# return wrapper - -# @contextmanager -# def _zip(self): -# assert self.exists(), "Can only access uploaded file if it exists." -# zip_file = None -# try: -# zip_file = ZipFile(self.os_path) -# yield zip_file -# except BadZipFile as e: -# raise FileError('Upload is not a zip file', e) -# finally: -# if zip_file is not None: -# zip_file.close() - -# @property -# def filelist(self) -> List[str]: -# if self.is_extracted: -# return self._bagged_container.manifest -# else: -# with self._zip() as zip_file: -# return [ -# zip_info.filename for zip_info in zip_file.filelist -# if not zip_info.filename.endswith('/')] - -# @property -# def is_extracted(self) -> bool: -# return self._bagged_container is not None - -# @Decorators.handle_errors -# def upload_hash(self) -> str: -# assert self.is_extracted -# return self._bagged_container.hash - -# @Decorators.handle_errors -# def extract(self) -> None: -# """ -# 'Opens' the upload. This means the upload files get extracted and bagged to tmp. - -# Raises: -# UploadFileError: If some IO went wrong. -# KeyError: If the upload does not exist. -# """ -# os.makedirs(os.path.join(config.fs.tmp, 'uploads_extracted'), exist_ok=True) - -# with self._zip() as zip_file: -# zip_file.extractall(self._extract_dir) - -# self.logger.debug('extracted uploaded file') - -# self._bagged_container = BaggedDataContainer.create(self._extract_dir) -# self.logger.debug('bagged uploaded file') - -# def persist(self, object_id: str = None): -# """ -# Persists the extracted and bagged upload to the repository raw data bucket. -# """ -# assert self.is_extracted -# if object_id is None: -# object_id = self.upload_hash() - -# target = Objects._os_path(config.files.raw_bucket, object_id, 'zip') -# directory = os.path.dirname(target) -# if not os.path.isdir(directory): -# os.makedirs(directory) - -# return ZippedDataContainer.create(self._extract_dir, target=target) - -# @Decorators.handle_errors -# def remove_extract(self) -> None: -# """ -# Closes the upload. This means the tmp. files are deleted. - -# Raises: -# UploadFileError: If some IO went wrong. -# KeyError: If the upload does not exist. -# """ -# try: -# shutil.rmtree(self._extract_dir) -# except FileNotFoundError: -# raise KeyError() - -# self.logger.debug('removed uploaded file extract') - -# def __enter__(self): -# self.extract() -# return self - -# def __exit__(self, exc_type, exc, exc_tb): -# self.remove_extract() - -# def get_file(self, filename: str) -> File: -# """ -# Returns a :class:`File` instance as a handle to the file with the given name. -# Only works on extracted uploads. The given filename must be one of the -# name in ``self.filelist``. -# """ -# assert self.is_extracted -# return self._bagged_container.get_file(filename) - -# @property -# def is_valid(self): -# return is_zipfile(self.os_path) - -# def get_siblings(self, filename: str) -> Generator[str, None, None]: -# """ -# Returns the names of all files that share the same prefix (object id), -# respectively are part of the same directory (incl. files in sub directories). -# In nomad terms, the aux files the this file. Returned siblings are relative -# to the upload root directory. -# """ -# dirname = os.path.dirname(filename) -# for other in self.filelist: -# if other.startswith(dirname) and other != filename: -# yield other - - -# class RepositoryFile(ObjectFile): -# """ -# Represents a repository file. A repository file is a persistet bagged upload, incl. -# the upload metadata. It is used to serve raw data. -# """ -# def __init__(self, upload_hash: str) -> None: -# super().__init__( -# bucket=config.files.raw_bucket, -# object_id=upload_hash, -# ext='zip') - -# self.zipped_container = ZippedDataContainer(self.os_path) - -# def get_file(self, path: str) -> ZippedFile: -# return self.zipped_container.get_file(path) - -# @property -# def manifest(self) -> List[str]: -# return self.zipped_container.manifest - - -# class ArchiveFile(ObjectFile): -# """ -# Represents the archive file for an individual calculation. Allows to write the -# archive, read the archive, delete the archive. - -# Archive files are stored in their own *bucket*. -# """ -# def __init__(self, archive_id: str) -> None: -# super().__init__( -# bucket=config.files.archive_bucket, -# object_id=archive_id, -# ext='json.gz' if config.files.compress_archive else 'json') - -# def bind_logger(self, logger): -# upload_hash, calc_hash = self.object_id.split('/') -# return super().bind_logger(logger).bind( -# archive_id=self.object_id, upload_hash=upload_hash, calc_hash=calc_hash) - -# @contextmanager -# def write_archive_json(self) -> Generator[TextIO, None, None]: -# """ Context manager that yields a file-like to write the archive json. """ -# with self.open('wb') as binary_out: -# if config.files.compress_archive: -# gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) -# out = gzip_wrapper -# else: -# text_wrapper = io.TextIOWrapper(binary_out, encoding='utf-8') -# out = text_wrapper - -# try: -# yield out -# finally: -# out.flush() -# out.close() - -# self.logger.debug('archive file written') - -# @contextmanager -# def read_archive_json(self) -> Generator[TextIO, None, None]: -# """ Context manager that yields a file-like to read the archive json. """ -# with self.open(mode='rb') as binary_in: -# try: -# if config.files.compress_archive: -# gzip_wrapper = cast(TextIO, gzip.open(binary_in, 'rt')) -# in_file = gzip_wrapper -# else: -# text_wrapper = io.TextIOWrapper(binary_in, encoding='utf-8') -# in_file = text_wrapper -# except FileNotFoundError: -# raise KeyError() - -# try: -# yield in_file -# finally: -# in_file.close() - -# self.logger.debug('archive file read') - -# @staticmethod -# def delete_archives(upload_hash: str): -# """ Delete all archives of one upload with the given hash. """ -# bucket = config.files.archive_bucket -# Objects.delete_all(bucket, upload_hash) - -# utils.get_logger(__name__, bucket=bucket, upload_hash=upload_hash) \ -# .debug('archive files deleted') - - -# class ArchiveLogFile(ObjectFile): -# """ -# Represents a log file that was created for processing a single calculation to create -# an archive. -# Logfiles are stored within the *archive_bucket* alongside the archive files. -# """ -# def __init__(self, archive_id: str) -> None: -# super().__init__( -# bucket=config.files.archive_bucket, -# object_id=archive_id, -# ext='log') - - -# class DataContainer(ABC): -# """ -# An abstract baseclass for a *data container*. A data container is a persistent -# bundle of related files, like the calculation raw data of a user upload. - -# A container has a *manifest* and arbitrary *metadata*. -# """ -# @property -# def manifest(self) -> List[str]: -# """ -# A readonly list of paths to files within the container relative to the containers -# payload directory. -# """ -# pass - -# @property -# def metadata(self) -> Dict[str, Any]: -# """ -# The modifiable metadata of this manifest. On the top-level its a string keyed -# dictionary. The values can be arbitrary, but have to be JSON-serializable. -# Modifications have to be saved (:func:`save_metadata`). -# """ -# pass - -# def save_metadata(self) -> None: -# """ Persists metadata changes. """ -# pass - -# def get_file(self, manifest_path: str) -> File: -# """ -# Returns a file-like for the given manifest path. -# """ -# pass - -# @property -# def hash(self) -> str: -# return self.metadata['Nomad-Hash'] - - -# class BaggedDataContainer(DataContainer): -# """ -# A *data container* based on *bagit*. Once created no more files can be added. -# """ -# def __init__(self, path: str) -> None: -# self.path = path -# self.bag = bagit.Bag(path) -# self._metadata = None -# self.payload_directory = os.path.join(path, 'data') - -# @staticmethod -# def create(path: str) -> 'BaggedDataContainer': -# """ -# Makes a bag from the given directory and returns the respective BaggedDataContainer -# instance. -# """ -# bag = bagit.make_bag(path, checksums=['sha512']) - -# # TODO implement NOMAD-coe's way of doing the hashing -# hashes = [ -# value['sha512'] for key, value in bag.entries.items() -# if key.startswith('data/') -# ] -# bag.info['Nomad-Hash'] = utils.hash(''.join(hashes)) - -# bag.save() -# return BaggedDataContainer(path) - -# @property -# def metadata(self): -# if self._metadata is None: -# self._metadata = BaggedDataContainer._load_bagit_metadata(self.bag.info) -# return self._metadata - -# @staticmethod -# def _load_bagit_metadata(info): -# metadata = info -# for key, value in metadata.items(): -# if key not in bagit.STANDARD_BAG_INFO_HEADERS: -# try: -# metadata[key] = json.loads(value) -# except Exception: -# pass -# return metadata - -# def save_metadata(self): -# metadata = self.bag.info -# for key, value in metadata.items(): -# if key not in bagit.STANDARD_BAG_INFO_HEADERS and not isinstance(value, str): -# metadata[key] = json.dumps(value) -# self.bag.save() - -# @property -# def manifest(self): -# return [path[5:] for path in self.bag.entries.keys() if path.startswith('data/')] - -# def get_file(self, path): -# return File(os.path.join(self.payload_directory, path)) - - -# class ZippedDataContainer(File, DataContainer): -# """ -# A *bagit*-based data container that has been zipped. Its metadata cannot be changed -# anymore. -# """ -# def __init__(self, os_path: str) -> None: -# super(ZippedDataContainer, self).__init__(os_path) -# self._metadata = None -# self._base_directory = os.path.splitext(os.path.basename(os_path))[0] -# self._payload_directory = '%s/data/' % self._base_directory -# self._payload_deirectory_len = len(self._payload_directory) - -# @staticmethod -# def create(path: str, target: str = None) -> 'ZippedDataContainer': -# """ -# Creates a zipped bag from a bag. - -# Arguments: -# path: The path to the bag -# target: -# The path to the zip (excl. .zip extension). Base dir in zip will be -# based on the target path. -# """ -# if not target: -# target = path + '.zip' - -# target = os.path.abspath(target) - -# assert os.path.isdir(path) -# assert os.path.exists(os.path.dirname(target)) - -# # manually created zipfile instead of shutils.make_zip to use base_dir from -# # target while zipping path -# base_dir = os.path.splitext(os.path.basename(target))[0] -# path_prefix_len = len(path) + 1 -# with ZipFile(target, "w", compression=ZIP_DEFLATED, allowZip64=True) as zip_file: -# for root, _, filenames in os.walk(path): -# for name in filenames: -# file_path = os.path.join(root, name) -# zipped_path = os.path.join(base_dir, file_path[path_prefix_len:]) -# zip_file.write(file_path, zipped_path) - -# return ZippedDataContainer(target) - -# @contextmanager -# def zip_file(self): -# assert self.exists(), "Can only access uploaded file if it exists." -# zip_file = None -# try: -# zip_file = ZipFile(self.os_path) -# yield zip_file -# except BadZipFile as e: -# raise FileError('Upload is not a zip file', e) -# finally: -# if zip_file is not None: -# zip_file.close() - -# @property -# def manifest(self): -# with self.zip_file() as zip_file: -# return [ -# zip_info.filename[self._payload_deirectory_len:] for zip_info in zip_file.filelist -# if not zip_info.filename.endswith('/') and zip_info.filename.startswith(self._payload_directory)] - -# @property -# def metadata(self): -# if self._metadata is None: -# self._metadata = self._load_metadata() -# return self._metadata - -# def _load_metadata(self): -# with ZippedFile(self.os_path, '%s/bag-info.txt' % self._base_directory).open('r') as metadata_file: -# metadata_contents = metadata_file.read() - -# metadata_file = io.StringIO(metadata_contents.decode("utf-8")) -# tags = {} -# for name, value in bagit._parse_tags(metadata_file): -# if name not in tags: -# tags[name] = value -# continue - -# if not isinstance(tags[name], list): -# tags[name] = [tags[name], value] -# else: -# tags[name].append(value) - -# return BaggedDataContainer._load_bagit_metadata(tags) - -# def get_file(self, path): -# return ZippedFile(self.path, self._payload_directory + path) - -# def get_zip_path(self, path): -# return self._payload_directory + path diff --git a/nomad/processing/data.py b/nomad/processing/data.py index 0506180e8049b9f754ca2ae04493c1718c4f67f3..15df76ea7e7ac0032a2ce00131277494e09e1b30 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -25,7 +25,6 @@ calculations, and files """ from typing import List, Any, ContextManager, Tuple, Generator -from elasticsearch.exceptions import NotFoundError from mongoengine import StringField, BooleanField, DateTimeField, DictField, IntField import logging from structlog import wrap_logger @@ -37,7 +36,6 @@ from nomad.repo import RepoCalc, RepoUpload from nomad.processing.base import Proc, Chord, process, task, PENDING, SUCCESS, FAILURE from nomad.parsing import parsers, parser_dict from nomad.normalizing import normalizers -from nomad.utils import lnr class NotAllowedDuringProcessing(Exception): pass @@ -106,26 +104,6 @@ class Calc(Proc, datamodel.Calc): def upload_hash(self): return utils.archive.upload_hash(self.archive_id) - def delete(self): - """ - Delete this calculation and all associated data. This includes all files, - the archive, and this search index entry. - TODO is this needed? Or do we always delete hole uploads in bulk. - """ - # delete all files - self.upload_files.delete() - - # delete the search index entry - try: - elastic_entry = RepoCalc.get(self.archive_id) - if elastic_entry is not None: - elastic_entry.delete() - except NotFoundError: - pass - - # delete this mongo document - super().delete() - def get_logger(self, **kwargs): logger = super().get_logger() logger = logger.bind( @@ -312,7 +290,6 @@ class Upload(Chord, datamodel.Upload): local_path: optional local path, e.g. for files that are already somewhere on the server additional_metadata: optional user provided additional meta data upload_id: the upload id generated by the database - in_staging: true if the upload is still in staging and can be edited by the uploader is_private: true if the upload and its derivitaves are only visible to the uploader upload_time: the timestamp when the system realised the upload upload_hash: the hash of the uploaded file @@ -326,7 +303,6 @@ class Upload(Chord, datamodel.Upload): local_path = StringField(default=None) additional_metadata = DictField(default=None) - in_staging = BooleanField(default=True) is_private = BooleanField(default=False) upload_time = DateTimeField() @@ -355,7 +331,7 @@ class Upload(Chord, datamodel.Upload): @classmethod def user_uploads(cls, user: coe_repo.User) -> List['Upload']: """ Returns all uploads for the given user. Currently returns all uploads. """ - return cls.objects(user_id=str(user.user_id), in_staging=True) + return cls.objects(user_id=str(user.user_id)) @property def uploader(self): @@ -367,23 +343,10 @@ class Upload(Chord, datamodel.Upload): return logger def delete(self): - logger = self.get_logger(task='delete') - if not (self.completed or self.current_task == 'uploading'): raise NotAllowedDuringProcessing() - - with lnr(logger, 'delete all files of upload'): - self.upload_files.delete() - - with lnr(logger, 'deleting calcs db entries'): - # delete repo entries - self.to(RepoUpload).delete() - - # delete calc processings - Calc.objects(upload_id=self.upload_id).delete() - - with lnr(logger, 'deleting upload db entry'): - super().delete() + Calc.objects(upload_id=self.upload_id).delete() + super().delete() @classmethod def create(cls, **kwargs) -> 'Upload': @@ -412,11 +375,15 @@ class Upload(Chord, datamodel.Upload): if not (self.completed or self.current_task == 'uploading'): raise NotAllowedDuringProcessing() - self.in_staging = False + self.delete() + self.to(RepoUpload).unstage() coe_repo.Upload.add(self, meta_data) self.save() + self.upload_files.pack() + self.upload_files.delete() + @process def process(self): self.extracting() @@ -516,8 +483,6 @@ class Upload(Chord, datamodel.Upload): self.get_logger(), 'pack staging upload', step='cleaning', upload_size=self.upload_files.size): pass - # self.upload_files.pack() - # self.upload_files.delete() @property def processed_calcs(self): diff --git a/nomad/utils.py b/nomad/utils.py index 056ad6d7f4475c1f21c79c5028b45792045e8cc6..7db26a6ed0af4e17b2ec1595b8477e63f7830b5c 100644 --- a/nomad/utils.py +++ b/nomad/utils.py @@ -46,6 +46,7 @@ import json import uuid import time import re +from werkzeug.exceptions import HTTPException from nomad import config @@ -218,6 +219,9 @@ def lnr(logger, event, **kwargs): """ try: yield + except HTTPException as e: + # ignore HTTPException as they are part of the normal flask error handling + raise e except Exception as e: logger.error(event, exc_info=e, **kwargs) raise e diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index 3defaaee257c105325332bf852df64814f253a7d..41e3d21cc5db0b6a1f8c68c4076b05e49033872c 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -31,10 +31,10 @@ from nomad.processing import Upload, Calc from nomad.processing.base import task as task_decorator from nomad.repo import RepoUpload -from tests.test_files import example_file, empty_file +from tests.test_uploads import example_file, empty_file # import fixtures -from tests.test_files import clear_files # pylint: disable=unused-import +from tests.test_uploads import clear_files # pylint: disable=unused-import example_files = [empty_file, example_file] @@ -130,20 +130,6 @@ def test_processing_with_warning(uploaded_id_with_warning, worker, test_user, mo assert_processing(upload, mocksearch) -# TODO -# @pytest.mark.parametrize('uploaded_id', [example_files[1]], indirect=True) -# def test_processing_doublets(uploaded_id, worker, test_user, with_error): - -# upload = run_processing(uploaded_id, test_user) -# assert upload.status == 'SUCCESS' -# assert upload.to(RepoUpload).exists() - -# upload = run_processing(uploaded_id, test_user) -# assert upload.status == 'FAILURE' -# assert len(upload.errors) > 0 -# assert 'already' in upload.errors[0] - - @pytest.mark.timeout(30) def test_process_non_existing(worker, test_user, with_error): upload = run_processing('__does_not_exist', test_user) diff --git a/tests/test_api.py b/tests/test_api.py index ee3be373ad8b9feb9213b4e2edb4ee690b10566f..a5bc8993a2c46e0e58c9bdc8f450c20cff7f3531 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -29,12 +29,12 @@ services_config.update(api_base_path='') config.services = config.NomadServicesConfig(**services_config) from nomad import api, coe_repo # noqa -from nomad.uploads import UploadFiles # noqa -from nomad.processing import Upload # noqa +from nomad.uploads import UploadFiles, PublicUploadFiles # noqa +from nomad.processing import Upload, Calc # noqa from nomad.coe_repo import User # noqa from tests.processing.test_data import example_files # noqa -from tests.test_files import example_file, example_file_mainfile, example_file_contents # noqa +from tests.test_uploads import example_file, example_file_mainfile, example_file_contents # noqa from tests.test_uploads import create_staging_upload, create_public_upload # noqa # import fixtures @@ -210,18 +210,27 @@ class TestUploads: assert len(upload['calcs']['results']) == 1 def assert_unstage(self, client, test_user_auth, upload_id, proc_infra, meta_data={}): + rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) + upload = self.assert_upload(rv.data) + empty_upload = upload['calcs']['pagination']['total'] == 0 + rv = client.post( '/uploads/%s' % upload_id, headers=test_user_auth, data=json.dumps(dict(operation='unstage', meta_data=meta_data)), content_type='application/json') assert rv.status_code == 200 - rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) - assert rv.status_code == 200 - upload = self.assert_upload(rv.data) - empty_upload = upload['calcs']['pagination']['total'] == 0 - assert_coe_upload(upload['upload_hash'], empty=empty_upload, meta_data=meta_data) + self.assert_upload_does_not_exist(client, upload_id, test_user_auth) + assert_coe_upload(upload_id, empty=empty_upload, meta_data=meta_data) + + def assert_upload_does_not_exist(self, client, upload_id: str, test_user_auth): + rv = client.get('/uploads/%s' % upload_id, headers=test_user_auth) + assert rv.status_code == 404 + assert Upload.objects(upload_id=upload_id).first() is None + assert Calc.objects(upload_id=upload_id).count() is 0 + upload_files = UploadFiles.get(upload_id) + assert upload_files is None or isinstance(upload_files, PublicUploadFiles) def test_get_command(self, client, test_user_auth, no_warn): rv = client.get('/uploads/command', headers=test_user_auth) @@ -288,7 +297,7 @@ class TestUploads: self.assert_processing(client, test_user_auth, upload['upload_id']) self.assert_unstage(client, test_user_auth, upload['upload_id'], proc_infra) rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth) - assert rv.status_code == 400 + assert rv.status_code == 404 def test_delete(self, client, test_user_auth, proc_infra): rv = client.put('/uploads/?local_path=%s' % example_file, headers=test_user_auth) @@ -296,6 +305,7 @@ class TestUploads: self.assert_processing(client, test_user_auth, upload['upload_id']) rv = client.delete('/uploads/%s' % upload['upload_id'], headers=test_user_auth) assert rv.status_code == 200 + self.assert_upload_does_not_exist(client, upload['upload_id'], test_user_auth) @pytest.mark.parametrize('example_file', example_files) def test_post(self, client, test_user_auth, example_file, proc_infra, clean_repository_db): @@ -509,14 +519,14 @@ class TestRaw(UploadFilesBasedTests): @UploadFilesBasedTests.check_authorizaton def test_raw_file(self, client, upload, auth_headers): - url = '/raw/%s/data/%s' % (upload, example_file_mainfile) + url = '/raw/%s/%s' % (upload, example_file_mainfile) rv = client.get(url, headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 @UploadFilesBasedTests.ignore_authorization def test_raw_file_missing_file(self, client, upload, auth_headers): - url = '/raw/%s/data/does/not/exist' % upload + url = '/raw/%s/does/not/exist' % upload rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 data = json.loads(rv.data) @@ -524,7 +534,7 @@ class TestRaw(UploadFilesBasedTests): @UploadFilesBasedTests.ignore_authorization def test_raw_file_listing(self, client, upload, auth_headers): - url = '/raw/%s/data/examples' % upload + url = '/raw/%s/examples' % upload rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 data = json.loads(rv.data) @@ -533,7 +543,7 @@ class TestRaw(UploadFilesBasedTests): @pytest.mark.parametrize('compress', [True, False]) @UploadFilesBasedTests.ignore_authorization def test_raw_file_wildcard(self, client, upload, auth_headers, compress): - url = '/raw/%s/data/examples*' % upload + url = '/raw/%s/examples*' % upload if compress: url = '%s?compress=1' % url rv = client.get(url, headers=auth_headers) @@ -560,7 +570,7 @@ class TestRaw(UploadFilesBasedTests): @UploadFilesBasedTests.check_authorizaton def test_raw_files(self, client, upload, auth_headers, compress): url = '/raw/%s?files=%s' % ( - upload, ','.join(['data/%s' % file for file in example_file_contents])) + upload, ','.join(example_file_contents)) if compress: url = '%s&compress=1' % url rv = client.get(url, headers=auth_headers) @@ -575,7 +585,7 @@ class TestRaw(UploadFilesBasedTests): @UploadFilesBasedTests.check_authorizaton def test_raw_files_post(self, client, upload, auth_headers, compress): url = '/raw/%s' % upload - data = dict(files=['data/%s' % file for file in example_file_contents]) + data = dict(files=example_file_contents) if compress is not None: data.update(compress=compress) rv = client.post(url, data=json.dumps(data), content_type='application/json', headers=auth_headers) @@ -589,7 +599,7 @@ class TestRaw(UploadFilesBasedTests): @pytest.mark.parametrize('compress', [True, False]) @UploadFilesBasedTests.ignore_authorization def test_raw_files_missing_file(self, client, upload, auth_headers, compress): - url = '/raw/%s?files=data/%s,missing/file.txt' % (upload, example_file_mainfile) + url = '/raw/%s?files=%s,missing/file.txt' % (upload, example_file_mainfile) if compress: url = '%s&compress=1' % url rv = client.get(url, headers=auth_headers) diff --git a/tests/test_coe_repo.py b/tests/test_coe_repo.py index 3b50188a8d1b7ce5b7488ffa4215bb4a6b4c011e..5f6f415daf8efb61fefe901f13760853ec2ab7fb 100644 --- a/tests/test_coe_repo.py +++ b/tests/test_coe_repo.py @@ -20,7 +20,7 @@ from nomad.coe_repo import User, Calc, Upload from tests.processing.test_data import processed_upload # pylint: disable=unused-import from tests.processing.test_data import uploaded_id # pylint: disable=unused-import from tests.processing.test_data import mocks_forall # pylint: disable=unused-import -from tests.test_files import clear_files # pylint: disable=unused-import +from tests.test_uploads import clear_files # pylint: disable=unused-import def assert_user(user, reference): diff --git a/tests/test_files.py b/tests/test_files.py deleted file mode 100644 index dcd54db75916efd0e4300132ba4292beb13610da..0000000000000000000000000000000000000000 --- a/tests/test_files.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest -import json -import shutil -import os -import os.path -from zipfile import ZipFile - -# from nomad.files import Objects, ObjectFile, ArchiveFile, UploadFile, ArchiveLogFile, \ -# BaggedDataContainer, ZippedDataContainer -from nomad import config - -# example_file uses an artificial parser for faster test execution, can also be -# changed to examples_vasp.zip for using vasp parser -example_file = 'tests/data/proc/examples_template.zip' -example_file_contents = [ - 'examples_template/template.json', - 'examples_template/1.aux', - 'examples_template/2.aux', - 'examples_template/3.aux', - 'examples_template/4.aux'] -example_file_mainfile = 'examples_template/template.json' -empty_file = 'tests/data/proc/empty.zip' - -example_bucket = 'test_bucket' -example_data = dict(test_key='test_value') - - -@pytest.fixture(scope='function') -def clear_files(): - """ Utility fixture that removes all files from files and tmp after test. """ - try: - yield - finally: - try: - shutil.rmtree(config.fs.objects) - except FileNotFoundError: - pass - try: - shutil.rmtree(config.fs.tmp) - except FileNotFoundError: - pass - - -# class TestObjects: -# @pytest.fixture() -# def existing_example_file(self, clear_files): -# with ObjectFile(example_bucket, 'example_file', ext='json').open(mode='wt') as out: -# json.dump(example_data, out) - -# yield 'example_file', 'json' - -# def test_size(self, existing_example_file): -# name, ext = existing_example_file -# assert ObjectFile(example_bucket, name, ext).size > 0 - -# def test_exists(self, existing_example_file): -# name, ext = existing_example_file -# assert ObjectFile(example_bucket, name, ext).exists() - -# def test_not_exists(self): -# assert not ObjectFile(example_bucket, 'does_not_exist').exists() - -# def test_open(self, existing_example_file): -# name, ext = existing_example_file - -# assert ObjectFile(example_bucket, name, ext).exists() -# with ObjectFile(example_bucket, name, ext=ext).open() as f: -# json.load(f) - -# def test_delete(self, existing_example_file): -# name, ext = existing_example_file -# ObjectFile(example_bucket, name, ext).delete() -# assert not ObjectFile(example_bucket, name, ext).exists() - -# def test_delete_all(self, existing_example_file): -# name, ext = existing_example_file -# Objects.delete_all(example_bucket) -# assert not ObjectFile(example_bucket, name, ext).exists() - - -# class TestBaggedDataContainer: - -# @pytest.fixture(scope='function') -# def example_directory(self, clear_files): -# directory = os.path.join(config.fs.tmp, 'test_container') -# os.makedirs(directory, exist_ok=True) - -# with ZipFile(example_file) as zip_file: -# zip_file.extractall(directory) - -# yield directory - -# @pytest.fixture(scope='function') -# def example_container(self, example_directory): -# yield BaggedDataContainer.create(example_directory) - -# def assert_container(self, container): -# assert container.manifest is not None -# assert len(container.manifest) == 5 -# assert container.hash is not None -# assert container.metadata is not None -# for file_path in container.manifest: -# assert file_path.startswith('examples_template') - -# def test_make(self, example_container): -# self.assert_container(example_container) - -# def test_metadata(self, example_directory, example_container): -# example_container.metadata['test'] = dict(k1='v1', k2=True, k3=0) -# example_container.save_metadata() - -# example_container = BaggedDataContainer(example_directory) -# self.assert_container(example_container) -# assert example_container.metadata['test']['k1'] == 'v1' -# assert example_container.metadata['test']['k2'] -# assert example_container.metadata['test']['k3'] == 0 - -# def test_file(self, example_container): -# file = example_container.get_file('examples_template/template.json') -# assert file is not None -# with file.open('r') as f: -# assert json.load(f) - - -# class TestZippedDataContainer(TestBaggedDataContainer): -# @pytest.fixture(scope='function') -# def example_container(self, example_directory): -# BaggedDataContainer.create(example_directory) -# return ZippedDataContainer.create(example_directory) - -# def test_metadata(self, example_directory, example_container): -# pass - -# def test_target(self, example_directory): -# BaggedDataContainer.create(example_directory) -# target = os.path.join(os.path.dirname(example_directory), 'different.zip') -# container = ZippedDataContainer.create(example_directory, target=target) -# self.assert_container(container) -# with ZipFile(target, 'r') as zip_file: -# for info in zip_file.filelist: -# assert info.filename.startswith('different') - - -# @pytest.fixture(scope='function', params=[False, True]) -# def archive_config(monkeypatch, request): -# new_config = config.FilesConfig( -# config.files.uploads_bucket, -# config.files.raw_bucket, -# config.files.archive_bucket, -# config.files.staging_bucket, -# config.files.public_bucket, -# request.param) -# monkeypatch.setattr(config, 'files', new_config) -# yield - - -# @pytest.fixture(scope='function') -# def archive(clear_files, archive_config): -# archive = ArchiveFile('__test_upload_hash/__test_calc_hash') -# with archive.write_archive_json() as out: -# json.dump(example_data, out) -# yield archive - - -# class TestArchiveFile: - -# def test_archive(self, archive: ArchiveFile, no_warn): -# assert archive.exists() - -# with archive.read_archive_json() as file: -# result = json.load(file) - -# assert 'test_key' in result -# assert result['test_key'] == 'test_value' - -# def test_delete_archive(self, archive: ArchiveFile, no_warn): -# archive.delete() -# assert not archive.exists() - -# def test_delete_archives(self, archive: ArchiveFile, no_warn): -# ArchiveFile.delete_archives(archive.object_id.split('/')[0]) -# assert not archive.exists() - - -# class TestUploadFile: - -# @pytest.fixture() -# def upload_same_file(self, clear_files): -# upload = UploadFile('__test_upload_id2') -# shutil.copyfile(example_file, upload.os_path) -# yield upload - -# @pytest.fixture() -# def upload(self, clear_files): -# upload = UploadFile('__test_upload_id') -# upload.create_dirs() -# shutil.copyfile(example_file, upload.os_path) -# yield upload - -# def assert_upload(self, upload: UploadFile): -# assert upload.exists() - -# assert len(upload.filelist) == 5 -# has_json = False -# for filename in upload.filelist: -# the_file = upload.get_file(filename) -# assert the_file.exists() -# assert the_file.size >= 0 -# if the_file.path.endswith('.json'): -# has_json = True -# assert the_file.size > 0 -# with the_file.open() as f: -# f.read() -# break -# assert has_json - -# def test_upload_extracted(self, upload: UploadFile): -# with upload: -# self.assert_upload(upload) - -# def test_persist(self, upload: UploadFile): -# with upload: -# zipped_container = upload.persist() - -# assert zipped_container.exists() -# assert zipped_container.os_path.endswith('%s.zip' % upload.upload_hash()) - -# def test_delete_upload(self, upload: UploadFile): -# upload.delete() -# assert not upload.exists() - -# def test_hash(self, upload: UploadFile, upload_same_file: UploadFile, no_warn): -# with upload: -# hash = upload.upload_hash() -# assert hash is not None -# assert isinstance(hash, str) - -# with upload_same_file: -# assert hash == upload_same_file.upload_hash() - -# def test_siblings(self, upload: UploadFile, no_warn): -# with upload: -# siblings = list(upload.get_siblings('examples_template/template.json')) -# assert len(siblings) == 4 -# assert all(sibling.endswith('.aux') for sibling in siblings) - - -# class TestLocalUploadFile(TestUploadFile): -# @pytest.fixture() -# def upload_same_file(self, clear_files): -# upload = UploadFile('__test_upload_id2', local_path=example_file) -# yield upload - -# @pytest.fixture() -# def upload(self, clear_files): -# upload = UploadFile('__test_upload_id', local_path=example_file) -# yield upload - -# def test_delete_upload(self, upload: UploadFile): -# upload.delete() -# assert upload.exists() - - -# @pytest.fixture(scope='function') -# def archive_log(clear_files, archive_config): -# archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash') -# with archive_log.open('wt') as f: -# f.write('This is a test') - -# yield archive_log - - -# class TestArchiveLogFile: - -# def test_archive_log_file(self, archive_log): -# assert archive_log.exists() -# with archive_log.open('rt') as f: -# assert 'test' in f.read() diff --git a/tests/test_repo.py b/tests/test_repo.py index d13ad56f86edb200748c7426b942b5f35b6c7958..2a47a163e1367d92404e634460eb99b134263f1a 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -20,7 +20,7 @@ # from nomad.parsing import LocalBackend # from nomad.repo import AlreadyExists, RepoCalc -# from tests.test_files import example_file # noqa +# from tests.test_uploads import example_file # noqa # from tests.test_normalizing import normalized_template_example # pylint: disable=unused-import # from tests.test_parsing import parsed_template_example # pylint: disable=unused-import diff --git a/tests/test_search.py b/tests/test_search.py index ee9dc78100f2b4e059cd361c6f7db152def8305c..7ad2d8301b933f14306229d7d2790ab42fc79b0f 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import nomad.search +import nomad.search # pylint: disable=W0611 def test_helloworld(elastic): diff --git a/tests/test_uploads.py b/tests/test_uploads.py index beaa0a6c22ced12b1fd23ca7e3b6c870c923162a..33d797427962cf3ba972fb0454d47e22e4265cc9 100644 --- a/tests/test_uploads.py +++ b/tests/test_uploads.py @@ -25,7 +25,37 @@ from nomad.uploads import Metadata, MetadataTimeout, PublicMetadata, StagingMeta from nomad.uploads import StagingUploadFiles, PublicUploadFiles, UploadFiles, Restricted, \ ArchiveBasedStagingUploadFiles -from tests.test_files import example_file, example_file_contents, example_file_mainfile + +# example_file uses an artificial parser for faster test execution, can also be +# changed to examples_vasp.zip for using vasp parser +example_file = 'tests/data/proc/examples_template.zip' +example_file_contents = [ + 'examples_template/template.json', + 'examples_template/1.aux', + 'examples_template/2.aux', + 'examples_template/3.aux', + 'examples_template/4.aux'] +example_file_mainfile = 'examples_template/template.json' +empty_file = 'tests/data/proc/empty.zip' + +example_bucket = 'test_bucket' +example_data = dict(test_key='test_value') + + +@pytest.fixture(scope='function') +def clear_files(): + """ Utility fixture that removes all files from files and tmp after test. """ + try: + yield + finally: + try: + shutil.rmtree(config.fs.objects) + except FileNotFoundError: + pass + try: + shutil.rmtree(config.fs.tmp) + except FileNotFoundError: + pass class TestObjects: diff --git a/tests_integration/test_client.py b/tests_integration/test_client.py index da37982081adb5411b860f1c6d770c3f17be1e64..ae78b90135761049f3b76abca7d26c923f2c4b8f 100644 --- a/tests_integration/test_client.py +++ b/tests_integration/test_client.py @@ -16,7 +16,7 @@ import pytest from nomad.client import create_client, upload_file -from tests.test_files import example_file +from tests.test_uploads import example_file @pytest.fixture(scope='session')