diff --git a/.vscode/launch.json b/.vscode/launch.json index 354de6228219713c80a56a8fbd608538b0223155..f741b7602570324722eb65596904c3e2459b263d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -44,7 +44,7 @@ "cwd": "${workspaceFolder}", "program": "${workspaceFolder}/.pyenv/bin/pytest", "args": [ - "-sv", "tests/processing/test_data.py::test_processing[tests/data/proc/examples_template.zip]" + "-sv", "tests/test_api.py::TestRaw::test_raw_files[test_data0]" ] }, { diff --git a/nomad/api/archive.py b/nomad/api/archive.py index 6bcf1848896a8ef6ddf6e9e420f987675fd1ff08..8f4776f0d93865aeadb80c8c625f6b26fc5eeee1 100644 --- a/nomad/api/archive.py +++ b/nomad/api/archive.py @@ -24,12 +24,10 @@ from flask_restplus import abort, Resource import nomad_meta_info -from nomad import config -from nomad.uploads import UploadFiles -from nomad.utils import get_logger +from nomad.uploads import UploadFiles, Restricted from .app import api -from .auth import login_if_available +from .auth import login_if_available, create_authorization_predicate from .common import calc_route ns = api.namespace( @@ -41,6 +39,7 @@ ns = api.namespace( class ArchiveCalcLogResource(Resource): @api.doc('get_archive_logs') @api.response(404, 'The upload or calculation does not exist') + @api.response(401, 'Not authorized to access the data.') @api.response(200, 'Archive data send', headers={'Content-Type': 'application/plain'}) @login_if_available def get(self, upload_hash, calc_hash): @@ -51,30 +50,29 @@ class ArchiveCalcLogResource(Resource): """ archive_id = '%s/%s' % (upload_hash, calc_hash) - try: - upload_files = UploadFiles.get(upload_hash) - with upload_files.archive_log_file(calc_hash, 'rt') as f: - rv = send_file( - f, - mimetype='text/plain', - as_attachment=True, - attachment_filename='%s.log' % archive_id) + upload_files = UploadFiles.get( + upload_hash, is_authorized=create_authorization_predicate(upload_hash, calc_hash)) - return rv - except FileNotFoundError: - abort(404, message='Archive/calculation %s does not exist.' % archive_id) - except Exception as e: - logger = get_logger( - __name__, endpoint='logs', action='get', - upload_hash=upload_hash, calc_hash=calc_hash) - logger.error('Exception on accessing calc proc log', exc_info=e) - abort(500, message='Could not accessing the logs.') + if upload_files is None: + abort(404, message='Archive %s does not exist.' % upload_hash) + + try: + return send_file( + upload_files.archive_log_file(calc_hash, 'rt'), + mimetype='text/plain', + as_attachment=True, + attachment_filename='%s.log' % archive_id) + except Restricted: + abort(401, message='Not authorized to access %s/%s.' % (upload_hash, calc_hash)) + except KeyError: + abort(404, message='Calculation %s does not exist.' % archive_id) @calc_route(ns) class ArchiveCalcResource(Resource): @api.doc('get_archive_calc') @api.response(404, 'The upload or calculation does not exist') + @api.response(401, 'Not authorized to access the data.') @api.response(200, 'Archive data send') @login_if_available def get(self, upload_hash, calc_hash): @@ -85,28 +83,22 @@ class ArchiveCalcResource(Resource): """ archive_id = '%s/%s' % (upload_hash, calc_hash) - try: - upload_file = UploadFiles.get(upload_hash) - mode = 'rb' if config.files.compress_archive else 'rt' - with upload_file.archive_file(calc_hash, mode) as f: - rv = send_file( - f, - mimetype='application/json', - as_attachment=True, - attachment_filename='%s.json' % archive_id) - - if config.files.compress_archive: - rv.headers['Content-Encoding'] = 'gzip' + upload_file = UploadFiles.get( + upload_hash, is_authorized=create_authorization_predicate(upload_hash, calc_hash)) - return rv + if upload_file is None: + abort(404, message='Archive %s does not exist.' % upload_hash) + + try: + return send_file( + upload_file.archive_file(calc_hash, 'rt'), + mimetype='application/json', + as_attachment=True, + attachment_filename='%s.json' % archive_id) + except Restricted: + abort(401, message='Not authorized to access %s/%s.' % (upload_hash, calc_hash)) except KeyError: - abort(404, message='Archive %s does not exist.' % archive_id) - except Exception as e: - logger = get_logger( - __name__, endpoint='archive', action='get', - upload_hash=upload_hash, calc_hash=calc_hash) - logger.error('Exception on accessing archive', exc_info=e) - abort(500, message='Could not accessing the archive.') + abort(404, message='Calculation %s does not exist.' % archive_id) @ns.route('/metainfo/<string:metainfo_path>') @@ -132,8 +124,3 @@ class MetainfoResource(Resource): return rv except FileNotFoundError: abort(404, message='The metainfo %s does not exist.' % metainfo_path) - except Exception as e: - logger = get_logger( - __name__, endpoint='metainfo', action='get', metainfo_path=metainfo_path) - logger.error('Exception on accessing metainfo', exc_info=e) - abort(500, message='Could not accessing the metainfo.') diff --git a/nomad/api/auth.py b/nomad/api/auth.py index b1ec55914d1cd65ce6ac359143863d640983eb01..3c13aba9084b3dd429e160fd00c23cefb94c35a2 100644 --- a/nomad/api/auth.py +++ b/nomad/api/auth.py @@ -39,7 +39,7 @@ from flask import g, request, make_response from flask_restplus import abort, Resource from flask_httpauth import HTTPBasicAuth -from nomad import config +from nomad import config, processing, uploads, utils, coe_repo from nomad.coe_repo import User, LoginException from .app import app, api @@ -147,3 +147,32 @@ class TokenResource(Resource): 401, message='You are not propertly logged in at the NOMAD coe repository, ' 'there is no token for you.') + + +def create_authorization_predicate(upload_hash, calc_hash=None): + """ + Returns a predicate that determines if the logged in user has the authorization + to access the given upload and calculation. + """ + def func(): + if g.user is None: + # guest users don't have authorized access to anything + return False + + # look in repository + upload = coe_repo.Upload.from_upload_hash(upload_hash) + if upload is not None: + return upload.user_id == g.user.user_id + + # look in staging + staging_upload = processing.Upload.get(upload_hash) + if staging_upload is not None: + return str(g.user.user_id) == str(staging_upload.user_id) + + # There are no db entries for the given resource + if uploads.UploadFiles.get(upload_hash) is not None: + logger = utils.get_logger(__name__, upload_hash=upload_hash, calc_hash=calc_hash) + logger.error('Upload files without respective db entry') + + raise KeyError + return func diff --git a/nomad/api/raw.py b/nomad/api/raw.py index 4136bbbc5fee4e3cae640b2846d730117aa3515d..f2057024c43c652766dc04ddc964bf38d87fe36f 100644 --- a/nomad/api/raw.py +++ b/nomad/api/raw.py @@ -22,15 +22,13 @@ import os.path from zipfile import ZIP_DEFLATED, ZIP_STORED import zipstream -from flask import Response, request, send_file +from flask import Response, request, send_file, stream_with_context from flask_restplus import abort, Resource, fields -from werkzeug.exceptions import HTTPException -from nomad.utils import get_logger -from nomad.uploads import UploadFiles +from nomad.uploads import UploadFiles, Restricted from .app import api -from .auth import login_if_available +from .auth import login_if_available, create_authorization_predicate ns = api.namespace('raw', description='Downloading raw data files.') @@ -57,6 +55,7 @@ raw_file_from_path_parser.add_argument(**raw_file_compress_argument) class RawFileFromPathResource(Resource): @api.doc('get') @api.response(404, 'The upload or path does not exist') + @api.response(401, 'Not authorized to access the data.') @api.response(200, 'File(s) send', headers={'Content-Type': 'application/gz'}) @api.expect(raw_file_from_path_parser, validate=True) @login_if_available @@ -66,12 +65,14 @@ class RawFileFromPathResource(Resource): If the given path points to a file, the file is provided. If the given path points to an directory, the directory and all contents is provided as .zip file. + Zip files are streamed; instead of 401 errors, the zip file will just not contain + any files that the user is not authorized to access. """ upload_filepath = fix_file_paths(path) - try: - upload_files = UploadFiles.get(upload_hash) - except KeyError: + upload_files = UploadFiles.get( + upload_hash, create_authorization_predicate(upload_hash)) + if upload_files is None: abort(404, message='The upload with hash %s does not exist.' % upload_hash) if upload_filepath[-1:] == '*': @@ -84,27 +85,19 @@ class RawFileFromPathResource(Resource): return respond_to_get_raw_files(upload_hash, files, compress) try: - with upload_files.raw_file(upload_filepath) as f: - rv = send_file( - f, - mimetype='application/octet-stream', - as_attachment=True, - attachment_filename=os.path.basename(upload_filepath)) - return rv + return send_file( + upload_files.raw_file(upload_filepath), + mimetype='application/octet-stream', + as_attachment=True, + attachment_filename=os.path.basename(upload_filepath)) + except Restricted: + abort(401, message='Not authorized to access upload %s.' % upload_hash) except KeyError: files = list(file for file in upload_files.raw_file_manifest(upload_filepath)) if len(files) == 0: abort(404, message='The file %s does not exist.' % upload_filepath) else: abort(404, message='The file %s does not exist, but there are files with matching paths' % upload_filepath, files=files) - except HTTPException as e: - raise e - except Exception as e: - logger = get_logger( - __name__, endpoint='raw', action='get', - upload_hash=upload_hash, upload_filepath=upload_filepath) - logger.error('Exception on accessing raw data', exc_info=e) - abort(500, message='Could not accessing the raw data.') raw_files_request_model = api.model('RawFilesRequest', { @@ -132,7 +125,11 @@ class RawFilesResource(Resource): @api.expect(raw_files_request_model, validate=True) @login_if_available def post(self, upload_hash): - """ Download multiple raw calculation files. """ + """ + Download multiple raw calculation files in a .zip file. + Zip files are streamed; instead of 401 errors, the zip file will just not contain + any files that the user is not authorized to access. + """ json_data = request.get_json() compress = json_data.get('compress', False) files = [fix_file_paths(file.strip()) for file in json_data['files']] @@ -145,7 +142,12 @@ class RawFilesResource(Resource): @api.expect(raw_files_request_parser, validate=True) @login_if_available def get(self, upload_hash): - """ Download multiple raw calculation files. """ + """ + Download multiple raw calculation files. + Download multiple raw calculation files in a .zip file. + Zip files are streamed; instead of 401 errors, the zip file will just not contain + any files that the user is not authorized to access. + """ files_str = request.args.get('files', None) compress = request.args.get('compress', 'false') == 'true' @@ -157,36 +159,34 @@ class RawFilesResource(Resource): def respond_to_get_raw_files(upload_hash, files, compress=False): - logger = get_logger(__name__, endpoint='raw', action='get files', upload_hash=upload_hash) - - try: - upload_file = UploadFiles.get(upload_hash) - except KeyError: + upload_files = UploadFiles.get( + upload_hash, create_authorization_predicate(upload_hash)) + if upload_files is None: abort(404, message='The upload with hash %s does not exist.' % upload_hash) def generator(): """ Stream a zip file with all files using zipstream. """ def iterator(): """ Replace the directory based iter of zipstream with an iter over all given files. """ - try: - for filename in files: - # Write a file to the zipstream. - try: - with upload_file.raw_file(filename) as f: - def iter_content(): - while True: - data = f.read(100000) - if not data: - break - yield data - - yield dict(arcname=filename, iterable=iter_content()) - except KeyError as e: - # files that are not found, will not be returned - pass - - except Exception as e: - logger.error('Exception while accessing files.', exc_info=e) + for filename in files: + # Write a file to the zipstream. + try: + with upload_files.raw_file(filename, 'rb') as f: + def iter_content(): + while True: + data = f.read(100000) + if not data: + break + yield data + + yield dict(arcname=filename, iterable=iter_content()) + except KeyError: + # files that are not found, will not be returned + pass + except Restricted: + # due to the streaming nature, we cannot raise 401 here + # we just leave it out in the download + pass compression = ZIP_DEFLATED if compress else ZIP_STORED zip_stream = zipstream.ZipFile(mode='w', compression=compression, allowZip64=True) @@ -195,6 +195,6 @@ def respond_to_get_raw_files(upload_hash, files, compress=False): for chunk in zip_stream: yield chunk - response = Response(generator(), mimetype='application/zip') + response = Response(stream_with_context(generator()), mimetype='application/zip') response.headers['Content-Disposition'] = 'attachment; filename={}'.format('%s.zip' % upload_hash) return response diff --git a/nomad/api/upload.py b/nomad/api/upload.py index 3cdfdf57268a0dcdb52f52de2b185fcb7363becf..2c889086b40beccf3b2b6393d7d84125d84e74a9 100644 --- a/nomad/api/upload.py +++ b/nomad/api/upload.py @@ -191,7 +191,7 @@ class UploadListResource(Resource): abort(400, message='Some IO went wrong, download probably aborted/disrupted.') if not upload_files.is_valid: - upload_files.delete() + # TODO upload_files.delete() upload.delete() abort(400, message='Bad file format, excpected %s.' % ", ".join(upload_files.formats)) diff --git a/nomad/config.py b/nomad/config.py index 898a2cb02f0f15cb3a1af9c4fe2526809c0b2ee1..c67bb4425550508c53b902529b412be44ed2a06a 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -22,7 +22,7 @@ import logging from collections import namedtuple FilesConfig = namedtuple( - 'FilesConfig', ['uploads_bucket', 'raw_bucket', 'archive_bucket', 'compress_archive', 'staging_bucket', 'public_bucket']) + 'FilesConfig', ['uploads_bucket', 'raw_bucket', 'archive_bucket', 'staging_bucket', 'public_bucket']) """ API independent configuration for the object storage. """ CeleryConfig = namedtuple('Celery', ['broker_url']) @@ -50,7 +50,6 @@ files = FilesConfig( uploads_bucket='uploads', raw_bucket=os.environ.get('NOMAD_FILES_RAW_BUCKET', 'raw'), archive_bucket='archive', - compress_archive=True, staging_bucket='staging', public_bucket='public' ) diff --git a/nomad/files.py b/nomad/files.py index dee11e19b0fa5112dee7f0e55f719b93084643b5..1187bb30204b83320768f937734ede75193caebb 100644 --- a/nomad/files.py +++ b/nomad/files.py @@ -41,660 +41,660 @@ allow later introduction of real object storage systems. .. autoclass:: ZippedDataContainer :members: """ -from abc import ABC -from typing import List, Generator, IO, TextIO, cast, Dict, Any -import os -import os.path -from zipfile import ZipFile, BadZipFile, is_zipfile, ZIP_DEFLATED -import shutil -from contextlib import contextmanager -import gzip -import io -import bagit -import json - -from nomad import config, utils - - -class File: - """ - Base class for handling a file. Allows to open (read, write) and delete files. - - Arguments: - os_path: The path to the file in the os filesystem. - - Attributes: - logger: A structured logger with bucket and object information. - path: The abstract path of the file. - """ - def __init__(self, os_path: str = None) -> None: - self.os_path = os_path - - self.logger = self.bind_logger(utils.get_logger(__name__)) - - def bind_logger(self, logger): - """ Adds context information to the given logger and returns it. """ - return logger.bind(path=self.os_path) - - @contextmanager - def open(self, mode: str = 'r', *args, **kwargs) -> Generator[IO, None, None]: - """ Opens the object with he given mode, etc. """ - self.logger.debug('open file') - try: - if mode.startswith('w'): - self.create_dirs() - with open(self.os_path, mode, *args, **kwargs) as f: - yield f - except FileNotFoundError: - raise KeyError() - - def delete(self) -> None: - """ Deletes the file. """ - try: - os.remove(self.os_path) - self.logger.debug('file deleted') - except FileNotFoundError: - raise KeyError() - - def exists(self) -> bool: - """ Returns true if object exists. """ - return os.path.exists(self.os_path) - - @property - def size(self) -> int: - """ Returns the os determined file size. """ - return os.stat(self.os_path).st_size - - @property - def path(self) -> str: - return self.os_path - - def create_dirs(self) -> None: - directory = os.path.dirname(self.os_path) - if not os.path.isdir(directory): - os.makedirs(directory) - - -class ZippedFile(File): - """ A file contained in a .zip archive. """ - def __init__(self, zip_os_path: str, filename: str) -> None: - self.filename = filename - super().__init__(zip_os_path) - - def bind_logger(self, logger): - return super().bind_logger(logger).bind(filename=self.filename) - - @contextmanager - def open(self, *args, **kwargs) -> Generator[IO, None, None]: - self.logger.debug('open file') - try: - with ZipFile(self.os_path) as zip_file: - yield zip_file.open(self.filename, *args, **kwargs) - except FileNotFoundError: - raise KeyError() - except KeyError as e: - raise e - except Exception as e: - msg = 'Could not read upload.' - self.logger.error(msg, exc_info=e) - raise FileError(msg, e) - - def delete(self) -> None: - assert False, "A file in a zip archive cannot be deleted." - - @property - def size(self) -> int: - with ZipFile(self.os_path) as zip_file: - return zip_file.getinfo(self.filename).file_size - - @property - def path(self) -> str: - return os.path.join( - os.path.dirname(self.os_path), - os.path.basename(self.os_path), - self.filename) - - -class Objects: - @classmethod - def _os_path(cls, bucket: str, name: str, ext: str = None) -> str: - if ext is not None and ext != '': - file_name = '%s.%s' % (name, ext) - elif name is None or name == '': - file_name = '' - else: - file_name = name - - # add an extra directory to limit the files per directory (gpfs) - file_name = '%s/%s' % (file_name[0:3], file_name) - - path_segments = file_name.split('/') - path = os.path.join(*([config.fs.objects, bucket] + path_segments)) - - return os.path.abspath(path) - - @classmethod - def delete_all(cls, bucket: str, prefix: str = ''): - """ Delete all files with given prefix, prefix must denote a directory. """ - try: - shutil.rmtree(cls._os_path(bucket, prefix, ext=None)) - except FileNotFoundError: - pass - - -class ObjectFile(File): - """ - Base class for file objects. Allows to open (read, write) and delete objects. - File objects filesystem location is govern by its bucket, object_id, and ext. - This object store location can be overridden with a local_path. - - Arguments: - bucket (str): The 'bucket' for this object. - object_id (str): The object_id for this object. Might contain `/` to structure - the bucket further. Will be mapped to directories in the filesystem. - ext (str): Optional extension for the object file in the filesystem. - - Attributes: - logger: A structured logger with bucket and object information. - has_local_path: True, if this object is stored somewhere else in the fs. - """ - def __init__(self, bucket: str, object_id: str, ext: str = None, local_path: str = None) -> None: - self.bucket = bucket - self.object_id = object_id - self.ext = ext - - self.has_local_path = local_path is not None - path = Objects._os_path(self.bucket, self.object_id, self.ext) - path = local_path if self.has_local_path else path - - super().__init__(path) - - def bind_logger(self, logger): - """ Adds context information to the given logger and returns it. """ - return super().bind_logger(logger).bind(bucket=self.bucket, object=self.object_id) - - def delete(self) -> None: - """ Deletes the file, if it has not a localpath. Localpath files are never deleted. """ - # Do not delete local files, no matter what - if not self.has_local_path: - super().delete() - - -class FileError(Exception): - def __init__(self, msg, cause): - super().__init__(msg, cause) - - -class UploadFile(ObjectFile): - """ - Instances of ``UploadFile`` represent an uploaded file in the *'object storage'*. - - Currently only user ``.zip`` files are supported. - - Uploads can be extracted to tmp storage (open/close), the list of files in - the upload is provided, and files can be opened for read. Extracting uploads - is optional, all functions in this module are also available without extracting. - Extracts are automatically bagged with *bagit*. - - This class is a context manager, that extracts the file when using a ``with`` - statement with instances of this class. - - UploadFiles are stored in their own *bucket*. But, storage can be overridden - by providing a ``local_path``. This is useful when the file is already stored - in nomad's distributed file system, e.g. for bulk processing of already uploaded - files. - - Uploads can be persistet as :class:`ZippedDataContainers` for permanent repository - raw data storage. - - Arguments: - upload_id: The upload of this uploaded file. - local_path: Optional override for the path used to store/access the uploaded file. - - Attributes: - is_extracted: True if the upload is extracted. - upload_extract_dir: The path of the tmp directory with the extracted contents. - filelist: A list of filenames relative to the .zipped upload root. - """ - - formats = ['zip'] - """ A human readable list of supported file formats. """ - - def __init__(self, upload_id: str, local_path: str = None) -> None: - super().__init__( - bucket=config.files.uploads_bucket, - object_id=upload_id, - ext='zip', - local_path=local_path) - - self._extract_dir: str = os.path.join(config.fs.tmp, 'uploads_extracted', upload_id) - self._bagged_container: DataContainer = None - if os.path.isdir(self._extract_dir): - self._bagged_container = BaggedDataContainer(self._extract_dir) - - def bind_logger(self, logger): - return super().bind_logger(logger).bind(upload_id=self.object_id) - - # There is not good way to capsule decorators in a class: - # https://medium.com/@vadimpushtaev/decorator-inside-python-class-1e74d23107f6 - class Decorators: - @classmethod - def handle_errors(cls, decorated): - def wrapper(self, *args, **kwargs): - try: - return decorated(self, *args, **kwargs) - except Exception as e: - msg = 'Could not %s upload.' % decorated.__name__ - self.logger.error(msg, upload_id=self.object_id, exc_info=e) - raise FileError(msg, e) - return wrapper - - @contextmanager - def _zip(self): - assert self.exists(), "Can only access uploaded file if it exists." - zip_file = None - try: - zip_file = ZipFile(self.os_path) - yield zip_file - except BadZipFile as e: - raise FileError('Upload is not a zip file', e) - finally: - if zip_file is not None: - zip_file.close() - - @property - def filelist(self) -> List[str]: - if self.is_extracted: - return self._bagged_container.manifest - else: - with self._zip() as zip_file: - return [ - zip_info.filename for zip_info in zip_file.filelist - if not zip_info.filename.endswith('/')] - - @property - def is_extracted(self) -> bool: - return self._bagged_container is not None - - @Decorators.handle_errors - def upload_hash(self) -> str: - assert self.is_extracted - return self._bagged_container.hash - - @Decorators.handle_errors - def extract(self) -> None: - """ - 'Opens' the upload. This means the upload files get extracted and bagged to tmp. - - Raises: - UploadFileError: If some IO went wrong. - KeyError: If the upload does not exist. - """ - os.makedirs(os.path.join(config.fs.tmp, 'uploads_extracted'), exist_ok=True) - - with self._zip() as zip_file: - zip_file.extractall(self._extract_dir) - - self.logger.debug('extracted uploaded file') - - self._bagged_container = BaggedDataContainer.create(self._extract_dir) - self.logger.debug('bagged uploaded file') - - def persist(self, object_id: str = None): - """ - Persists the extracted and bagged upload to the repository raw data bucket. - """ - assert self.is_extracted - if object_id is None: - object_id = self.upload_hash() - - target = Objects._os_path(config.files.raw_bucket, object_id, 'zip') - directory = os.path.dirname(target) - if not os.path.isdir(directory): - os.makedirs(directory) - - return ZippedDataContainer.create(self._extract_dir, target=target) - - @Decorators.handle_errors - def remove_extract(self) -> None: - """ - Closes the upload. This means the tmp. files are deleted. - - Raises: - UploadFileError: If some IO went wrong. - KeyError: If the upload does not exist. - """ - try: - shutil.rmtree(self._extract_dir) - except FileNotFoundError: - raise KeyError() - - self.logger.debug('removed uploaded file extract') - - def __enter__(self): - self.extract() - return self - - def __exit__(self, exc_type, exc, exc_tb): - self.remove_extract() - - def get_file(self, filename: str) -> File: - """ - Returns a :class:`File` instance as a handle to the file with the given name. - Only works on extracted uploads. The given filename must be one of the - name in ``self.filelist``. - """ - assert self.is_extracted - return self._bagged_container.get_file(filename) - - @property - def is_valid(self): - return is_zipfile(self.os_path) - - def get_siblings(self, filename: str) -> Generator[str, None, None]: - """ - Returns the names of all files that share the same prefix (object id), - respectively are part of the same directory (incl. files in sub directories). - In nomad terms, the aux files the this file. Returned siblings are relative - to the upload root directory. - """ - dirname = os.path.dirname(filename) - for other in self.filelist: - if other.startswith(dirname) and other != filename: - yield other - - -class RepositoryFile(ObjectFile): - """ - Represents a repository file. A repository file is a persistet bagged upload, incl. - the upload metadata. It is used to serve raw data. - """ - def __init__(self, upload_hash: str) -> None: - super().__init__( - bucket=config.files.raw_bucket, - object_id=upload_hash, - ext='zip') - - self.zipped_container = ZippedDataContainer(self.os_path) - - def get_file(self, path: str) -> ZippedFile: - return self.zipped_container.get_file(path) - - @property - def manifest(self) -> List[str]: - return self.zipped_container.manifest - - -class ArchiveFile(ObjectFile): - """ - Represents the archive file for an individual calculation. Allows to write the - archive, read the archive, delete the archive. - - Archive files are stored in their own *bucket*. - """ - def __init__(self, archive_id: str) -> None: - super().__init__( - bucket=config.files.archive_bucket, - object_id=archive_id, - ext='json.gz' if config.files.compress_archive else 'json') - - def bind_logger(self, logger): - upload_hash, calc_hash = self.object_id.split('/') - return super().bind_logger(logger).bind( - archive_id=self.object_id, upload_hash=upload_hash, calc_hash=calc_hash) - - @contextmanager - def write_archive_json(self) -> Generator[TextIO, None, None]: - """ Context manager that yields a file-like to write the archive json. """ - with self.open('wb') as binary_out: - if config.files.compress_archive: - gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) - out = gzip_wrapper - else: - text_wrapper = io.TextIOWrapper(binary_out, encoding='utf-8') - out = text_wrapper - - try: - yield out - finally: - out.flush() - out.close() - - self.logger.debug('archive file written') - - @contextmanager - def read_archive_json(self) -> Generator[TextIO, None, None]: - """ Context manager that yields a file-like to read the archive json. """ - with self.open(mode='rb') as binary_in: - try: - if config.files.compress_archive: - gzip_wrapper = cast(TextIO, gzip.open(binary_in, 'rt')) - in_file = gzip_wrapper - else: - text_wrapper = io.TextIOWrapper(binary_in, encoding='utf-8') - in_file = text_wrapper - except FileNotFoundError: - raise KeyError() - - try: - yield in_file - finally: - in_file.close() - - self.logger.debug('archive file read') - - @staticmethod - def delete_archives(upload_hash: str): - """ Delete all archives of one upload with the given hash. """ - bucket = config.files.archive_bucket - Objects.delete_all(bucket, upload_hash) - - utils.get_logger(__name__, bucket=bucket, upload_hash=upload_hash) \ - .debug('archive files deleted') - - -class ArchiveLogFile(ObjectFile): - """ - Represents a log file that was created for processing a single calculation to create - an archive. - Logfiles are stored within the *archive_bucket* alongside the archive files. - """ - def __init__(self, archive_id: str) -> None: - super().__init__( - bucket=config.files.archive_bucket, - object_id=archive_id, - ext='log') - - -class DataContainer(ABC): - """ - An abstract baseclass for a *data container*. A data container is a persistent - bundle of related files, like the calculation raw data of a user upload. - - A container has a *manifest* and arbitrary *metadata*. - """ - @property - def manifest(self) -> List[str]: - """ - A readonly list of paths to files within the container relative to the containers - payload directory. - """ - pass - - @property - def metadata(self) -> Dict[str, Any]: - """ - The modifiable metadata of this manifest. On the top-level its a string keyed - dictionary. The values can be arbitrary, but have to be JSON-serializable. - Modifications have to be saved (:func:`save_metadata`). - """ - pass - - def save_metadata(self) -> None: - """ Persists metadata changes. """ - pass - - def get_file(self, manifest_path: str) -> File: - """ - Returns a file-like for the given manifest path. - """ - pass - - @property - def hash(self) -> str: - return self.metadata['Nomad-Hash'] - - -class BaggedDataContainer(DataContainer): - """ - A *data container* based on *bagit*. Once created no more files can be added. - """ - def __init__(self, path: str) -> None: - self.path = path - self.bag = bagit.Bag(path) - self._metadata = None - self.payload_directory = os.path.join(path, 'data') - - @staticmethod - def create(path: str) -> 'BaggedDataContainer': - """ - Makes a bag from the given directory and returns the respective BaggedDataContainer - instance. - """ - bag = bagit.make_bag(path, checksums=['sha512']) - - # TODO implement NOMAD-coe's way of doing the hashing - hashes = [ - value['sha512'] for key, value in bag.entries.items() - if key.startswith('data/') - ] - bag.info['Nomad-Hash'] = utils.hash(''.join(hashes)) - - bag.save() - return BaggedDataContainer(path) - - @property - def metadata(self): - if self._metadata is None: - self._metadata = BaggedDataContainer._load_bagit_metadata(self.bag.info) - return self._metadata - - @staticmethod - def _load_bagit_metadata(info): - metadata = info - for key, value in metadata.items(): - if key not in bagit.STANDARD_BAG_INFO_HEADERS: - try: - metadata[key] = json.loads(value) - except Exception: - pass - return metadata - - def save_metadata(self): - metadata = self.bag.info - for key, value in metadata.items(): - if key not in bagit.STANDARD_BAG_INFO_HEADERS and not isinstance(value, str): - metadata[key] = json.dumps(value) - self.bag.save() - - @property - def manifest(self): - return [path[5:] for path in self.bag.entries.keys() if path.startswith('data/')] - - def get_file(self, path): - return File(os.path.join(self.payload_directory, path)) - - -class ZippedDataContainer(File, DataContainer): - """ - A *bagit*-based data container that has been zipped. Its metadata cannot be changed - anymore. - """ - def __init__(self, os_path: str) -> None: - super(ZippedDataContainer, self).__init__(os_path) - self._metadata = None - self._base_directory = os.path.splitext(os.path.basename(os_path))[0] - self._payload_directory = '%s/data/' % self._base_directory - self._payload_deirectory_len = len(self._payload_directory) - - @staticmethod - def create(path: str, target: str = None) -> 'ZippedDataContainer': - """ - Creates a zipped bag from a bag. - - Arguments: - path: The path to the bag - target: - The path to the zip (excl. .zip extension). Base dir in zip will be - based on the target path. - """ - if not target: - target = path + '.zip' - - target = os.path.abspath(target) - - assert os.path.isdir(path) - assert os.path.exists(os.path.dirname(target)) - - # manually created zipfile instead of shutils.make_zip to use base_dir from - # target while zipping path - base_dir = os.path.splitext(os.path.basename(target))[0] - path_prefix_len = len(path) + 1 - with ZipFile(target, "w", compression=ZIP_DEFLATED, allowZip64=True) as zip_file: - for root, _, filenames in os.walk(path): - for name in filenames: - file_path = os.path.join(root, name) - zipped_path = os.path.join(base_dir, file_path[path_prefix_len:]) - zip_file.write(file_path, zipped_path) - - return ZippedDataContainer(target) - - @contextmanager - def zip_file(self): - assert self.exists(), "Can only access uploaded file if it exists." - zip_file = None - try: - zip_file = ZipFile(self.os_path) - yield zip_file - except BadZipFile as e: - raise FileError('Upload is not a zip file', e) - finally: - if zip_file is not None: - zip_file.close() - - @property - def manifest(self): - with self.zip_file() as zip_file: - return [ - zip_info.filename[self._payload_deirectory_len:] for zip_info in zip_file.filelist - if not zip_info.filename.endswith('/') and zip_info.filename.startswith(self._payload_directory)] - - @property - def metadata(self): - if self._metadata is None: - self._metadata = self._load_metadata() - return self._metadata - - def _load_metadata(self): - with ZippedFile(self.os_path, '%s/bag-info.txt' % self._base_directory).open('r') as metadata_file: - metadata_contents = metadata_file.read() - - metadata_file = io.StringIO(metadata_contents.decode("utf-8")) - tags = {} - for name, value in bagit._parse_tags(metadata_file): - if name not in tags: - tags[name] = value - continue - - if not isinstance(tags[name], list): - tags[name] = [tags[name], value] - else: - tags[name].append(value) - - return BaggedDataContainer._load_bagit_metadata(tags) - - def get_file(self, path): - return ZippedFile(self.path, self._payload_directory + path) - - def get_zip_path(self, path): - return self._payload_directory + path +# from abc import ABC +# from typing import List, Generator, IO, TextIO, cast, Dict, Any +# import os +# import os.path +# from zipfile import ZipFile, BadZipFile, is_zipfile, ZIP_DEFLATED +# import shutil +# from contextlib import contextmanager +# import gzip +# import io +# import bagit +# import json + +# from nomad import config, utils + + +# class File: +# """ +# Base class for handling a file. Allows to open (read, write) and delete files. + +# Arguments: +# os_path: The path to the file in the os filesystem. + +# Attributes: +# logger: A structured logger with bucket and object information. +# path: The abstract path of the file. +# """ +# def __init__(self, os_path: str = None) -> None: +# self.os_path = os_path + +# self.logger = self.bind_logger(utils.get_logger(__name__)) + +# def bind_logger(self, logger): +# """ Adds context information to the given logger and returns it. """ +# return logger.bind(path=self.os_path) + +# @contextmanager +# def open(self, mode: str = 'r', *args, **kwargs) -> Generator[IO, None, None]: +# """ Opens the object with he given mode, etc. """ +# self.logger.debug('open file') +# try: +# if mode.startswith('w'): +# self.create_dirs() +# with open(self.os_path, mode, *args, **kwargs) as f: +# yield f +# except FileNotFoundError: +# raise KeyError() + +# def delete(self) -> None: +# """ Deletes the file. """ +# try: +# os.remove(self.os_path) +# self.logger.debug('file deleted') +# except FileNotFoundError: +# raise KeyError() + +# def exists(self) -> bool: +# """ Returns true if object exists. """ +# return os.path.exists(self.os_path) + +# @property +# def size(self) -> int: +# """ Returns the os determined file size. """ +# return os.stat(self.os_path).st_size + +# @property +# def path(self) -> str: +# return self.os_path + +# def create_dirs(self) -> None: +# directory = os.path.dirname(self.os_path) +# if not os.path.isdir(directory): +# os.makedirs(directory) + + +# class ZippedFile(File): +# """ A file contained in a .zip archive. """ +# def __init__(self, zip_os_path: str, filename: str) -> None: +# self.filename = filename +# super().__init__(zip_os_path) + +# def bind_logger(self, logger): +# return super().bind_logger(logger).bind(filename=self.filename) + +# @contextmanager +# def open(self, *args, **kwargs) -> Generator[IO, None, None]: +# self.logger.debug('open file') +# try: +# with ZipFile(self.os_path) as zip_file: +# yield zip_file.open(self.filename, *args, **kwargs) +# except FileNotFoundError: +# raise KeyError() +# except KeyError as e: +# raise e +# except Exception as e: +# msg = 'Could not read upload.' +# self.logger.error(msg, exc_info=e) +# raise FileError(msg, e) + +# def delete(self) -> None: +# assert False, "A file in a zip archive cannot be deleted." + +# @property +# def size(self) -> int: +# with ZipFile(self.os_path) as zip_file: +# return zip_file.getinfo(self.filename).file_size + +# @property +# def path(self) -> str: +# return os.path.join( +# os.path.dirname(self.os_path), +# os.path.basename(self.os_path), +# self.filename) + + +# class Objects: +# @classmethod +# def _os_path(cls, bucket: str, name: str, ext: str = None) -> str: +# if ext is not None and ext != '': +# file_name = '%s.%s' % (name, ext) +# elif name is None or name == '': +# file_name = '' +# else: +# file_name = name + +# # add an extra directory to limit the files per directory (gpfs) +# file_name = '%s/%s' % (file_name[0:3], file_name) + +# path_segments = file_name.split('/') +# path = os.path.join(*([config.fs.objects, bucket] + path_segments)) + +# return os.path.abspath(path) + +# @classmethod +# def delete_all(cls, bucket: str, prefix: str = ''): +# """ Delete all files with given prefix, prefix must denote a directory. """ +# try: +# shutil.rmtree(cls._os_path(bucket, prefix, ext=None)) +# except FileNotFoundError: +# pass + + +# class ObjectFile(File): +# """ +# Base class for file objects. Allows to open (read, write) and delete objects. +# File objects filesystem location is govern by its bucket, object_id, and ext. +# This object store location can be overridden with a local_path. + +# Arguments: +# bucket (str): The 'bucket' for this object. +# object_id (str): The object_id for this object. Might contain `/` to structure +# the bucket further. Will be mapped to directories in the filesystem. +# ext (str): Optional extension for the object file in the filesystem. + +# Attributes: +# logger: A structured logger with bucket and object information. +# has_local_path: True, if this object is stored somewhere else in the fs. +# """ +# def __init__(self, bucket: str, object_id: str, ext: str = None, local_path: str = None) -> None: +# self.bucket = bucket +# self.object_id = object_id +# self.ext = ext + +# self.has_local_path = local_path is not None +# path = Objects._os_path(self.bucket, self.object_id, self.ext) +# path = local_path if self.has_local_path else path + +# super().__init__(path) + +# def bind_logger(self, logger): +# """ Adds context information to the given logger and returns it. """ +# return super().bind_logger(logger).bind(bucket=self.bucket, object=self.object_id) + +# def delete(self) -> None: +# """ Deletes the file, if it has not a localpath. Localpath files are never deleted. """ +# # Do not delete local files, no matter what +# if not self.has_local_path: +# super().delete() + + +# class FileError(Exception): +# def __init__(self, msg, cause): +# super().__init__(msg, cause) + + +# class UploadFile(ObjectFile): +# """ +# Instances of ``UploadFile`` represent an uploaded file in the *'object storage'*. + +# Currently only user ``.zip`` files are supported. + +# Uploads can be extracted to tmp storage (open/close), the list of files in +# the upload is provided, and files can be opened for read. Extracting uploads +# is optional, all functions in this module are also available without extracting. +# Extracts are automatically bagged with *bagit*. + +# This class is a context manager, that extracts the file when using a ``with`` +# statement with instances of this class. + +# UploadFiles are stored in their own *bucket*. But, storage can be overridden +# by providing a ``local_path``. This is useful when the file is already stored +# in nomad's distributed file system, e.g. for bulk processing of already uploaded +# files. + +# Uploads can be persistet as :class:`ZippedDataContainers` for permanent repository +# raw data storage. + +# Arguments: +# upload_id: The upload of this uploaded file. +# local_path: Optional override for the path used to store/access the uploaded file. + +# Attributes: +# is_extracted: True if the upload is extracted. +# upload_extract_dir: The path of the tmp directory with the extracted contents. +# filelist: A list of filenames relative to the .zipped upload root. +# """ + +# formats = ['zip'] +# """ A human readable list of supported file formats. """ + +# def __init__(self, upload_id: str, local_path: str = None) -> None: +# super().__init__( +# bucket=config.files.uploads_bucket, +# object_id=upload_id, +# ext='zip', +# local_path=local_path) + +# self._extract_dir: str = os.path.join(config.fs.tmp, 'uploads_extracted', upload_id) +# self._bagged_container: DataContainer = None +# if os.path.isdir(self._extract_dir): +# self._bagged_container = BaggedDataContainer(self._extract_dir) + +# def bind_logger(self, logger): +# return super().bind_logger(logger).bind(upload_id=self.object_id) + +# # There is not good way to capsule decorators in a class: +# # https://medium.com/@vadimpushtaev/decorator-inside-python-class-1e74d23107f6 +# class Decorators: +# @classmethod +# def handle_errors(cls, decorated): +# def wrapper(self, *args, **kwargs): +# try: +# return decorated(self, *args, **kwargs) +# except Exception as e: +# msg = 'Could not %s upload.' % decorated.__name__ +# self.logger.error(msg, upload_id=self.object_id, exc_info=e) +# raise FileError(msg, e) +# return wrapper + +# @contextmanager +# def _zip(self): +# assert self.exists(), "Can only access uploaded file if it exists." +# zip_file = None +# try: +# zip_file = ZipFile(self.os_path) +# yield zip_file +# except BadZipFile as e: +# raise FileError('Upload is not a zip file', e) +# finally: +# if zip_file is not None: +# zip_file.close() + +# @property +# def filelist(self) -> List[str]: +# if self.is_extracted: +# return self._bagged_container.manifest +# else: +# with self._zip() as zip_file: +# return [ +# zip_info.filename for zip_info in zip_file.filelist +# if not zip_info.filename.endswith('/')] + +# @property +# def is_extracted(self) -> bool: +# return self._bagged_container is not None + +# @Decorators.handle_errors +# def upload_hash(self) -> str: +# assert self.is_extracted +# return self._bagged_container.hash + +# @Decorators.handle_errors +# def extract(self) -> None: +# """ +# 'Opens' the upload. This means the upload files get extracted and bagged to tmp. + +# Raises: +# UploadFileError: If some IO went wrong. +# KeyError: If the upload does not exist. +# """ +# os.makedirs(os.path.join(config.fs.tmp, 'uploads_extracted'), exist_ok=True) + +# with self._zip() as zip_file: +# zip_file.extractall(self._extract_dir) + +# self.logger.debug('extracted uploaded file') + +# self._bagged_container = BaggedDataContainer.create(self._extract_dir) +# self.logger.debug('bagged uploaded file') + +# def persist(self, object_id: str = None): +# """ +# Persists the extracted and bagged upload to the repository raw data bucket. +# """ +# assert self.is_extracted +# if object_id is None: +# object_id = self.upload_hash() + +# target = Objects._os_path(config.files.raw_bucket, object_id, 'zip') +# directory = os.path.dirname(target) +# if not os.path.isdir(directory): +# os.makedirs(directory) + +# return ZippedDataContainer.create(self._extract_dir, target=target) + +# @Decorators.handle_errors +# def remove_extract(self) -> None: +# """ +# Closes the upload. This means the tmp. files are deleted. + +# Raises: +# UploadFileError: If some IO went wrong. +# KeyError: If the upload does not exist. +# """ +# try: +# shutil.rmtree(self._extract_dir) +# except FileNotFoundError: +# raise KeyError() + +# self.logger.debug('removed uploaded file extract') + +# def __enter__(self): +# self.extract() +# return self + +# def __exit__(self, exc_type, exc, exc_tb): +# self.remove_extract() + +# def get_file(self, filename: str) -> File: +# """ +# Returns a :class:`File` instance as a handle to the file with the given name. +# Only works on extracted uploads. The given filename must be one of the +# name in ``self.filelist``. +# """ +# assert self.is_extracted +# return self._bagged_container.get_file(filename) + +# @property +# def is_valid(self): +# return is_zipfile(self.os_path) + +# def get_siblings(self, filename: str) -> Generator[str, None, None]: +# """ +# Returns the names of all files that share the same prefix (object id), +# respectively are part of the same directory (incl. files in sub directories). +# In nomad terms, the aux files the this file. Returned siblings are relative +# to the upload root directory. +# """ +# dirname = os.path.dirname(filename) +# for other in self.filelist: +# if other.startswith(dirname) and other != filename: +# yield other + + +# class RepositoryFile(ObjectFile): +# """ +# Represents a repository file. A repository file is a persistet bagged upload, incl. +# the upload metadata. It is used to serve raw data. +# """ +# def __init__(self, upload_hash: str) -> None: +# super().__init__( +# bucket=config.files.raw_bucket, +# object_id=upload_hash, +# ext='zip') + +# self.zipped_container = ZippedDataContainer(self.os_path) + +# def get_file(self, path: str) -> ZippedFile: +# return self.zipped_container.get_file(path) + +# @property +# def manifest(self) -> List[str]: +# return self.zipped_container.manifest + + +# class ArchiveFile(ObjectFile): +# """ +# Represents the archive file for an individual calculation. Allows to write the +# archive, read the archive, delete the archive. + +# Archive files are stored in their own *bucket*. +# """ +# def __init__(self, archive_id: str) -> None: +# super().__init__( +# bucket=config.files.archive_bucket, +# object_id=archive_id, +# ext='json.gz' if config.files.compress_archive else 'json') + +# def bind_logger(self, logger): +# upload_hash, calc_hash = self.object_id.split('/') +# return super().bind_logger(logger).bind( +# archive_id=self.object_id, upload_hash=upload_hash, calc_hash=calc_hash) + +# @contextmanager +# def write_archive_json(self) -> Generator[TextIO, None, None]: +# """ Context manager that yields a file-like to write the archive json. """ +# with self.open('wb') as binary_out: +# if config.files.compress_archive: +# gzip_wrapper = cast(TextIO, gzip.open(binary_out, 'wt')) +# out = gzip_wrapper +# else: +# text_wrapper = io.TextIOWrapper(binary_out, encoding='utf-8') +# out = text_wrapper + +# try: +# yield out +# finally: +# out.flush() +# out.close() + +# self.logger.debug('archive file written') + +# @contextmanager +# def read_archive_json(self) -> Generator[TextIO, None, None]: +# """ Context manager that yields a file-like to read the archive json. """ +# with self.open(mode='rb') as binary_in: +# try: +# if config.files.compress_archive: +# gzip_wrapper = cast(TextIO, gzip.open(binary_in, 'rt')) +# in_file = gzip_wrapper +# else: +# text_wrapper = io.TextIOWrapper(binary_in, encoding='utf-8') +# in_file = text_wrapper +# except FileNotFoundError: +# raise KeyError() + +# try: +# yield in_file +# finally: +# in_file.close() + +# self.logger.debug('archive file read') + +# @staticmethod +# def delete_archives(upload_hash: str): +# """ Delete all archives of one upload with the given hash. """ +# bucket = config.files.archive_bucket +# Objects.delete_all(bucket, upload_hash) + +# utils.get_logger(__name__, bucket=bucket, upload_hash=upload_hash) \ +# .debug('archive files deleted') + + +# class ArchiveLogFile(ObjectFile): +# """ +# Represents a log file that was created for processing a single calculation to create +# an archive. +# Logfiles are stored within the *archive_bucket* alongside the archive files. +# """ +# def __init__(self, archive_id: str) -> None: +# super().__init__( +# bucket=config.files.archive_bucket, +# object_id=archive_id, +# ext='log') + + +# class DataContainer(ABC): +# """ +# An abstract baseclass for a *data container*. A data container is a persistent +# bundle of related files, like the calculation raw data of a user upload. + +# A container has a *manifest* and arbitrary *metadata*. +# """ +# @property +# def manifest(self) -> List[str]: +# """ +# A readonly list of paths to files within the container relative to the containers +# payload directory. +# """ +# pass + +# @property +# def metadata(self) -> Dict[str, Any]: +# """ +# The modifiable metadata of this manifest. On the top-level its a string keyed +# dictionary. The values can be arbitrary, but have to be JSON-serializable. +# Modifications have to be saved (:func:`save_metadata`). +# """ +# pass + +# def save_metadata(self) -> None: +# """ Persists metadata changes. """ +# pass + +# def get_file(self, manifest_path: str) -> File: +# """ +# Returns a file-like for the given manifest path. +# """ +# pass + +# @property +# def hash(self) -> str: +# return self.metadata['Nomad-Hash'] + + +# class BaggedDataContainer(DataContainer): +# """ +# A *data container* based on *bagit*. Once created no more files can be added. +# """ +# def __init__(self, path: str) -> None: +# self.path = path +# self.bag = bagit.Bag(path) +# self._metadata = None +# self.payload_directory = os.path.join(path, 'data') + +# @staticmethod +# def create(path: str) -> 'BaggedDataContainer': +# """ +# Makes a bag from the given directory and returns the respective BaggedDataContainer +# instance. +# """ +# bag = bagit.make_bag(path, checksums=['sha512']) + +# # TODO implement NOMAD-coe's way of doing the hashing +# hashes = [ +# value['sha512'] for key, value in bag.entries.items() +# if key.startswith('data/') +# ] +# bag.info['Nomad-Hash'] = utils.hash(''.join(hashes)) + +# bag.save() +# return BaggedDataContainer(path) + +# @property +# def metadata(self): +# if self._metadata is None: +# self._metadata = BaggedDataContainer._load_bagit_metadata(self.bag.info) +# return self._metadata + +# @staticmethod +# def _load_bagit_metadata(info): +# metadata = info +# for key, value in metadata.items(): +# if key not in bagit.STANDARD_BAG_INFO_HEADERS: +# try: +# metadata[key] = json.loads(value) +# except Exception: +# pass +# return metadata + +# def save_metadata(self): +# metadata = self.bag.info +# for key, value in metadata.items(): +# if key not in bagit.STANDARD_BAG_INFO_HEADERS and not isinstance(value, str): +# metadata[key] = json.dumps(value) +# self.bag.save() + +# @property +# def manifest(self): +# return [path[5:] for path in self.bag.entries.keys() if path.startswith('data/')] + +# def get_file(self, path): +# return File(os.path.join(self.payload_directory, path)) + + +# class ZippedDataContainer(File, DataContainer): +# """ +# A *bagit*-based data container that has been zipped. Its metadata cannot be changed +# anymore. +# """ +# def __init__(self, os_path: str) -> None: +# super(ZippedDataContainer, self).__init__(os_path) +# self._metadata = None +# self._base_directory = os.path.splitext(os.path.basename(os_path))[0] +# self._payload_directory = '%s/data/' % self._base_directory +# self._payload_deirectory_len = len(self._payload_directory) + +# @staticmethod +# def create(path: str, target: str = None) -> 'ZippedDataContainer': +# """ +# Creates a zipped bag from a bag. + +# Arguments: +# path: The path to the bag +# target: +# The path to the zip (excl. .zip extension). Base dir in zip will be +# based on the target path. +# """ +# if not target: +# target = path + '.zip' + +# target = os.path.abspath(target) + +# assert os.path.isdir(path) +# assert os.path.exists(os.path.dirname(target)) + +# # manually created zipfile instead of shutils.make_zip to use base_dir from +# # target while zipping path +# base_dir = os.path.splitext(os.path.basename(target))[0] +# path_prefix_len = len(path) + 1 +# with ZipFile(target, "w", compression=ZIP_DEFLATED, allowZip64=True) as zip_file: +# for root, _, filenames in os.walk(path): +# for name in filenames: +# file_path = os.path.join(root, name) +# zipped_path = os.path.join(base_dir, file_path[path_prefix_len:]) +# zip_file.write(file_path, zipped_path) + +# return ZippedDataContainer(target) + +# @contextmanager +# def zip_file(self): +# assert self.exists(), "Can only access uploaded file if it exists." +# zip_file = None +# try: +# zip_file = ZipFile(self.os_path) +# yield zip_file +# except BadZipFile as e: +# raise FileError('Upload is not a zip file', e) +# finally: +# if zip_file is not None: +# zip_file.close() + +# @property +# def manifest(self): +# with self.zip_file() as zip_file: +# return [ +# zip_info.filename[self._payload_deirectory_len:] for zip_info in zip_file.filelist +# if not zip_info.filename.endswith('/') and zip_info.filename.startswith(self._payload_directory)] + +# @property +# def metadata(self): +# if self._metadata is None: +# self._metadata = self._load_metadata() +# return self._metadata + +# def _load_metadata(self): +# with ZippedFile(self.os_path, '%s/bag-info.txt' % self._base_directory).open('r') as metadata_file: +# metadata_contents = metadata_file.read() + +# metadata_file = io.StringIO(metadata_contents.decode("utf-8")) +# tags = {} +# for name, value in bagit._parse_tags(metadata_file): +# if name not in tags: +# tags[name] = value +# continue + +# if not isinstance(tags[name], list): +# tags[name] = [tags[name], value] +# else: +# tags[name].append(value) + +# return BaggedDataContainer._load_bagit_metadata(tags) + +# def get_file(self, path): +# return ZippedFile(self.path, self._payload_directory + path) + +# def get_zip_path(self, path): +# return self._payload_directory + path diff --git a/nomad/processing/data.py b/nomad/processing/data.py index e0c83da95e205c1836ef469d388b60c85ebce218..0506180e8049b9f754ca2ae04493c1718c4f67f3 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -99,7 +99,7 @@ class Calc(Proc, datamodel.Calc): @property def upload_files(self) -> ArchiveBasedStagingUploadFiles: if not self._upload_files: - self._upload_files = ArchiveBasedStagingUploadFiles(self.upload_id, public_only=False) + self._upload_files = ArchiveBasedStagingUploadFiles(self.upload_id, is_authorized=lambda: True, local_path=self.upload.local_path) return self._upload_files @property @@ -429,7 +429,7 @@ class Upload(Chord, datamodel.Upload): @property def upload_files(self) -> ArchiveBasedStagingUploadFiles: if not self._upload_files: - self._upload_files = ArchiveBasedStagingUploadFiles(self.upload_id, public_only=False) + self._upload_files = ArchiveBasedStagingUploadFiles(self.upload_id, is_authorized=lambda: True, local_path=self.local_path) return self._upload_files @task diff --git a/nomad/uploads.py b/nomad/uploads.py index e1b7a33dfc5fd06219778e5191970f9d42366d19..f0d04f4f35b32c3bd2e8d67ef294593ddac08564 100644 --- a/nomad/uploads.py +++ b/nomad/uploads.py @@ -37,7 +37,7 @@ almost readonly (beside metadata) storage. """ from abc import ABCMeta -from typing import IO, Generator, Dict, Iterator, Iterable +from typing import IO, Generator, Dict, Iterator, Iterable, Callable from filelock import Timeout, FileLock import ujson import os.path @@ -45,7 +45,6 @@ import os import shutil from zipfile import ZipFile, BadZipFile, is_zipfile from bagit import make_bag -import contextlib import hashlib import io @@ -297,10 +296,13 @@ class Restricted(Exception): class UploadFiles(DirectoryObject, metaclass=ABCMeta): + + _archive_ext = 'json' + def __init__( - self, bucket: str, upload_id: str, public_only: bool = True, - create: bool = False, - archive_ext: str = 'json.gz' if config.files.compress_archive else 'json') -> None: + self, bucket: str, upload_id: str, + is_authorized: Callable[[], bool] = lambda: False, + create: bool = False) -> None: self.logger = utils.get_logger(__name__, upload_id=upload_id) super().__init__(bucket, upload_id, create=create, prefix=True) @@ -309,8 +311,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): raise KeyError() self.upload_id = upload_id - self.public_only = public_only - self._archive_ext = archive_ext + self._is_authorized = is_authorized @staticmethod def get(upload_id: str, *args, **kwargs) -> 'UploadFiles': @@ -326,8 +327,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): """ The calc metadata for this upload. """ raise NotImplementedError - @contextlib.contextmanager - def raw_file(self, file_path: str, *args, **kwargs) -> Generator[IO, None, None]: + def raw_file(self, file_path: str, *args, **kwargs) -> IO: """ Opens a raw file and returns a file-like objects. Additional args, kwargs are delegated to the respective `open` call. @@ -349,8 +349,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): """ raise NotImplementedError() - @contextlib.contextmanager - def archive_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: + def archive_file(self, calc_hash: str, *args, **kwargs) -> IO: """ Opens a archive file and returns a file-like objects. Additional args, kwargs are delegated to the respective `open` call. @@ -362,8 +361,7 @@ class UploadFiles(DirectoryObject, metaclass=ABCMeta): """ raise NotImplementedError() - @contextlib.contextmanager - def archive_log_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: + def archive_log_file(self, calc_hash: str, *args, **kwargs) -> IO: """ Opens a archive log file and returns a file-like objects. Additional args, kwargs are delegated to the respective `open` call. @@ -397,40 +395,29 @@ class StagingUploadFiles(UploadFiles): def metadata(self) -> Metadata: return self._metadata - @contextlib.contextmanager - def _file(self, path_object: PathObject, *args, **kwargs) -> Generator[IO, None, None]: + def _file(self, path_object: PathObject, *args, **kwargs) -> IO: try: - with open(path_object.os_path, *args, **kwargs) as f: - yield f + return open(path_object.os_path, *args, **kwargs) except FileNotFoundError: raise KeyError() - @contextlib.contextmanager - def raw_file(self, file_path: str, *args, **kwargs) -> Generator[IO, None, None]: - if self.public_only: + def raw_file(self, file_path: str, *args, **kwargs) -> IO: + if not self._is_authorized(): raise Restricted - - with self._file(self.raw_file_object(file_path), *args, **kwargs) as f: - yield f + return self._file(self.raw_file_object(file_path), *args, **kwargs) def raw_file_object(self, file_path: str) -> PathObject: return self._raw_dir.join_file(file_path) - @contextlib.contextmanager - def archive_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: - if self.public_only: + def archive_file(self, calc_hash: str, *args, **kwargs) -> IO: + if not self._is_authorized(): raise Restricted + return self._file(self.archive_file_object(calc_hash), *args, **kwargs) - with self._file(self.archive_file_object(calc_hash), *args, **kwargs) as f: - yield f - - @contextlib.contextmanager - def archive_log_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: - if self.public_only: + def archive_log_file(self, calc_hash: str, *args, **kwargs) -> IO: + if not self._is_authorized(): raise Restricted - - with self._file(self.archive_log_file_object(calc_hash), *args, **kwargs) as f: - yield f + return self._file(self.archive_log_file_object(calc_hash), *args, **kwargs) def archive_file_object(self, calc_hash: str) -> PathObject: return self._archive_dir.join_file('%s.%s' % (calc_hash, self._archive_ext)) @@ -621,7 +608,7 @@ class ArchiveBasedStagingUploadFiles(StagingUploadFiles): @property def upload_file_os_path(self): - if self._local_path: + if self._local_path is not None: return self._local_path else: return self._upload_file.os_path @@ -653,8 +640,7 @@ class PublicUploadFiles(UploadFiles): def metadata(self) -> Metadata: return self._metadata - @contextlib.contextmanager - def _file(self, prefix: str, ext: str, path: str, *args, **kwargs) -> Generator[IO, None, None]: + def _file(self, prefix: str, ext: str, path: str, *args, **kwargs) -> IO: mode = kwargs.get('mode') if len(args) == 0 else args[0] if 'mode' in kwargs: del(kwargs['mode']) @@ -664,26 +650,22 @@ class PublicUploadFiles(UploadFiles): try: zip_file = self.join_file('%s-%s.%s.zip' % (prefix, access, ext)) with ZipFile(zip_file.os_path) as zf: - with zf.open(path, 'r', **kwargs) as f: - if 't' in mode: - yield io.TextIOWrapper(f) - else: - yield f - return + f = zf.open(path, 'r', **kwargs) + if access == 'restricted' and not self._is_authorized(): + raise Restricted + if 't' in mode: + return io.TextIOWrapper(f) + else: + return f except FileNotFoundError: pass except KeyError: pass - if self.public_only: - raise Restricted - raise KeyError() - @contextlib.contextmanager - def raw_file(self, file_path: str, *args, **kwargs) -> Generator[IO, None, None]: - with self._file('raw', 'bagit', 'data/' + file_path, *args, *kwargs) as f: - yield f + def raw_file(self, file_path: str, *args, **kwargs) -> IO: + return self._file('raw', 'bagit', 'data/' + file_path, *args, *kwargs) def raw_file_manifest(self, path_prefix: str = None) -> Generator[str, None, None]: for access in ['public', 'restricted']: @@ -697,15 +679,11 @@ class PublicUploadFiles(UploadFiles): except FileNotFoundError: pass - @contextlib.contextmanager - def archive_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: - with self._file('archive', self._archive_ext, '%s.%s' % (calc_hash, self._archive_ext), *args, **kwargs) as f: - yield f + def archive_file(self, calc_hash: str, *args, **kwargs) -> IO: + return self._file('archive', self._archive_ext, '%s.%s' % (calc_hash, self._archive_ext), *args, **kwargs) - @contextlib.contextmanager - def archive_log_file(self, calc_hash: str, *args, **kwargs) -> Generator[IO, None, None]: - with self._file('archive', self._archive_ext, '%s.log' % calc_hash, *args, **kwargs) as f: - yield f + def archive_log_file(self, calc_hash: str, *args, **kwargs) -> IO: + return self._file('archive', self._archive_ext, '%s.log' % calc_hash, *args, **kwargs) def repack(self) -> None: """ @@ -714,6 +692,3 @@ class PublicUploadFiles(UploadFiles): the restrictions on calculations. This is potentially a long running operation. """ pass - - def delete(self): - assert False, 'cannot delete public upload' diff --git a/tests/processing/test_data.py b/tests/processing/test_data.py index c6939d8dd0b1c87ac5eca64bb77aa2d6f3837a2c..3defaaee257c105325332bf852df64814f253a7d 100644 --- a/tests/processing/test_data.py +++ b/tests/processing/test_data.py @@ -89,7 +89,7 @@ def assert_processing(upload: Upload, mocksearch=None): assert len(upload.errors) == 0 assert upload.status == 'SUCCESS' - upload_files = UploadFiles.get(upload.upload_id, public_only=False) + upload_files = UploadFiles.get(upload.upload_id, is_authorized=lambda: True) assert isinstance(upload_files, StagingUploadFiles) for calc in Calc.objects(upload_id=upload.upload_id): diff --git a/tests/test_api.py b/tests/test_api.py index 6f65e9b545027d8975509bab8a260c6befc7aaa8..ee3be373ad8b9feb9213b4e2edb4ee690b10566f 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -15,14 +15,12 @@ import pytest import time import json -import zlib -import os.path from mongoengine import connect from mongoengine.connection import disconnect import base64 import zipfile import io -import datetime +import inspect from nomad import config # for convinience we test the api without path prefix @@ -30,19 +28,19 @@ services_config = config.services._asdict() services_config.update(api_base_path='') config.services = config.NomadServicesConfig(**services_config) -from nomad import api # noqa -from nomad.files import UploadFile # noqa +from nomad import api, coe_repo # noqa +from nomad.uploads import UploadFiles # noqa from nomad.processing import Upload # noqa from nomad.coe_repo import User # noqa from tests.processing.test_data import example_files # noqa from tests.test_files import example_file, example_file_mainfile, example_file_contents # noqa +from tests.test_uploads import create_staging_upload, create_public_upload # noqa # import fixtures -from tests.test_files import clear_files, archive, archive_log, archive_config # noqa pylint: disable=unused-import from tests.test_normalizing import normalized_template_example # noqa pylint: disable=unused-import from tests.test_parsing import parsed_template_example # noqa pylint: disable=unused-import -from tests.test_repo import example_elastic_calc # noqa pylint: disable=unused-import +# from tests.test_repo import example_elastic_calc # noqa pylint: disable=unused-import from tests.test_coe_repo import assert_coe_upload # noqa @@ -196,16 +194,17 @@ class TestUploads: assert len(upload['tasks']) == 4 assert upload['status'] == 'SUCCESS' assert upload['current_task'] == 'cleanup' - assert UploadFile(upload['upload_id'], upload.get('local_path')).exists() + upload_files = UploadFiles.get(upload['upload_id']) + assert upload_files is not None calcs = upload['calcs']['results'] for calc in calcs: assert calc['status'] == 'SUCCESS' assert calc['current_task'] == 'archiving' assert len(calc['tasks']) == 3 - assert client.get('/archive/logs/%s' % calc['archive_id']).status_code == 200 + assert client.get('/archive/logs/%s' % calc['archive_id'], headers=test_user_auth).status_code == 200 if upload['calcs']['pagination']['total'] > 1: - rv = client.get('%s?page=2&per_page=1&order_by=status' % upload_endpoint) + rv = client.get('%s?page=2&per_page=1&order_by=status' % upload_endpoint, headers=test_user_auth) assert rv.status_code == 200 upload = self.assert_upload(rv.data) assert len(upload['calcs']['results']) == 1 @@ -339,74 +338,166 @@ class TestUploads: # assert rv.status_code == 400 -class TestRepo: - def test_calc(self, client, example_elastic_calc, no_warn): - rv = client.get( - '/repo/%s/%s' % (example_elastic_calc.upload_hash, example_elastic_calc.calc_hash)) - assert rv.status_code == 200 - - def test_non_existing_calcs(self, client): - rv = client.get('/repo/doesnt/exist') - assert rv.status_code == 404 +# class TestRepo: +# def test_calc(self, client, example_elastic_calc, no_warn): +# rv = client.get( +# '/repo/%s/%s' % (example_elastic_calc.upload_hash, example_elastic_calc.calc_hash)) +# assert rv.status_code == 200 + +# def test_non_existing_calcs(self, client): +# rv = client.get('/repo/doesnt/exist') +# assert rv.status_code == 404 + +# def test_calcs(self, client, example_elastic_calc, no_warn): +# rv = client.get('/repo/') +# assert rv.status_code == 200 +# data = json.loads(rv.data) +# results = data.get('results', None) +# assert results is not None +# assert isinstance(results, list) +# assert len(results) >= 1 + +# def test_calcs_pagination(self, client, example_elastic_calc, no_warn): +# rv = client.get('/repo/?page=1&per_page=1') +# assert rv.status_code == 200 +# data = json.loads(rv.data) +# results = data.get('results', None) +# assert results is not None +# assert isinstance(results, list) +# assert len(results) == 1 + +# def test_calcs_user(self, client, example_elastic_calc, test_user_auth, no_warn): +# rv = client.get('/repo/?owner=user', headers=test_user_auth) +# assert rv.status_code == 200 +# data = json.loads(rv.data) +# results = data.get('results', None) +# assert results is not None +# assert len(results) >= 1 + +# def test_calcs_user_authrequired(self, client, example_elastic_calc, no_warn): +# rv = client.get('/repo/?owner=user') +# assert rv.status_code == 401 + +# def test_calcs_user_invisible(self, client, example_elastic_calc, test_other_user_auth, no_warn): +# rv = client.get('/repo/?owner=user', headers=test_other_user_auth) +# assert rv.status_code == 200 +# data = json.loads(rv.data) +# results = data.get('results', None) +# assert results is not None +# assert len(results) == 0 + + +class UploadFilesBasedTests: + + @staticmethod + def fix_signature(func, wrapper): + additional_args = list(inspect.signature(func).parameters.values())[4:] + wrapper_sig = inspect.signature(wrapper) + wrapper_args = list(wrapper_sig.parameters.values())[:3] + additional_args + wrapper_sig = wrapper_sig.replace(parameters=tuple(wrapper_args)) + wrapper.__signature__ = wrapper_sig + + @staticmethod + def check_authorizaton(func): + @pytest.mark.parametrize('test_data', [ + [True, None, True], # in staging for upload + [True, None, False], # in staging for different user + [True, None, None], # in staging for guest + [False, True, True], # in public, restricted for uploader + [False, True, False], # in public, restricted for different user + [False, True, None], # in public, restricted for guest + [False, False, True], # in public, public, for uploader + [False, False, False], # in public, public, for different user + [False, False, None] # in public, public, for guest + ], indirect=True) + def wrapper(self, client, test_data, *args, **kwargs): + upload, authorized, auth_headers = test_data + try: + func(self, client, upload, auth_headers, *args, **kwargs) + except AssertionError as assertion: + assertion_str = str(assertion) + if not authorized: + if '0 == 5' in assertion_str and 'ZipFile' in assertion_str: + # the user is not authorized an gets an empty zip as expected + return + if '401' in assertion_str: + # the user is not authorized and gets a 401 as expected + return + raise assertion + + if not authorized: + assert False + UploadFilesBasedTests.fix_signature(func, wrapper) + return wrapper + + @staticmethod + def ignore_authorization(func): + @pytest.mark.parametrize('test_data', [ + [True, None, True], # in staging + [False, False, None], # in public + ], indirect=True) + def wrapper(self, client, test_data, *args, **kwargs): + upload, _, auth_headers = test_data + func(self, client, upload, auth_headers, *args, **kwargs) + UploadFilesBasedTests.fix_signature(func, wrapper) + return wrapper - def test_calcs(self, client, example_elastic_calc, no_warn): - rv = client.get('/repo/') - assert rv.status_code == 200 - data = json.loads(rv.data) - results = data.get('results', None) - assert results is not None - assert isinstance(results, list) - assert len(results) >= 1 + @pytest.fixture(scope='function') + def test_data(self, request, clean_repository_db, no_warn, test_user, other_test_user): + # delete potential old test files + for _ in [0, 1]: + upload_files = UploadFiles.get('test_upload') + if upload_files: + upload_files.delete() - def test_calcs_pagination(self, client, example_elastic_calc, no_warn): - rv = client.get('/repo/?page=1&per_page=1') - assert rv.status_code == 200 - data = json.loads(rv.data) - results = data.get('results', None) - assert results is not None - assert isinstance(results, list) - assert len(results) == 1 + in_staging, restricted, for_uploader = request.param - def test_calcs_user(self, client, example_elastic_calc, test_user_auth, no_warn): - rv = client.get('/repo/?owner=user', headers=test_user_auth) - assert rv.status_code == 200 - data = json.loads(rv.data) - results = data.get('results', None) - assert results is not None - assert len(results) >= 1 + if in_staging: + authorized = for_uploader + else: + authorized = not restricted or for_uploader - def test_calcs_user_authrequired(self, client, example_elastic_calc, no_warn): - rv = client.get('/repo/?owner=user') - assert rv.status_code == 401 + if for_uploader: + auth_headers = create_auth_headers(test_user) + elif for_uploader is False: + auth_headers = create_auth_headers(other_test_user) + else: + auth_headers = None - def test_calcs_user_invisible(self, client, example_elastic_calc, test_other_user_auth, no_warn): - rv = client.get('/repo/?owner=user', headers=test_other_user_auth) - assert rv.status_code == 200 - data = json.loads(rv.data) - results = data.get('results', None) - assert results is not None - assert len(results) == 0 + calc_specs = 'r' if restricted else 'p' + if in_staging: + Upload.create(user=test_user, upload_id='test_upload', upload_hash='test_upload') + upload_files = create_staging_upload('test_upload', calc_specs=calc_specs) + else: + upload_files = create_public_upload('test_upload', calc_specs=calc_specs) + clean_repository_db.begin() + coe_upload = coe_repo.Upload( + upload_name='test_upload', + user_id=test_user.user_id, is_processed=True) + clean_repository_db.add(coe_upload) + clean_repository_db.commit() + yield 'test_upload', authorized, auth_headers -class TestArchive: - def test_get(self, client, archive, repository_db, no_warn): - rv = client.get('/archive/%s' % archive.object_id) + upload_files.delete() - if rv.headers.get('Content-Encoding') == 'gzip': - json.loads(zlib.decompress(rv.data, 16 + zlib.MAX_WBITS)) - else: - json.loads(rv.data) +class TestArchive(UploadFilesBasedTests): + @UploadFilesBasedTests.check_authorizaton + def test_get(self, client, upload, auth_headers): + rv = client.get('/archive/%s/0' % upload, headers=auth_headers) assert rv.status_code == 200 + assert json.loads(rv.data) is not None - def test_get_calc_proc_log(self, client, archive_log, repository_db, no_warn): - rv = client.get('/archive/logs/%s' % archive_log.object_id) - - assert len(rv.data) > 0 + @UploadFilesBasedTests.check_authorizaton + def test_get_calc_proc_log(self, client, upload, auth_headers): + rv = client.get('/archive/logs/%s/0' % upload, headers=auth_headers) assert rv.status_code == 200 + assert len(rv.data) > 0 - def test_get_non_existing_archive(self, client, repository_db, no_warn): - rv = client.get('/archive/%s' % 'doesnt/exist') + @UploadFilesBasedTests.ignore_authorization + def test_get_non_existing_archive(self, client, upload, auth_headers): + rv = client.get('/archive/%s' % 'doesnt/exist', headers=auth_headers) assert rv.status_code == 404 def test_get_metainfo(self, client): @@ -414,53 +505,38 @@ class TestArchive: assert rv.status_code == 200 -def test_docs(client): - rv = client.get('/docs/index.html') - rv = client.get('/docs/introduction.html') - assert rv.status_code == 200 - - -class TestRaw: - - @pytest.fixture - def example_upload_hash(self, mockmongo, repository_db, no_warn): - upload = Upload(id='test_upload_id', local_path=os.path.abspath(example_file)) - upload.create_time = datetime.datetime.now() - upload.user_id = 'does@not.exist' - upload.save() +class TestRaw(UploadFilesBasedTests): - with UploadFile(upload.upload_id, local_path=upload.local_path) as upload_file: - upload_file.persist() - upload_hash = upload_file.upload_hash() - - return upload_hash - - def test_raw_file(self, client, example_upload_hash): - url = '/raw/%s/data/%s' % (example_upload_hash, example_file_mainfile) - rv = client.get(url) + @UploadFilesBasedTests.check_authorizaton + def test_raw_file(self, client, upload, auth_headers): + url = '/raw/%s/data/%s' % (upload, example_file_mainfile) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 - def test_raw_file_missing_file(self, client, example_upload_hash): - url = '/raw/%s/does/not/exist' % example_upload_hash - rv = client.get(url) + @UploadFilesBasedTests.ignore_authorization + def test_raw_file_missing_file(self, client, upload, auth_headers): + url = '/raw/%s/data/does/not/exist' % upload + rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 data = json.loads(rv.data) assert 'files' not in data - def test_raw_file_listing(self, client, example_upload_hash): - url = '/raw/%s/data/examples' % example_upload_hash - rv = client.get(url) + @UploadFilesBasedTests.ignore_authorization + def test_raw_file_listing(self, client, upload, auth_headers): + url = '/raw/%s/data/examples' % upload + rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 data = json.loads(rv.data) assert len(data['files']) == 5 @pytest.mark.parametrize('compress', [True, False]) - def test_raw_file_wildcard(self, client, example_upload_hash, compress): - url = '/raw/%s/data/examples*' % example_upload_hash + @UploadFilesBasedTests.ignore_authorization + def test_raw_file_wildcard(self, client, upload, auth_headers, compress): + url = '/raw/%s/data/examples*' % upload if compress: url = '%s?compress=1' % url - rv = client.get(url) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 @@ -468,23 +544,26 @@ class TestRaw: assert zip_file.testzip() is None assert len(zip_file.namelist()) == len(example_file_contents) - def test_raw_file_wildcard_missing(self, client, example_upload_hash): - url = '/raw/%s/does/not/exist*' % example_upload_hash - rv = client.get(url) + @UploadFilesBasedTests.ignore_authorization + def test_raw_file_wildcard_missing(self, client, upload, auth_headers): + url = '/raw/%s/does/not/exist*' % upload + rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 - def test_raw_file_missing_upload(self, client, example_upload_hash): + @UploadFilesBasedTests.ignore_authorization + def test_raw_file_missing_upload(self, client, upload, auth_headers): url = '/raw/doesnotexist/%s' % example_file_mainfile - rv = client.get(url) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 @pytest.mark.parametrize('compress', [True, False]) - def test_raw_files(self, client, example_upload_hash, compress): + @UploadFilesBasedTests.check_authorizaton + def test_raw_files(self, client, upload, auth_headers, compress): url = '/raw/%s?files=%s' % ( - example_upload_hash, ','.join(['data/%s' % file for file in example_file_contents])) + upload, ','.join(['data/%s' % file for file in example_file_contents])) if compress: url = '%s&compress=1' % url - rv = client.get(url) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 @@ -493,12 +572,13 @@ class TestRaw: assert len(zip_file.namelist()) == len(example_file_contents) @pytest.mark.parametrize('compress', [True, False, None]) - def test_raw_files_post(self, client, example_upload_hash, compress): - url = '/raw/%s' % example_upload_hash + @UploadFilesBasedTests.check_authorizaton + def test_raw_files_post(self, client, upload, auth_headers, compress): + url = '/raw/%s' % upload data = dict(files=['data/%s' % file for file in example_file_contents]) if compress is not None: data.update(compress=compress) - rv = client.post(url, data=json.dumps(data), content_type='application/json') + rv = client.post(url, data=json.dumps(data), content_type='application/json', headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 @@ -507,11 +587,12 @@ class TestRaw: assert len(zip_file.namelist()) == len(example_file_contents) @pytest.mark.parametrize('compress', [True, False]) - def test_raw_files_missing_file(self, client, example_upload_hash, compress): - url = '/raw/%s?files=data/%s,missing/file.txt' % (example_upload_hash, example_file_mainfile) + @UploadFilesBasedTests.ignore_authorization + def test_raw_files_missing_file(self, client, upload, auth_headers, compress): + url = '/raw/%s?files=data/%s,missing/file.txt' % (upload, example_file_mainfile) if compress: url = '%s&compress=1' % url - rv = client.get(url) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 200 assert len(rv.data) > 0 @@ -519,8 +600,15 @@ class TestRaw: assert zip_file.testzip() is None assert len(zip_file.namelist()) == 1 - def test_raw_files_missing_upload(self, client, example_upload_hash): + @UploadFilesBasedTests.ignore_authorization + def test_raw_files_missing_upload(self, client, upload, auth_headers): url = '/raw/doesnotexist?files=shoud/not/matter.txt' - rv = client.get(url) + rv = client.get(url, headers=auth_headers) assert rv.status_code == 404 + + +def test_docs(client): + rv = client.get('/docs/index.html') + rv = client.get('/docs/introduction.html') + assert rv.status_code == 200 diff --git a/tests/test_files.py b/tests/test_files.py index 374adab8b15219311ada3ece782941182fbcc666..dcd54db75916efd0e4300132ba4292beb13610da 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -19,8 +19,8 @@ import os import os.path from zipfile import ZipFile -from nomad.files import Objects, ObjectFile, ArchiveFile, UploadFile, ArchiveLogFile, \ - BaggedDataContainer, ZippedDataContainer +# from nomad.files import Objects, ObjectFile, ArchiveFile, UploadFile, ArchiveLogFile, \ +# BaggedDataContainer, ZippedDataContainer from nomad import config # example_file uses an artificial parser for faster test execution, can also be @@ -55,238 +55,238 @@ def clear_files(): pass -class TestObjects: - @pytest.fixture() - def existing_example_file(self, clear_files): - with ObjectFile(example_bucket, 'example_file', ext='json').open(mode='wt') as out: - json.dump(example_data, out) +# class TestObjects: +# @pytest.fixture() +# def existing_example_file(self, clear_files): +# with ObjectFile(example_bucket, 'example_file', ext='json').open(mode='wt') as out: +# json.dump(example_data, out) - yield 'example_file', 'json' +# yield 'example_file', 'json' - def test_size(self, existing_example_file): - name, ext = existing_example_file - assert ObjectFile(example_bucket, name, ext).size > 0 +# def test_size(self, existing_example_file): +# name, ext = existing_example_file +# assert ObjectFile(example_bucket, name, ext).size > 0 - def test_exists(self, existing_example_file): - name, ext = existing_example_file - assert ObjectFile(example_bucket, name, ext).exists() +# def test_exists(self, existing_example_file): +# name, ext = existing_example_file +# assert ObjectFile(example_bucket, name, ext).exists() - def test_not_exists(self): - assert not ObjectFile(example_bucket, 'does_not_exist').exists() +# def test_not_exists(self): +# assert not ObjectFile(example_bucket, 'does_not_exist').exists() + +# def test_open(self, existing_example_file): +# name, ext = existing_example_file - def test_open(self, existing_example_file): - name, ext = existing_example_file +# assert ObjectFile(example_bucket, name, ext).exists() +# with ObjectFile(example_bucket, name, ext=ext).open() as f: +# json.load(f) - assert ObjectFile(example_bucket, name, ext).exists() - with ObjectFile(example_bucket, name, ext=ext).open() as f: - json.load(f) +# def test_delete(self, existing_example_file): +# name, ext = existing_example_file +# ObjectFile(example_bucket, name, ext).delete() +# assert not ObjectFile(example_bucket, name, ext).exists() - def test_delete(self, existing_example_file): - name, ext = existing_example_file - ObjectFile(example_bucket, name, ext).delete() - assert not ObjectFile(example_bucket, name, ext).exists() +# def test_delete_all(self, existing_example_file): +# name, ext = existing_example_file +# Objects.delete_all(example_bucket) +# assert not ObjectFile(example_bucket, name, ext).exists() - def test_delete_all(self, existing_example_file): - name, ext = existing_example_file - Objects.delete_all(example_bucket) - assert not ObjectFile(example_bucket, name, ext).exists() +# class TestBaggedDataContainer: -class TestBaggedDataContainer: +# @pytest.fixture(scope='function') +# def example_directory(self, clear_files): +# directory = os.path.join(config.fs.tmp, 'test_container') +# os.makedirs(directory, exist_ok=True) - @pytest.fixture(scope='function') - def example_directory(self, clear_files): - directory = os.path.join(config.fs.tmp, 'test_container') - os.makedirs(directory, exist_ok=True) +# with ZipFile(example_file) as zip_file: +# zip_file.extractall(directory) - with ZipFile(example_file) as zip_file: - zip_file.extractall(directory) +# yield directory - yield directory +# @pytest.fixture(scope='function') +# def example_container(self, example_directory): +# yield BaggedDataContainer.create(example_directory) - @pytest.fixture(scope='function') - def example_container(self, example_directory): - yield BaggedDataContainer.create(example_directory) +# def assert_container(self, container): +# assert container.manifest is not None +# assert len(container.manifest) == 5 +# assert container.hash is not None +# assert container.metadata is not None +# for file_path in container.manifest: +# assert file_path.startswith('examples_template') - def assert_container(self, container): - assert container.manifest is not None - assert len(container.manifest) == 5 - assert container.hash is not None - assert container.metadata is not None - for file_path in container.manifest: - assert file_path.startswith('examples_template') +# def test_make(self, example_container): +# self.assert_container(example_container) - def test_make(self, example_container): - self.assert_container(example_container) +# def test_metadata(self, example_directory, example_container): +# example_container.metadata['test'] = dict(k1='v1', k2=True, k3=0) +# example_container.save_metadata() + +# example_container = BaggedDataContainer(example_directory) +# self.assert_container(example_container) +# assert example_container.metadata['test']['k1'] == 'v1' +# assert example_container.metadata['test']['k2'] +# assert example_container.metadata['test']['k3'] == 0 + +# def test_file(self, example_container): +# file = example_container.get_file('examples_template/template.json') +# assert file is not None +# with file.open('r') as f: +# assert json.load(f) - def test_metadata(self, example_directory, example_container): - example_container.metadata['test'] = dict(k1='v1', k2=True, k3=0) - example_container.save_metadata() - example_container = BaggedDataContainer(example_directory) - self.assert_container(example_container) - assert example_container.metadata['test']['k1'] == 'v1' - assert example_container.metadata['test']['k2'] - assert example_container.metadata['test']['k3'] == 0 +# class TestZippedDataContainer(TestBaggedDataContainer): +# @pytest.fixture(scope='function') +# def example_container(self, example_directory): +# BaggedDataContainer.create(example_directory) +# return ZippedDataContainer.create(example_directory) + +# def test_metadata(self, example_directory, example_container): +# pass + +# def test_target(self, example_directory): +# BaggedDataContainer.create(example_directory) +# target = os.path.join(os.path.dirname(example_directory), 'different.zip') +# container = ZippedDataContainer.create(example_directory, target=target) +# self.assert_container(container) +# with ZipFile(target, 'r') as zip_file: +# for info in zip_file.filelist: +# assert info.filename.startswith('different') + + +# @pytest.fixture(scope='function', params=[False, True]) +# def archive_config(monkeypatch, request): +# new_config = config.FilesConfig( +# config.files.uploads_bucket, +# config.files.raw_bucket, +# config.files.archive_bucket, +# config.files.staging_bucket, +# config.files.public_bucket, +# request.param) +# monkeypatch.setattr(config, 'files', new_config) +# yield + + +# @pytest.fixture(scope='function') +# def archive(clear_files, archive_config): +# archive = ArchiveFile('__test_upload_hash/__test_calc_hash') +# with archive.write_archive_json() as out: +# json.dump(example_data, out) +# yield archive + + +# class TestArchiveFile: + +# def test_archive(self, archive: ArchiveFile, no_warn): +# assert archive.exists() + +# with archive.read_archive_json() as file: +# result = json.load(file) + +# assert 'test_key' in result +# assert result['test_key'] == 'test_value' + +# def test_delete_archive(self, archive: ArchiveFile, no_warn): +# archive.delete() +# assert not archive.exists() + +# def test_delete_archives(self, archive: ArchiveFile, no_warn): +# ArchiveFile.delete_archives(archive.object_id.split('/')[0]) +# assert not archive.exists() + + +# class TestUploadFile: + +# @pytest.fixture() +# def upload_same_file(self, clear_files): +# upload = UploadFile('__test_upload_id2') +# shutil.copyfile(example_file, upload.os_path) +# yield upload + +# @pytest.fixture() +# def upload(self, clear_files): +# upload = UploadFile('__test_upload_id') +# upload.create_dirs() +# shutil.copyfile(example_file, upload.os_path) +# yield upload + +# def assert_upload(self, upload: UploadFile): +# assert upload.exists() + +# assert len(upload.filelist) == 5 +# has_json = False +# for filename in upload.filelist: +# the_file = upload.get_file(filename) +# assert the_file.exists() +# assert the_file.size >= 0 +# if the_file.path.endswith('.json'): +# has_json = True +# assert the_file.size > 0 +# with the_file.open() as f: +# f.read() +# break +# assert has_json + +# def test_upload_extracted(self, upload: UploadFile): +# with upload: +# self.assert_upload(upload) + +# def test_persist(self, upload: UploadFile): +# with upload: +# zipped_container = upload.persist() + +# assert zipped_container.exists() +# assert zipped_container.os_path.endswith('%s.zip' % upload.upload_hash()) + +# def test_delete_upload(self, upload: UploadFile): +# upload.delete() +# assert not upload.exists() + +# def test_hash(self, upload: UploadFile, upload_same_file: UploadFile, no_warn): +# with upload: +# hash = upload.upload_hash() +# assert hash is not None +# assert isinstance(hash, str) + +# with upload_same_file: +# assert hash == upload_same_file.upload_hash() - def test_file(self, example_container): - file = example_container.get_file('examples_template/template.json') - assert file is not None - with file.open('r') as f: - assert json.load(f) +# def test_siblings(self, upload: UploadFile, no_warn): +# with upload: +# siblings = list(upload.get_siblings('examples_template/template.json')) +# assert len(siblings) == 4 +# assert all(sibling.endswith('.aux') for sibling in siblings) + + +# class TestLocalUploadFile(TestUploadFile): +# @pytest.fixture() +# def upload_same_file(self, clear_files): +# upload = UploadFile('__test_upload_id2', local_path=example_file) +# yield upload +# @pytest.fixture() +# def upload(self, clear_files): +# upload = UploadFile('__test_upload_id', local_path=example_file) +# yield upload -class TestZippedDataContainer(TestBaggedDataContainer): - @pytest.fixture(scope='function') - def example_container(self, example_directory): - BaggedDataContainer.create(example_directory) - return ZippedDataContainer.create(example_directory) +# def test_delete_upload(self, upload: UploadFile): +# upload.delete() +# assert upload.exists() - def test_metadata(self, example_directory, example_container): - pass - def test_target(self, example_directory): - BaggedDataContainer.create(example_directory) - target = os.path.join(os.path.dirname(example_directory), 'different.zip') - container = ZippedDataContainer.create(example_directory, target=target) - self.assert_container(container) - with ZipFile(target, 'r') as zip_file: - for info in zip_file.filelist: - assert info.filename.startswith('different') +# @pytest.fixture(scope='function') +# def archive_log(clear_files, archive_config): +# archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash') +# with archive_log.open('wt') as f: +# f.write('This is a test') + +# yield archive_log -@pytest.fixture(scope='function', params=[False, True]) -def archive_config(monkeypatch, request): - new_config = config.FilesConfig( - config.files.uploads_bucket, - config.files.raw_bucket, - config.files.archive_bucket, - config.files.staging_bucket, - config.files.public_bucket, - request.param) - monkeypatch.setattr(config, 'files', new_config) - yield - - -@pytest.fixture(scope='function') -def archive(clear_files, archive_config): - archive = ArchiveFile('__test_upload_hash/__test_calc_hash') - with archive.write_archive_json() as out: - json.dump(example_data, out) - yield archive - - -class TestArchiveFile: - - def test_archive(self, archive: ArchiveFile, no_warn): - assert archive.exists() - - with archive.read_archive_json() as file: - result = json.load(file) - - assert 'test_key' in result - assert result['test_key'] == 'test_value' - - def test_delete_archive(self, archive: ArchiveFile, no_warn): - archive.delete() - assert not archive.exists() - - def test_delete_archives(self, archive: ArchiveFile, no_warn): - ArchiveFile.delete_archives(archive.object_id.split('/')[0]) - assert not archive.exists() - - -class TestUploadFile: - - @pytest.fixture() - def upload_same_file(self, clear_files): - upload = UploadFile('__test_upload_id2') - shutil.copyfile(example_file, upload.os_path) - yield upload - - @pytest.fixture() - def upload(self, clear_files): - upload = UploadFile('__test_upload_id') - upload.create_dirs() - shutil.copyfile(example_file, upload.os_path) - yield upload - - def assert_upload(self, upload: UploadFile): - assert upload.exists() - - assert len(upload.filelist) == 5 - has_json = False - for filename in upload.filelist: - the_file = upload.get_file(filename) - assert the_file.exists() - assert the_file.size >= 0 - if the_file.path.endswith('.json'): - has_json = True - assert the_file.size > 0 - with the_file.open() as f: - f.read() - break - assert has_json - - def test_upload_extracted(self, upload: UploadFile): - with upload: - self.assert_upload(upload) - - def test_persist(self, upload: UploadFile): - with upload: - zipped_container = upload.persist() - - assert zipped_container.exists() - assert zipped_container.os_path.endswith('%s.zip' % upload.upload_hash()) - - def test_delete_upload(self, upload: UploadFile): - upload.delete() - assert not upload.exists() - - def test_hash(self, upload: UploadFile, upload_same_file: UploadFile, no_warn): - with upload: - hash = upload.upload_hash() - assert hash is not None - assert isinstance(hash, str) - - with upload_same_file: - assert hash == upload_same_file.upload_hash() - - def test_siblings(self, upload: UploadFile, no_warn): - with upload: - siblings = list(upload.get_siblings('examples_template/template.json')) - assert len(siblings) == 4 - assert all(sibling.endswith('.aux') for sibling in siblings) - - -class TestLocalUploadFile(TestUploadFile): - @pytest.fixture() - def upload_same_file(self, clear_files): - upload = UploadFile('__test_upload_id2', local_path=example_file) - yield upload - - @pytest.fixture() - def upload(self, clear_files): - upload = UploadFile('__test_upload_id', local_path=example_file) - yield upload - - def test_delete_upload(self, upload: UploadFile): - upload.delete() - assert upload.exists() - - -@pytest.fixture(scope='function') -def archive_log(clear_files, archive_config): - archive_log = ArchiveLogFile('__test_upload_hash/__test_calc_hash') - with archive_log.open('wt') as f: - f.write('This is a test') - - yield archive_log - - -class TestArchiveLogFile: - - def test_archive_log_file(self, archive_log): - assert archive_log.exists() - with archive_log.open('rt') as f: - assert 'test' in f.read() +# class TestArchiveLogFile: + +# def test_archive_log_file(self, archive_log): +# assert archive_log.exists() +# with archive_log.open('rt') as f: +# assert 'test' in f.read() diff --git a/tests/test_repo.py b/tests/test_repo.py index e97f4b23165a9706bac093244a7caba170193d78..d13ad56f86edb200748c7426b942b5f35b6c7958 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -12,115 +12,114 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -from typing import Generator -from datetime import datetime -from elasticsearch import NotFoundError - -from nomad.files import ArchiveFile, UploadFile -from nomad.parsing import LocalBackend -from nomad.repo import AlreadyExists, RepoCalc +# import pytest +# from typing import Generator +# from datetime import datetime +# from elasticsearch import NotFoundError + +# from nomad.parsing import LocalBackend +# from nomad.repo import AlreadyExists, RepoCalc -from tests.test_files import example_file # noqa -from tests.test_normalizing import normalized_template_example # pylint: disable=unused-import -from tests.test_parsing import parsed_template_example # pylint: disable=unused-import +# from tests.test_files import example_file # noqa +# from tests.test_normalizing import normalized_template_example # pylint: disable=unused-import +# from tests.test_parsing import parsed_template_example # pylint: disable=unused-import -@pytest.fixture(scope='function') -def example_elastic_calc(normalized_template_example: LocalBackend, elastic, test_user) \ - -> Generator[RepoCalc, None, None]: +# @pytest.fixture(scope='function') +# def example_elastic_calc(normalized_template_example: LocalBackend, elastic, test_user) \ +# -> Generator[RepoCalc, None, None]: - upload_file = UploadFile('test_upload_id', local_path=example_file) - mainfile = next(filename for filename in upload_file.filelist if 'template.json' in filename) - auxfiles = list(upload_file.get_siblings(mainfile)) +# upload_file = UploadFile('test_upload_id', local_path=example_file) +# mainfile = next(filename for filename in upload_file.filelist if 'template.json' in filename) +# auxfiles = list(upload_file.get_siblings(mainfile)) - try: - calc = RepoCalc.get(id='test_upload_hash/test_calc_hash') - except NotFoundError: - pass - else: - calc.delete() +# try: +# calc = RepoCalc.get(id='test_upload_hash/test_calc_hash') +# except NotFoundError: +# pass +# else: +# calc.delete() - entry = RepoCalc.create_from_backend( - normalized_template_example, - upload_hash='test_upload_hash', - calc_hash='test_calc_hash', - upload_id='test_upload_id', - additional=dict( - mainfile=mainfile, - upload_time=datetime.now(), - staging=True, restricted=False, user_id=str(test_user.user_id), - aux_files=auxfiles)) +# entry = RepoCalc.create_from_backend( +# normalized_template_example, +# upload_hash='test_upload_hash', +# calc_hash='test_calc_hash', +# upload_id='test_upload_id', +# additional=dict( +# mainfile=mainfile, +# upload_time=datetime.now(), +# staging=True, restricted=False, user_id=str(test_user.user_id), +# aux_files=auxfiles)) - entry.persist(refresh='true') +# entry.persist(refresh='true') - yield entry +# yield entry - try: - calc = RepoCalc.get(id='test_upload_hash/test_calc_hash') - except NotFoundError: - pass - else: - calc.delete() +# try: +# calc = RepoCalc.get(id='test_upload_hash/test_calc_hash') +# except NotFoundError: +# pass +# else: +# calc.delete() -def assert_elastic_calc(calc: RepoCalc): - assert calc is not None - for property in RepoCalc._doc_type.mapping: - assert getattr(calc, property) is not None +# def assert_elastic_calc(calc: RepoCalc): +# assert calc is not None +# for property in RepoCalc._doc_type.mapping: +# assert getattr(calc, property) is not None - assert len(getattr(calc, 'aux_files')) > 0 +# assert len(getattr(calc, 'aux_files')) > 0 -def test_create_elastic_calc(example_elastic_calc: RepoCalc, no_warn): - assert_elastic_calc(example_elastic_calc) - assert example_elastic_calc.upload.exists() +# def test_create_elastic_calc(example_elastic_calc: RepoCalc, no_warn): +# assert_elastic_calc(example_elastic_calc) +# assert example_elastic_calc.upload.exists() - get_result: RepoCalc = RepoCalc.get( - id='%s/%s' % (example_elastic_calc.upload_hash, example_elastic_calc.calc_hash)) - assert_elastic_calc(get_result) +# get_result: RepoCalc = RepoCalc.get( +# id='%s/%s' % (example_elastic_calc.upload_hash, example_elastic_calc.calc_hash)) +# assert_elastic_calc(get_result) -def test_create_existing_elastic_calc( - example_elastic_calc: RepoCalc, normalized_template_example, test_user): +# def test_create_existing_elastic_calc( +# example_elastic_calc: RepoCalc, normalized_template_example, test_user): - calc = RepoCalc.create_from_backend( - normalized_template_example, - upload_hash='test_upload_hash', - calc_hash='test_calc_hash', - upload_id='test_upload_id', - additional=dict( - mainfile='/test/mainfile', - upload_time=datetime.now(), - staging=True, restricted=False, user_id=str(test_user.user_id))) +# calc = RepoCalc.create_from_backend( +# normalized_template_example, +# upload_hash='test_upload_hash', +# calc_hash='test_calc_hash', +# upload_id='test_upload_id', +# additional=dict( +# mainfile='/test/mainfile', +# upload_time=datetime.now(), +# staging=True, restricted=False, user_id=str(test_user.user_id))) - try: - calc.persist(refresh='true') - assert False - except AlreadyExists: - pass - else: - assert False +# try: +# calc.persist(refresh='true') +# assert False +# except AlreadyExists: +# pass +# else: +# assert False -def test_delete_elastic_calc(example_elastic_calc: RepoCalc): - example_elastic_calc.delete() +# def test_delete_elastic_calc(example_elastic_calc: RepoCalc): +# example_elastic_calc.delete() - assert not ArchiveFile('test_upload_hash/test_calc_hash').exists() - try: - RepoCalc.get(id='test_upload_hash/test_calc_hash') - assert False - except NotFoundError: - pass - else: - assert False +# assert not ArchiveFile('test_upload_hash/test_calc_hash').exists() +# try: +# RepoCalc.get(id='test_upload_hash/test_calc_hash') +# assert False +# except NotFoundError: +# pass +# else: +# assert False -def test_staging_elastic_calc(example_elastic_calc: RepoCalc, no_warn): - assert RepoCalc.get(id='test_upload_hash/test_calc_hash').staging +# def test_staging_elastic_calc(example_elastic_calc: RepoCalc, no_warn): +# assert RepoCalc.get(id='test_upload_hash/test_calc_hash').staging -def test_unstage_elastic_calc(example_elastic_calc: RepoCalc, no_warn): - example_elastic_calc.upload.unstage(staging=False) +# def test_unstage_elastic_calc(example_elastic_calc: RepoCalc, no_warn): +# example_elastic_calc.upload.unstage(staging=False) - assert not RepoCalc.get(id='test_upload_hash/test_calc_hash').staging +# assert not RepoCalc.get(id='test_upload_hash/test_calc_hash').staging diff --git a/tests/test_uploads.py b/tests/test_uploads.py index 86342d6837a493256745ba2e8f1f9aa524403ce5..beaa0a6c22ced12b1fd23ca7e3b6c870c923162a 100644 --- a/tests/test_uploads.py +++ b/tests/test_uploads.py @@ -17,6 +17,7 @@ import os import os.path import shutil import pytest +import json from nomad import config from nomad.uploads import DirectoryObject, PathObject @@ -200,11 +201,11 @@ class UploadFilesContract(UploadFilesFixtures): try: with test_upload.raw_file(example_file_mainfile) as f: assert len(f.read()) > 0 - if test_upload.public_only: + if not test_upload._is_authorized(): with test_upload.metadata as md: assert not md.get(example_calc_hash).get('restricted', False) except Restricted: - assert test_upload.public_only + assert not test_upload._is_authorized() with test_upload.metadata as md: assert md.get(example_calc_hash).get('restricted', False) @@ -215,18 +216,19 @@ class UploadFilesContract(UploadFilesFixtures): @pytest.mark.parametrize('test_logs', [True, False]) def test_archive(self, test_upload, test_logs: bool): - if test_logs: - open = test_upload.archive_log_file(example_calc_hash, 'rt') - else: - open = test_upload.archive_file(example_calc_hash, 'rt') try: - with open as f: - assert f.read() == 'archive' - if test_upload.public_only: + if test_logs: + with test_upload.archive_log_file(example_calc_hash, 'rt') as f: + assert f.read() == 'archive' + else: + f = test_upload.archive_file(example_calc_hash, 'rt') + assert json.load(f) == 'archive' + + if not test_upload._is_authorized(): with test_upload.metadata as md: assert not md.get(example_calc_hash).get('restricted', False) except Restricted: - assert test_upload.public_only + assert not test_upload._is_authorized() with test_upload.metadata as md: assert md.get(example_calc_hash).get('restricted', False) @@ -242,59 +244,64 @@ class UploadFilesContract(UploadFilesFixtures): assert md.get(example_calc_hash)['data'] == 'updated' -class TestStagingUploadFiles(UploadFilesContract): - - @staticmethod - def create_upload(upload_id: str, calc_specs: str) -> StagingUploadFiles: - """ - Create an upload according to given calc_specs. Where calc specs is a string - with letters determining example calcs being public or restricted. - The calcs will be copies of example_calc. First calc is at top level, following - calcs will be put under 1/, 2/, etc. - """ - upload = StagingUploadFiles(upload_id, create=True, archive_ext='txt', public_only=False) - - prefix = 0 - for calc_spec in calc_specs: - upload.add_rawfiles(example_file, prefix=None if prefix == 0 else str(prefix)) - hash = str(int(example_calc_hash) + prefix) - with upload.archive_file(hash, 'wt') as f: - f.write('archive') - with upload.archive_log_file(hash, 'wt') as f: - f.write('archive') - calc = dict(**example_calc) - calc['hash'] = hash - if prefix > 0: - calc['mainfile'] = os.path.join(str(prefix), calc['mainfile']) - if calc_spec == 'r': - calc['restricted'] = True - elif calc_spec == 'p': - calc['restricted'] = False - upload.metadata.insert(calc) - prefix += 1 +def create_staging_upload(upload_id: str, calc_specs: str) -> StagingUploadFiles: + """ + Create an upload according to given spec. Additional arguments are given to + the StagingUploadFiles contstructor. + + Arguments: + upload_id: The id that should be given to this test upload. + calc_specs: A string that determines the properties of the given upload. + With letters determining example calcs being public `p` or restricted `p`. + The calcs will be copies of calcs in `example_file`. + First calc is at top level, following calcs will be put under 1/, 2/, etc. + """ + upload = StagingUploadFiles(upload_id, create=True, is_authorized=lambda: True) + + prefix = 0 + for calc_spec in calc_specs: + upload.add_rawfiles(example_file, prefix=None if prefix == 0 else str(prefix)) + hash = str(int(example_calc_hash) + prefix) + with upload.archive_file(hash, 'wt') as f: + f.write('"archive"') + with upload.archive_log_file(hash, 'wt') as f: + f.write('archive') + calc = dict(**example_calc) + calc['hash'] = hash + if prefix > 0: + calc['mainfile'] = os.path.join(str(prefix), calc['mainfile']) + if calc_spec == 'r': + calc['restricted'] = True + elif calc_spec == 'p': + calc['restricted'] = False + upload.metadata.insert(calc) + prefix += 1 + + if calc_specs.startswith('P'): + public_only = True + calc_specs = calc_specs[1:] + else: + public_only = False + upload._is_authorized = lambda: not public_only + + with upload.metadata as md: + assert len(md) == len(calc_specs) + return upload - if calc_specs.startswith('P'): - public_only = True - calc_specs = calc_specs[1:] - else: - public_only = False - upload.public_only = public_only - with upload.metadata as md: - assert len(md) == len(calc_specs) - return upload +class TestStagingUploadFiles(UploadFilesContract): @pytest.fixture(scope='function', params=['r', 'rr', 'pr', 'rp', 'p', 'pp']) def test_upload(self, request, test_upload_id: str) -> StagingUploadFiles: - return TestStagingUploadFiles.create_upload(test_upload_id, calc_specs=request.param) + return create_staging_upload(test_upload_id, calc_specs=request.param) @pytest.fixture(scope='function') def empty_test_upload(self, test_upload_id) -> Generator[UploadFiles, None, None]: - yield StagingUploadFiles(test_upload_id, create=True, public_only=False) + yield StagingUploadFiles(test_upload_id, create=True, is_authorized=lambda: True) @pytest.mark.parametrize('prefix', [None, 'prefix']) def test_add_rawfiles_zip(self, test_upload_id, prefix): - test_upload = StagingUploadFiles(test_upload_id, create=True, archive_ext='txt', public_only=False) + test_upload = StagingUploadFiles(test_upload_id, create=True, is_authorized=lambda: True) test_upload.add_rawfiles(example_file, prefix=prefix) for filepath in example_file_contents: filepath = os.path.join(prefix, filepath) if prefix else filepath @@ -304,8 +311,7 @@ class TestStagingUploadFiles(UploadFilesContract): assert len(content) > 0 def test_write_archive(self, test_upload): - with test_upload.archive_file(example_calc_hash, 'rt') as f: - assert f.read() == 'archive' + assert json.load(test_upload.archive_file(example_calc_hash, 'rt')) == 'archive' def test_calc_hash(self, test_upload): assert test_upload.calc_hash(example_file_mainfile) is not None @@ -348,14 +354,18 @@ class TestArchiveBasedStagingUploadFiles(UploadFilesFixtures): assert not ArchiveBasedStagingUploadFiles(test_upload_id, create=True).is_valid +def create_public_upload(upload_id: str, calc_specs: str, **kwargs): + staging_upload = create_staging_upload(upload_id, calc_specs) + staging_upload.pack() + staging_upload.delete() + return PublicUploadFiles(upload_id, **kwargs) + + class TestPublicUploadFiles(UploadFilesContract): @pytest.fixture(scope='function') def empty_test_upload(self, test_upload_id: str) -> Generator[UploadFiles, None, None]: - staging_upload = TestStagingUploadFiles.create_upload(test_upload_id, calc_specs='') - staging_upload.pack() - staging_upload.delete() - yield PublicUploadFiles(test_upload_id, archive_ext='txt') + yield create_public_upload(test_upload_id, calc_specs='', is_authorized=lambda: True) @pytest.fixture(scope='function', params=['r', 'rr', 'pr', 'rp', 'p', 'pp', 'Ppr', 'Prp']) def test_upload(self, request, test_upload_id: str) -> PublicUploadFiles: @@ -366,6 +376,6 @@ class TestPublicUploadFiles(UploadFilesContract): else: public_only = False - staging_upload = TestStagingUploadFiles.create_upload(test_upload_id, calc_specs=calc_specs) + staging_upload = create_staging_upload(test_upload_id, calc_specs=calc_specs) staging_upload.pack() - return PublicUploadFiles(test_upload_id, archive_ext='txt', public_only=public_only) + return PublicUploadFiles(test_upload_id, is_authorized=lambda: not public_only)