From f28859809f874b92d509a1c0912f25f3779ef4a1 Mon Sep 17 00:00:00 2001
From: Markus Scheidgen <markus.scheidgen@gmail.com>
Date: Mon, 8 Oct 2018 11:35:55 +0200
Subject: [PATCH] Added files to api.

---
 nomad/api.py       | 74 ++++++++++++++++++++++++++++++++++++++++++----
 nomad/files.py     |  5 ++--
 tests/test_api.py  | 34 +++++++++++++++++++++
 tests/test_repo.py | 12 ++++++--
 4 files changed, 114 insertions(+), 11 deletions(-)

diff --git a/nomad/api.py b/nomad/api.py
index 22e3b2db73..3652010f16 100644
--- a/nomad/api.py
+++ b/nomad/api.py
@@ -461,6 +461,7 @@ class UploadFileRes(Resource):
                 with uploadFile.open('wb') as f:
                     while not request.stream.is_exhausted:
                         f.write(request.stream.read(1024))
+
             except Exception as e:
                 logger.error('Error on streaming upload', exc_info=e)
                 abort(400, message='Some IO went wrong, download probably aborted/disrupted.')
@@ -665,13 +666,13 @@ def get_calc_proc_log(upload_hash, calc_hash):
 
     try:
         archive = ArchiveLogFile(archive_id)
-        arhchive_path = archive.os_path
+        archive_path = archive.os_path
 
         rv = send_file(
-            arhchive_path,
+            archive_path,
             mimetype='application/text',
             as_attachment=True,
-            attachment_filename=os.path.basename(arhchive_path))
+            attachment_filename=os.path.basename(archive_path))
 
         return rv
     except KeyError:
@@ -712,13 +713,13 @@ def get_calc(upload_hash, calc_hash):
 
     try:
         archive = ArchiveFile(archive_id)
-        arhchive_path = archive.os_path
+        archive_path = archive.os_path
 
         rv = send_file(
-            arhchive_path,
+            archive_path,
             mimetype='application/json',
             as_attachment=True,
-            attachment_filename=os.path.basename(arhchive_path))
+            attachment_filename=os.path.basename(archive_path))
 
         if config.files.compress_archive:
             rv.headers['Content-Encoding'] = 'gzip'
@@ -736,6 +737,67 @@ def get_calc(upload_hash, calc_hash):
         abort(500, message='Could not accessing the archive.')
 
 
+@app.route('%s/raw/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET'])
+def get_raw(upload_hash, calc_hash):
+    """
+    Get calculation mainfile raw data. Calcs are references via *upload_hash*, *calc_hash*
+    pairs. Returns the mainfile, unless an aux_file is specified. Aux files are stored
+    in repository entries.
+
+    .. :quickref: repo; Get calculation raw data.
+
+    **Example request**:
+
+    .. sourcecode:: http
+
+        GET /nomad/api/raw/W36aqCzAKxOCfIiMFsBJh3nHPb4a/7ddvtfRfZAvc3Crr7jOJ8UH0T34I HTTP/1.1
+        Accept: application/gz
+
+    :param string upload_hash: the hash of the upload (from uploaded file contents)
+    :param string calc_hash: the hash of the calculation (from mainfile)
+    :qparam str auxfile: an optional aux_file to download the respective aux file, default is mainfile
+    :resheader Content-Type: application/json
+    :status 200: calc raw data successfully retrieved
+    :status 404: calc with given hashes does not exist or the given aux file does not exist
+    :returns: the raw data in body
+    """
+    archive_id = '%s/%s' % (upload_hash, calc_hash)
+    try:
+        repo = RepoCalc.get(id=archive_id)
+    except NotFoundError:
+        abort(404, message='There is no calculation for %s/%s' % (upload_hash, calc_hash))
+    except Exception as e:
+        abort(500, message=str(e))
+
+    auxfile = request.args.get('auxfile', None)
+    if auxfile:
+        filename = os.path.join(os.path.dirname(repo.mainfile), auxfile)
+    else:
+        filename = repo.mainfile
+
+    try:
+        upload = Upload.get(repo.upload_id)
+        upload_file = UploadFile(upload.upload_id, local_path=upload.local_path)
+        the_file = upload_file.get_file(filename)
+        with the_file.open() as f:
+            rv = send_file(
+                f,
+                mimetype='application/octet-stream',
+                as_attachment=True,
+                attachment_filename=os.path.basename(filename))
+            return rv
+    except KeyError:
+        abort(404, message='The file %s does not exist.' % filename)
+    except FileNotFoundError:
+        abort(404, message='The file %s does not exist.' % filename)
+    except Exception as e:
+        logger = get_logger(
+            __name__, endpoint='archive', action='get',
+            upload_hash=upload_hash, calc_hash=calc_hash)
+        logger.error('Exception on accessing archive', exc_info=e)
+        abort(500, message='Could not accessing the archive.')
+
+
 @app.route('%s/admin/<string:operation>' % base_path, methods=['POST'])
 def call_admin_operation(operation):
     if operation == 'repair_uploads':
diff --git a/nomad/files.py b/nomad/files.py
index 79952ba227..fb4383ac8a 100644
--- a/nomad/files.py
+++ b/nomad/files.py
@@ -111,10 +111,11 @@ class ZippedFile(File):
         self.logger.debug('open file')
         try:
             with ZipFile(self.os_path) as zip_file:
-                with zip_file.open(self.filename, *args, **kwargs) as f:
-                    yield f
+                yield zip_file.open(self.filename, *args, **kwargs)
         except FileNotFoundError:
             raise KeyError()
+        except KeyError as e:
+            raise e
         except Exception as e:
             msg = 'Could not read upload.'
             self.logger.error(msg, exc_info=e)
diff --git a/tests/test_api.py b/tests/test_api.py
index ad6f798f0c..07fba40ba1 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -3,6 +3,7 @@ import time
 import json
 import zlib
 import re
+import os.path
 from mongoengine import connect
 from mongoengine.connection import disconnect
 from datetime import datetime, timedelta
@@ -19,6 +20,7 @@ from nomad.files import UploadFile  # noqa
 from nomad.processing import Upload  # noqa
 
 from tests.processing.test_data import example_files  # noqa
+from tests.test_files import example_file  # noqa
 
 # import fixtures
 from tests.test_files import clear_files, archive, archive_log, archive_config  # noqa pylint: disable=unused-import
@@ -321,3 +323,35 @@ def test_get_calc_proc_log(client, archive_log, no_warn):
 def test_get_non_existing_archive(client, no_warn):
     rv = client.get('/archive/%s' % 'doesnt/exist')
     assert rv.status_code == 404
+
+
+@pytest.fixture
+def example_repo_with_files(mockmongo, example_elastic_calc):
+    upload = Upload(id=example_elastic_calc.upload_id, local_path=os.path.abspath(example_file))
+    upload.create_time = datetime.now()
+    upload.user_id = 'does@not.exist'
+    upload.save()
+
+    return example_elastic_calc
+
+
+def test_raw_mainfile(client, example_repo_with_files, no_warn):
+    rv = client.get('/raw/%s' % example_repo_with_files.archive_id)
+    assert rv.status_code == 200
+    assert len(rv.data) > 0
+
+
+def test_raw_auxfile(client, example_repo_with_files, no_warn):
+    rv = client.get('/raw/%s?auxfile=1.aux' % example_repo_with_files.archive_id)
+    assert rv.status_code == 200
+    assert len(rv.data) == 0
+
+
+def test_raw_missing_auxfile(client, example_repo_with_files, no_warn):
+    rv = client.get('/raw/%s?auxfile=doesnotexist' % example_repo_with_files.archive_id)
+    assert rv.status_code == 404
+
+
+def test_raw_missing_mainfile(client, no_warn):
+    rv = client.get('/raw/doesnot/exist')
+    assert rv.status_code == 404
diff --git a/tests/test_repo.py b/tests/test_repo.py
index 46697b92df..d789890029 100644
--- a/tests/test_repo.py
+++ b/tests/test_repo.py
@@ -17,10 +17,11 @@ from typing import Generator
 from datetime import datetime
 from elasticsearch import NotFoundError
 
-from nomad.files import ArchiveFile
+from nomad.files import ArchiveFile, UploadFile
 from nomad.parsing import LocalBackend
 from nomad.repo import AlreadyExists, RepoCalc, key_mappings
 
+from tests.test_files import example_file  # noqa
 from tests.test_normalizing import normalized_template_example  # pylint: disable=unused-import
 from tests.test_parsing import parsed_template_example  # pylint: disable=unused-import
 
@@ -28,6 +29,11 @@ from tests.test_parsing import parsed_template_example  # pylint: disable=unused
 @pytest.fixture(scope='function')
 def example_elastic_calc(normalized_template_example: LocalBackend, elastic) \
         -> Generator[RepoCalc, None, None]:
+
+    upload_file = UploadFile('test_upload_id', local_path=example_file)
+    mainfile = next(filename for filename in upload_file.filelist if 'template.json' in filename)
+    auxfiles = list(upload_file.get_siblings(mainfile))
+
     try:
         calc = RepoCalc.get(id='test_upload_hash/test_calc_hash')
     except NotFoundError:
@@ -41,10 +47,10 @@ def example_elastic_calc(normalized_template_example: LocalBackend, elastic) \
         calc_hash='test_calc_hash',
         upload_id='test_upload_id',
         additional=dict(
-            mainfile='/test/mainfile',
+            mainfile=mainfile,
             upload_time=datetime.now(),
             staging=True, restricted=False, user_id='me@gmail.com',
-            aux_files=['/test/aux1', '/test/aux2']),
+            aux_files=auxfiles),
         refresh='true')
 
     yield entry
-- 
GitLab