Commit a09e4ca8 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored raw API endpoint to use upload relative paths.

parent f2ee8b19
......@@ -44,7 +44,7 @@
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/.pyenv/bin/pytest",
"args": [
"-sv", "tests/test_coe_repo.py::test_add_upload"
"-sv", "tests/test_api.py::test_raw_file"
]
},
{
......
......@@ -28,4 +28,4 @@ There is a separate documentation for the API endpoints from a client perspectiv
"""
from .app import app
from . import upload, repository, archive
from . import upload, repository, archive, raw
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The raw API of the nomad@FAIRDI APIs. Can be used to retrieve raw calculation files.
"""
# TODO implement restrictions based on user, permissions, and upload/calc metadata
import os.path
from zipfile import ZIP_DEFLATED
import zipstream
from flask import Response, request, send_file
from flask_restful import abort
from werkzeug.exceptions import HTTPException
from nomad.files import RepositoryFile
from nomad.utils import get_logger
from .app import app, base_path
@app.route('%s/raw/<string:upload_hash>/<path:upload_filepath>' % base_path, methods=['GET'])
def get_raw_file(upload_hash, upload_filepath):
"""
Get a single raw calculation file from a given upload.
.. :quickref: raw; Get single raw calculation file.
**Example request**:
.. sourcecode:: http
GET /nomad/api/raw/W36aqCzAKxOCfIiMFsBJh3nHPb4a/Si/si.out HTTP/1.1
Accept: application/gz
:param string upload_hash: the hash based identifier of the upload
:param path upload_filepath: the path to the desired file within the upload
:resheader Content-Type: application/gz
:status 200: calc raw data successfully retrieved
:status 404: upload with given hash does not exist or the given file does not exist
:returns: the gzipped raw data in the body
"""
repository_file = RepositoryFile(upload_hash)
if not repository_file.exists():
abort(404, message='The upload with hash %s does not exist.' % upload_hash)
try:
the_file = repository_file.get_file(upload_filepath)
with the_file.open() as f:
rv = send_file(
f,
mimetype='application/octet-stream',
as_attachment=True,
attachment_filename=os.path.basename(upload_filepath))
return rv
except KeyError:
abort(404, message='The file %s does not exist.' % upload_filepath)
except FileNotFoundError:
abort(404, message='The file %s does not exist.' % upload_filepath)
except HTTPException as e:
raise e
except Exception as e:
logger = get_logger(
__name__, endpoint='raw', action='get',
upload_hash=upload_hash, upload_filepath=upload_filepath)
logger.error('Exception on accessing raw data', exc_info=e)
abort(500, message='Could not accessing the raw data.')
@app.route('%s/raw/<string:upload_hash>' % base_path, methods=['GET'])
def get_raw_files(upload_hash):
"""
Get multiple raw calculation files.
.. :quickref: raw; Get multiple raw calculation files.
**Example request**:
.. sourcecode:: http
GET /nomad/api/raw/W36aqCzAKxOCfIiMFsBJh3nHPb4a?files=Si/si.out,Si/aux.txt HTTP/1.1
Accept: application/gz
:param string upload_hash: the hash based identifier of the upload
:qparam str files: a comma separated list of file path
:resheader Content-Type: application/json
:status 200: calc raw data successfully retrieved
:status 404: calc with given hash does not exist or one of the given files does not exist
:returns: a streamed .zip archive with the raw data
"""
logger = get_logger(__name__, endpoint='raw', action='get', upload_hash=upload_hash)
files_str = request.args.get('files', None)
if files_str is None:
abort(400, message="No files argument given.")
files = [file.strip() for file in files_str.split(',')]
repository_file = RepositoryFile(upload_hash)
if not repository_file.exists():
abort(404, message='The upload with hash %s does not exist.' % upload_hash)
def generator():
""" Stream a zip file with all files using zipstream. """
def iterator():
""" Replace the directory based iter of zipstream with an iter over all given files. """
try:
for filename in files:
# Write a file to the zipstream.
try:
the_file = repository_file.get_file(filename)
with the_file.open() as f:
def iter_content():
while True:
data = f.read(1024)
if not data:
break
yield data
yield dict(arcname=filename, iterable=iter_content())
except KeyError as e:
# files that are not found, will not be returned
pass
except Exception as e:
logger.error('Exception while accessing auxfiles.', exc_info=e)
zip_stream = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
zip_stream.paths_to_write = iterator()
for chunk in zip_stream:
yield chunk
response = Response(generator(), mimetype='application/zip')
response.headers['Content-Disposition'] = 'attachment; filename={}'.format('%s.zip' % upload_hash)
return response
......@@ -14,24 +14,16 @@
"""
The repository API of the nomad@FAIRDI APIs. Currently allows to resolve repository
meta-data as well as raw-calculation files.
meta-data.
"""
import os.path
from contextlib import contextmanager
from zipfile import ZIP_DEFLATED
import zipstream
from elasticsearch.exceptions import NotFoundError
from flask import Response, g, request, send_file
from flask import g, request
from flask_restful import Resource, abort
from werkzeug.exceptions import HTTPException
from nomad.files import RepositoryFile
from nomad.repo import RepoCalc
from nomad.utils import get_logger
from .app import api, app, auth, base_path
from .app import api, auth, base_path
class RepoCalcRes(Resource):
......@@ -196,115 +188,5 @@ class RepoCalcsRes(Resource):
}
@app.route('%s/raw/<string:upload_hash>/<string:calc_hash>' % base_path, methods=['GET'])
def get_raw(upload_hash, calc_hash):
"""
Get calculation mainfile raw data. Calcs are references via *upload_hash*, *calc_hash*
pairs. Returns the mainfile, unless an aux_file is specified. Aux files are stored
in repository entries. See ``/repo`` endpoint.
.. :quickref: repo; Get calculation raw data.
**Example request**:
.. sourcecode:: http
GET /nomad/api/raw/W36aqCzAKxOCfIiMFsBJh3nHPb4a/7ddvtfRfZAvc3Crr7jOJ8UH0T34I HTTP/1.1
Accept: application/gz
:param string upload_hash: the hash of the upload (from uploaded file contents)
:param string calc_hash: the hash of the calculation (from mainfile)
:qparam str auxfile: an optional aux_file to download the respective aux file, default is mainfile
:qparam all: set any value to get a .zip with main and aux files instead of an individual file
:resheader Content-Type: application/json
:status 200: calc raw data successfully retrieved
:status 404: calc with given hashes does not exist or the given aux file does not exist
:returns: the raw data in body
"""
archive_id = '%s/%s' % (upload_hash, calc_hash)
logger = get_logger(__name__, endpoint='raw', action='get', archive_id=archive_id)
try:
repo = RepoCalc.get(id=archive_id)
except NotFoundError:
abort(404, message='There is no calculation for %s/%s' % (upload_hash, calc_hash))
except Exception as e:
abort(500, message=str(e))
repository_file = RepositoryFile(upload_hash)
@contextmanager
def raw_file(filename):
try:
the_file = repository_file.get_file(filename)
with the_file.open() as f:
yield f
except KeyError:
abort(404, message='The file %s does not exist.' % filename)
except FileNotFoundError:
abort(404, message='The file %s does not exist.' % filename)
get_all = request.args.get('all', None) is not None
if get_all:
# retrieve the 'whole' calculation, meaning the mainfile and all aux files as
# a .zip archive
def generator():
""" Stream a zip file with all files using zipstream. """
def iterator():
""" Replace the directory based iter of zipstream with an iter over all raw files. """
def write(filename):
""" Write a raw file to the zipstream. """
def iter_content():
""" Iterate the raw file contents. """
with raw_file(filename) as file_object:
while True:
data = file_object.read(1024)
if not data:
break
yield data
return dict(arcname=filename, iterable=iter_content())
yield write(repo.mainfile)
try:
for auxfile in repo.aux_files:
yield write(os.path.join(os.path.dirname(repo.mainfile), auxfile))
except Exception as e:
logger.error('Exception while accessing auxfiles.', exc_info=e)
zip_stream = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
zip_stream.paths_to_write = iterator()
for chunk in zip_stream:
yield chunk
response = Response(generator(), mimetype='application/zip')
response.headers['Content-Disposition'] = 'attachment; filename={}'.format('%s.zip' % archive_id)
return response
else:
# retrieve an individual raw file
auxfile = request.args.get('auxfile', None)
if auxfile:
filename = os.path.join(os.path.dirname(repo.mainfile), auxfile)
else:
filename = repo.mainfile
try:
with raw_file(filename) as f:
rv = send_file(
f,
mimetype='application/octet-stream',
as_attachment=True,
attachment_filename=os.path.basename(filename))
return rv
except HTTPException as e:
raise e
except Exception as e:
logger = get_logger(
__name__, endpoint='archive', action='get',
upload_hash=upload_hash, calc_hash=calc_hash)
logger.error('Exception on accessing archive', exc_info=e)
abort(500, message='Could not accessing the archive.')
api.add_resource(RepoCalcsRes, '%s/repo' % base_path)
api.add_resource(RepoCalcRes, '%s/repo/<string:upload_hash>/<string:calc_hash>' % base_path)
......@@ -386,17 +386,13 @@ class UploadFile(ObjectFile):
Returns the names of all files that share the same prefix (object id),
respectively are part of the same directory (incl. files in sub directories).
In nomad terms, the aux files the this file. Returned siblings are relative
to this files directory.
to the upload root directory.
"""
dirname = os.path.dirname(filename)
dirname_len = len(dirname) + 1
for other in self.filelist:
if other.startswith(dirname) and other != filename:
yield other[dirname_len:]
def get_sibling_file(self, filename: str, sibling: str) -> File:
sibling_name = os.path.join(os.path.dirname(filename), sibling)
return self.get_file(sibling_name)
yield other
class RepositoryFile(ObjectFile):
......
DELETE http://localhost:9200/index2018-11-15 HTTP/1.1;
DELETE http://localhost:9200/repo_index HTTP/1.1;
###
DELETE http://localhost:9200/topics2018-11-15 HTTP/1.1;
DELETE http://localhost:9200/repo_topics HTTP/1.1;
###
GET http://localhost:9200/_cat/indices HTTP/1.1;
###
GET http://localhost:9200/index2018-11-15/_search HTTP/1.1;
GET http://localhost:9200/repo_index/_search HTTP/1.1;
###
......@@ -15,5 +15,15 @@ POST http://localhost:8111/repo/repo-update/index/calculation HTTP/1.1;
content-type: application/json
{
"calc_ids": [10]
"calc_ids": [13]
}
###
POST http://localhost:8000/nomad/api/uploads/-kp9ZIYTSJa0EWUAgtKU4A HTTP/1.1;
content-type: application/json
Authorization: Basic bGVvbmFyZC5ob2ZzdGFkdGVyQG5vbWFkLWZhaXJkaS50ZXN0cy5kZTpub21hZA==
{
"operation": "unstage"
}
\ No newline at end of file
......@@ -336,45 +336,65 @@ def test_get_non_existing_archive(client, no_warn):
assert rv.status_code == 404
@pytest.fixture
def example_repo_with_files(mockmongo, example_elastic_calc):
upload = Upload(id=example_elastic_calc.upload_id, local_path=os.path.abspath(example_file))
upload.create_time = datetime.now()
upload.user_id = 'does@not.exist'
upload.save()
with UploadFile(upload.upload_id, local_path=upload.local_path) as upload_file:
upload_file.persist(example_elastic_calc.upload_hash)
class TestRaw:
return example_elastic_calc
@pytest.fixture
def example_repo_with_files(self, mockmongo, example_elastic_calc, no_warn):
upload = Upload(id=example_elastic_calc.upload_id, local_path=os.path.abspath(example_file))
upload.create_time = datetime.now()
upload.user_id = 'does@not.exist'
upload.save()
with UploadFile(upload.upload_id, local_path=upload.local_path) as upload_file:
upload_file.persist(example_elastic_calc.upload_hash)
def test_raw_mainfile(client, example_repo_with_files, no_warn):
rv = client.get('/raw/%s' % example_repo_with_files.archive_id)
assert rv.status_code == 200
assert len(rv.data) > 0
def test_raw_auxfile(client, example_repo_with_files, no_warn):
rv = client.get('/raw/%s?auxfile=1.aux' % example_repo_with_files.archive_id)
assert rv.status_code == 200
assert len(rv.data) == 0
return example_elastic_calc
def test_raw_file(self, client, example_repo_with_files):
repo_entry = example_repo_with_files
url = '/raw/%s/%s' % (repo_entry.upload_hash, repo_entry.mainfile)
rv = client.get(url)
assert rv.status_code == 200
assert len(rv.data) > 0
def test_raw_file_missing_file(self, client, example_repo_with_files):
repo_entry = example_repo_with_files
url = '/raw/%s/does/not/exist' % repo_entry.upload_hash
rv = client.get(url)
assert rv.status_code == 404
def test_raw_file_missing_upload(self, client, example_repo_with_files):
repo_entry = example_repo_with_files
url = '/raw/doesnotexist/%s' % repo_entry.mainfile
rv = client.get(url)
assert rv.status_code == 404
def test_raw_files(self, client, example_repo_with_files):
repo_entry = example_repo_with_files
url = '/raw/%s?files=%s,%s' % (
repo_entry.upload_hash, repo_entry.mainfile, ','.join(repo_entry.aux_files))
rv = client.get(url)
def test_raw_missing_auxfile(client, example_repo_with_files, no_warn):
rv = client.get('/raw/%s?auxfile=doesnotexist' % example_repo_with_files.archive_id)
assert rv.status_code == 404
assert rv.status_code == 200
assert len(rv.data) > 0
with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
assert zip_file.testzip() is None
assert len(zip_file.namelist()) == 5
def test_raw_files_missing_file(self, client, example_repo_with_files):
repo_entry = example_repo_with_files
url = '/raw/%s?files=%s,missing/file.txt' % (
repo_entry.upload_hash, repo_entry.mainfile)
rv = client.get(url)
def test_raw_all_files(client, example_repo_with_files, no_warn):
rv = client.get('/raw/%s?all=1' % example_repo_with_files.archive_id)
assert rv.status_code == 200
assert len(rv.data) > 0
with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
assert zip_file.testzip() is None
assert len(zip_file.namelist()) == 5
assert rv.status_code == 200
assert len(rv.data) > 0
with zipfile.ZipFile(io.BytesIO(rv.data)) as zip_file:
assert zip_file.testzip() is None
assert len(zip_file.namelist()) == 1
def test_raw_files_missing_upload(self, client, example_repo_with_files):
url = '/raw/doesnotexist?files=shoud/not/matter.txt'
rv = client.get(url)
def test_raw_missing_mainfile(client, no_warn):
rv = client.get('/raw/doesnot/exist')
assert rv.status_code == 404
assert rv.status_code == 404
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment