Commit d028b9ed authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added all calc raw file zipball download.

parent f2885980
......@@ -12,13 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from flask import Flask, request, g, jsonify, send_file
from werkzeug.exceptions import HTTPException
from flask import Flask, request, g, jsonify, send_file, Response
from flask_restful import Resource, Api, abort
from flask_cors import CORS
from flask_httpauth import HTTPBasicAuth
from elasticsearch.exceptions import NotFoundError
from datetime import datetime
import os.path
import zipstream
from zipfile import ZIP_DEFLATED
from contextlib import contextmanager
import types
from nomad import config, infrastructure
from nomad.files import UploadFile, ArchiveFile, ArchiveLogFile
......@@ -742,7 +747,7 @@ def get_raw(upload_hash, calc_hash):
"""
Get calculation mainfile raw data. Calcs are references via *upload_hash*, *calc_hash*
pairs. Returns the mainfile, unless an aux_file is specified. Aux files are stored
in repository entries.
in repository entries. See ``/repo`` endpoint.
.. :quickref: repo; Get calculation raw data.
......@@ -756,6 +761,7 @@ def get_raw(upload_hash, calc_hash):
:param string upload_hash: the hash of the upload (from uploaded file contents)
:param string calc_hash: the hash of the calculation (from mainfile)
:qparam str auxfile: an optional aux_file to download the respective aux file, default is mainfile
:qparam all: set any value to get a .zip with main and aux files instead of an individual file
:resheader Content-Type: application/json
:status 200: calc raw data successfully retrieved
:status 404: calc with given hashes does not exist or the given aux file does not exist
......@@ -769,33 +775,76 @@ def get_raw(upload_hash, calc_hash):
except Exception as e:
abort(500, message=str(e))
auxfile = request.args.get('auxfile', None)
if auxfile:
filename = os.path.join(os.path.dirname(repo.mainfile), auxfile)
@contextmanager
def raw_file(filename):
try:
upload = Upload.get(repo.upload_id)
upload_file = UploadFile(upload.upload_id, local_path=upload.local_path)
the_file = upload_file.get_file(filename)
with the_file.open() as f:
yield f
except KeyError:
abort(404, message='The file %s does not exist.' % filename)
except FileNotFoundError:
abort(404, message='The file %s does not exist.' % filename)
get_all = request.args.get('all', None) is not None
if get_all:
# retrieve the 'whole' calculation, meaning the mainfile and all aux files as
# a .zip archive
def generator():
""" Stream a zip file with all files using zipstream. """
def iterator():
""" Replace the directory based iter of zipstream with an iter over all raw files. """
def write(filename):
""" Write a raw file to the zipstream. """
def iter_content():
""" Iterate the raw file contents. """
with raw_file(filename) as file_object:
while True:
data = file_object.read(1024)
if not data:
break
yield data
return dict(arcname=filename, iterable=iter_content())
yield write(repo.mainfile)
for auxfile in repo.aux_files:
yield write(os.path.join(os.path.dirname(repo.mainfile), auxfile))
zip_stream = zipstream.ZipFile(mode='w', compression=ZIP_DEFLATED)
zip_stream.paths_to_write = iterator()
for chunk in zip_stream:
yield chunk
response = Response(generator(), mimetype='application/zip')
response.headers['Content-Disposition'] = 'attachment; filename={}'.format('%s.zip' % archive_id)
return response
else:
filename = repo.mainfile
# retrieve an individual raw file
auxfile = request.args.get('auxfile', None)
if auxfile:
filename = os.path.join(os.path.dirname(repo.mainfile), auxfile)
else:
filename = repo.mainfile
try:
upload = Upload.get(repo.upload_id)
upload_file = UploadFile(upload.upload_id, local_path=upload.local_path)
the_file = upload_file.get_file(filename)
with the_file.open() as f:
rv = send_file(
f,
mimetype='application/octet-stream',
as_attachment=True,
attachment_filename=os.path.basename(filename))
return rv
except KeyError:
abort(404, message='The file %s does not exist.' % filename)
except FileNotFoundError:
abort(404, message='The file %s does not exist.' % filename)
except Exception as e:
logger = get_logger(
__name__, endpoint='archive', action='get',
upload_hash=upload_hash, calc_hash=calc_hash)
logger.error('Exception on accessing archive', exc_info=e)
abort(500, message='Could not accessing the archive.')
try:
with raw_file(filename) as f:
rv = send_file(
f,
mimetype='application/octet-stream',
as_attachment=True,
attachment_filename=os.path.basename(filename))
return rv
except HTTPException as e:
raise e
except Exception as e:
logger = get_logger(
__name__, endpoint='archive', action='get',
upload_hash=upload_hash, calc_hash=calc_hash)
logger.error('Exception on accessing archive', exc_info=e)
abort(500, message='Could not accessing the archive.')
@app.route('%s/admin/<string:operation>' % base_path, methods=['POST'])
......
......@@ -26,6 +26,10 @@ import requests
from requests.auth import HTTPBasicAuth
import click
from nomad import config
from nomad.files import UploadFile
api_base = 'http://localhost/nomad/api'
user = 'other@gmail.com'
pw = 'nomad'
......@@ -44,13 +48,15 @@ def handle_common_errors(func):
@handle_common_errors
def upload_file(file_path, name=None, offline=False):
def upload_file(file_path: str, name: str = None, offline: bool = False):
"""
Upload a file to nomad.
Arguments:
file_path: Path to the file, absolute or relative to call directory.
name: Optional name, default is the file_path's basename
file_path: path to the file, absolute or relative to call directory
name: optional name, default is the file_path's basename
offline: allows to process data without upload, requires client to be run on the server
"""
auth = HTTPBasicAuth(user, pw)
......@@ -115,6 +121,51 @@ def walk_through_files(path, extension='.zip'):
yield os.path.abspath(os.path.join(dirpath, filename))
class CalcProcReproduction(UploadFile):
"""
Instances represent a local reproduction of the processing for a single calculation.
It allows to download raw data from a nomad server and reproduce its processing
(parsing, normalizing) with the locally installed parsers and normalizers.
The use-case is error/warning reproduction. Use ELK to identify errors, use
the upload, archive ids/hashes to given by ELK, and reproduce and fix the error
in your development environment.
This is a class of :class:`UploadFile` the downloaded raw data will be treated as
an fake 'upload' that only contains the respective calculation data. This allows us
to locally run processing code that is very similar to the one used on the server.
"""
def __init__(self, archive_id: str) -> CalcProcReproduction:
local_path = os.path.join(config.fs.tmp, '%s.zip' % archive_id)
if not os.path.exists(local_path):
# download raw if not already downloaded
req = requests.get('%s/raw/%s?all=1' % (api_base, archive_id), stream=True)
with open(local_path, 'wb') as f:
for chunk in req.iter_content():
f.write(chunk)
super().__init__(upload_id='tmp_%s' % archive_id, local_path=local_path)
def parse(parser_name: str = None):
"""
Run the given parser on the downloaded calculation. If no parser is given,
do parser matching and use the respective parser.
"""
pass
def normalize(normalizer_name: str):
"""
Parse the downloaded calculation and run the given normalizer.
"""
pass
def normalize_all():
"""
Parse the downloaded calculation and run the whole normalizer chain.
"""
pass
@click.group()
@click.option('--host', default='localhost', help='The host nomad runs on, default is "localhost".')
@click.option('--port', default=80, help='the port nomad runs with, default is 80.')
......
......@@ -21,4 +21,5 @@ requests
click
sphinx
sphinxcontrib.httpdomain
sphinx_rtd_theme
\ No newline at end of file
sphinx_rtd_theme
zipstream
\ No newline at end of file
......@@ -352,6 +352,14 @@ def test_raw_missing_auxfile(client, example_repo_with_files, no_warn):
assert rv.status_code == 404
def test_raw_all_files(client, example_repo_with_files, no_warn):
rv = client.get('/raw/%s?all=1' % example_repo_with_files.archive_id)
assert rv.status_code == 200
assert len(rv.data) > 0
with open('test.zip', 'wb') as f:
f.write(rv.data)
def test_raw_missing_mainfile(client, no_warn):
rv = client.get('/raw/doesnot/exist')
assert rv.status_code == 404
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment