Commit cc33b1d5 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'apisnippet' into v0.7.2

parents c49bd43f ba2ccbd2
......@@ -21,7 +21,7 @@ from typing import Dict, Any
from io import BytesIO
import os.path
from flask import send_file
from flask_restplus import abort, Resource
from flask_restplus import abort, Resource, fields
import json
import importlib
......@@ -32,8 +32,9 @@ from nomad import utils, search
from .auth import authenticate, create_authorization_predicate
from .api import api
from .repo import search_request_parser, add_query
from .common import calc_route, streamed_zipfile, build_snippet, to_json
from .common import calc_route, streamed_zipfile, search_model, add_pagination_parameters,\
add_scroll_parameters, add_search_parameters, apply_search_parameters,\
query_api_python, query_api_curl
ns = api.namespace(
'archive',
......@@ -108,22 +109,20 @@ class ArchiveCalcResource(Resource):
abort(404, message='Calculation %s does not exist.' % archive_id)
archives_from_query_parser = search_request_parser.copy()
archives_from_query_parser.add_argument(
_archive_download_parser = api.parser()
add_search_parameters(_archive_download_parser)
_archive_download_parser.add_argument(
name='compress', type=bool, help='Use compression on .zip files, default is not.',
location='args')
archives_from_query_parser.add_argument(
name='res_type', type=str, help='Type of return value, can be zip of json.',
location='args', default='zip')
@ns.route('/query')
class ArchiveQueryResource(Resource):
@ns.route('/download')
class ArchiveDownloadResource(Resource):
manifest_quantities = ['upload_id', 'calc_id', 'external_id', 'raw_id', 'pid', 'calc_hash']
@api.doc('archives_from_query')
@api.doc('archive_download')
@api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
@api.expect(archives_from_query_parser, validate=True)
@api.expect(_archive_download_parser, validate=True)
@api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
@authenticate(signature_token=True)
def get(self):
......@@ -139,14 +138,13 @@ class ArchiveQueryResource(Resource):
The zip file will contain a ``manifest.json`` with the repository meta data.
"""
try:
args = archives_from_query_parser.parse_args()
args = _archive_download_parser.parse_args()
compress = args.get('compress', False)
res_type = args.get('res_type')
except Exception:
abort(400, message='bad parameter types')
search_request = search.SearchRequest()
add_query(search_request, search_request_parser.parse_args())
apply_search_parameters(search_request, args)
calcs = search_request.execute_scan(order_by='upload_id')
......@@ -176,7 +174,7 @@ class ArchiveQueryResource(Resource):
manifest[calc_id] = {
key: entry[key]
for key in ArchiveQueryResource.manifest_quantities
for key in ArchiveDownloadResource.manifest_quantities
if entry.get(key) is not None
}
......@@ -196,16 +194,126 @@ class ArchiveQueryResource(Resource):
lambda *args: BytesIO(manifest_contents),
lambda *args: len(manifest_contents))
if res_type == 'zip':
return streamed_zipfile(
generator(), zipfile_name='nomad_archive.zip', compress=compress)
elif res_type == 'json':
archive_data = to_json(generator())
code_snippet = build_snippet(args, os.path.join(api.base_url, ns.name, 'query'))
data = {'archive_data': archive_data, 'code_snippet': code_snippet}
return data, 200
else:
raise Exception('Unknown res_type %s' % res_type)
return streamed_zipfile(
generator(), zipfile_name='nomad_archive.zip', compress=compress)
_archive_query_parser = api.parser()
add_pagination_parameters(_archive_query_parser)
add_scroll_parameters(_archive_query_parser)
_archive_query_model_fields = {
'results': fields.List(fields.Raw, description=(
'A list of search results. Each result is a dict with quantities names as key and '
'values as values')),
'python': fields.String(description=(
'A string of python code snippet which can be executed to reproduce the api result.')),
'curl': fields.String(description=(
'A string of curl command which can be executed to reproduce the api result.')),
}
_archive_query_model = api.inherit('ArchiveCalculations', search_model, _archive_query_model_fields)
@ns.route('/query')
class ArchiveQueryResource(Resource):
@api.doc('archive_query')
@api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
@api.response(401, 'Not authorized to access the data.')
@api.response(404, 'The upload or calculation does not exist')
@api.response(200, 'Archive data send')
@api.expect(_archive_query_parser, validate=True)
@api.marshal_with(_archive_query_model, skip_none=True, code=200, description='Search results sent')
@authenticate(signature_token=True)
def get(self):
"""
Get archive data in json format from all query results.
See ``/repo`` endpoint for documentation on the search
parameters.
The actual data are in archive_data and a supplementary python code (curl) to
execute search is in python (curl).
"""
try:
args = {
key: value for key, value in _archive_query_parser.parse_args().items()
if value is not None}
scroll = args.get('scroll', False)
scroll_id = args.get('scroll_id', None)
page = args.get('page', 1)
per_page = args.get('per_page', 10 if not scroll else 1000)
order = args.get('order', -1)
order_by = 'upload_id'
except Exception:
abort(400, message='bad parameter types')
try:
assert page >= 1
assert per_page > 0
except AssertionError:
abort(400, message='invalid pagination')
if order not in [-1, 1]:
abort(400, message='invalid pagination')
search_request = search.SearchRequest()
apply_search_parameters(search_request, _archive_query_parser.parse_args())
try:
if scroll:
results = search_request.execute_scrolled(scroll_id=scroll_id, size=per_page)
else:
results = search_request.execute_paginated(
per_page=per_page, page=page, order=order, order_by=order_by)
except search.ScrollIdNotFound:
abort(400, 'The given scroll_id does not exist.')
except KeyError as e:
import traceback
traceback.print_exc()
abort(400, str(e))
# build python code and curl snippet
uri = os.path.join(api.base_url, ns.name, 'query')
results['python'] = query_api_python(args, uri)
results['curl'] = query_api_curl(args, uri)
data = []
calcs = results['results']
try:
upload_files = None
for entry in calcs:
upload_id = entry['upload_id']
calc_id = entry['calc_id']
if upload_files is None or upload_files.upload_id != upload_id:
if upload_files is not None:
upload_files.close_zipfile_cache()
upload_files = UploadFiles.get(
upload_id, create_authorization_predicate(upload_id))
if upload_files is None:
raise KeyError
upload_files.open_zipfile_cache()
fo = upload_files.archive_file(calc_id, 'rb')
data.append(json.loads(fo.read()))
if upload_files is not None:
upload_files.close_zipfile_cache()
except Restricted:
abort(401, message='Not authorized to access %s/%s.' % (upload_id, calc_id))
except KeyError:
abort(404, message='Calculation %s/%s does not exist.' % (upload_id, calc_id))
results['results'] = data
return results, 200
@ns.route('/metainfo/<string:metainfo_package_name>')
......
......@@ -15,14 +15,17 @@
"""
Common data, variables, decorators, models used throughout the API.
"""
from typing import Callable, IO, Set, Tuple, Iterable
from flask_restplus import fields, abort
from typing import Callable, IO, Set, Tuple, Iterable, Dict, Any
from flask_restplus import fields
import zipstream
from flask import stream_with_context, Response
from flask import stream_with_context, Response, g, abort
import sys
import json
import os.path
from nomad.app.utils import RFC3339DateTime
from nomad import search
from nomad.app.optimade import filterparser
from nomad.app.utils import RFC3339DateTime, rfc3339DateTime
from nomad.files import Restricted
from .api import api
......@@ -52,18 +55,104 @@ pagination_model = api.model('Pagination', {
})
""" Model used in responses with pagination. """
search_model = api.model('Search', {
'pagination': fields.Nested(pagination_model, skip_none=True),
'scroll': fields.Nested(allow_null=True, skip_none=True, model=api.model('Scroll', {
'total': fields.Integer(description='The total amount of hits for the search.'),
'scroll_id': fields.String(allow_null=True, description='The scroll_id that can be used to retrieve the next page.'),
'size': fields.Integer(help='The size of the returned scroll page.')})),
'results': fields.List(fields.Raw, description=(
'A list of search results. Each result is a dict with quantitie names as key and '
'values as values')),
})
def add_pagination_parameters(request_parser):
""" Add pagination parameters to Flask querystring parser. """
request_parser.add_argument(
'page', type=int, help='The page, starting with 1.', location='args')
request_parser.add_argument(
'per_page', type=int, help='Desired calcs per page.', location='args')
request_parser.add_argument(
'order_by', type=str, help='The field to sort by.', location='args')
request_parser.add_argument(
'order', type=int, help='Use -1 for decending and 1 for acending order.', location='args')
request_parser = api.parser()
add_pagination_parameters(request_parser)
pagination_request_parser = request_parser.copy()
def add_scroll_parameters(request_parser):
""" Add scroll parameters to Flask querystring parser. """
request_parser.add_argument(
'scroll', type=bool, help='Enable scrolling')
request_parser.add_argument(
'scroll_id', type=str, help='The id of the current scrolling window to use.')
def add_search_parameters(request_parser):
""" Add search parameters to Flask querystring parser. """
# more search parameters
request_parser.add_argument(
'owner', type=str,
help='Specify which calcs to return: ``all``, ``public``, ``user``, ``staging``, default is ``all``')
request_parser.add_argument(
'from_time', type=lambda x: rfc3339DateTime.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) minimum entry time (e.g. upload time)')
request_parser.add_argument(
'until_time', type=lambda x: rfc3339DateTime.parse(x),
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
# main search parameters
for quantity in search.quantities.values():
request_parser.add_argument(
quantity.name, help=quantity.description,
action=quantity.argparse_action if quantity.multi else None)
def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str, Any]):
"""
Help that adds query relevant request args to the given SearchRequest.
"""
args = {key: value for key, value in args.items() if value is not None}
# owner
owner = args.get('owner', 'all')
try:
search_request.owner(
owner,
g.user.user_id if g.user is not None else None)
except ValueError as e:
abort(401, getattr(e, 'message', 'Invalid owner parameter: %s' % owner))
except Exception as e:
abort(400, getattr(e, 'message', 'Invalid owner parameter'))
# time range
from_time_str = args.get('from_time', None)
until_time_str = args.get('until_time', None)
try:
from_time = rfc3339DateTime.parse(from_time_str) if from_time_str is not None else None
until_time = rfc3339DateTime.parse(until_time_str) if until_time_str is not None else None
search_request.time_range(start=from_time, end=until_time)
except Exception:
abort(400, message='bad datetime format')
pagination_request_parser = api.parser()
""" Parser used for requests with pagination. """
# optimade
try:
optimade = args.get('optimade', None)
if optimade is not None:
q = filterparser.parse_filter(optimade)
search_request.query(q)
except filterparser.FilterException:
abort(400, message='could not parse optimade query')
pagination_request_parser.add_argument(
'page', type=int, help='The page, starting with 1.', location='args')
pagination_request_parser.add_argument(
'per_page', type=int, help='Desired calcs per page.', location='args')
pagination_request_parser.add_argument(
'order_by', type=str, help='The field to sort by.', location='args')
pagination_request_parser.add_argument(
'order', type=int, help='Use -1 for decending and 1 for acending order.', location='args')
# search parameter
search_request.search_parameters(**{
key: value for key, value in args.items()
if key not in ['optimade'] and key in search.quantities})
def calc_route(ns, prefix: str = ''):
......@@ -156,25 +245,32 @@ def streamed_zipfile(
return response
def to_json(files: Iterable[Tuple[str, str, Callable[[str], IO], Callable[[str], int]]]):
data = {}
for _, file_id, open_io, _ in files:
try:
f = open_io(file_id)
data[file_id] = json.loads(f.read())
except KeyError:
pass
except Restricted:
abort(401, message='Not authorized to access %s.' % file_id)
return data
def resolve_query_api_url(args: Dict[str, Any], base_url: str):
"""
Generates a uri from query parameters and base url.
"""
args_keys = list(args.keys())
args_keys.sort()
if args_keys == ['calc_id', 'upload_id']:
url = '"%s"' % os.path.join(base_url, args['upload_id'], args['calc_id'])
else:
url = '"%s?%s" % (base_url, urlencode(args))'
return url
def build_snippet(args, base_url):
def query_api_python(args: Dict[str, Any], base_url: str):
"""
Creates a string of python code to execute a search query to the repository using
the requests library.
Arguments:
args: A dict of search parameters that will be encoded in the uri
base_url: The resource url which is prepended to the uri
"""
str_code = 'import requests\n'
str_code += 'from urllib.parse import urlencode\n'
str_code += '\n\n'
str_code += 'def query_repository(args, base_url):\n'
str_code += ' url = "%s?%s" % (base_url, urlencode(args))\n'
str_code += ' url = %s\n' % resolve_query_api_url(args, base_url)
str_code += ' response = requests.get(url)\n'
str_code += ' if response.status_code != 200:\n'
str_code += ' raise Exception("nomad return status %d" % response.status_code)\n'
......@@ -193,3 +289,15 @@ def build_snippet(args, base_url):
str_code += 'JSON_DATA = query_repository(args, base_url)\n'
return str_code
def query_api_curl(args: Dict[str, Any], base_url: str):
"""
Creates a string of curl command to execute a search query to the repository.
Arguments:
args: A dict of search parameters that will be encoded in the uri
base_url: The resource url which is prepended to the uri
"""
args = {key: val for key, val in args.items() if val is not None}
uri = resolve_query_api_url(args, base_url)
return 'curl -X GET %s -H "accept: application/json" --output "nomad.json"' % uri
......@@ -34,8 +34,7 @@ from nomad.processing import Calc
from .api import api
from .auth import authenticate, create_authorization_predicate
from .repo import search_request_parser, add_query
from .common import streamed_zipfile
from .common import streamed_zipfile, add_search_parameters, apply_search_parameters
ns = api.namespace('raw', description='Downloading raw data files.')
......@@ -56,16 +55,16 @@ raw_file_strip_argument = dict(
name='strip', type=bool, help='Removes a potential common path prefix from all file paths.',
location='args')
raw_file_from_path_parser = api.parser()
raw_file_from_path_parser.add_argument(**raw_file_compress_argument)
raw_file_from_path_parser.add_argument(**raw_file_strip_argument)
raw_file_from_path_parser.add_argument(
_raw_file_from_path_parser = api.parser()
_raw_file_from_path_parser.add_argument(**raw_file_compress_argument)
_raw_file_from_path_parser.add_argument(**raw_file_strip_argument)
_raw_file_from_path_parser.add_argument(
name='length', type=int, help='Download only x bytes from the given file.',
location='args')
raw_file_from_path_parser.add_argument(
_raw_file_from_path_parser.add_argument(
name='offset', type=int, help='Start downloading a file\' content from the given offset.',
location='args')
raw_file_from_path_parser.add_argument(
_raw_file_from_path_parser.add_argument(
name='decompress', type=int, help='Automatically decompress the file if compressed. Only supports .gz',
location='args')
......@@ -194,7 +193,7 @@ class RawFileFromUploadPathResource(Resource):
@api.response(404, 'The upload or path does not exist')
@api.response(401, 'Not authorized to access the requested files.')
@api.response(200, 'File(s) send')
@api.expect(raw_file_from_path_parser, validate=True)
@api.expect(_raw_file_from_path_parser, validate=True)
@authenticate(signature_token=True)
def get(self, upload_id: str, path: str):
""" Get a single raw calculation file, directory contents, or whole directory sub-tree
......@@ -255,7 +254,7 @@ class RawFileFromCalcPathResource(Resource):
@api.response(404, 'The upload or path does not exist')
@api.response(401, 'Not authorized to access the requested files.')
@api.response(200, 'File(s) send')
@api.expect(raw_file_from_path_parser, validate=True)
@api.expect(_raw_file_from_path_parser, validate=True)
@authenticate(signature_token=True)
def get(self, upload_id: str, calc_id: str, path: str):
""" Get a single raw calculation file, calculation contents, or all files for a
......@@ -295,7 +294,7 @@ class RawFileFromCalcEmptyPathResource(RawFileFromCalcPathResource):
@api.response(404, 'The upload or path does not exist')
@api.response(401, 'Not authorized to access the requested files.')
@api.response(200, 'File(s) send')
@api.expect(raw_file_from_path_parser, validate=True)
@api.expect(_raw_file_from_path_parser, validate=True)
@authenticate(signature_token=True)
def get(self, upload_id: str, calc_id: str):
""" Get calculation contents.
......@@ -306,7 +305,7 @@ class RawFileFromCalcEmptyPathResource(RawFileFromCalcPathResource):
return super().get(upload_id, calc_id, None)
raw_files_request_model = api.model('RawFilesRequest', {
_raw_files_request_model = api.model('RawFilesRequest', {
'files': fields.List(
fields.String, default=[], description='List of files to download.'),
'compress': fields.Boolean(
......@@ -314,11 +313,11 @@ raw_files_request_model = api.model('RawFilesRequest', {
description='Enable compression, default is not compression.')
})
raw_files_request_parser = api.parser()
raw_files_request_parser.add_argument(
_raw_files_request_parser = api.parser()
_raw_files_request_parser.add_argument(
'files', required=True, type=str, help='Comma separated list of files to download.', location='args')
raw_files_request_parser.add_argument(**raw_file_strip_argument)
raw_files_request_parser.add_argument(**raw_file_compress_argument)
_raw_files_request_parser.add_argument(**raw_file_strip_argument)
_raw_files_request_parser.add_argument(**raw_file_compress_argument)
@ns.route('/<string:upload_id>')
......@@ -329,7 +328,7 @@ class RawFilesResource(Resource):
@api.doc('get_files')
@api.response(404, 'The upload or path does not exist')
@api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
@api.expect(raw_files_request_model, validate=True)
@api.expect(_raw_files_request_model, validate=True)
@authenticate()
def post(self, upload_id):
""" Download multiple raw calculation files in a .zip file.
......@@ -346,7 +345,7 @@ class RawFilesResource(Resource):
@api.doc('get_files_alternate')
@api.response(404, 'The upload or path does not exist')
@api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
@api.expect(raw_files_request_parser, validate=True)
@api.expect(_raw_files_request_parser, validate=True)
@authenticate(signature_token=True)
def get(self, upload_id):
"""
......@@ -355,7 +354,7 @@ class RawFilesResource(Resource):
Zip files are streamed; instead of 401 errors, the zip file will just not contain
any files that the user is not authorized to access.
"""
args = raw_files_request_parser.parse_args()
args = _raw_files_request_parser.parse_args()
files_str = args.get('files')
compress = args.get('compress', False)
strip = args.get('strip', False)
......@@ -367,12 +366,13 @@ class RawFilesResource(Resource):
return respond_to_get_raw_files(upload_id, files, compress=compress, strip=strip)
raw_file_from_query_parser = search_request_parser.copy()
raw_file_from_query_parser.add_argument(
_raw_file_from_query_parser = api.parser()
add_search_parameters(_raw_file_from_query_parser)
_raw_file_from_query_parser.add_argument(
name='compress', type=bool, help='Use compression on .zip files, default is not.',
location='args')
raw_file_from_query_parser.add_argument(**raw_file_strip_argument)
raw_file_from_query_parser.add_argument(
_raw_file_from_query_parser.add_argument(**raw_file_strip_argument)
_raw_file_from_query_parser.add_argument(
name='file_pattern', type=str,
help=(
'A wildcard pattern. Only filenames that match this pattern will be in the '
......@@ -386,7 +386,7 @@ class RawFileQueryResource(Resource):
@api.doc('raw_files_from_query')
@api.response(400, 'Invalid requests, e.g. wrong owner type or bad search parameters')
@api.expect(raw_file_from_query_parser, validate=True)
@api.expect(_raw_file_from_query_parser, validate=True)
@api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
@authenticate(signature_token=True)
def get(self):
......@@ -403,7 +403,7 @@ class RawFileQueryResource(Resource):
"""
patterns: List[str] = None
try:
args = raw_file_from_query_parser.parse_args()
args = _raw_file_from_query_parser.parse_args()
compress = args.get('compress', False)
strip = args.get('strip', False)
pattern = args.get('file_pattern', None)
......@@ -417,7 +417,7 @@ class RawFileQueryResource(Resource):
abort(400, message='bad parameter types')
search_request = search.SearchRequest()
add_query(search_request, search_request_parser.parse_args())
apply_search_parameters(search_request, _raw_file_from_query_parser.parse_args())
def path(entry):
return '%s/%s' % (entry['upload_id'], entry['mainfile'])
......
This diff is collapsed.
......@@ -637,8 +637,8 @@ class TestArchive(UploadFilesBasedTests):
assert len(metainfo) > 0
@pytest.mark.parametrize('compress', [False, True])
def test_archive_from_query_upload_id(self, api, non_empty_processed, test_user_auth, compress):
url = '/archive/query?upload_id=%s&compress=%s' % (non_empty_processed.upload_id, 'true' if compress else 'false')
def test_archive_zip_dowload_upload_id(self, api, non_empty_processed, test_user_auth, compress):
url = '/archive/download?upload_id=%s&compress=%s' % (non_empty_processed.upload_id, 'true' if compress else 'false')
rv = api.get(url, headers=test_user_auth)
assert rv.status_code == 200
......@@ -648,9 +648,9 @@ class TestArchive(UploadFilesBasedTests):
{'atoms': 'Si'},
{'authors': 'Sheldon Cooper'}
])
def test_archive_from_query(self, api, processeds, test_user_auth, query_params):
def test_archive_zip_dowload(self, api, processeds, test_user_auth, query_params):
url = '/archive/query?%s' % urlencode(query_params)
url = '/archive/download?%s' % urlencode(query_params)
rv = api.get(url, headers=test_user_auth)
assert rv.status_code == 200
......@@ -660,22 +660,22 @@ class TestArchive(UploadFilesBasedTests):
manifest = json.load(f)
assert len(manifest) == len(processeds)
def test_archive_from_empty_query(self, api, elastic):
url = '/archive/query?upload_id=doesNotExist'
def test_archive_zip_dowload_empty(self, api, elastic):
url = '/archive/download?upload_id=doesNotExist'
rv = api.get(url)
assert rv.status_code == 200
assert_zip_file(rv, files=1)
def test_code_snippet(self, api, processeds, test_user_auth):
query_params = {'atoms': 'Si', 'res_type': 'json'}
def test_get_code_from_query(self, api, processeds, test_user_auth):
query_params = {'atoms': 'Si', 'res_type': 'json', 'order': 1, 'per_page':