From e2d16c2e67d0467f4ea1c9e623c6851a3f67c717 Mon Sep 17 00:00:00 2001
From: Alvin Noe Ladines <ladinesa@physik.hu-berlin.de>
Date: Wed, 15 Jan 2020 21:37:49 +0100
Subject: [PATCH] Implemented building of python code snippets in archive and
 repo

---
 nomad/app/api/archive.py | 19 +++++++++++++++---
 nomad/app/api/common.py  | 42 +++++++++++++++++++++++++++++++++++++++-
 nomad/app/api/repo.py    |  9 ++++++++-
 tests/app/test_api.py    | 18 +++++++++++++++++
 4 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/nomad/app/api/archive.py b/nomad/app/api/archive.py
index f620a35cc9..dafee37069 100644
--- a/nomad/app/api/archive.py
+++ b/nomad/app/api/archive.py
@@ -33,7 +33,7 @@ from nomad import utils, search
 from .auth import authenticate, create_authorization_predicate
 from .api import api
 from .repo import search_request_parser, add_query
-from .common import calc_route, streamed_zipfile
+from .common import calc_route, streamed_zipfile, build_snippet, to_json
 
 ns = api.namespace(
     'archive',
@@ -112,6 +112,10 @@ archives_from_query_parser = search_request_parser.copy()
 archives_from_query_parser.add_argument(
     name='compress', type=bool, help='Use compression on .zip files, default is not.',
     location='args')
+archives_from_query_parser.add_argument(
+    name='res_type', type=str, help='Type of return value, can be zip of json.',
+    location='args'
+)
 
 
 @ns.route('/query')
@@ -138,6 +142,7 @@ class ArchiveQueryResource(Resource):
         try:
             args = archives_from_query_parser.parse_args()
             compress = args.get('compress', False)
+            res_type = args.get('res_type', 'zip')
         except Exception:
             abort(400, message='bad parameter types')
 
@@ -192,8 +197,16 @@ class ArchiveQueryResource(Resource):
                 lambda *args: BytesIO(manifest_contents),
                 lambda *args: len(manifest_contents))
 
-        return streamed_zipfile(
-            generator(), zipfile_name='nomad_archive.zip', compress=compress)
+        if res_type == 'zip':
+            return streamed_zipfile(
+                generator(), zipfile_name='nomad_archive.zip', compress=compress)
+        elif res_type == 'json':
+            archive_data = to_json(generator())
+            code_snippet = build_snippet(args, os.path.join(api.base_url, ns.name, 'query'))
+            data = {'archive_data': archive_data, 'code_snippet': code_snippet}
+            return data, 200
+        else:
+            raise Exception('Unknown res_type %s' % res_type)
 
 
 @ns.route('/metainfo/<string:metainfo_package_name>')
diff --git a/nomad/app/api/common.py b/nomad/app/api/common.py
index f4cf761dcb..9eff6bf433 100644
--- a/nomad/app/api/common.py
+++ b/nomad/app/api/common.py
@@ -16,10 +16,11 @@
 Common data, variables, decorators, models used throughout the API.
 """
 from typing import Callable, IO, Set, Tuple, Iterable
-from flask_restplus import fields
+from flask_restplus import fields, abort
 import zipstream
 from flask import stream_with_context, Response
 import sys
+import json
 
 from nomad.app.utils import RFC3339DateTime
 from nomad.files import Restricted
@@ -153,3 +154,42 @@ def streamed_zipfile(
     response = Response(stream_with_context(generator()), mimetype='application/zip')
     response.headers['Content-Disposition'] = 'attachment; filename={}'.format(zipfile_name)
     return response
+
+
+def to_json(files: Iterable[Tuple[str, str, Callable[[str], IO], Callable[[str], int]]]):
+    data = {}
+    for _, file_id, open_io, _ in files:
+        try:
+            f = open_io(file_id)
+            data[file_id] = json.loads(f.read())
+        except KeyError:
+            pass
+        except Restricted:
+            abort(401, message='Not authorized to access %s.' % file_id)
+    return data
+
+
+def build_snippet(args, base_url):
+    str_code = 'import requests\n'
+    str_code += 'from urllib.parse import urlencode\n'
+    str_code += '\n\n'
+    str_code += 'def query_repository(args, base_url):\n'
+    str_code += '    url = "%s?%s" % (base_url, urlencode(args))\n'
+    str_code += '    response = requests.get(url)\n'
+    str_code += '    if response.status_code != 200:\n'
+    str_code += '        raise Exception("nomad return status %d" % response.status_code)\n'
+    str_code += '    return response.json()\n'
+    str_code += '\n\n'
+    str_code += 'args = {'
+    for key, val in args.items():
+        if val is None:
+            continue
+        if isinstance(val, str):
+            str_code += '"%s": "%s", ' % (key, val)
+        else:
+            str_code += '"%s": %s, ' % (key, val)
+    str_code += '}\n'
+    str_code += 'base_url = "%s"\n' % base_url
+    str_code += 'JSON_DATA = query_repository(args, base_url)\n'
+
+    return str_code
diff --git a/nomad/app/api/repo.py b/nomad/app/api/repo.py
index c98cdfacd5..d1ee22a524 100644
--- a/nomad/app/api/repo.py
+++ b/nomad/app/api/repo.py
@@ -24,6 +24,7 @@ from elasticsearch_dsl import Q
 from elasticsearch.exceptions import NotFoundError
 import elasticsearch.helpers
 from datetime import datetime
+import os.path
 
 from nomad import search, utils, datamodel, processing as proc, infrastructure
 from nomad.app.utils import rfc3339DateTime, RFC3339DateTime, with_logger
@@ -32,7 +33,7 @@ from nomad.datamodel import UserMetadata, Dataset, User
 
 from .api import api
 from .auth import authenticate
-from .common import pagination_model, pagination_request_parser, calc_route
+from .common import pagination_model, pagination_request_parser, calc_route, build_snippet
 
 ns = api.namespace('repo', description='Access repository metadata.')
 
@@ -80,6 +81,8 @@ repo_calcs_model_fields = {
         'value and quantity value as key. The possible metrics are code runs(calcs), %s. '
         'There is a pseudo quantity "total" with a single value "all" that contains the '
         ' metrics over all results. ' % ', '.join(datamodel.Domain.instance.metrics_names))),
+    'code_snippet': fields.String(description=(
+        'A string of python code snippet which can be executed to reproduce the api result.')),
 }
 for group_name, (group_quantity, _) in search.groups.items():
     repo_calcs_model_fields[group_name] = fields.Nested(api.model('RepoDatasets', {
@@ -300,6 +303,10 @@ class RepoCalcsResource(Resource):
                     if args.get(group_name, False):
                         results[group_name] = quantities[group_quantity]
 
+            # build python code snippet
+            snippet = build_snippet(args, os.path.join(api.base_url, ns.name, ''))
+            results['code_snippet'] = snippet
+
             return results, 200
         except search.ScrollIdNotFound:
             abort(400, 'The given scroll_id does not exist.')
diff --git a/tests/app/test_api.py b/tests/app/test_api.py
index 5e69ff1bf8..c962235af3 100644
--- a/tests/app/test_api.py
+++ b/tests/app/test_api.py
@@ -667,6 +667,16 @@ class TestArchive(UploadFilesBasedTests):
         assert rv.status_code == 200
         assert_zip_file(rv, files=1)
 
+    def test_code_snippet(self, api, processeds, test_user_auth):
+        query_params = {'atoms': 'Si', 'res_type': 'json'}
+        url = '/archive/query?%s' % urlencode(query_params)
+        rv = api.get(url, headers=test_user_auth)
+
+        assert rv.status_code == 200
+        data = json.loads(rv.data)
+        assert isinstance(data, dict)
+        assert data['code_snippet'] is not None
+
 
 class TestRepo():
     @pytest.fixture(scope='class')
@@ -1058,6 +1068,14 @@ class TestRepo():
         data = json.loads(rv.data)
         assert data['pagination']['total'] > 0
 
+    def test_code_snippet(self, api, example_elastic_calcs, test_user_auth):
+        rv = api.get('/repo/?per_page=10', headers=test_user_auth)
+        assert rv.status_code == 200
+        data = json.loads(rv.data)
+        assert data['code_snippet'] is not None
+        # exec does not seem to work
+        # exec(data['code_snippet'])
+
 
 class TestEditRepo():
 
-- 
GitLab