Commit 05949cb0 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merged nosql into master.

parents 58b7900e b58496cc
......@@ -5,3 +5,4 @@ __pycache__
.*~
.cache/
.idea/
*.iml
## Limitations of this version compared to prior SQL-based API
- no `calculations_list_matching_criteria` and `calculations_list_matching_criteria` in search results
## REST API Implementation
Folder structure:
~~~
.
|____app
| |______init__.py
| |____api_v1_0
| | |______init__.py
| | |____errors.py
| | |____routes.py
| |____config.py
| |____decorators
| | |______init__.py
| | |____caching.py
| | |____crossdomain.py
| | |____json.py
| | |____paginate.py
| |____mockup
| | |____mockup.py
| | |____README.md
| | |____requirements.txt
| |____models.py
| |____static
| |____templates
| |____utils.py
|____nomadapprun.py
|____README.md
|____requirements.txt
|____tests
| |______init__.py
~~~
## REST API Implementation
This folder structure has two top-level folders:
- The Flask application lives inside a package generically named app.
- Unit tests are written in a tests package.
- [app/api_v1_0/routes.py](app/api_v1_0/routes.py) contains the actual API endpoint
implementations.
- [app/dmss.py](app/dmss.py) contains a dmss abstraction to access mongodb
- Unit tests are written in the [tests](tests) package.
There are also a few files:
- `requirements.txt` lists the package dependencies so that it is easy to regenerate an identical virtual environment on a different computer/server
- `config.py` stores the configuration settings (database, ...)
- `config.py` stores the configuration settings (database, ...). **note** you can also use the environment variables defined there.
- `nomadapprun.py` launches the application (and perhaps other application tasks in future)
**Note - This directory is not used anymore since there is a functional API**: the `mockup` directory, inside `app` directory, contains a quick mockup server which returns the corresponding JSON output. Is needed for the GUI team to be able to test their requests live. The server returns _fake_ JSON output because there is still no data available for use in database, so no real queries to database.
......@@ -12,16 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from flask import Flask, g
from flask_sqlalchemy import SQLAlchemy
from flask_marshmallow import Marshmallow
import logging
from elasticsearch import Elasticsearch
from flask import Flask
from flask_httpauth import HTTPBasicAuth
db = SQLAlchemy()
ma = Marshmallow()
from app.dmss import DMSS
from app.config import dmss_config, es_config
es = Elasticsearch([es_config['host']], port=es_config['port'])
dmss = DMSS()
http_auth = HTTPBasicAuth()
def create_app():
logging.basicConfig(level=logging.INFO)
"""Create an application instance."""
app = Flask(__name__)
......@@ -29,11 +36,14 @@ def create_app():
app.config.from_object('app.config')
# initialize extensions
db.init_app(app)
ma.init_app(app)
app.logger.info("Used DMSS config: %s " % str(dmss_config))
app.logger.info("Used ES config: %s" % str(es_config))
# register blueprints
from .api_v1_0 import api as api_blueprint
app.register_blueprint(api_blueprint, url_prefix='/v1.0')
return app
app = create_app()
......@@ -12,14 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from flask import Blueprint
from flask import Blueprint, jsonify
from flask_cors import CORS
from ..decorators import etag
from ..errors import InvalidEncRequest
api = Blueprint('api', __name__)
# enable CORS for entire Blueprint with default options
CORS(api)
from . import routes
# don't forget to uncomment if caching/etag needed
@api.after_request
......@@ -30,4 +33,46 @@ def after_request(rv):
"""
return rv
from . import routes, errors
@api.errorhandler(InvalidEncRequest)
def invalid_enc_request(e):
response = jsonify(e.to_dict())
response.status_code = e.status_code
return response
# this has to be an app-wide handler
@api.app_errorhandler(404)
def not_found(e):
response = jsonify({'status': 404, 'error': 'not found',
'message': 'invalid resource URI'})
response.status_code = 404
return response
# this has to be an app-wide handler
@api.app_errorhandler(405)
def method_not_supported(e):
response = jsonify({'status': 405, 'error': 'method not supported',
'message': 'the method is not supported'})
response.status_code = 405
return response
# this has to be an app-wide handler
@api.app_errorhandler(500)
def internal_server_error(e):
response = jsonify({'status': 500, 'error': 'internal server error',
'message': e.args[0]})
response.status_code = 500
return response
# needed for example when searching by elements/formula
# but not using exclusive parameter
@api.app_errorhandler(400)
def bad_request(e):
response = jsonify({'status': 400, 'error': 'bad request',
'message': 'Bad request'})
response.status_code = 400
return response
This diff is collapsed.
......@@ -12,13 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# needed just for testing
# SQLALCHEMY_DATABASE_URI = 'postgresql://nomadapp:Nomad_Flask@localhost:8000/nomaddb'
SQLALCHEMY_DATABASE_URI = 'postgresql://nomadapp:Nomad_Flask@localhost/nomaddb'
SQLALCHEMY_TRACK_MODIFICATIONS = False
# SQLALCHEMY_ECHO = True
import os
SECRET_KEY = 'InVzZXIxQGdtlsLmNvbSI.YH6iB4wE5dDMNjpf-cX2Q9MIjyY'
# not used right now
# SAML_PATH = 'saml'
schema = 'nd_p32'
dmss_config = dict(host=os.environ.get('ENC_DMSS_HOST', 'localhost'),
port=int(os.environ.get('ENC_DMSS_PORT', '27017')),
db=os.environ.get('ENC_DMSS_DB', 'nomad_dmss'))
es_config = dict(host=os.environ.get('ENC_ES_HOST', 'localhost'),
port=int(os.environ.get('ENC_ES_PORT', '9200')),
index=os.environ.get('ENC_ES_INDEX', 'nomad'))
# Copyright 2016-2018 Ioan Vancea, Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##
# A script to create a new schema and a new table to contain
# the data history statistics for main DB. The same database is used.
# RUN: python create_stats_table.py
# - will create a new schema: data_history
# - will create a new table: stats
# - id: primary key
# - date: the date when the statistics were collected
# - summary: a json which contains the collected data
# Note: Marshmallow related data is not needed here, could be removed
##
import datetime
from flask import Flask
from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.dialects.postgresql import JSON
from flask_marshmallow import Marshmallow
tb_schema = 'data_history'
app = Flask(__name__)
# apply configuration
app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://nomadapp:Nomad_Flask@localhost/nomaddb'
# app.config.from_object('app.config')
# app.config['SQLALCHEMY_DATABASE_URI'] = 'postgresql://localhost/nomaddb'
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
app.config['SQLALCHEMY_ECHO'] = True
db = SQLAlchemy(app)
ma = Marshmallow(app)
class Stats(db.Model):
__tablename__ = 'stats'
__table_args__ = {'schema': tb_schema}
id = db.Column(db.Integer, primary_key=True)
date = db.Column(db.Date, default=datetime.date.today(),
nullable=False, index=True)
summary = db.Column(JSON, nullable=False)
class StatsSchema(ma.ModelSchema):
class Meta:
model = Stats
date = ma.DateTime(format="%d-%m-%Y")
stats_schema = StatsSchema()
m_stats_schema = StatsSchema(many=True)
if __name__ == '__main__':
from sqlalchemy.schema import CreateSchema
# create the new DB schema
db.engine.execute(CreateSchema(tb_schema))
# create the new table
db.create_all()
# app.run()
......@@ -13,5 +13,5 @@
# limitations under the License.
from .json import json
from .paginate import paginate
from .paginate import paginate, paginate_queryparams
from .caching import cache_control, no_cache, etag
......@@ -37,11 +37,15 @@ def json(f):
# if the response was a database model, then convert it to a
# dictionary
if not isinstance(rv, dict):
rv = rv.export_data()
# TODO deprecated
# if not isinstance(rv, dict):
# rv = rv.export_data()
# rv = rv.id
# generate the JSON response
if rv is None:
return jsonify({"message": "Query without result."}), 500
rv = jsonify(rv)
if status is not None:
rv.status_code = status
......
......@@ -12,88 +12,91 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import functools
from flask import url_for, request
import urllib.parse as urlparse
from flask import request
from urllib.parse import urlencode
def paginate(collection, func_name=None, max_per_page=25):
"""Generate a paginated response for a resource collection.
def paginate_queryparams(func):
def analyze_queryparams():
if request.args.get('pagination') == 'off':
return False, None, None
else:
return True, request.args.get('page'), request.args.get('per_page')
Routes that use this decorator must return a SQLAlchemy query as a
response.
return paginate(analyze_queryparams)(func)
The output of this decorator is a Python dictionary with the paginated
results. The application must ensure that this result is converted to a
response object, either by chaining another decorator or by using a
custom response object that accepts dictionaries.
"""
def decorator(f):
@functools.wraps(f)
def paginate(analyze_request):
def decorator(func):
@functools.wraps(func)
def wrapped(*args, **kwargs):
# invoke the wrapped function
if request.method == 'POST':
json_data = request.get_json()
# page = int(json_data['search_by']['page'])
# Return the value for a key if key is in the dictionary, else
# default = 1. If default is not given, it defaults to None
page = int(json_data['search_by'].get('page', "1"))
# per_page = int(json_data['search_by']['per_page'])
per_page = int(json_data['search_by'].get('per_page', max_per_page))
if json_data['search_by'].get('pagination') == 'off':
page = 1
per_page = 10000
# pagination = json_data['search_by']['pagination']
query = f(*args, **kwargs)
if request.method == 'GET':
# obtain pagination arguments from the URL's query string
page = request.args.get('page', 1, type=int)
if request.args.get('pagination') == 'off':
paginate, page, per_page = analyze_request()
if page is not None:
page = int(page)
if per_page is not None:
per_page = int(per_page)
additional_keys = {}
if paginate:
if page is None or page < 1:
page = 1
per_page = 10000
else:
per_page = min(request.args.get('per_page', max_per_page,
type=int), max_per_page)
# run the query with Flask-SQLAlchemy's pagination
p = query.paginate(page, per_page)
# build the pagination metadata to include in the response
pages = {'page': page, 'per_page': per_page,
'total': p.total, 'pages': p.pages}
if request.method == 'GET':
if p.has_prev:
pages['prev_url'] = url_for(request.endpoint, page=p.prev_num,
per_page=per_page,
_external=True, **kwargs)
else:
pages['prev_url'] = None
if p.has_next:
pages['next_url'] = url_for(request.endpoint, page=p.next_num,
per_page=per_page,
_external=True, **kwargs)
else:
pages['next_url'] = None
pages['first_url'] = url_for(request.endpoint, page=1,
per_page=per_page,
_external=True, **kwargs)
pages['last_url'] = url_for(request.endpoint, page=p.pages,
per_page=per_page,
_external=True, **kwargs)
# generate the paginated collection as a dictionary
# results = [(m.export_data(), calcs) for m, calcs in p.items]
results = func_name.dump(p.items).data
# results = m_calc_schema.dump(p.items).data
# results = [item.export_data() for item in p.items]
# results = [item.id for item in p.items]
# "pages" should be "null" if no items for one complete page
if p.total <= per_page:
pages = None
# return a dictionary as a response
return {'results': results, 'pages': pages, 'total_results': p.total}
if per_page is None or per_page < 1:
per_page = 25
kwargs["page"] = page
kwargs["per_page"] = per_page
func_results = func(*args, **kwargs)
assert len(func_results) == 2 or len(func_results) == 3
results, total_results = [], 0
if len(func_results) == 2:
results, total_results = func_results
elif len(func_results) == 3:
results, total_results, additional_keys = func_results
total_pages = math.ceil(total_results / per_page)
pages = {
"page": page,
"per_page": per_page,
"pages": total_pages,
"total": total_results
}
def url(page):
url = request.url
url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query["page"] = page
url_parts[4] = urlencode(query)
return urlparse.urlunparse(url_parts)
if request.method == 'GET':
if page > 1:
pages['prev_url'] = url(page - 1)
if page < total_pages:
pages['next_url'] = url(page + 1)
pages['first_url'] = url(1)
pages['last_url'] = url(total_pages)
result_dict = dict(results=results, pages=pages, total_results=total_results)
else:
kwargs["page"] = 1
kwargs["per_page"] = 10000
results, total_results = func(*args, **kwargs)
result_dict = dict(results=results, total_results=total_results)
for key, value in additional_keys.items():
result_dict[key] = value
return result_dict
return wrapped
return decorator
# Copyright 2016-2018 Ioan Vancea, Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from pymongo import MongoClient
from app.config import dmss_config
_default_per_page = 25
# transforms
def json_str(value):
return json.loads(value)
def int_id(value):
return int(value)
def vector(value):
return "(%s)" % ", ".join([json.dumps(item).strip('"') for item in value])
diagram_data_types = ['dos', 'band_structure', 'phonon_dos', 'phonon_dispersion', 'specific_heat_cv',
'helmholtz_free_energy', 'qha_bulk_modulus', 'qha_helmholtz_free_energy', 'qha_mass_density',
'qha_specific_heat_cv', 'qha_thermal_expansion', 'gw_spectral_function', 'elastic_toolbox_data']
model = {
value["key"]: value for value in [
{
"key": "material",
"collection": "material",
"label": "material",
"parent": None,
"parent_key": None,
"transforms": [],
"defaults": [
("periodicity", None)
]
},
{
"key": "calc",
"collection": "calc",
"label": "calculation",
"parent": "material",
"parent_key": "material_id",
"transforms": [
("brillouin_zone_json", json_str),
("lattice_parameters", vector),
("settings_basis_set_json", json_str),
("wyckoff_groups_json", json_str),
("smearing", vector),
("material_id", ("material", int_id))
],
"defaults": [
("energy", [])
],
"aggregate": [
{
"$lookup": {
"from": "energys",
"localField": "_id",
"foreignField": "calc_id",
"as": "energy"
}
}
]
},
{
"key": "element",
"collection": "elements",
"label": "element",
"parent": "material",
"parent_key": "material_id",
"transforms": [
("position", vector)
]
},
{
"key": "cell",
"collection": "cells",
"label": "cell",
"parent": "material",
"parent_key": "material_id",
"transforms": [(i, vector) for i in ["a", "b", "c"]]
},
{
"key": "contributor",
"collection": "contributors",
"label": "contributor",
"parent": "calc",
"parent_key": "calc_id",
"transforms": []
},
{
"key": "energy",
"collection": "energys",
"label": "energy",
"parent": "calc",
"parent_key": "calc_id",
"transforms": [
("calc_id", ("calc", int_id))
]
},
{
"key": "diagram_data",
"collection": "diagram_datas",
"label": "diagram data",
"parent": "calc",
"parent_key": "calc_id",
"transforms": [("data_content", json_str)]
}
]
}
class DMSS:
"""
This class provides an abstraction for accessing data in the mongo dmss without having to use the
mongo client directly. Allows to create complex aggregations that are executed right within the
mongodb server. Those aggregation allow to match, join, paginate, project (and exclude single properties).
It is basically a SQLAlchemy/SQL replacement.
"""
def __init__(self):
super().__init__()
self._mongo_client = MongoClient(dmss_config['host'], dmss_config['port'])
self._mongo_dmss = self</