Commit 843414f9 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Refactored datamodel to use metainfo2. #221

parent 29e58b1c
......@@ -216,9 +216,6 @@ There are three catergories of metadata:
Those sets of metadata along with the actual raw and archive data are often transformed,
passed, stored, etc. by the various nomad modules.
.. figure:: datamodel_metadataflow.png
:alt: nomad's metadata flow
### Implementation
The different entities have often multiple implementations for different storage systems.
For example, aspects of calculations are stored in files (raw files, calc metadata, archive data),
......
from nomad import datamodel
print(datamodel.CalcWithMetadata(domain='DFT', calc_id='test').__class__.__name__)
print(datamodel.CalcWithMetadata(calc_id='test').__class__.__name__)
print(datamodel.CalcWithMetadata(domain='EMS', calc_id='test').__class__.__name__)
print(datamodel.EntryMetadata(domain='DFT', calc_id='test').__class__.__name__)
print(datamodel.EntryMetadata(calc_id='test').__class__.__name__)
print(datamodel.EntryMetadata(domain='EMS', calc_id='test').__class__.__name__)
......@@ -89,7 +89,7 @@ class MetaInfoBrowser extends Component {
update(pkg) {
this.props.api.getInfo().then(info => {
const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
this.props.api.getMetaInfo(pkg || domain.metainfo.all_package).then(metainfos => {
const metainfoName = this.props.metainfo || domain.metainfo.root_sections[0]
const definition = metainfos.get(metainfoName)
......@@ -108,7 +108,7 @@ class MetaInfoBrowser extends Component {
init() {
this.props.api.getInfo().then(info => {
const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
const domain = info.domains.find(domain => domain.name === 'dft') // TODO deal with domains
this.props.api.getMetaInfo(domain.metainfo.all_package).then(metainfos => {
const metainfoName = this.props.metainfo || domain.metainfo.root_sections[0]
const definition = metainfos.get(metainfoName)
......
......@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
This module comprises the nomad@FAIRDI APIs. Currently there is NOMAD's official api, and
we will soon at the optimade api. The app module also servers documentation, gui, and
alive.
"""
'''
from flask import Flask, Blueprint, jsonify, url_for, abort, request
from flask_restplus import Api
from flask_cors import CORS
......@@ -36,11 +36,11 @@ from . import common
@property # type: ignore
def specs_url(self):
"""
'''
Fixes issue where swagger-ui makes a call to swagger.json over HTTP.
This can ONLY be used on servers that actually use HTTPS. On servers that use HTTP,
this code should not be used at all.
"""
'''
return url_for(self.endpoint('specs'), _external=True, _scheme='https')
......@@ -49,7 +49,7 @@ if config.services.https:
app = Flask(__name__)
""" The Flask app that serves all APIs. """
''' The Flask app that serves all APIs. '''
app.config.APPLICATION_ROOT = common.base_path # type: ignore
app.config.RESTPLUS_MASK_HEADER = False # type: ignore
......@@ -105,7 +105,7 @@ def handle(error: Exception):
@app.route('/alive')
def alive():
""" Simple endpoint to utilize kubernetes liveness/readiness probing. """
''' Simple endpoint to utilize kubernetes liveness/readiness probing. '''
return "I am, alive!"
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
The official NOMAD API.
There is a separate documentation for the API endpoints from a client perspective.
......@@ -22,7 +22,7 @@ There is a separate documentation for the API endpoints from a client perspectiv
.. automodule:: nomad.app.api.upload
.. automodule:: nomad.app.api.repo
.. automodule:: nomad.app.api.archive
"""
'''
from .api import blueprint
from . import info, auth, upload, repo, archive, raw, mirror, dataset
......@@ -23,7 +23,7 @@ api = Api(
version='1.0', title='NOMAD API',
description='Official NOMAD API',
validate=True)
""" Provides the flask restplus api instance for the regular NOMAD api"""
''' Provides the flask restplus api instance for the regular NOMAD api'''
# For some unknown reason it is necessary for each fr api to have a handler.
# Otherwise the global app error handler won't be called.
......
......@@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
The archive API of the nomad@FAIRDI APIs. This API is about serving processed
(parsed and normalized) calculation data in nomad's *meta-info* format.
"""
'''
from typing import Dict, Any
from io import BytesIO
......@@ -51,11 +51,11 @@ class ArchiveCalcLogResource(Resource):
@api.response(200, 'Archive data send', headers={'Content-Type': 'application/plain'})
@authenticate(signature_token=True)
def get(self, upload_id, calc_id):
"""
'''
Get calculation processing log.
Calcs are references via *upload_id*, *calc_id* pairs.
"""
'''
archive_id = '%s/%s' % (upload_id, calc_id)
upload_files = UploadFiles.get(
......@@ -85,11 +85,11 @@ class ArchiveCalcResource(Resource):
@api.response(200, 'Archive data send')
@authenticate(signature_token=True)
def get(self, upload_id, calc_id):
"""
'''
Get calculation data in archive form.
Calcs are references via *upload_id*, *calc_id* pairs.
"""
'''
archive_id = '%s/%s' % (upload_id, calc_id)
upload_file = UploadFiles.get(
......@@ -128,7 +128,7 @@ class ArchiveDownloadResource(Resource):
@api.response(200, 'File(s) send', headers={'Content-Type': 'application/zip'})
@authenticate(signature_token=True)
def get(self):
"""
'''
Get calculation data in archive form from all query results.
See ``/repo`` endpoint for documentation on the search
......@@ -138,7 +138,7 @@ class ArchiveDownloadResource(Resource):
any files that the user is not authorized to access.
The zip file will contain a ``manifest.json`` with the repository meta data.
"""
'''
try:
args = _archive_download_parser.parse_args()
compress = args.get('compress', False)
......@@ -229,7 +229,7 @@ class ArchiveQueryResource(Resource):
@api.marshal_with(_archive_query_model, skip_none=True, code=200, description='Search results sent')
@authenticate()
def post(self):
"""
'''
Post a query schema and return it filled with archive data.
See ``/repo`` endpoint for documentation on the search
......@@ -237,7 +237,7 @@ class ArchiveQueryResource(Resource):
The actual data are in results and a supplementary python code (curl) to
execute search is in python (curl).
"""
'''
try:
data_in = request.get_json()
scroll = data_in.get('scroll', None)
......@@ -323,9 +323,9 @@ class MetainfoResource(Resource):
@api.response(404, 'The metainfo does not exist')
@api.response(200, 'Metainfo data send')
def get(self, metainfo_package_name):
"""
'''
Get a metainfo definition file.
"""
'''
try:
return load_metainfo(metainfo_package_name), 200
except FileNotFoundError:
......@@ -345,7 +345,7 @@ metainfo_main_path = os.path.dirname(os.path.abspath(nomad_meta_info.__file__))
def load_metainfo(
package_name_or_dependency: str, dependency_source: str = None,
loaded_packages: Dict[str, Any] = None) -> Dict[str, Any]:
"""
'''
Loads the given metainfo package and all its dependencies. Returns a dict with
all loaded package_names and respective packages.
......@@ -354,7 +354,7 @@ def load_metainfo(
dependency_source: The path of the metainfo that uses this function to load a relative dependency.
loaded_packages: Give a dict and the function will added freshly loaded packages
to it and return it.
"""
'''
if loaded_packages is None:
loaded_packages = {}
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
The API is protected with *keycloak* and *OpenIDConnect*. All API endpoints that require
or support authentication accept OIDC bearer tokens via HTTP header (``Authentication``).
These token can be acquired from the NOMAD keycloak server or through the ``/auth`` endpoint
......@@ -29,7 +29,7 @@ decorator.
To allow authentification with signed urls, use this decorator:
.. autofunction:: with_signature_token
"""
'''
from flask import g, request
from flask_restplus import abort, Resource, fields
import functools
......@@ -69,11 +69,11 @@ api.authorizations = {
def _verify_upload_token(token) -> str:
"""
'''
Verifies the upload token generated with :func:`generate_upload_token`.
Returns: The user UUID or None if the toke could not be verified.
"""
'''
payload, signature = token.split('.')
payload = utils.base64_decode(payload)
signature = utils.base64_decode(signature)
......@@ -92,7 +92,7 @@ def _verify_upload_token(token) -> str:
def authenticate(
basic: bool = False, upload_token: bool = False, signature_token: bool = False,
required: bool = False, admin_only: bool = False):
"""
'''
A decorator to protect API endpoints with authentication. Uses keycloak access
token to authenticate users. Other methods might apply. Will abort with 401
if necessary.
......@@ -103,7 +103,7 @@ def authenticate(
signature_token: Also allow signed urls
required: Authentication is required
admin_only: Only the admin user is allowed to use the endpoint.
"""
'''
methods = ['OpenIDConnect Bearer Token']
if basic:
methods.append('HTTP Basic Authentication')
......@@ -192,7 +192,7 @@ class AuthResource(Resource):
@api.marshal_with(auth_model, skip_none=True, code=200, description='Auth info send')
@authenticate(required=True, basic=True)
def get(self):
"""
'''
Provides authentication information. This endpoint requires authentification.
Like all endpoints the OIDC access token based authentification. In additional,
basic HTTP authentification can be used. This allows to login and acquire an
......@@ -202,7 +202,7 @@ class AuthResource(Resource):
URLs with a ``signature_token`` query parameter, e.g. for file downloads on the
raw or archive api endpoints; a short ``upload_token`` that is used in
``curl`` command line based uploads; and the OIDC JWT access token.
"""
'''
def signature_token():
expires_at = datetime.datetime.utcnow() + datetime.timedelta(seconds=10)
......@@ -239,7 +239,7 @@ class UsersResource(Resource):
@api.marshal_with(users_model, code=200, description='User suggestions send')
@api.expect(users_parser, validate=True)
def get(self):
""" Get existing users. """
''' Get existing users. '''
args = users_parser.parse_args()
return dict(users=infrastructure.keycloak.search_user(args.get('query')))
......@@ -248,7 +248,7 @@ class UsersResource(Resource):
@api.marshal_with(user_model, code=200, skip_none=True, description='User invited')
@api.expect(user_model, validate=True)
def put(self):
""" Invite a new user. """
''' Invite a new user. '''
if config.keycloak.oasis:
abort(400, 'User invide does not work this NOMAD OASIS')
......@@ -273,10 +273,10 @@ class UsersResource(Resource):
def with_signature_token(func):
"""
'''
A decorator for API endpoint implementations that validates signed URLs. Token to
sign URLs can be retrieved via the ``/auth`` endpoint.
"""
'''
@functools.wraps(func)
@api.response(401, 'Invalid or expired signature token')
def wrapper(*args, **kwargs):
......@@ -302,10 +302,10 @@ def with_signature_token(func):
def create_authorization_predicate(upload_id, calc_id=None):
"""
'''
Returns a predicate that determines if the logged in user has the authorization
to access the given upload and calculation.
"""
'''
def func():
if g.user is None:
# guest users don't have authorized access to anything
......
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
Common data, variables, decorators, models used throughout the API.
"""
'''
from typing import Callable, IO, Set, Tuple, Iterable, Dict, Any
from flask_restplus import fields
import zipstream
......@@ -24,8 +24,7 @@ from urllib.parse import urlencode
import sys
import os.path
from nomad import search, config
from nomad.datamodel import Domain
from nomad import search, config, datamodel
from nomad.app.optimade import filterparser
from nomad.app.common import RFC3339DateTime, rfc3339DateTime
from nomad.files import Restricted
......@@ -57,7 +56,7 @@ pagination_model = api.model('Pagination', {
'order_by': fields.String(description='Sorting criterion.'),
'order': fields.Integer(description='Sorting order -1 for descending, 1 for asceding.')
})
""" Model used in responses with pagination. """
''' Model used in responses with pagination. '''
scroll_model = api.model('Scroll', {
'scroll': fields.Boolean(default=False, description='Flag if scrolling is enables.'),
......@@ -79,13 +78,13 @@ search_model_fields = {
search_model = api.model('Search', search_model_fields)
query_model_fields = {
quantity.qualified_name: fields.Raw(description=quantity.description)
for quantity in Domain.all_quantities()}
qualified_name: fields.Raw(description=quantity.description)
for qualified_name, quantity in search.search_quantities.items()}
query_model_fields.update(**{
'owner': fields.String(description='The group the calculations belong to.', allow_null=True, skip_none=True),
'domain': fields.String(description='Specify the domain to search in: %s, default is ``%s``' % (
', '.join(['``%s``' % key for key in Domain.instances.keys()]), config.default_domain)),
', '.join(['``%s``' % domain for domain in datamodel.domains]), config.default_domain)),
'from_time': fields.Raw(description='The minimum entry time.', allow_null=True, skip_none=True),
'until_time': fields.Raw(description='The maximum entry time.', allow_null=True, skip_none=True)
})
......@@ -94,7 +93,7 @@ query_model = api.model('Query', query_model_fields)
def add_pagination_parameters(request_parser):
""" Add pagination parameters to Flask querystring parser. """
''' Add pagination parameters to Flask querystring parser. '''
request_parser.add_argument(
'page', type=int, help='The page, starting with 1.', location='args')
request_parser.add_argument(
......@@ -111,7 +110,7 @@ pagination_request_parser = request_parser.copy()
def add_scroll_parameters(request_parser):
""" Add scroll parameters to Flask querystring parser. """
''' Add scroll parameters to Flask querystring parser. '''
request_parser.add_argument(
'scroll', type=bool, help='Enable scrolling')
request_parser.add_argument(
......@@ -119,12 +118,12 @@ def add_scroll_parameters(request_parser):
def add_search_parameters(request_parser):
""" Add search parameters to Flask querystring parser. """
''' Add search parameters to Flask querystring parser. '''
# more search parameters
request_parser.add_argument(
'domain', type=str,
help='Specify the domain to search in: %s, default is ``%s``' % (
', '.join(['``%s``' % key for key in Domain.instances.keys()]),
', '.join(['``%s``' % domain for domain in datamodel.domains]),
config.default_domain))
request_parser.add_argument(
'owner', type=str,
......@@ -137,20 +136,18 @@ def add_search_parameters(request_parser):
help='A yyyy-MM-ddTHH:mm:ss (RFC3339) maximum entry time (e.g. upload time)')
# main search parameters
for quantity in Domain.all_quantities():
for qualified_name, quantity in search.search_quantities.items():
request_parser.add_argument(
quantity.qualified_name, help=quantity.description,
action=quantity.argparse_action if quantity.multi else None)
qualified_name, help=quantity.description, action=quantity.argparse_action)
_search_quantities = set([
domain.qualified_name for domain in Domain.all_quantities()])
_search_quantities = set(search.search_quantities.keys())
def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str, Any]):
"""
'''
Help that adds query relevant request args to the given SearchRequest.
"""
'''
args = {key: value for key, value in args.items() if value is not None}
# domain
......@@ -196,7 +193,7 @@ def apply_search_parameters(search_request: search.SearchRequest, args: Dict[str
def calc_route(ns, prefix: str = ''):
""" A resource decorator for /<upload>/<calc> based routes. """
''' A resource decorator for /<upload>/<calc> based routes. '''
def decorator(func):
ns.route('%s/<string:upload_id>/<string:calc_id>' % prefix)(
api.doc(params={
......@@ -208,7 +205,7 @@ def calc_route(ns, prefix: str = ''):
def upload_route(ns, prefix: str = ''):
""" A resource decorator for /<upload> based routes. """
''' A resource decorator for /<upload> based routes. '''
def decorator(func):
ns.route('%s/<string:upload_id>' % prefix)(
api.doc(params={
......@@ -221,7 +218,7 @@ def upload_route(ns, prefix: str = ''):
def streamed_zipfile(
files: Iterable[Tuple[str, str, Callable[[str], IO], Callable[[str], int]]],
zipfile_name: str, compress: bool = False):
"""
'''
Creates a response that streams the given files as a streamed zip file. Ensures that
each given file is only streamed once, based on its filename in the resulting zipfile.
......@@ -232,17 +229,17 @@ def streamed_zipfile(
zipfile_name: A name that will be used in the content disposition attachment
used as an HTTP respone.
compress: Uses compression. Default is stored only.
"""
'''
streamed_files: Set[str] = set()
def generator():
""" Stream a zip file with all files using zipstream. """
''' Stream a zip file with all files using zipstream. '''
def iterator():
"""
'''
Replace the directory based iter of zipstream with an iter over all given
files.
"""
'''
# the actual contents
for zipped_filename, file_id, open_io, file_size in files:
if zipped_filename in streamed_files:
......@@ -286,12 +283,12 @@ def streamed_zipfile(
def query_api_url(*args, query_string: Dict[str, Any] = None):
"""
'''
Creates a API URL.
Arguments:
*args: URL path segments after the API base URL
query_string: A dict with query string parameters
"""
'''
url = os.path.join(config.api_url(False), *args)
if query_string is not None:
url = '%s?%s' % (url, urlencode(query_string, doseq=True))
......@@ -300,10 +297,10 @@ def query_api_url(*args, query_string: Dict[str, Any] = None):
def query_api_python(*args, **kwargs):
"""
'''
Creates a string of python code to execute a search query to the repository using
the requests library.
"""
'''
url = query_api_url(*args, **kwargs)
return '''import requests
response = requests.post("{}")
......@@ -311,8 +308,8 @@ data = response.json()'''.format(url)
def query_api_curl(*args, **kwargs):
"""
'''
Creates a string of curl command to execute a search query to the repository.
"""
'''
url = query_api_url(*args, **kwargs)
return 'curl -X POST %s -H "accept: application/json" --output "nomad.json"' % url
......@@ -49,7 +49,7 @@ class DatasetListResource(Resource):
@api.expect(list_datasets_parser)
@authenticate(required=True)
def get(self):
""" Retrieve a list of all datasets of the authenticated user. """
''' Retrieve a list of all datasets of the authenticated user. '''
args = {
key: value for key, value in list_datasets_parser.parse_args().items()
if value is not None}
......@@ -76,7 +76,7 @@ class DatasetListResource(Resource):
@api.expect(dataset_model)
@authenticate(required=True)
def put(self):
""" Creates a new dataset. """
''' Creates a new dataset. '''
data = request.get_json()
if data is None:
data = {}
......@@ -112,7 +112,7 @@ class DatasetResource(Resource):
@api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset send')
@authenticate(required=True)
def get(self, name: str):
""" Retrieve a dataset by name. """
''' Retrieve a dataset by name. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
except KeyError:
......@@ -126,7 +126,7 @@ class DatasetResource(Resource):
@api.marshal_with(dataset_model, skip_none=True, code=200, description='DOI assigned')
@authenticate(required=True)
def post(self, name: str):
""" Assign a DOI to the dataset. """
''' Assign a DOI to the dataset. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
except KeyError:
......@@ -168,7 +168,7 @@ class DatasetResource(Resource):
@api.marshal_with(dataset_model, skip_none=True, code=200, description='Dateset deleted')
@authenticate(required=True)
def delete(self, name: str):
""" Delete the dataset. """
''' Delete the dataset. '''
try:
result = Dataset.m_def.m_x('me').get(user_id=g.user.user_id, name=name)
except KeyError:
......
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
API endpoint that deliver backend configuration details.
"""
'''
from flask_restplus import Resource, fields
......@@ -69,7 +69,7 @@ class InfoResource(Resource):
@api.doc('get_info')
@api.marshal_with(info_model, skip_none=True, code=200, description='Info send')
def get(self):
""" Return information about the nomad backend and its configuration. """
''' Return information about the nomad backend and its configuration. '''
codes = [
parser.code_name
for parser in parsing.parser_dict.values()
......@@ -83,16 +83,13 @@ class InfoResource(Resource):
'normalizers': [normalizer.__name__ for normalizer in normalizing.normalizers],
'domains': [
{
'name': domain.name,
'quantities': [quantity for quantity in domain.quantities.values()],
'metrics_names': domain.metrics_names,
'aggregations_names': domain.aggregations_names,
'name': domain_name,
'metainfo': {
'all_package': domain.metainfo_all_package,
'root_sections': domain.root_sections
'all_package': domain['metainfo_all_package'],
'root_section': domain['root_section']
}
}
for domain in datamodel.Domain.instances.values()
for domain_name, domain in datamodel.domains.items()
],
'version': config.version,
'release': config.release,
......
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
'''
The mirror API of the nomad@FAIRDI APIs. Allows to export upload metadata.
"""
'''
from flask import request
from flask_restplus import Resource, abort, fields
......@@ -82,9 +82,9 @@ class MirrorUploadResource(Resource):
@api.doc('get_upload_mirror')
@authenticate(admin_only=True)
def get(self, upload_id):
"""
'''
Export upload (and all calc) metadata for mirrors.
"""
'''