Commit a063d537 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'v0.7.6' into 'master'

V0.7.6

See merge request !94
parents 8444bcd6 90a4d543
Pipeline #69272 passed with stages
in 14 minutes and 12 seconds
Subproject commit 15d0110cbeda05aaea05e4d30ba3aeb0874dafef
Subproject commit 022a2af6bad45364dbdfac6b6c913f04186ac7d4
Subproject commit b39569c5fa69254c90f91ec430d28a0941efbe95
Subproject commit fe15759f080e8176d88af91447949243608b0d7e
......@@ -17,8 +17,13 @@ We do not assume many specific python packages. Only the *bravado* package (avai
via pipy) is required. It allows us to use the nomad ReST API in a more friendly and
pythonic way. You can simply install it the usual way
Optionally, if you need to access your private data, the package *python-keycloak* is
required to conveniently acquire the necessary tokens to authenticate your self towards
NOMAD.
```
pip install bravado
pip install python-keycloak
```
For the following code snippets, we need the following imports:
......@@ -33,6 +38,12 @@ import os.path
import sys
```
And optionally:
```python
from bravado.requests_client import RequestsClient, Authenticator
from keycloak import KeycloakOpenID
```
### An example file
Lets assume you have an example upload file ready. Its a `.zip` (`.tgz` would also work)
with some *VASP* data from a single run at `/example/AcAg/vasprun.xml`, `/example/AcAg/OUTCAR`, ...
......@@ -55,13 +66,47 @@ password = 'password'
### Using bravado
Bravado reads a ReST API's definition from a `swagger.json` as it is provided by
many APIs, including nomad's of course. Bravado also allows to use authentication,
which makes it even easier. The following would be a typical setup:
many APIs, including nomad's of course.
```python
host = urlparse(nomad_url).netloc.split(':')[0]
http_client = RequestsClient()
http_client.set_basic_auth(host, user, password)
client = SwaggerClient.from_url('%s/swagger.json' % nomad_url, http_client=http_client)
```
Bravado also allows to use authentication, if required. The following would be a typical setup:
```python
class KeycloakAuthenticator(Authenticator):
""" A bravado authenticator for NOMAD's keycloak-based user management. """
def __init__(self, user, password):
super().__init__(host=urlparse(nomad_url).netloc.split(':')[0])
self.user = user
self.password = password
self.token = None
self.__oidc = KeycloakOpenID(
server_url='https://repository.nomad-coe.eu/fairdi/keycloak/auth/',
realm_name='fairdi_nomad_prod',
client_id='nomad_public')
def apply(self, request):
if self.token is None:
self.token = self.__oidc.token(username=self.user, password=self.password)
self.token['time'] = time()
elif self.token['expires_in'] < int(time()) - self.token['time'] + 10:
try:
self.token = self.__oidc.refresh_token(self.token['refresh_token'])
self.token['time'] = time()
except Exception:
self.token = self.__oidc.token(username=self.user, password=self.password)
self.token['time'] = time()
request.headers.setdefault('Authorization', 'Bearer %s' % self.token['access_token'])
return request
http_client = RequestsClient()
http_client.authenticator = KeycloakAuthenticator(user=user, password=password)
client = SwaggerClient.from_url('%s/swagger.json' % nomad_url, http_client=http_client)
```
......@@ -192,7 +237,30 @@ print('%s/raw/%s/%s/*' % (nomad_url, calc['upload_id'], os.path.dirname(calc['ma
There are different options to download individual files, or zips with multiple files.
## Using *curl* to access the API
The shell tool *curl* can be used to call most API endpoints. Most endpoints for searching
or downloading data are only **GET** operations controlled by URL parameters. For example:
Downloading data:
```
curl http://repository.nomad-coe.eu/app/api/raw/query?upload_id=<your_upload_id> -o download.zip
```
It is a litle bit trickier, if you need to authenticate yourself, e.g. to download
not yet published or embargoed data. All endpoints support and most require the use of
an access token. To acquire an access token from our usermanagement system with curl:
```
curl --data 'grant_type=password&client_id=nomad_public&username=<your_username>&password=<your password>' \
https://repository.nomad-coe.eu/fairdi/keycloak/auth/realms/fairdi_nomad_prod/protocol/openid-connect/token
```
You can use the access-token with:
```
curl -H 'Authorization: Bearer <you_access_token>' \
http://repository.nomad-coe.eu/app/api/raw/query?upload_id=<your_upload_id> -o download.zip
```
## Conclusions
This was just a small glimpse into the nomad API. You should checkout our swagger documentation
for more details on all the API endpoints and their parameters. You can explore the
API via swagger-ui and even try it in your browser. Just visit the API url.
This was just a small glimpse into the nomad API. You should checkout our [swagger-ui](https://repository.nomad-coe.eu/app/api/) for more details on all the API endpoints and their parameters. You can explore the
API via the swagger-ui and even try it in your browser.
{
"name": "nomad-fair-gui",
"version": "0.7.5",
"version": "0.7.6",
"commit": "nomad-gui-commit-placeholder",
"private": true,
"dependencies": {
......
......@@ -20,6 +20,8 @@ from flask import request
from flask_restplus import Resource, abort, fields
from nomad import processing as proc
from nomad.datamodel import Dataset
from nomad.doi import DOI
from .api import api
from .auth import authenticate
......@@ -32,6 +34,8 @@ mirror_upload_model = api.model('MirrorUpload', {
'upload_id': fields.String(description='The id of the exported upload'),
'upload': fields.String(description='The upload metadata as mongoengine json string'),
'calcs': fields.List(fields.Raw, description='All upload calculation metadata as mongo SON'),
'datasets': fields.Raw(description='All upload datasets as dict id->mongo SON'),
'dois': fields.Raw(description='All upload dois as dict id->mongo SON'),
'upload_files_path': fields.String(description='The path to the local uploads file folder')
})
......@@ -40,6 +44,8 @@ mirror_query_model = api.model('MirrorQuery', {
description='Mongoengine query that is used to search for uploads to mirror.')
})
_Dataset = Dataset.m_def.m_x('me').me_cls
@ns.route('/')
class MirrorUploadsResource(Resource):
......@@ -87,9 +93,23 @@ class MirrorUploadResource(Resource):
if upload.tasks_running or upload.process_running:
abort(400, message='Only non processing uploads can be exported')
calcs = []
datasets = {}
dois = {}
for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id)):
calcs.append(calc)
for dataset in calc['metadata'].get('datasets', []):
if dataset not in datasets:
datasets[dataset] = _Dataset._get_collection().find_one(dict(_id=dataset))
doi = datasets[dataset].get('doi', None)
if doi is not None:
dois[doi] = DOI._get_collection().find_one(dict(_id=doi))
return {
'upload_id': upload_id,
'upload': upload.to_json(),
'calcs': [calc for calc in proc.Calc._get_collection().find(dict(upload_id=upload_id))],
'calcs': calcs,
'datasets': datasets,
'dois': dois,
'upload_files_path': upload.upload_files.os_path
}, 200
......@@ -23,6 +23,7 @@ import datetime
from nomad import utils, processing as proc, search, config, files, infrastructure
from nomad.datamodel import Dataset, User
from nomad.doi import DOI
from nomad.cli.admin.uploads import delete_upload
from .client import client
......@@ -31,12 +32,12 @@ from .client import client
__in_test = False
""" Will be monkeypatched by tests to alter behavior for testing. """
__Dataset = Dataset.m_def.m_x('me').me_cls
_Dataset = Dataset.m_def.m_x('me').me_cls
__logger = utils.get_logger(__name__)
def fix_time(data):
for key in ['upload_time', 'last_processing']:
def fix_time(data, keys):
for key in keys:
time = data.get(key)
if isinstance(time, int):
data[key] = datetime.datetime.utcfromtimestamp(time)
......@@ -53,11 +54,11 @@ def tarnsform_user_id(source_user_id):
def transform_dataset(source_dataset):
pid = str(source_dataset['id'])
target_dataset = __Dataset.objects(pid=pid).first()
target_dataset = _Dataset.objects(pid=pid).first()
if target_dataset is not None:
return target_dataset.dataset_id
target_dataset = __Dataset(
target_dataset = _Dataset(
dataset_id=utils.create_uuid(),
pid=pid,
name=source_dataset['name'])
......@@ -248,6 +249,8 @@ def mirror(
# In tests, we mirror from our selves, remove it so it is not there for import
proc.Calc.objects(upload_id=upload_id).delete()
proc.Upload.objects(upload_id=upload_id).delete()
_Dataset.objects().delete()
DOI.objects().delete()
search.delete_upload(upload_id)
else:
n_calcs = 0
......@@ -300,8 +303,17 @@ def mirror(
if not files_only:
# create mongo
upload = proc.Upload.from_json(upload_data.upload, created=True).save()
if upload_data.datasets is not None:
for dataset in upload_data.datasets.values():
fix_time(dataset, ['created'])
_Dataset._get_collection().insert(dataset)
if upload_data.dois is not None:
for doi in upload_data.dois.values():
fix_time(doi, ['create_time'])
DOI._get_collection().insert(doi)
for calc in upload_data.calcs:
fix_time(calc['metadata'])
fix_time(calc, ['create_time', 'complete_time'])
fix_time(calc['metadata'], ['upload_time', 'last_processing'])
proc.Calc._get_collection().insert(upload_data.calcs)
# index es
......
......@@ -24,7 +24,9 @@ import numpy as np
import click
import json
from datetime import datetime
import subprocess
from nomad import config
from .client import client
......@@ -359,7 +361,8 @@ def statistics_plot(errors, title, x_axis, y_axis, cumulate, total, save, power,
@client.command(help='Generate table with basic statistics summary.')
@click.option('--html', is_flag=True, help='Output HTML instead of plain text table.')
@click.option('--geometries', is_flag=True, help='Use geometries not unique geometries.')
def statistics_table(html, geometries):
@click.option('--public-path', type=str, default=config.fs.public, help='The path to the public data. Default is %s.' % config.fs.public)
def statistics_table(html, geometries, public_path):
# get more stats for files
# uploads: find . -maxdepth 2 | wc -l
# public archive: find . -regex '.*archive.*public.*zip' -type f -print0 | du --files0-from=- -ch | grep total$
......@@ -392,6 +395,8 @@ def statistics_table(html, geometries):
+ get_statistic(data_all, 'system', 'molecule / cluster', 'calculations')
calculations_2d = get_statistic(data_all, 'system', '2D / surface', 'calculations')
calculations_2d += get_statistic(data_all, 'system', '2D', 'calculations')
calculations_2d += get_statistic(data_all, 'system', 'surface', 'calculations')
calculations_3d = get_statistic(data_all, 'system', 'bulk', 'calculations')
metrics_all = client.repo.search(per_page=1, metrics=[geometry_metric, 'quantities']).response().result
......@@ -412,17 +417,43 @@ def statistics_table(html, geometries):
client.repo.search(per_page=1, code_name='Phonopy').response().result,
'total', 'all', 'code_runs')
# files and sized
def run_shell_command(command):
process = subprocess.run(['bash', '-c', command], stdout=subprocess.PIPE)
out = process.stdout.decode('utf-8')
if process.stderr is not None:
err = process.stderr.decode('utf-8')
print('There is an error: %s' % str(err.strip()))
return out.split('\t')[0].strip()
archive_data = run_shell_command((
'find %s -regex \'.*archive.*public.*zip\' '
'-type f -print0 | du --files0-from=- -ch | grep total$') % public_path)
raw_data = run_shell_command((
'find %s -regex \'.*raw.*public.*zip\' '
'-type f -print0 | du --files0-from=- -ch | grep total$') % public_path)
n_uploads = run_shell_command(
'find %s -regex \'.*raw.*public.*zip\' -type f | wc -l' % public_path)
try:
n_uploads = '{:,}'.format(int(n_uploads))
except Exception:
pass
if not html:
print('''
Entries: {:,.0f},
Calculations, e.g. total energies: {:,.0f},
Geometries: {:,.0f},
Bulk crystals: {:,.0f},
2D / Surfaces: {:,.0f},
Atoms / Molecules: {:,.0f},
DOS: {:,.0f},
Entries: {:,.0f}
Calculations, e.g. total energies: {:,.0f}
Geometries: {:,.0f}
Bulk crystals: {:,.0f}
2D / Surfaces: {:,.0f}
Atoms / Molecules: {:,.0f}
DOS: {:,.0f}
Band structures: {:,.0f}
Total parsed quantities: {:,.0f}
Public raw data: {}B
Public archive data: {}B
Number of uploads: {}
'''.format(
entries,
calculations,
......@@ -432,7 +463,10 @@ def statistics_table(html, geometries):
calculations_1d,
dos,
band_structures,
quantities
quantities,
raw_data,
archive_data,
n_uploads
))
else:
......@@ -497,8 +531,8 @@ def statistics_table(html, geometries):
Furthermore:
</p>
<ul>
<li><b>5,053</b> Uploads with <b>41 TB</b> of raw data</li>
<li><b>15 TB</b> of archive data</li>
<li><b>{}</b> Uploads with <b>{}B</b> of raw data</li>
<li><b>{}B</b> of archive data</li>
<li>Data classified using <b>168</b> public metadata of the NOMAD Meta Info and <b>2,360</b> code-specific metadata</li>
</ul>
<p>
......@@ -551,5 +585,8 @@ def statistics_table(html, geometries):
dos,
band_structures,
phonons,
quantities
quantities,
n_uploads,
raw_data,
archive_data
))
......@@ -208,7 +208,7 @@ datacite = NomadConfig(
password='*'
)
version = '0.7.5'
version = '0.7.6'
commit = gitinfo.commit
release = 'devel'
domain = 'DFT'
......
apiVersion: v1
appVersion: "0.7.5"
appVersion: "0.7.6"
description: A Helm chart for Kubernetes that only runs nomad services and uses externally hosted databases.
name: nomad
version: 0.7.5
version: 0.7.6
......@@ -16,7 +16,7 @@ data:
keys=console, access, error
[formatters]
keys=generic
keys=generic, json
[logger_root]
level=INFO
......@@ -38,14 +38,14 @@ data:
args=(sys.stdout, )
[handler_access]
class=logstash.TCPLogstashHandler
class=StreamHandler
formatter=json
args=('{{ .Values.logstash.host }}',{{ .Values.logstash.port }})
args=(sys.stdout, )
[handler_error]
class=logstash.TCPLogstashHandler
class=StreamHandler
formatter=json
args=('{{ .Values.logstash.host }}',{{ .Values.logstash.port }})
args=(sys.stdout, )
[formatter_generic]
format=%(asctime)s [%(process)d] [%(levelname)s] %(message)s
......
......@@ -12,7 +12,7 @@ reqs = [str(ir.req) for ir in install_reqs if 'sphinxcontrib.httpdomain' not in
setup(
name='nomad',
version='0.7.5',
version='0.7.6',
description='The nomad@FAIRDI infrastructure python package',
py_modules=['nomad'],
install_requires=reqs,
......
......@@ -1624,6 +1624,36 @@ class TestMirror:
data = json.loads(rv.data)
assert data[0]['upload_id'] == published.upload_id
def test_dataset(self, api, published_wo_user_metadata, admin_user_auth, test_user_auth):
rv = api.post(
'/repo/edit', headers=test_user_auth, content_type='application/json',
data=json.dumps({
'actions': {
'datasets': [{
'value': 'test_dataset'
}]
}
}))
assert rv.status_code == 200
rv = api.post('/datasets/test_dataset', headers=test_user_auth)
assert rv.status_code == 200
rv = api.post(
'/mirror/',
content_type='application/json', data='{"query":{}}', headers=admin_user_auth)
assert rv.status_code == 200, rv.data
url = '/mirror/%s' % published_wo_user_metadata.upload_id
rv = api.get(url, headers=admin_user_auth)
assert rv.status_code == 200
data = json.loads(rv.data)
assert len(data['datasets']) == 1
dataset = data['calcs'][0]['metadata']['datasets'][0]
assert dataset in data['datasets']
assert data['datasets'][dataset]['doi'] is not None
assert data['datasets'][dataset]['doi'] in data['dois']
class TestDataset:
......
......@@ -22,6 +22,8 @@ from nomad import utils, search, processing as proc, files
from nomad.cli import cli
from nomad.processing import Upload, Calc
from tests.app.test_app import BlueprintClient
# TODO there is much more to test
......@@ -322,6 +324,35 @@ class TestClient:
published.upload_files.exists
def test_mirror_datasets(self, client, published_wo_user_metadata, test_user_auth, admin_user_bravado_client, monkeypatch):
# use the API to create dataset and DOI
api = BlueprintClient(client, '/api')
rv = api.post(
'/repo/edit', headers=test_user_auth, content_type='application/json',
data=json.dumps({
'actions': {
'datasets': [{
'value': 'test_dataset'
}]
}
}))
assert rv.status_code == 200
rv = api.post('/datasets/test_dataset', headers=test_user_auth)
assert rv.status_code == 200
# perform the mirror
monkeypatch.setattr('nomad.cli.client.mirror.__in_test', True)
result = click.testing.CliRunner().invoke(
cli, ['client', 'mirror'], catch_exceptions=False, obj=utils.POPO())
assert result.exit_code == 0, result.output
assert published_wo_user_metadata.upload_id in result.output
assert published_wo_user_metadata.upload_files.os_path in result.output
published_wo_user_metadata.upload_files.exists
def test_statistics(self, client, proc_infra, admin_user_bravado_client):
result = click.testing.CliRunner().invoke(
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment