Commit c458aad3 authored by Cuauhtemoc Salazar's avatar Cuauhtemoc Salazar
Browse files

Merge branch 'v0.8.0' into parser_fixes (after checkout v0.8.0; git pull)

parents 46cc63ae 76488bd1
......@@ -10,11 +10,12 @@
.volumes/
.git/
.coverage
**/.ipynb_checkpoints
**/.volumes
data/
local/
target/
examples/
ops/
build/
dist/
......
......@@ -25,4 +25,4 @@ nomad.yaml
./gunicorn.conf
build/
dist/
setup.json
\ No newline at end of file
setup.json
......@@ -94,6 +94,24 @@ tests:
- $CI_COMMIT_REF_NAME =~ /^dev-.*$/
- $CI_COMMIT_MESSAGE =~ /\[skip[ _-]tests?\]/i
install_tests:
stage: test
image: python:3.6
before_script:
- git submodule sync
- git submodule update --init --jobs=4
script:
- pip install --upgrade pip
- pip install fastentrypoints
- pip install pyyaml
- pip install numpy
- python setup.py compile
- python setup.py sdist
- pip install dist/nomad-0.8.0.tar.gz
- python -c "import nomad.datamodel, nomad.datamodel.metainfo, nomad.client"
- pip install dist/nomad-0.8.0.tar.gz[parsing]
- python -m nomad.cli parse tests/data/parsers/vasp/vasp.xml
deploy:
stage: deploy
before_script:
......
......@@ -22,7 +22,19 @@
# We use slim for the final image
FROM python:3.6-slim as final
# First, build all python stuff in a python build image
# First built the GUI in a gui build image
FROM node:latest as gui_build
RUN mkdir -p /app
WORKDIR /app
ENV PATH /app/node_modules/.bin:$PATH
COPY gui/package.json /app/package.json
COPY gui/yarn.lock /app/yarn.lock
RUN yarn
COPY gui /app
RUN yarn run build
RUN yarn run --silent react-docgen src/components --pretty > react-docgen.out
# Second, build all python stuff in a python build image
FROM python:3.6-stretch as build
RUN mkdir /install
......@@ -56,23 +68,13 @@ RUN python setup.py compile
RUN pip install .[all]
RUN python setup.py sdist
WORKDIR /install/docs
COPY --from=gui_build /app/react-docgen.out /install/docs
RUN make html
RUN \
find /usr/local/lib/python3.6/ -name 'tests' ! -path '*/networkx/*' -exec rm -r '{}' + && \
find /usr/local/lib/python3.6/ -name 'test' -exec rm -r '{}' + && \
find /usr/local/lib/python3.6/site-packages/ -name '*.so' -print -exec sh -c 'file "{}" | grep -q "not stripped" && strip -s "{}"' \;
# Second built the GUI in a gui build image
FROM node:latest as gui_build
RUN mkdir -p /app
WORKDIR /app
ENV PATH /app/node_modules/.bin:$PATH
COPY gui/package.json /app/package.json
COPY gui/yarn.lock /app/yarn.lock
RUN yarn
COPY gui /app
RUN yarn run build
# Third, create a slim final image
FROM final
......
Subproject commit 7a7c46ddeac23591b8d10aa6bbb7a7fa987df51c
Subproject commit c424e75671e8c09c2f29c90ec63feafd0a2a706e
Subproject commit c0e34029160eb179dbb80d7d66b81a657602a11a
Subproject commit 4674d1ad944d181aab12695503154d839f40b5b0
Subproject commit 2e2a4cc93fe2f2f91b8ce44f1da983f315cf1453
Subproject commit 3d51c6f61e2c8b8eaa026dd613c23e9524c9e9ea
Subproject commit 09bc61058470381a9d590e70cf9ee0c76fb120f2
Subproject commit d054d15c08b6da05d8afded4410474bdea999717
Subproject commit cc782ec0acec9cbbc38c0c717e2439ed2aaae92d
Subproject commit ba060d7c8cb902d16e47ca382ef986662a5f3fa4
.build/
*.graffle/
\ No newline at end of file
*.graffle/
react-docgen.out
\ No newline at end of file
API(s) Documentation
====================
This is just a brief summary of all API endpoints of the NOMAD API. For a more compelling documention
consult our *swagger* dashboards:
- (NOMAD API)[swagger dashboard](https://repository.nomad-coe.eu/app/api/)
- (NOMAD's Optimade API)[swagger dashboard](https://repository.nomad-coe.eu/app/optimade/)
Summary
-------
......
# API Tutorial
# API Tutorials
The NOMAD Repository and Archive offers all its functionality through an application
programming interface (API). More specifically a [RESTful HTTP API](https://en.wikipedia.org/wiki/Representational_state_transfer) that allows you
to use NOMAD as a set of resources (think data) that can be uploaded, accessed, downloaded,
searched for, etc. via [HTTP requests](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol).
There are different tools and libraries to use the NOMAD API that come with different
trade-offs between expressiveness, learning curve, and convinience:
- use an HTTP program like *curl* or *wget* to directly use NOMAD from within a shell
- use a generic Python HTTP library like [requests](https://requests.readthedocs.io/en/master/)
- use more specific Python libraries like [bravado](https://github.com/Yelp/bravado) that turn HTTP requests into NOMAD
specific function calls based on an [OpenAPI spec](https://swagger.io/specification/) that NOMAD offers and that describes our API
- directly in the browser via our generated [swagger dashboard](https://repository.nomad-coe.eu/app/api/)
- use the NOMAD Python client library, which offers custom and more powerful
implementations for certain tasks (currently only for accessing the NOMAD Archive)
This set of tutorials provides a few examples for common NOMAD tasks using the various
options.
## Using *curl* (or *wget*)
Terminal programs like *curl* act as an HTTP client and allow you to send requests and
display or store the respective responses. HTTP basically allows you to GET, POST, PUT,
and DELETE "resources" on a remote server. These resources are identified via URLs (=uniform
resource locator). URLs usually consists of a protocol (e.g. HTTP), a domain (our servers),
a path (a place on our servers), and query parameters (additional options).
NOMAD provides three main set of resources: **repo** (i.e. the NOMAD Repository), **raw**
(raw uploaded files), **archive** (i.e. the NOMAD Archive). Within all these resource sets
you have endpoints that either allow you to directly locate a NOMAD entry (i.e. an
uploaded code run) or to ask a query to locate many NOMAD entries at the same time. Here,
the **repo** will return the repository metadata for said entries, **archive** the archive
data, ...
Let's say you want to see the repository metadata (i.e. the information that you see in
our gui) for entries that fit search criteria, like compounds having atoms *Si* and *O* in
it:
This tutorial assumes that you want to
```
curl -X GET "http://repository.nomad-coe.eu/app/api/repo/?atoms=Si&atoms=O"
```
Here we used curl to send an HTTP GET request to return the resource located by the given URL.
In practice you can omit the `-X GET` (which is the default) and you might want to format
the output:
```
curl "http://repository.nomad-coe.eu/app/api/repo/?atoms=Si&atoms=O" | python -m json.tool
```
You'll see the the metadata of the first 10 entries that match your criteria. There
are various other query parameters. You find a full list in the generated [swagger dashboard
of our API](https://repository.nomad-coe.eu/app/api/).
Besides search criteria you can determine how many results (`per_page`) and what page of
results should be returned (`page`). If you want to go beyond the first 10.000 results
you can use our *scroll* API (`scroll=true`, `scroll_after`). You can limit what properties
should be returned (`include`, `exclude`). See the the generated [swagger dashboard
of our API](https://repository.nomad-coe.eu/app/api/) for more parameters.
If you use the [NOMAD Repository and Archive search interface](https://repository.nomad-coe.eu/app/gui/search)
and create a query, you can click th a **<>**-button (right and on top of the result list).
This will give you some code examples with URLs for your search query.
Similar functionality is offered to download archive or raw data. Let's say you have
identified an entry (given via a `upload_id`/`calc_id`, see the query output), and
you want to download it:
```
curl "http://repository.nomad-coe.eu/app/api/raw/calc/f0KQE2aiSz2KRE47QtoZtw/6xe9fZ9xoxBYZOq5lTt8JMgPa3gX/*" -o download.zip
```
This basically requests all the files (`*`) that belong to this entry. If you have a query
that is more selective, you can also download all results. Here all compounds that only
consist of Si, O, bulk material simulations of cubic systems (currently ~100 entries):
```
curl "http://repository.nomad-coe.eu/app/api/raw/query?only_atoms=Si&only_atoms=O&system=bulk&crystal_system=cubic" -o download.zip
```
In a similar way you can see the archive of an entry:
```
curl "http://repository.nomad-coe.eu/app/api/archive/f0KQE2aiSz2KRE47QtoZtw/6xe9fZ9xoxBYZOq5lTt8JMgPa3gX" | python -m json.tool
```
Or query and display the first page of 10 archives:
```
curl "http://repository.nomad-coe.eu/app/api/archive/query?only_atoms=Si&only_atoms=O" | python -m json.tool
```
## Using Python's *request* library
Similar to *curl* in the shell, you can use *requests* in Python. Its a generic HTTP
client library that allows you to send requests:
```python
import requests
import json
response = requests.get("http://repository.nomad-coe.eu/app/api/archive/query?only_atoms=Si&only_atoms=O")
data = response.json()
print(json.dumps(data), indent=2)
```
## Using bravado and our OpenAPI spec
The Python library *bravado* is also an HTTP client, but instead of generic *GET URL*
style functions, it takes a formal specification of the NOMAD API and provides NOMAD
specific functions for you.
```python
from bravado.client import SwaggerClient
nomad_url = 'http://repository.nomad-coe.eu/app/api'
# create the bravado client
client = SwaggerClient.from_url('%s/swagger.json' % nomad_url)
# perform the search request to print number of public entries
data = client.repo.search(atoms=['Si', 'O']).response().result
# print the total ammount of search results
print(data.pagination.total)
# print the data of the first result
print(data.results[0])
```
Read on and learn how to install bravado and perform various tasks, like:
- upload some data
- publish the data
- find it
- download it again
## Prequisites
### Python
The tutorial was tested with Python 3, but it might as well work with Python 2.
### Python packages
We do not assume many specific python packages. Only the *bravado* package (available
via pipy) is required. It allows us to use the nomad ReST API in a more friendly and
......@@ -110,7 +231,7 @@ http_client.authenticator = KeycloakAuthenticator(user=user, password=password)
client = SwaggerClient.from_url('%s/swagger.json' % nomad_url, http_client=http_client)
```
## Uploading data
### Uploading data
Now, we can look at actually using the nomad API. The API is divided into several
modules: *uploads*, *repo*, *archive*, *raw*, etc. Each provided functionality for
a certain aspect of nomad.
......@@ -119,7 +240,7 @@ The *uploads* endpoints can be used to, you guessed it, upload your data. But th
also allow to get process on the upload processing; inspect, delete, and publish uploads;
and get details about the uploaded data, which code input/output files where found, etc.
### Uploading a file
#### Uploading a file
Its simple, since bravado supports uploading files:
......@@ -134,7 +255,7 @@ you can skip the actual upload and say:
upload = client.uploads.upload(local_path='/nomad/my_files/example.zip').response().result
```
### Supervising the processing
#### Supervising the processing
Once uploaded, nomad will extract the file, identify code data, parse and normalize the
data. We call this *processing* and *processing* consists of *tasks* (uploading, extracting, parsing).
......@@ -165,7 +286,7 @@ Of course, you can also visit the nomad GUI
to inspect your uploads. (You might click reload, if you had the page already open.)
### Publishing your upload
#### Publishing your upload
The uploaded data is only visible to you. We call this *staging*. After the processing
was successful and you are satisfied with our processing, you have to publish the upload.
This also allows you to add additional meta-data to your upload (e.g. comments, references, coauthors, etc.).
......@@ -203,7 +324,7 @@ This time we needed some exception handling, since the upload will be removed fr
staging area, and you will get a 404 on the `uploads` endpoint.
## Searching for data
### Searching for data
The *repo* part of the API contains a *search* endpoint that support many different
quantities to search for. These include `formula` (e.g. *AcAg*), `system` (e.g. *bulk/2D/atom*), `spacegroup`, `authors`, `code` (e.g. *VASP*), etc.
In the following example, we search for the specific path segment `AcAg`.
......@@ -223,7 +344,7 @@ the type of search and their is no formal swagger model for it, therefore you ge
dictionaries.
## Downloading data
### Downloading data
The *raw* api allows to download data. You can do that either via bravado:
```python
client.raw.get(upload_id=calc['upload_id'], path=calc['mainfile']).response()
......@@ -237,7 +358,7 @@ print('%s/raw/%s/%s/*' % (nomad_url, calc['upload_id'], os.path.dirname(calc['ma
There are different options to download individual files, or zips with multiple files.
## Using *curl* to access the API
### Using *curl* to access the API
The shell tool *curl* can be used to call most API endpoints. Most endpoints for searching
or downloading data are only **GET** operations controlled by URL parameters. For example:
......@@ -261,6 +382,15 @@ curl -H 'Authorization: Bearer <you_access_token>' \
http://repository.nomad-coe.eu/app/api/raw/query?upload_id=<your_upload_id> -o download.zip
```
## Conclusions
This was just a small glimpse into the nomad API. You should checkout our [swagger-ui](https://repository.nomad-coe.eu/app/api/) for more details on all the API endpoints and their parameters. You can explore the
### Conclusions
This was just a small glimpse into the nomad API. You should checkout our
[swagger-ui](https://repository.nomad-coe.eu/app/api/)
for more details on all the API endpoints and their parameters. You can explore the
API via the swagger-ui and even try it in your browser.
## NOMAD's Python client library
This library is part devevloped by NOMAD. It is supposed to provide more powerful
access to common yet complex tasks. It currently only support access to the NOMAD
Archive. It has its separate documentation [here](archive.html).
\ No newline at end of file
Accessing the Archive
=====================
Of course, you can access the NOMAD Archive directly via the NOMAD API (see the `API tutorial <api_tutorial.html>`_
and `API reference <api.html>`_). But, it is more effective and convenient to use NOMAD's Python client
library.
.. automodule:: nomad.client
\ No newline at end of file
......@@ -13,8 +13,12 @@
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import sys
sys.path.insert(0, os.path.abspath('.'))
# from recommonmark.transform import AutoStructify
# import docutils_react_docgen
# docutils_react_docgen.SETTINGS['react_docgen'] = 'cat'
sys.path.insert(0, os.path.abspath('..'))
......
# =====================
# The MIT License (MIT)
# =====================
# Copyright (c) 2015 Paul Wexler
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
"""
docutils_react_docgen
=====================
docutils extension for documenting React modules.
Requires react-docgen
"""
from docutils import statemachine
from docutils.parsers import rst
import json
import os
import re
import subprocess
REACT_DOCGEN = 'react-docgen' # **Deprecated** Use SETTINGS['react_docgen']
MODULE_DESCRIPTION_MISSING = 'Module doc string is missing!'
MODULE_PROP_DESCRIPTION_MISSING = 'Property doc string is missing!'
MODULE_UNDERLINE_CHARACTER = '-'
TAB_SIZE = 4
DEFAULT_OPTIONS = {
'exclude': '',
'include': '',
'module_description_missing': MODULE_DESCRIPTION_MISSING,
'module_prop_description_missing': MODULE_PROP_DESCRIPTION_MISSING,
'module_underline_character': MODULE_UNDERLINE_CHARACTER,
'path_index': 0,
'show_prop_type': False,
'src': '',
'tab_size': TAB_SIZE,
'use_commonjs_module_name': True,
}
SETTINGS = {
'project_base': None, # absolute path to project base.
'react_docgen': 'react-docgen', # react-docgen command to run.
'rst_output': None, # output filename or no rst output.
}
def find_package(dirname):
"""find commonjs package for given directory.
Starts from `dirname` and recurses up the directory tree
looking for bower.json or package.json.
Returns a tuple (dirname, package)
dirname
The directory the .json file was found in.
package
A dict loaded from the .json file.
Its keys are the module filenames.
"""
if dirname:
bower_json = os.path.join(dirname, 'bower.json')
if os.path.exists(bower_json):
with open(bower_json, 'r') as f:
return dirname, json.load(f)
package_json = os.path.join(dirname, 'package.json')
if os.path.exists(package_json):
with open(package_json, 'r') as f:
return dirname, json.load(f)
next_dirname = os.path.dirname(dirname)
if next_dirname != dirname:
return find_package(next_dirname)
return None, None
def get_dirname(doc_dict, options):
return (os.path.dirname(list(doc_dict.keys())[0])
if options['use_commonjs_module_name'] and doc_dict
else '')
def react_docgen(args, react_docgen=REACT_DOCGEN):
"""Execute `react-docgen` with the given arguments.
`args` is a string which may contain spaces.
`react_docgen` is also a string which may contain spaces.
Returns the output of `react-docgen` as a dict
whose keys are module filenames (strings),
and whose values are module metadata (dicts).
WARNING
The default for react_docgen always evaluates to its initial value.
"""
cmd = react_docgen.split() + args.split()
return json.loads(subprocess.check_output(cmd, stderr=subprocess.PIPE))
def react_doc_to_rst(doc_dict, options, formatter_class, args=''):
""" Convert `doc_dict`, the react-docgen output dict
to a string of ReStructuredText,
according to the `options` and using the `formatter_class`
`args` is the string of arguments passed to the directive.
The default is ''. Only required when using absolute addressing
so the Formatter can recover the path_argument.
"""
dirname = get_dirname(doc_dict, options)
formatter = formatter_class(options, dirname, args=args)
return formatter.run(doc_dict)
def run_react_docgen(args, options=DEFAULT_OPTIONS):
""" Execute `SETTINGS['react_docgen']` with the given args.
`args` is a string which may contain spaces.
`SETTINGS['react_docgen']` is also a string which may contain spaces.
`options` is a dict of directive options.
The command output is expected to be a JSON blob representing
a dict whose keys are the module filenames (strings),
and whose values are the module metadata (dicts).
However, the blob is simply converted into a python object and returned.
Implements the `project_base` setting and the `path_index` option.
"""
arg_list = args.split()
project_base = SETTINGS['project_base']
if project_base != None:
path_index = options['path_index']
path_argument = arg_list[path_index]
if not path_argument.startswith(os.path.sep):
arg_list[path_index] = os.path.abspath(os.path.join(
project_base,
path_argument))
cmd = SETTINGS['react_docgen'].split() + arg_list
return json.loads(subprocess.check_output(cmd, stderr=subprocess.PIPE))
class Formatter(object):
""" Formatter(options, dirname).run(doc_dict) returns a string.
options
a dict of options.
dirname
the directory to search for the CommonJS package
if the use_commonjs_module_name option is True
doc_dict
a dict of react-docgen module metadata
args
Default is ''. The string of arguments passed to the directive.
Required when using absolute addressing.
"""
def __init__(self, options, dirname, args=''):
self.options = options
self.tab = ' ' * self.options['tab_size']
package_dirname, package = find_package(dirname)
if package_dirname:
self.package_dirname_len = len(package_dirname)
self.package_name = package['name']
else:
self.package_dirname_len = 0
self.args = args
self._compile_filters()
def _compile_filters(self):
include = self.options['include']
self.include = re.compile(include) if include else None
exclude = self.options['exclude']
self.exclude = re.compile(exclude) if exclude else None
def _filter(self, filename, module_blob):
"""returns True/False to include/exclude the given module
from the output.
"""
description = module_blob.get('description', '')
return ((not self.include or self.include.search(description))
and
(not self.exclude or not self.exclude.search(description)))
def _get_module_name(self, filename):
if self.package_dirname_len:
module_name = '%s%s' % (
self.package_name,
filename[self.package_dirname_len:])
if module_name.endswith('.js'):
module_name = module_name[:-3]
else:
module_name = filename
return module_name
def _get_object_name(self, obj):
if 'value' in obj:
value = obj['value']
if isinstance(value, str):
return value
elif isinstance(value, list):
return '[%s]' % ', '.join(
self._get_object_name(item) for item in value)
else:
return str(value)
elif 'name' in obj:
return obj['name']
else:
# if this happens show the obj instead of raising an error.
return str(obj)
def _make_definition(self, term, term_definition):
definition = '\n'.join((self.tab + line
for line in term_definition.split('\n'))
if term_definition else [self.tab])
return term + '\n' + definition
def _make_emphasis(self, text, style):
s = ''
s += style + text + style
return s
def _make_heading(self, text, underline_char):
s = ''
s += text + '\n'
s += underline_char * len(text) + '\n\n'
return s
def _make_module(self, filename, module_blob):
s = ''
s += self.<