Commit 5e8153e3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added classes to maintain parsers at runtime.

parent a64d4b13
...@@ -3,4 +3,4 @@ __pycache__ ...@@ -3,4 +3,4 @@ __pycache__
*.pyc *.pyc
*.egg-info/ *.egg-info/
data data
build .dependencies/
\ No newline at end of file \ No newline at end of file
...@@ -14,6 +14,13 @@ ...@@ -14,6 +14,13 @@
"test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml" "test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml"
] ]
}, },
"name": "Python: nomad/",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/nomad/"
{ {
"name": "Python: tests/", "name": "Python: tests/",
"type": "python", "type": "python",
...@@ -30,7 +30,7 @@ make html ...@@ -30,7 +30,7 @@ make html
Conintue with reading the documentation for further setup and contribution guidelines: Conintue with reading the documentation for further setup and contribution guidelines:
``` ```
cd .build/html cd .build/html
python -m SimpleHTTPServer .build -p 8888 python -m http.server 8888
``` ```
Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in
your browser. your browser.
import abc # Copyright 2018 Markus Scheidgen
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
Integration of parsers into the processing
Parsers are developed as independed, individual python programs in their own GIT repositories.
They are build on a common modules called *python-common*, also in a separate GIT.
All parsers depend on the *meta-info*, which is also maintained in its own GIT.
Assumption about parsers
For now, we make a few assumption about parsers
- they always work on the same *meta-inf*
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
- their version is uniquly identified by a GIT commit SHA
Preparing dependencies and parsers during python run-time
To make GIT maintained python modules available, we use:
.. autoclass:: nomad.parsers.PythonGitRepository
Parsers, as a special case for a GIT maintained python modules, can be used via:
.. autoclass:: nomad.parsers.Parser
import re import re
import os
import os.path
from git import Repo, Git
from pip import main as pip
from pip._internal import main as pip
import importlib
from nomadcore.parser_backend import JsonParseEventsWriterBackend from nomadcore.parser_backend import JsonParseEventsWriterBackend
from vaspparser import VASPParser
_meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/' _meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/'
base_dir = './.dependencies'
class PythonGitRepositoryError(Exception):
def __init__(self, msg, repo):
msg = '%s [%s]' % (msg, repo)
class Parser(abc.ABC): class PythonGitRepository():
"""Represents a python module in a git repository.
It allows to fetch a specific commit, install all requirements to
the current python environment, and check the installation via module import.
def __init__(self, name, git_url, git_commit, modules=[]):
name: A name that determines the download path, can contain '/' for sub dirs.
git_url: A publically available and fetchable url to the GIT repository.
git_commit: The full commit SHA of the desired commit.
modules: A list of python module names that is used to confirm the installation.
super().__init__() = name
self.git_url = git_url
self.git_commit = git_commit
self.modules = modules
def prepare(self, force_install=False):
"""Makes sure that the repository is fetched, at the right commit, and installed.
force_install: default is *False*. Allows to force install, e.g. after git commit or
url change.
PythonGitRepositoryError: if something went wrong.
# check/change working directory
old_cwd = os.getcwd()
cwd = os.path.join(base_dir,
if not os.path.exists(cwd):
# check git/do init
if os.path.exists('.git'):
git = Repo('./')
git_cmd = Git('./')
git = Repo('./')
origin = git.create_remote('origin', self.git_url)
# check commit/checkout
if 'master' not in git.heads:
origin = git.remote('origin')
git.create_head('master', self.git_commit)
elif self.git_commit != git.heads.master.commit:
origin = git.remote('origin')
assert self.git_commit != git.heads.master.commit, \
'Actual and desired commit do not match'
# check install
def is_installed():
for module in self.modules:
module_spec = importlib.util.find_spec(module)
if module_spec is None:
return False
return True
if is_installed() and not force_install:
# check/install requirements.txt
if os.path.exists('requirements.txt'):
# try twice to support circular dependencies
for _ in range(1, 2):
pipcode = pip(['install', '-r', 'requirements.txt'])
if pipcode == 0:
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install requirements (pip code=%s)' % pipcode, self)
# check/install
if os.path.exists(''):
pipcode = pip(['install', '-e', '.'])
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install (pip code=%s)' % pipcode, repo=self)
# check install again
if not is_installed():
raise PythonGitRepositoryError(
'Some modules are not installed after install', repo=self)
# reload, loaded modules when installed because of force_install
except PythonGitRepositoryError as e:
raise e
except Exception as e:
raise PythonGitRepositoryError(
'Unexpected exception during preparation: %s' % e, repo=self)
class Parser(PythonGitRepository):
""" """
Instances specify a parser. It allows to find *main files* from given uploaded Instances specify a parser. It allows to find *main files* from given uploaded
and extracted files. Further, allows to run the parser on those 'main files'. and extracted files. Further, allows to run the parser on those 'main files'.
""" """
def __init__(self, name, main_file_re, main_contents_re): def __init__(self, name, git_url, git_commit, parser, main_file_re, main_contents_re):
super().__init__() modules = ['.'.join(parser.split('.')[:-1])] = name super().__init__(
os.path.join('parsers', name), git_url, git_commit, modules=modules)
self.parser = parser
self._main_file_re = re.compile(main_file_re) self._main_file_re = re.compile(main_file_re)
self._main_contents_re = re.compile(main_contents_re) self._main_contents_re = re.compile(main_contents_re)
...@@ -28,15 +189,22 @@ class Parser(abc.ABC): ...@@ -28,15 +189,22 @@ class Parser(abc.ABC):
if file: if file:
file.close() file.close()
def run(self, mainfile): def run(self, mainfile):
pass module_name = self.parser.split('.')[:-1]
parser_class = self.parser.split('.')[1]
module = importlib.import_module('.'.join(module_name))
Parser = getattr(module, parser_class)
parser = Parser(backend=JsonParseEventsWriterBackend)
class VASPRunParser(Parser): class VASPRunParser(Parser):
def __init__(self): def __init__(self):
super().__init__( super().__init__(
name='VASPRunParser', name='VASPRunParser',
main_file_re=r'^.*\.xml$', main_file_re=r'^.*\.xml$',
main_contents_re=( main_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*' r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
...@@ -46,11 +214,16 @@ class VASPRunParser(Parser): ...@@ -46,11 +214,16 @@ class VASPRunParser(Parser):
r'?') r'?')
) )
def run(self, mainfile):
parser = VASPParser(backend=JsonParseEventsWriterBackend)
parsers = [ parsers = [
VASPRunParser() VASPRunParser()
] ]
parser_dict = { parser for parser in parsers} parser_dict = { parser for parser in parsers}
def prepare_parsers(force_install=False):
for parser in parsers:
if __name__ == '__main__':
...@@ -22,7 +22,7 @@ import logstash ...@@ -22,7 +22,7 @@ import logstash
import nomad.config as config import nomad.config as config
import nomad.files as files import nomad.files as files
from nomad.parsers import parsers, parser_dict from nomad.parsers import parsers, parser_dict, prepare_parsers
if config.logstash.enabled: if config.logstash.enabled:
...@@ -49,6 +49,8 @@ app.conf.update( ...@@ -49,6 +49,8 @@ app.conf.update(
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
@app.task() @app.task()
def open_upload(upload_id): def open_upload(upload_id):
Subproject commit ddf8495944fbbcb62801f69b2c2c6c3d6099129d
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment