Commit 5e8153e3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added classes to maintain parsers at runtime.

parent a64d4b13
...@@ -3,4 +3,4 @@ __pycache__ ...@@ -3,4 +3,4 @@ __pycache__
*.pyc *.pyc
*.egg-info/ *.egg-info/
data data
build .dependencies/
\ No newline at end of file \ No newline at end of file
...@@ -14,6 +14,13 @@ ...@@ -14,6 +14,13 @@
"test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml" "test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml"
] ]
}, },
"name": "Python: nomad/",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/nomad/"
{ {
"name": "Python: tests/", "name": "Python: tests/",
"type": "python", "type": "python",
...@@ -30,7 +30,7 @@ make html ...@@ -30,7 +30,7 @@ make html
Conintue with reading the documentation for further setup and contribution guidelines: Conintue with reading the documentation for further setup and contribution guidelines:
``` ```
cd .build/html cd .build/html
python -m SimpleHTTPServer .build -p 8888 python -m http.server 8888
``` ```
Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in
your browser. your browser.
import abc # Copyright 2018 Markus Scheidgen
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
Integration of parsers into the processing
Parsers are developed as independed, individual python programs in their own GIT repositories.
They are build on a common modules called *python-common*, also in a separate GIT.
All parsers depend on the *meta-info*, which is also maintained in its own GIT.
Assumption about parsers
For now, we make a few assumption about parsers
- they always work on the same *meta-inf*
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
- their version is uniquly identified by a GIT commit SHA
Preparing dependencies and parsers during python run-time
To make GIT maintained python modules available, we use:
.. autoclass:: nomad.parsers.PythonGitRepository
Parsers, as a special case for a GIT maintained python modules, can be used via:
.. autoclass:: nomad.parsers.Parser
import re import re
import os
import os.path
from git import Repo, Git
from pip import main as pip
from pip._internal import main as pip
import importlib
from nomadcore.parser_backend import JsonParseEventsWriterBackend from nomadcore.parser_backend import JsonParseEventsWriterBackend
from vaspparser import VASPParser
_meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/' _meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/'
base_dir = './.dependencies'
class PythonGitRepositoryError(Exception):
def __init__(self, msg, repo):
msg = '%s [%s]' % (msg, repo)
class PythonGitRepository():
"""Represents a python module in a git repository.
class Parser(abc.ABC): It allows to fetch a specific commit, install all requirements to
the current python environment, and check the installation via module import.
""" """
Instances specify a parser. It allows to find *main files* from given uploaded def __init__(self, name, git_url, git_commit, modules=[]):
and extracted files. Further, allows to run the parser on those 'main files'. """
name: A name that determines the download path, can contain '/' for sub dirs.
git_url: A publically available and fetchable url to the GIT repository.
git_commit: The full commit SHA of the desired commit.
modules: A list of python module names that is used to confirm the installation.
""" """
def __init__(self, name, main_file_re, main_contents_re):
super().__init__() super().__init__() = name = name
self.git_url = git_url
self.git_commit = git_commit
self.modules = modules
def prepare(self, force_install=False):
"""Makes sure that the repository is fetched, at the right commit, and installed.
force_install: default is *False*. Allows to force install, e.g. after git commit or
url change.
PythonGitRepositoryError: if something went wrong.
# check/change working directory
old_cwd = os.getcwd()
cwd = os.path.join(base_dir,
if not os.path.exists(cwd):
# check git/do init
if os.path.exists('.git'):
git = Repo('./')
git_cmd = Git('./')
git = Repo('./')
origin = git.create_remote('origin', self.git_url)
# check commit/checkout
if 'master' not in git.heads:
origin = git.remote('origin')
git.create_head('master', self.git_commit)
elif self.git_commit != git.heads.master.commit:
origin = git.remote('origin')
assert self.git_commit != git.heads.master.commit, \
'Actual and desired commit do not match'
# check install
def is_installed():
for module in self.modules:
module_spec = importlib.util.find_spec(module)
if module_spec is None:
return False
return True
if is_installed() and not force_install:
# check/install requirements.txt
if os.path.exists('requirements.txt'):
# try twice to support circular dependencies
for _ in range(1, 2):
pipcode = pip(['install', '-r', 'requirements.txt'])
if pipcode == 0:
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install requirements (pip code=%s)' % pipcode, self)
# check/install
if os.path.exists(''):
pipcode = pip(['install', '-e', '.'])
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install (pip code=%s)' % pipcode, repo=self)
# check install again
if not is_installed():
raise PythonGitRepositoryError(
'Some modules are not installed after install', repo=self)
# reload, loaded modules when installed because of force_install
except PythonGitRepositoryError as e:
raise e
except Exception as e:
raise PythonGitRepositoryError(
'Unexpected exception during preparation: %s' % e, repo=self)
class Parser(PythonGitRepository):
Instances specify a parser. It allows to find *main files* from given uploaded
and extracted files. Further, allows to run the parser on those 'main files'.
def __init__(self, name, git_url, git_commit, parser, main_file_re, main_contents_re):
modules = ['.'.join(parser.split('.')[:-1])]
os.path.join('parsers', name), git_url, git_commit, modules=modules)
self.parser = parser
self._main_file_re = re.compile(main_file_re) self._main_file_re = re.compile(main_file_re)
self._main_contents_re = re.compile(main_contents_re) self._main_contents_re = re.compile(main_contents_re)
...@@ -28,15 +189,22 @@ class Parser(abc.ABC): ...@@ -28,15 +189,22 @@ class Parser(abc.ABC):
if file: if file:
file.close() file.close()
def run(self, mainfile): def run(self, mainfile):
pass module_name = self.parser.split('.')[:-1]
parser_class = self.parser.split('.')[1]
module = importlib.import_module('.'.join(module_name))
Parser = getattr(module, parser_class)
parser = Parser(backend=JsonParseEventsWriterBackend)
class VASPRunParser(Parser): class VASPRunParser(Parser):
def __init__(self): def __init__(self):
super().__init__( super().__init__(
name='VASPRunParser', name='VASPRunParser',
main_file_re=r'^.*\.xml$', main_file_re=r'^.*\.xml$',
main_contents_re=( main_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*' r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
...@@ -46,11 +214,16 @@ class VASPRunParser(Parser): ...@@ -46,11 +214,16 @@ class VASPRunParser(Parser):
r'?') r'?')
) )
def run(self, mainfile):
parser = VASPParser(backend=JsonParseEventsWriterBackend)
parsers = [ parsers = [
VASPRunParser() VASPRunParser()
] ]
parser_dict = { parser for parser in parsers} parser_dict = { parser for parser in parsers}
def prepare_parsers(force_install=False):
for parser in parsers:
if __name__ == '__main__':
...@@ -22,7 +22,7 @@ import logstash ...@@ -22,7 +22,7 @@ import logstash
import nomad.config as config import nomad.config as config
import nomad.files as files import nomad.files as files
from nomad.parsers import parsers, parser_dict from nomad.parsers import parsers, parser_dict, prepare_parsers
if config.logstash.enabled: if config.logstash.enabled:
...@@ -49,6 +49,8 @@ app.conf.update( ...@@ -49,6 +49,8 @@ app.conf.update(
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
@app.task() @app.task()
def open_upload(upload_id): def open_upload(upload_id):
Subproject commit ddf8495944fbbcb62801f69b2c2c6c3d6099129d
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment