Commit 5e8153e3 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added classes to maintain parsers at runtime.

parent a64d4b13
...@@ -3,4 +3,4 @@ __pycache__ ...@@ -3,4 +3,4 @@ __pycache__
*.pyc *.pyc
*.egg-info/ *.egg-info/
data data
build .dependencies/
\ No newline at end of file \ No newline at end of file
...@@ -14,6 +14,13 @@ ...@@ -14,6 +14,13 @@
"test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml" "test/examples/oqmd/relaxation/vasprun.xml", "test/examples/oqmd/relaxation/vasprun.xml"
] ]
}, },
{
"name": "Python: nomad/parsers.py",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/nomad/parsers.py"
},
{ {
"name": "Python: tests/test_files.py", "name": "Python: tests/test_files.py",
"type": "python", "type": "python",
......
...@@ -30,7 +30,7 @@ make html ...@@ -30,7 +30,7 @@ make html
Conintue with reading the documentation for further setup and contribution guidelines: Conintue with reading the documentation for further setup and contribution guidelines:
``` ```
cd .build/html cd .build/html
python -m SimpleHTTPServer .build -p 8888 python -m http.server 8888
``` ```
Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in Open [http://localhost:8888/html/setup.html](http://localhost:8888/html/setup.html) in
your browser. your browser.
import abc # Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Integration of parsers into the processing
==========================================
Parsers are developed as independed, individual python programs in their own GIT repositories.
They are build on a common modules called *python-common*, also in a separate GIT.
All parsers depend on the *meta-info*, which is also maintained in its own GIT.
Assumption about parsers
------------------------
For now, we make a few assumption about parsers
- they always work on the same *meta-inf*
- they have no conflicting python requirments
- they can be loaded at the same time and can be used within the same python process
- they are uniquely identified by a GIT URL and publicly accessible
- their version is uniquly identified by a GIT commit SHA
Preparing dependencies and parsers during python run-time
---------------------------------------------------------
To make GIT maintained python modules available, we use:
.. autoclass:: nomad.parsers.PythonGitRepository
Parsers, as a special case for a GIT maintained python modules, can be used via:
.. autoclass:: nomad.parsers.Parser
"""
import re import re
import os
import os.path
from git import Repo, Git
try:
from pip import main as pip
except:
from pip._internal import main as pip
import importlib
from nomadcore.parser_backend import JsonParseEventsWriterBackend from nomadcore.parser_backend import JsonParseEventsWriterBackend
from vaspparser import VASPParser
_meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/' _meta_info_path = './submodules/nomad-meta-info/meta_info/nomad_meta_info/'
base_dir = './.dependencies'
class PythonGitRepositoryError(Exception):
def __init__(self, msg, repo):
msg = '%s [%s]' % (msg, repo)
super().__init__(msg)
class PythonGitRepository():
"""Represents a python module in a git repository.
class Parser(abc.ABC): It allows to fetch a specific commit, install all requirements to
the current python environment, and check the installation via module import.
""" """
Instances specify a parser. It allows to find *main files* from given uploaded def __init__(self, name, git_url, git_commit, modules=[]):
and extracted files. Further, allows to run the parser on those 'main files'. """
Args:
name: A name that determines the download path, can contain '/' for sub dirs.
git_url: A publically available and fetchable url to the GIT repository.
git_commit: The full commit SHA of the desired commit.
modules: A list of python module names that is used to confirm the installation.
""" """
def __init__(self, name, main_file_re, main_contents_re):
super().__init__() super().__init__()
self.name = name self.name = name
self.git_url = git_url
self.git_commit = git_commit
self.modules = modules
def prepare(self, force_install=False):
"""Makes sure that the repository is fetched, at the right commit, and installed.
Args:
force_install: default is *False*. Allows to force install, e.g. after git commit or
url change.
Raises:
PythonGitRepositoryError: if something went wrong.
"""
# check/change working directory
old_cwd = os.getcwd()
try:
cwd = os.path.join(base_dir, self.name)
if not os.path.exists(cwd):
os.makedirs(cwd)
os.chdir(cwd)
# check git/do init
if os.path.exists('.git'):
git = Repo('./')
else:
git_cmd = Git('./')
git_cmd.init()
git = Repo('./')
origin = git.create_remote('origin', self.git_url)
# check commit/checkout
if 'master' not in git.heads:
origin = git.remote('origin')
origin.fetch(self.git_commit)
git.create_head('master', self.git_commit)
elif self.git_commit != git.heads.master.commit:
origin = git.remote('origin')
origin.fetch(self.git_commit)
assert self.git_commit != git.heads.master.commit, \
'Actual and desired commit do not match'
git.heads.master.checkout()
# check install
def is_installed():
for module in self.modules:
module_spec = importlib.util.find_spec(module)
if module_spec is None:
return False
return True
if is_installed() and not force_install:
return
# check/install requirements.txt
if os.path.exists('requirements.txt'):
# try twice to support circular dependencies
for _ in range(1, 2):
pipcode = pip(['install', '-r', 'requirements.txt'])
if pipcode == 0:
break
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install requirements (pip code=%s)' % pipcode, self)
# check/install setup.py
if os.path.exists('setup.py'):
pipcode = pip(['install', '-e', '.'])
if pipcode != 0:
raise PythonGitRepositoryError(
'Could not install (pip code=%s)' % pipcode, repo=self)
# check install again
if not is_installed():
raise PythonGitRepositoryError(
'Some modules are not installed after install', repo=self)
# reload, loaded modules when installed because of force_install
# TODO
except PythonGitRepositoryError as e:
raise e
except Exception as e:
raise PythonGitRepositoryError(
'Unexpected exception during preparation: %s' % e, repo=self)
finally:
os.chdir(old_cwd)
pass
class Parser(PythonGitRepository):
"""
Instances specify a parser. It allows to find *main files* from given uploaded
and extracted files. Further, allows to run the parser on those 'main files'.
"""
def __init__(self, name, git_url, git_commit, parser, main_file_re, main_contents_re):
modules = ['.'.join(parser.split('.')[:-1])]
super().__init__(
os.path.join('parsers', name), git_url, git_commit, modules=modules)
self.parser = parser
self._main_file_re = re.compile(main_file_re) self._main_file_re = re.compile(main_file_re)
self._main_contents_re = re.compile(main_contents_re) self._main_contents_re = re.compile(main_contents_re)
...@@ -28,15 +189,22 @@ class Parser(abc.ABC): ...@@ -28,15 +189,22 @@ class Parser(abc.ABC):
if file: if file:
file.close() file.close()
@abc.abstractmethod
def run(self, mainfile): def run(self, mainfile):
pass module_name = self.parser.split('.')[:-1]
parser_class = self.parser.split('.')[1]
module = importlib.import_module('.'.join(module_name))
Parser = getattr(module, parser_class)
parser = Parser(backend=JsonParseEventsWriterBackend)
parser.parse(mainfile)
class VASPRunParser(Parser): class VASPRunParser(Parser):
def __init__(self): def __init__(self):
super().__init__( super().__init__(
name='VASPRunParser', name='VASPRunParser',
git_url='git@gitlab.mpcdf.mpg.de:nomad-lab/parser-vasp.git',
git_commit='ddf8495944fbbcb62801f69b2c2c6c3d6099129d',
parser='vaspparser.VASPParser',
main_file_re=r'^.*\.xml$', main_file_re=r'^.*\.xml$',
main_contents_re=( main_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*' r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
...@@ -46,11 +214,16 @@ class VASPRunParser(Parser): ...@@ -46,11 +214,16 @@ class VASPRunParser(Parser):
r'?') r'?')
) )
def run(self, mainfile):
parser = VASPParser(backend=JsonParseEventsWriterBackend)
parser.parse(mainfile)
parsers = [ parsers = [
VASPRunParser() VASPRunParser()
] ]
parser_dict = {parser.name: parser for parser in parsers} parser_dict = {parser.name: parser for parser in parsers}
def prepare_parsers(force_install=False):
for parser in parsers:
parser.prepare(force_install=force_install)
if __name__ == '__main__':
prepare_parsers(force_install=True)
...@@ -22,7 +22,7 @@ import logstash ...@@ -22,7 +22,7 @@ import logstash
import nomad.config as config import nomad.config as config
import nomad.files as files import nomad.files as files
from nomad.parsers import parsers, parser_dict from nomad.parsers import parsers, parser_dict, prepare_parsers
if config.logstash.enabled: if config.logstash.enabled:
...@@ -49,6 +49,8 @@ app.conf.update( ...@@ -49,6 +49,8 @@ app.conf.update(
logger = get_task_logger(__name__) logger = get_task_logger(__name__)
prepare_parsers(force_install=True)
@app.task() @app.task()
def open_upload(upload_id): def open_upload(upload_id):
......
Subproject commit ddf8495944fbbcb62801f69b2c2c6c3d6099129d
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment