From d70ce4eafcac169a7e3be0fd114c633677d2df89 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 11:17:43 +0100 Subject: [PATCH 1/9] Moved from dependencies.py to git submodules. --- .gitignore | 1 - .gitmodules | 53 +++++-- README.md | 3 +- dependencies.sh | 11 ++ dependencies/nomad-meta-info | 1 + dependencies/parsers/bigdft | 1 + dependencies/parsers/cp2k | 1 + dependencies/parsers/cpmd | 1 + dependencies/parsers/crystal | 1 + dependencies/parsers/exciting | 1 + dependencies/parsers/fhi-aims | 1 + dependencies/parsers/nwchem | 1 + dependencies/parsers/vasp | 1 + dependencies/parsers/wien2k | 1 + dependencies/python_common | 1 + docs/dev_guidelines.rst | 18 ++- docs/parser_tutorial.md | 13 +- docs/reference.rst | 5 - docs/setup.md | 4 +- nomad/dependencies.py | 261 ---------------------------------- nomad/parsing/__init__.py | 26 ++-- nomad/parsing/parser.py | 14 +- tests/profile_vasp.py | 15 -- tests/test_parsing.py | 2 +- 24 files changed, 101 insertions(+), 336 deletions(-) create mode 100755 dependencies.sh create mode 160000 dependencies/nomad-meta-info create mode 160000 dependencies/parsers/bigdft create mode 160000 dependencies/parsers/cp2k create mode 160000 dependencies/parsers/cpmd create mode 160000 dependencies/parsers/crystal create mode 160000 dependencies/parsers/exciting create mode 160000 dependencies/parsers/fhi-aims create mode 160000 dependencies/parsers/nwchem create mode 160000 dependencies/parsers/vasp create mode 160000 dependencies/parsers/wien2k create mode 160000 dependencies/python_common delete mode 100644 nomad/dependencies.py delete mode 100644 tests/profile_vasp.py diff --git a/.gitignore b/.gitignore index 8fe1d8860..84252b0a8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ __pycache__ *.pyc *.egg-info/ /data/ -.dependencies/ .volumes/ .pytest_cache/ .coverage diff --git a/.gitmodules b/.gitmodules index 268198241..c3ac1c7f4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,44 @@ -[submodule "submodules/python-common"] - path = submodules/python-common - url = git@gitlab.mpcdf.mpg.de:nomad-lab/python-common.git -[submodule "submodules/meta-info"] - path = submodules/nomad-meta-info - url = git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-meta-info.git -[submodule "submodules/parsers/vasp"] - path = submodules/parsers/vasp - url = git@gitlab.mpcdf.mpg.de:nomad-lab/parser-vasp.git +[submodule "dependencies/nomad-meta-info"] + path = dependencies/nomad-meta-info + url = https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info.git + branch = nomad-fair +[submodule "dependencies/python_common"] + path = dependencies/python_common + url = https://gitlab.mpcdf.mpg.de/nomad-lab/python-common.git + branch = nomad-fair +[submodule "dependencies/parsers/vasp"] + path = dependencies/parsers/vasp + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-vasp.git + branch = nomad-fair +[submodule "dependencies/parsers/exciting"] + path = dependencies/parsers/exciting + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-exciting.git + branch = nomad-fair +[submodule "dependencies/parsers/fhi-aims"] + path = dependencies/parsers/fhi-aims + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-fhi-aims.git + branch = nomad-fair +[submodule "dependencies/parsers/cp2k"] + path = dependencies/parsers/cp2k + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-cp2k + branch = nomad-fair +[submodule "dependencies/parsers/crystal"] + path = dependencies/parsers/crystal + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-crystal + branch = nomad-fair +[submodule "dependencies/parsers/cpmd"] + path = dependencies/parsers/cpmd + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-cpmd + branch = nomad-fair +[submodule "dependencies/parsers/nwchem"] + path = dependencies/parsers/nwchem + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-nwchem + branch = nomad-fair +[submodule "dependencies/parsers/bigdft"] + path = dependencies/parsers/bigdft + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-big-dft + branch = nomad-fair +[submodule "dependencies/parsers/wien2k"] + path = dependencies/parsers/wien2k + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-wien2k + branch = nomad-fair \ No newline at end of file diff --git a/README.md b/README.md index bc1e84904..5af532592 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,11 @@ You can access the running system and its documentation here: ### Generate the docs from the source -First, clone this repo: +First, clone this repo and init its submodules: ``` git clone git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-FAIR.git cd nomad-FAIR +git submodules init --depth 1 ``` Second, create and source your own virtual python environment: diff --git a/dependencies.sh b/dependencies.sh new file mode 100755 index 000000000..14894051f --- /dev/null +++ b/dependencies.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +git config -f .gitmodules --get-regexp '^submodule\..*\.path$' | + while read path_key path + do + echo $path + [ -f $path/requirements.txt ] && pip install -r $path/requirements.txt + [ -f $path/setup.py ] && pip install $1 $path + done diff --git a/dependencies/nomad-meta-info b/dependencies/nomad-meta-info new file mode 160000 index 000000000..716f34954 --- /dev/null +++ b/dependencies/nomad-meta-info @@ -0,0 +1 @@ +Subproject commit 716f3495427527446b693e62bfdf8bd30290d1d0 diff --git a/dependencies/parsers/bigdft b/dependencies/parsers/bigdft new file mode 160000 index 000000000..4114f96be --- /dev/null +++ b/dependencies/parsers/bigdft @@ -0,0 +1 @@ +Subproject commit 4114f96be4985a288b293be1ef26f830d6065eec diff --git a/dependencies/parsers/cp2k b/dependencies/parsers/cp2k new file mode 160000 index 000000000..b87cd29be --- /dev/null +++ b/dependencies/parsers/cp2k @@ -0,0 +1 @@ +Subproject commit b87cd29be9fe1637c20a23087ce98855ff1fea44 diff --git a/dependencies/parsers/cpmd b/dependencies/parsers/cpmd new file mode 160000 index 000000000..a8b323f8f --- /dev/null +++ b/dependencies/parsers/cpmd @@ -0,0 +1 @@ +Subproject commit a8b323f8f8b24a5cead4c7e0cca15a78193b12b5 diff --git a/dependencies/parsers/crystal b/dependencies/parsers/crystal new file mode 160000 index 000000000..a824f0f72 --- /dev/null +++ b/dependencies/parsers/crystal @@ -0,0 +1 @@ +Subproject commit a824f0f72303bb8e39c8ae7ee710210edc287331 diff --git a/dependencies/parsers/exciting b/dependencies/parsers/exciting new file mode 160000 index 000000000..3028201af --- /dev/null +++ b/dependencies/parsers/exciting @@ -0,0 +1 @@ +Subproject commit 3028201afe3f5b839afe3212769f193c9f634069 diff --git a/dependencies/parsers/fhi-aims b/dependencies/parsers/fhi-aims new file mode 160000 index 000000000..cb54c37d0 --- /dev/null +++ b/dependencies/parsers/fhi-aims @@ -0,0 +1 @@ +Subproject commit cb54c37d0c1b85cd57486f572de7bc2dbc294123 diff --git a/dependencies/parsers/nwchem b/dependencies/parsers/nwchem new file mode 160000 index 000000000..179c4ce3e --- /dev/null +++ b/dependencies/parsers/nwchem @@ -0,0 +1 @@ +Subproject commit 179c4ce3e0990b431869a7d644590689e91f8fe2 diff --git a/dependencies/parsers/vasp b/dependencies/parsers/vasp new file mode 160000 index 000000000..05e5f6081 --- /dev/null +++ b/dependencies/parsers/vasp @@ -0,0 +1 @@ +Subproject commit 05e5f608178df11fe5563dcb88c3e475d51cd230 diff --git a/dependencies/parsers/wien2k b/dependencies/parsers/wien2k new file mode 160000 index 000000000..038997516 --- /dev/null +++ b/dependencies/parsers/wien2k @@ -0,0 +1 @@ +Subproject commit 03899751624694c7099d5bc3e5b48971d924bc32 diff --git a/dependencies/python_common b/dependencies/python_common new file mode 160000 index 000000000..f03817034 --- /dev/null +++ b/dependencies/python_common @@ -0,0 +1 @@ +Subproject commit f03817034886e2c7225806e284a1eb8ecd8b3e25 diff --git a/docs/dev_guidelines.rst b/docs/dev_guidelines.rst index e27b948e4..6ae4202f3 100644 --- a/docs/dev_guidelines.rst +++ b/docs/dev_guidelines.rst @@ -115,16 +115,14 @@ said entities. NOMAD-coe Dependencies ---------------------- -We currently clone and install NOMAD-coe dependencies *"outside"* the nomad-FAIRDI project -(see :py:mod:`nomad.dependencies`). The installed projects become part of the python -environment and all dependencies are used like regular pipy packages and python modules. - -This allows us to target (e.g. install) individual commits. In theory, these might -change during runtime, allowing to update parsers or normalizers on a running nomad. -More importantly, we can address commit hashes to identify exact parser/normalizer versions. -On the downside, common functions for all dependencies (e.g. the python-common package, -or nomad_meta_info) cannot be part of the nomad-FAIRDI project. In general, it is hard -to simultaneously develop nomad-FAIRDI and NOMAD-coe dependencies. +We currently use git submodules to maintain references to NOMAD-coe dependencies. +All dependencies are python packages and installed via pip to your python environement. + +This allows us to target (e.g. install) individual commits. More importantly, we can address c +ommit hashes to identify exact parser/normalizer versions. On the downside, common functions +for all dependencies (e.g. the python-common package, or nomad_meta_info) cannot be part +of the nomad-FAIRDI project. In general, it is hard to simultaneously develop nomad-FAIRDI +and NOMAD-coe dependencies. Another approach is to integrate the NOMAD-coe sources with nomad-FAIRDI. The lacking availability of individual commit hashes, could be replaces with hashes of source-code diff --git a/docs/parser_tutorial.md b/docs/parser_tutorial.md index 882770a61..8d684cd8f 100644 --- a/docs/parser_tutorial.md +++ b/docs/parser_tutorial.md @@ -315,21 +315,16 @@ You a writing a python program. You know what to do. ## Added the parser to nomad@FAIRDI -First, you add your project to the list of :py:mod:`nomad.dependencies`: -```python -dependencies = [ - PythonGit( - name='parsers/vasp', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-vasp.git', - git_branch='nomad-fair'), -] +First, you add your parser to the dependencies. Put it into the dependencies folder, then: +``` +git submodule add dependencies/parsers/vasp ``` Second, you add your parser to the list of parsers :py:mod:`nomad.parsing`: ```python parsers = [ LegacyParser( - python_git=dependencies['parsers/vasp'], + name='parsers/vasp', parser_class_name='vaspparser.VaspOutcarParser', main_file_re=r'^OUTCAR(\.[^\.]*)?$', main_contents_re=(r'^\svasp\..*$') diff --git a/docs/reference.rst b/docs/reference.rst index a4476e539..e745ee8f4 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -11,11 +11,6 @@ nomad.infrastructure .. automodule:: nomad.infrastructure :members: -nomad.dependencies ------------------- -.. automodule:: nomad.dependencies - :members: - nomad.datamodel --------------- .. automodule:: nomad.datamodel diff --git a/docs/setup.md b/docs/setup.md index dd668cd0f..baeadd8b1 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -63,10 +63,10 @@ different parser, normalizer versions from within the running nomad infrastructu To run the dependencies script and install all dependencies into your environment: ``` -python nomad/dependencies.py --dev +./dependencies.sh ``` This will checkout the proper version of the respective NOMAD-coe modules, install -further requirements, and install the modules themselves. The `--dev` option will install +further requirements, and install the modules themselves. The `-e` option will install the NOMAD-coe dependencies with symbolic links allowing you to change the downloaded dependency code without having to reinstall after. diff --git a/nomad/dependencies.py b/nomad/dependencies.py deleted file mode 100644 index 23dd0e19b..000000000 --- a/nomad/dependencies.py +++ /dev/null @@ -1,261 +0,0 @@ -# Copyright 2018 Markus Scheidgen -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an"AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -This module allows to configure and install all necessary legecy nomad GIT repositories -to process (parser, normalizer, etc.) uploaded calculations. - -Parsers are developed as independent, individual python programs in their own GIT repositories. -They are built on a common modules called *python-common*, also in a separate GIT. -All parsers depend on the *meta-info*, which is also maintained in its own GIT. - -Using dependencies ------------------- - -To install all dependencies (clone, install requirements, install the dependency as python package) -use the following: - -.. code-block:: sh - - python -m nomad.dependencies --dev - -With the `--dev` parameter *pip* will use `-e` to install. The dependencies are cloned -into `.dependencies` and installed into your current python environment. - - -Adding dependencies ----------------------- - -To make GIT maintained python modules available, we use: - -.. autoclass:: PythonGit - :members: - - -Dependencies are listed in - -.. autodata:: dependencies - - -Dependencies can be programmatically installed with: - -.. autofunction:: prepare -""" -import sys -import os -import os.path -import logging -import subprocess -import shutil - -_logger = logging.getLogger(__name__) -base_dir = './.dependencies' - - -class PythonGitError(Exception): - def __init__(self, msg, repo): - msg = '%s [%s]' % (msg, repo) - super().__init__(msg) - - -class PythonGit(): - """ - Represents a python module in a git repository. It allows to fetch a specific commit, - install all requirements to the current python environment, and check the installation - via module import. - - This is only useful before you want to use the respective module in a different - python process, because it will not try to reload any already loaded modules into - the current python process. - - Arguments: - name: A name that determines the download path, can contain '/' for sub dirs. - Names are important, because modules might use relatives paths between - them. - git_url: A publically available and fetchable url to the GIT repository. - git_branch: The branch that should be used. - """ - def __init__(self, name: str, git_url: str, git_branch: str) -> None: - self.name = name - self.git_url = git_url - self.git_branch = git_branch - - def _run_pip_install(self, *args): - pipcode = 0 - - # some weird interaction of pip and virtualenv causes a bug that does - # not allow to install in docker due to a wrong PIP_REQ_TRACKER path. This - # is a workarround. - pip_req_tracker_key = 'PIP_REQ_TRACKER' - env = dict(os.environ) - if pip_req_tracker_key in env: - del(env['PIP_REQ_TRACKER']) - pipcode = subprocess.call( - [sys.executable, '-m', 'pip', 'install'] + list(args), - env=env) - - if pipcode != 0: - raise PythonGitError( - 'Could not install (pip return code=%s)' % pipcode, repo=self) - - def prepare(self, dev: bool = False) -> None: - """ - Makes sure that the repository is fetched, at the right commit, and installed. - - Arguments: - dev (bool): Indicate dev install (uses pip with -e). Default is False. - - Raises: - PythonGitError: if something went wrong. - """ - # import late because git will not be available in production - from git import Repo, Git - - # check/change working directory - old_cwd = os.getcwd() - try: - cwd = os.path.join(base_dir, self.name) - if not os.path.exists(cwd): - os.makedirs(cwd) - os.chdir(cwd) - - _logger.info('check git/do init with origin %s for %s' % (self.git_url, self.name)) - if os.path.exists('.git'): - git = Repo('./') - else: - git_cmd = Git('./') - git_cmd.init() - git = Repo('./') - origin = git.create_remote('origin', self.git_url) - - _logger.info('pull %s for %s' % (self.git_branch, self.name)) - origin = git.remote('origin') - origin.pull(self.git_branch, depth=1) - if git.head.name != self.git_branch: - new_branch = git.create_head(self.git_branch) - git.head.reference = new_branch - - try: - git.submodule_update(init=True) - except Exception: - pass - - if os.path.exists('requirements.txt'): - _logger.info('install requirements.txt for %s' % self.name) - self._run_pip_install('-r', 'requirements.txt') - - if os.path.exists('setup.py'): - _logger.info('install setup.py for %s' % self.name) - if dev: - self._run_pip_install('-e', '.') - else: - self._run_pip_install('.') - - except PythonGitError as e: - raise e - except Exception as e: - raise PythonGitError( - 'Unexpected exception during preparation: %s' % e, repo=self) - finally: - os.chdir(old_cwd) - pass - - def __repr__(self): - return self.name - - -dependencies = [ - # repository api is not really usuable, because it is written in python 2.x - # PythonGit( - # name='repository-api', - # git_url='https://gitlab.mpcdf.mpg.de/NoMaD/NomadRepositoryParser.git', - # git_branch='v2.1' - # ), - # not strictly necessary or useful for the common build - # PythonGit( - # name='nomad-lab-base', - # git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-lab-base.git', - # git_branch='nomad-FAIR'), - PythonGit( - name='nomad-meta-info', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-meta-info.git', - git_branch='nomad-fair'), - PythonGit( - name='python_common', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/python-common.git', - git_branch='nomad-fair'), - PythonGit( - name='parsers/vasp', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-vasp.git', - git_branch='nomad-fair'), - PythonGit( - name='parsers/exciting', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-exciting.git', - git_branch='nomad-fair'), - PythonGit( - name='parsers/fhi-aims', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-fhi-aims.git', - git_branch='nomad-fair'), - PythonGit( - name='parsers/cp2k', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-cp2k', - git_branch='nomad-fair'), - PythonGit( - name='parsers/crystal', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-crystal', - git_branch='nomad-fair'), - PythonGit( - name='parsers/cpmd', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-cpmd', - git_branch='nomad-fair'), - PythonGit( - name='parsers/nwchem', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-nwchem', - git_branch='nomad-fair'), - PythonGit( - name='parsers/bigdft', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-big-dft', - git_branch='nomad-fair'), - PythonGit( - name='parsers/wien2k', - git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-wien2k', - git_branch='nomad-fair') -] - - -dependencies_dict = {dependency.name: dependency for dependency in dependencies} - - -def prepare(*args, **kwargs) -> None: - """ - Installs all dependencies from :data:`dependencies` and :data:`parsers`. - """ - for python_git in dependencies: - python_git.prepare(*args, **kwargs) - - -if __name__ == '__main__': - import argparse - - parser = argparse.ArgumentParser(description='Install dependencies from NOMAD-coe.') - parser.add_argument('--dev', help='pip install with -e', action='store_true') - parser.add_argument('--clean', help='remove old deps first', action='store_true') - - args = parser.parse_args() - - _logger.setLevel(logging.DEBUG) - if args.clean and os.path.exists(base_dir): - shutil.rmtree(base_dir) - - prepare(dev=args.dev) diff --git a/nomad/parsing/__init__.py b/nomad/parsing/__init__.py index d3f9af24c..3b91d08c9 100644 --- a/nomad/parsing/__init__.py +++ b/nomad/parsing/__init__.py @@ -14,9 +14,8 @@ """ The *parsing* module is an interface for the existing NOMAD-coe parsers. -The parser code is used via :mod:`nomad.dependencies`. This module redefines -some of the old NOMAD-coe python-common functionality to create a more coherent -interface to the parsers. +This module redefines some of the old NOMAD-coe python-common functionality to create a +more coherent interface to the parsers. Assumption about parsers ------------------------ @@ -59,7 +58,6 @@ based on NOMAD-coe's *python-common* module. from nomad.parsing.backend import AbstractParserBackend, LocalBackend, LegacyLocalBackend, JSONStreamWriter, BadContextURI, WrongContextState from nomad.parsing.parser import Parser, LegacyParser, VaspOutcarParser from nomad.parsing.artificial import TemplateParser, GenerateRandomParser, ChaosParser -from nomad.dependencies import dependencies_dict as dependencies parsers = [ @@ -67,7 +65,7 @@ parsers = [ TemplateParser(), ChaosParser(), LegacyParser( - python_git=dependencies['parsers/vasp'], + name='parsers/vasp', parser_class_name='vaspparser.VASPRunParserInterface', main_file_re=r'^.*\.xml(\.[^\.]*)?$', main_contents_re=( @@ -78,13 +76,13 @@ parsers = [ r'?') ), VaspOutcarParser( - python_git=dependencies['parsers/vasp'], + name='parsers/vasp', parser_class_name='vaspparser.VaspOutcarParser', main_file_re=r'^OUTCAR(\.[^\.]*)?$', main_contents_re=(r'^\svasp\..*$') ), LegacyParser( - python_git=dependencies['parsers/exciting'], + name='parsers/exciting', parser_class_name='excitingparser.ExcitingParser', main_file_re=r'^.*/INFO\.OUT?', main_contents_re=( @@ -93,7 +91,7 @@ parsers = [ r'\s*\|\s*version hash id:\s*\S*\s*=') ), LegacyParser( - python_git=dependencies['parsers/fhi-aims'], + name='parsers/fhi-aims', parser_class_name='fhiaimsparser.FHIaimsParser', main_file_re=r'^.*\.out$', main_contents_re=( @@ -102,7 +100,7 @@ parsers = [ r'?\s*Version') ), LegacyParser( - python_git=dependencies['parsers/cp2k'], + name='parsers/cp2k', parser_class_name='cp2kparser.CP2KParser', main_file_re=r'^.*\.out$', # This looks for files with .out main_contents_re=( @@ -114,7 +112,7 @@ parsers = [ ) ), LegacyParser( - python_git=dependencies['parsers/crystal'], + name='parsers/crystal', parser_class_name='crystalparser.CrystalParser', main_file_re=r'^.*\.out$', main_contents_re=( @@ -128,7 +126,7 @@ parsers = [ # when searching through the first 500 bytes of main files. We decided # to use only a portion of the regex to avoid that issue. LegacyParser( - python_git=dependencies['parsers/cpmd'], + name='parsers/cpmd', parser_class_name='cpmdparser.CPMDParser', main_file_re=r'^.*\.out$', main_contents_re=( @@ -143,7 +141,7 @@ parsers = [ ) ), LegacyParser( - python_git=dependencies['parsers/nwchem'], + name='parsers/nwchem', parser_class_name='nwchemparser.NWChemParser', main_file_re=r'^.*\.out$', main_contents_re=( @@ -155,7 +153,7 @@ parsers = [ ) ), LegacyParser( - python_git=dependencies['parsers/bigdft'], + name='parsers/bigdft', parser_class_name='bigdftparser.BigDFTParser', main_file_re=r'^.*\.out$', main_contents_re=( @@ -187,7 +185,7 @@ parsers = [ ) ), LegacyParser( - python_git=dependencies['parsers/wien2k'], + name='parsers/wien2k', parser_class_name='wien2kparser.Wien2kParser', main_file_re=r'^.*\.scf$', # This looks for files with .scf main_contents_re=r':ITE[0-9]+: 1. ITERATION' diff --git a/nomad/parsing/parser.py b/nomad/parsing/parser.py index 8e452d240..307b3bf64 100644 --- a/nomad/parsing/parser.py +++ b/nomad/parsing/parser.py @@ -25,7 +25,6 @@ import glob from nomad import utils from nomad.parsing.backend import LocalBackend -from nomad.dependencies import PythonGit class Parser(metaclass=ABCMeta): @@ -34,7 +33,7 @@ class Parser(metaclass=ABCMeta): and extracted files. Further, allows to run the parser on those 'main files'. Arguments: - python_git: The :class:`PythonGit` that describes the parser code. + name: The name of the parser parser_class_name: Full qualified name of the main parser class. We assume it have one parameter for the backend. main_file_re: A regexp that matches main file paths that this parser can handle. @@ -63,8 +62,8 @@ class Parser(metaclass=ABCMeta): class LegacyParser(Parser): """ - A parser implementation for legacy NOMAD-coe parsers. Uses a - :class:`nomad.dependencies.PythonGit` to specify the old parser repository. It + A parser implementation for legacy NOMAD-coe parsers. It assumes that parsers + are installed to the python environment. It uses regular expessions to match parsers to mainfiles. Arguments: @@ -76,11 +75,10 @@ class LegacyParser(Parser): potential mainfile. """ def __init__( - self, python_git: PythonGit, parser_class_name: str, main_file_re: str, + self, name: str, parser_class_name: str, main_file_re: str, main_contents_re: str) -> None: - self.name = python_git.name - self.python_git = python_git + self.name = name self.parser_class_name = parser_class_name self._main_file_re = re.compile(main_file_re) self._main_contents_re = re.compile(main_contents_re) @@ -134,7 +132,7 @@ class LegacyParser(Parser): return backend def __repr__(self): - return self.python_git.__repr__() + return self.name class VaspOutcarParser(LegacyParser): diff --git a/tests/profile_vasp.py b/tests/profile_vasp.py deleted file mode 100644 index bcb3619f9..000000000 --- a/tests/profile_vasp.py +++ /dev/null @@ -1,15 +0,0 @@ -from nomad.parsing import parser_dict -from nomad import utils - - -def run_parser(parser_name, mainfile): - parser = parser_dict[parser_name] - return parser.run(mainfile, logger=utils.get_logger(__name__)) - - -if __name__ == '__main__': - run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') - run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') - run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') - run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') - run_parser('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') diff --git a/tests/test_parsing.py b/tests/test_parsing.py index a15b1f839..afad55358 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -247,7 +247,7 @@ def run_parser(parser_name, mainfile): @pytest.fixture def parsed_vasp_example() -> LocalBackend: return run_parser( - 'parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml') + 'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml') @pytest.fixture -- GitLab From edae7a5a86cdaf36f6b060b28f3bb0490ca6ef4d Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 11:22:49 +0100 Subject: [PATCH 2/9] Adopted to new git submodule dependencies. --- .gitlab-ci.yml | 3 +++ Dockerfile | 5 ++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 690e24989..da7b5fbdd 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,6 +29,9 @@ variables: build: stage: build + before_script: + - git submodule sync + - git submodule update --init --depth 1 script: - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN gitlab-registry.mpcdf.mpg.de - docker build --no-cache -t $TEST_IMAGE . diff --git a/Dockerfile b/Dockerfile index ab5afad53..6df850bce 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,9 +37,8 @@ RUN pip install -r requirements.txt # Use docker build --build-args CACHEBUST=2 to not cache this (e.g. when you know deps have changed) ARG CACHEBUST=1 -COPY nomad/dependencies.py /install/nomad/dependencies.py -COPY nomad/config.py /install/nomad/config.py -RUN python nomad/dependencies.py +COPY dependencies.sh /install/dependencies.sh +RUN sh dependencies.sh # do that after the dependencies to use docker's layer caching COPY . /install -- GitLab From 50845db311cf0e12a0fbc2669c6672d66d752dd5 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 11:29:20 +0100 Subject: [PATCH 3/9] Further adaptatios for new submodules strategy. --- .dockerignore | 1 - Dockerfile | 7 +++---- integration/build_coe_container.sh | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.dockerignore b/.dockerignore index ca2159c15..e4a48ff66 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,3 @@ -.dependencies/ .pyenv/ .vscode/ .volumes/ diff --git a/Dockerfile b/Dockerfile index 6df850bce..df355020c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,11 +37,10 @@ RUN pip install -r requirements.txt # Use docker build --build-args CACHEBUST=2 to not cache this (e.g. when you know deps have changed) ARG CACHEBUST=1 -COPY dependencies.sh /install/dependencies.sh -RUN sh dependencies.sh -# do that after the dependencies to use docker's layer caching +# Install all NOMAD-CoE dependencies and nomad@FAIRDI COPY . /install +RUN sh dependencies.sh RUN pip install . WORKDIR /install/docs RUN make html @@ -61,7 +60,7 @@ WORKDIR /app # transfer installed packages from dependency stage COPY --from=build /usr/local/lib/python3.6/site-packages /usr/local/lib/python3.6/site-packages # copy the meta-info, since it files are loaded via relative paths. TODO that should change. -COPY --from=build /install/.dependencies/nomad-meta-info /app/.dependencies/nomad-meta-info +COPY --from=build /install/dependencies/nomad-meta-info /app/dependencies/nomad-meta-info # copy the documentation, its files will be served by the API COPY --from=build /install/docs/.build /app/docs/.build # copy the nomad command diff --git a/integration/build_coe_container.sh b/integration/build_coe_container.sh index ac6f1003d..3badfd366 100755 --- a/integration/build_coe_container.sh +++ b/integration/build_coe_container.sh @@ -6,7 +6,7 @@ echo "log into docker registry..." docker login gitlab-registry.mpcdf.mpg.de -u $1 -p $2 echo "building images..." -cd .dependencies/nomad-lab-base +cd dependencies/nomad-lab-base sbt "project repoTool" docker sbt "project repoWebservice" docker -- GitLab From f93071f350128a420dd83381cb6e30366d8cfa44 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 13:03:37 +0100 Subject: [PATCH 4/9] Fixed redundent pkey index on source index. Performance imporovements. --- nomad/coe_repo/calc.py | 2 +- nomad/migration.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nomad/coe_repo/calc.py b/nomad/coe_repo/calc.py index 1e1ecd4a9..8f5d2966b 100644 --- a/nomad/coe_repo/calc.py +++ b/nomad/coe_repo/calc.py @@ -41,7 +41,7 @@ class Calc(Base, datamodel.Calc): # type: ignore owners = relationship('User', secondary=ownership, lazy='joined') coauthors = relationship('User', secondary=co_authorship, lazy='joined') shared_with = relationship('User', secondary=shareship, lazy='joined') - tags = relationship('Tag', lazy='joined') + tags = relationship('Tag', lazy='subquery', join_depth=1) spacegroup = relationship('Spacegroup', lazy='joined', uselist=False) parents = relationship( diff --git a/nomad/migration.py b/nomad/migration.py index da7b470cd..9a5b1749a 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -57,7 +57,7 @@ class SourceCalc(Document): sites = ['/data/nomad/extracted/', '/nomad/repository/extracted/'] prefixes = [extracted_prefix] + sites - meta = dict(indexes=['pid', 'upload']) + meta = dict(indexes=['upload']) _dataset_cache: dict = {} -- GitLab From d95da2faf671a0bae5b29cbdce8a2e9fea37615b Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 13:38:52 +0100 Subject: [PATCH 5/9] Added error recovery for indexing. --- nomad/migration.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/nomad/migration.py b/nomad/migration.py index 9a5b1749a..af7a8ab47 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -81,6 +81,7 @@ class SourceCalc(Document): Returns: yields tuples (:class:`SourceCalc`, #calcs_total[incl. datasets]) """ + logger = utils.get_logger(__name__) if drop: SourceCalc.drop_collection() @@ -97,24 +98,27 @@ class SourceCalc(Document): source_calcs = [] for calc in calcs: - if calc.calc_metadata is None or calc.calc_metadata.filenames is None: - continue # dataset case - - filenames = json.loads(calc.calc_metadata.filenames.decode('utf-8')) - filename = filenames[0] - for prefix in SourceCalc.prefixes: - filename = filename.replace(prefix, '') - segments = [file.strip('\\') for file in filename.split('/')] - - source_calc = SourceCalc(pid=calc.pid) - source_calc.upload = segments[0] - source_calc.mainfile = os.path.join(*segments[1:]) - if with_metadata: - source_calc.metadata = calc.to(CalcWithMetadata) - source_calcs.append(source_calc) - start_pid = source_calc.pid - - yield source_calc, total + try: + if calc.calc_metadata is None or calc.calc_metadata.filenames is None: + continue # dataset case + + filenames = json.loads(calc.calc_metadata.filenames.decode('utf-8')) + filename = filenames[0] + for prefix in SourceCalc.prefixes: + filename = filename.replace(prefix, '') + segments = [file.strip('\\') for file in filename.split('/')] + + source_calc = SourceCalc(pid=calc.pid) + source_calc.upload = segments[0] + source_calc.mainfile = os.path.join(*segments[1:]) + if with_metadata: + source_calc.metadata = calc.to(CalcWithMetadata) + source_calcs.append(source_calc) + start_pid = source_calc.pid + + yield source_calc, total + except Exception as e: + logger.error('could not index', pid=calc.pid, exc_info=e) if len(source_calcs) == 0: break -- GitLab From 85d9bd765b8c45347b0820c0bbf400367c395ecd Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 13:40:54 +0100 Subject: [PATCH 6/9] Set service to client for client. --- nomad/client/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nomad/client/main.py b/nomad/client/main.py index 55f16f56b..1add809aa 100644 --- a/nomad/client/main.py +++ b/nomad/client/main.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import sys import requests import click @@ -73,6 +74,8 @@ def cli(host: str, port: int, verbose: bool, user: str, password: str): else: config.console_log_level = logging.WARNING + config.service = os.environ.get('NOMAD_SERVICE', 'client') + global api_base api_base = 'http://%s:%d/nomad/api' % (host, port) -- GitLab From 41f4362303866a51a09665487427a0a2ec8f97f4 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 13:53:03 +0100 Subject: [PATCH 7/9] Added timer for index operations. --- nomad/migration.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nomad/migration.py b/nomad/migration.py index af7a8ab47..05cf3527e 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -88,13 +88,14 @@ class SourceCalc(Document): last_source_calc = SourceCalc.objects().order_by('-pid').first() start_pid = last_source_calc.pid if last_source_calc is not None else 0 source_query = source.query(Calc) - total = source_query.count() + total = source_query.count() - SourceCalc.objects.count() while True: - calcs = source_query \ - .filter(Calc.coe_calc_id > start_pid) \ - .order_by(Calc.coe_calc_id) \ - .limit(per_query) + with utils.timer(logger, 'query source db'): + calcs = source_query \ + .filter(Calc.coe_calc_id > start_pid) \ + .order_by(Calc.coe_calc_id) \ + .limit(per_query) source_calcs = [] for calc in calcs: @@ -123,7 +124,8 @@ class SourceCalc(Document): if len(source_calcs) == 0: break else: - SourceCalc.objects.insert(source_calcs) + with utils.timer(logger, 'write index'): + SourceCalc.objects.insert(source_calcs) class NomadCOEMigration: -- GitLab From bb22fa56d441eb055b7798859ea4457d2c89601b Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 14:05:22 +0100 Subject: [PATCH 8/9] Fixed timer. Added another dataset case to calc processing. --- nomad/migration.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/nomad/migration.py b/nomad/migration.py index 05cf3527e..f4a0718ce 100644 --- a/nomad/migration.py +++ b/nomad/migration.py @@ -91,20 +91,25 @@ class SourceCalc(Document): total = source_query.count() - SourceCalc.objects.count() while True: - with utils.timer(logger, 'query source db'): - calcs = source_query \ - .filter(Calc.coe_calc_id > start_pid) \ - .order_by(Calc.coe_calc_id) \ - .limit(per_query) + query_timer = utils.timer(logger, 'query source db') + query_timer.__enter__() # pylint: disable=E1101 + calcs = source_query \ + .filter(Calc.coe_calc_id > start_pid) \ + .order_by(Calc.coe_calc_id) \ + .limit(per_query) source_calcs = [] for calc in calcs: + query_timer.__exit__(None, None, None) # pylint: disable=E1101 try: if calc.calc_metadata is None or calc.calc_metadata.filenames is None: continue # dataset case filenames = json.loads(calc.calc_metadata.filenames.decode('utf-8')) filename = filenames[0] + if len(filenames) == 1 and (filename.endswith('.tgz') or filename.endswith('.zip')): + continue # also a dataset, some datasets have a downloadable archive + for prefix in SourceCalc.prefixes: filename = filename.replace(prefix, '') segments = [file.strip('\\') for file in filename.split('/')] -- GitLab From e14802aea8da1bd5baa0ca482104320ac5c0bad1 Mon Sep 17 00:00:00 2001 From: Markus Scheidgen Date: Thu, 7 Feb 2019 14:10:40 +0100 Subject: [PATCH 9/9] Removed service from log tags. --- nomad/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nomad/utils.py b/nomad/utils.py index 7a02fa7a9..ed8c9a517 100644 --- a/nomad/utils.py +++ b/nomad/utils.py @@ -167,7 +167,7 @@ def add_logstash_handler(logger): logstash_handler = LogstashHandler( config.logstash.host, config.logstash.tcp_port, version=1) - logstash_handler.formatter = LogstashFormatter(tags=['nomad', config.service, config.release]) + logstash_handler.formatter = LogstashFormatter(tags=['nomad', config.release]) logstash_handler.setLevel(config.logstash.level) logger.addHandler(logstash_handler) -- GitLab