From 1aa7f08a9f0306066e7d424abf63f6f2c93c491b Mon Sep 17 00:00:00 2001 From: Markus Scheidgen <markus.scheidgen@gmail.com> Date: Thu, 4 Jun 2020 16:51:00 +0200 Subject: [PATCH] Merged more parsers to support 40+ parsers. #331 --- .dockerignore | 1 + .gitignore | 1 + .gitmodules | 36 ++++++++++++++++++ .python-version | 2 +- Dockerfile | 72 ++++++++++++++--------------------- dependencies/nomad-meta-info | 2 +- dependencies/parsers/amber | 1 + dependencies/parsers/asap | 1 + dependencies/parsers/charmm | 1 + dependencies/parsers/dftbplus | 1 + dependencies/parsers/fplo | 1 + dependencies/parsers/gamess | 2 +- dependencies/parsers/gromacs | 1 + dependencies/parsers/gromos | 1 + dependencies/parsers/lammps | 1 + dependencies/parsers/mopac | 1 + dependencies/parsers/namd | 1 + dependencies/parsers/openkim | 1 + dependencies/parsers/tinker | 1 + dependencies/python_common | 2 +- docs/archive.rst | 2 +- docs/dev/setup.md | 12 +++--- gui/src/searchQuantities.json | 20 ++++++++-- nomad/parsing/__init__.py | 66 ++++++++++++++++++++++++++++++++ nomad/processing/data.py | 14 +++++-- requirements.txt | 4 +- setup.py | 2 +- 27 files changed, 183 insertions(+), 67 deletions(-) create mode 160000 dependencies/parsers/amber create mode 160000 dependencies/parsers/asap create mode 160000 dependencies/parsers/charmm create mode 160000 dependencies/parsers/dftbplus create mode 160000 dependencies/parsers/fplo create mode 160000 dependencies/parsers/gromacs create mode 160000 dependencies/parsers/gromos create mode 160000 dependencies/parsers/lammps create mode 160000 dependencies/parsers/mopac create mode 160000 dependencies/parsers/namd create mode 160000 dependencies/parsers/openkim create mode 160000 dependencies/parsers/tinker diff --git a/.dockerignore b/.dockerignore index 97f4f45f8c..4ca4d01cd1 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,6 +5,7 @@ **/*.pyc **/NOMAD.egg-info .*env/ +.pyenv*/ .pytest_cache .vscode/ .volumes/ diff --git a/.gitignore b/.gitignore index 493920a7ed..b8df0fecc6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .DS_Store .*env/ +.pyenv*/ .pytest/ .python-version .ipynb_checkpoints/ diff --git a/.gitmodules b/.gitmodules index b76dc3c1d5..6ffd31428e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -155,3 +155,39 @@ [submodule "dependencies/parsers/eels"] path = dependencies/parsers/eels url = https://github.com/markus1978/eels.git +[submodule "dependencies/parsers/namd"] + path = dependencies/parsers/namd + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-namd.git +[submodule "dependencies/parsers/charmm"] + path = dependencies/parsers/charmm + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-charmm.git +[submodule "dependencies/parsers/dftbplus"] + path = dependencies/parsers/dftbplus + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-dftb-plus.git +[submodule "dependencies/parsers/asap"] + path = dependencies/parsers/asap + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-asap.git +[submodule "dependencies/parsers/fplo"] + path = dependencies/parsers/fplo + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-fplo.git +[submodule "dependencies/parsers/mopac"] + path = dependencies/parsers/mopac + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-mopac.git +[submodule "dependencies/parsers/amber"] + path = dependencies/parsers/amber + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-amber.git +[submodule "dependencies/parsers/gromacs"] + path = dependencies/parsers/gromacs + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-gromacs.git +[submodule "dependencies/parsers/gromos"] + path = dependencies/parsers/gromos + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-gromos.git +[submodule "dependencies/parsers/lammps"] + path = dependencies/parsers/lammps + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-lammps.git +[submodule "dependencies/parsers/openkim"] + path = dependencies/parsers/openkim + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-openkim.git +[submodule "dependencies/parsers/tinker"] + path = dependencies/parsers/tinker + url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-tinker.git diff --git a/.python-version b/.python-version index cff2619cfb..bec3a35ee8 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.6.9 +system diff --git a/Dockerfile b/Dockerfile index 7bbf82b123..7b2271ad8e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,8 +19,10 @@ # The dockerfile is multistaged to use a fat, more convinient build image and # copy only necessities to a slim final image -# We use slim for the final image -FROM python:3.6-slim as final +# We use stretch instead of slim to allow compoilation of some dependencies. We +# do not bother to have an additional copy to a final slim image, because the space +# savings are minimal +FROM python:3.7-stretch as final # First built the GUI in a gui build image FROM node:latest as gui_build @@ -32,11 +34,15 @@ COPY gui/yarn.lock /app/yarn.lock RUN yarn COPY gui /app RUN yarn run build -# RUN yarn run --silent react-docgen src/components --pretty > react-docgen.out # Second, build all python stuff in a python build image -FROM python:3.6-stretch as build -RUN mkdir /install +FROM final +RUN mkdir /app + +# Install linux package dependencies +RUN apt-get update +RUN apt-get install -y --no-install-recommends libgomp1 +RUN apt-get install -y libmagic-dev curl vim make cmake swig libnetcdf-dev # Install some specific dependencies necessary for the build process RUN pip install --upgrade pip @@ -54,55 +60,33 @@ RUN pip install scikit-learn==0.20.2 RUN pip install ase==3.19.0 RUN pip install Pint RUN pip install matid -RUN pip install mdtraj==1.9.1 -RUN pip install mdanalysis==0.16.2 +RUN pip install mdtraj +RUN pip install mdanalysis -# Make will be necessary to build the docs with sphynx -RUN apt-get update && apt-get install -y make -RUN apt-get update && apt-get install -y vim +# Install pymolfile (required by some parsers) +RUN git clone -b nomad-fair https://gitlab.mpcdf.mpg.de/nomad-lab/pymolfile.git +WORKDIR /pymolfile/ +RUN python3 setup.py install +RUN rm -rf /pymolfile # Copy files and install nomad@FAIRDI -WORKDIR /install -COPY . /install +WORKDIR /app +COPY . /app RUN python setup.py compile RUN pip install .[all] RUN python setup.py sdist -WORKDIR /install/docs -# COPY --from=gui_build /app/react-docgen.out /install/docs -RUN make html -RUN \ - find /usr/local/lib/python3.6/ -name 'tests' ! -path '*/networkx/*' -exec rm -r '{}' + && \ - find /usr/local/lib/python3.6/ -name 'test' -exec rm -r '{}' + && \ - find /usr/local/lib/python3.6/site-packages/ -name '*.so' -print -exec sh -c 'file "{}" | grep -q "not stripped" && strip -s "{}"' \; -# Third, create a slim final image -FROM final - -RUN apt-get update && apt-get install -y --no-install-recommends libgomp1 && apt-get install -y libmagic-dev curl +WORKDIR /app/docs +RUN make html -# copy the sources for tests, coverage, qa, etc. -COPY . /app +# Remove unnessesary files WORKDIR /app -# transfer installed packages from dependency stage -COPY --from=build /usr/local/lib/python3.6/site-packages /usr/local/lib/python3.6/site-packages -RUN echo "copy 1" -# copy the meta-info, since it files are loaded via relative paths. TODO that should change. -COPY --from=build /install/dependencies/nomad-meta-info /app/dependencies/nomad-meta-info -RUN echo "copy 2" -# copy the documentation, its files will be served by the API -COPY --from=build /install/docs/.build /app/docs/.build -RUN echo "copy 3" -# copy the source distribution, its files will be served by the API -COPY --from=build /install/dist /app/dist -RUN echo "copy 4" -# copy the nomad command -COPY --from=build /usr/local/bin/nomad /usr/bin/nomad -RUN echo "copy 5" -# copy the gui -RUN mkdir -p /app/gui -COPY --from=gui_build /app/build /app/gui/build -RUN echo "copy 6" +RUN \ + find /usr/local/lib/python3.7/ -name 'tests' ! -path '*/networkx/*' -exec rm -r '{}' + && \ + find /usr/local/lib/python3.7/ -name 'test' -exec rm -r '{}' + && \ + find /usr/local/lib/python3.7/site-packages/ -name '*.so' -print -exec sh -c 'file "{}" | grep -q "not stripped" && strip -s "{}"' \; +# Setup directories, users, rights RUN mkdir -p /app/.volumes/fs RUN useradd -ms /bin/bash nomad RUN chown -R nomad /app diff --git a/dependencies/nomad-meta-info b/dependencies/nomad-meta-info index c424e75671..a2df84b2df 160000 --- a/dependencies/nomad-meta-info +++ b/dependencies/nomad-meta-info @@ -1 +1 @@ -Subproject commit c424e75671e8c09c2f29c90ec63feafd0a2a706e +Subproject commit a2df84b2dfc2c96344e7261580a40d655c904047 diff --git a/dependencies/parsers/amber b/dependencies/parsers/amber new file mode 160000 index 0000000000..09b52a7494 --- /dev/null +++ b/dependencies/parsers/amber @@ -0,0 +1 @@ +Subproject commit 09b52a7494b9655c53bbe3dcab2719823e5f2e27 diff --git a/dependencies/parsers/asap b/dependencies/parsers/asap new file mode 160000 index 0000000000..dd555aad1c --- /dev/null +++ b/dependencies/parsers/asap @@ -0,0 +1 @@ +Subproject commit dd555aad1ce925b5b56be7e55b6650f959ea5261 diff --git a/dependencies/parsers/charmm b/dependencies/parsers/charmm new file mode 160000 index 0000000000..dc2b8d5a08 --- /dev/null +++ b/dependencies/parsers/charmm @@ -0,0 +1 @@ +Subproject commit dc2b8d5a08fb27e53057f27d9854b22ffee28f06 diff --git a/dependencies/parsers/dftbplus b/dependencies/parsers/dftbplus new file mode 160000 index 0000000000..c447c6b07d --- /dev/null +++ b/dependencies/parsers/dftbplus @@ -0,0 +1 @@ +Subproject commit c447c6b07d8528210975337571c98096caf26770 diff --git a/dependencies/parsers/fplo b/dependencies/parsers/fplo new file mode 160000 index 0000000000..e4bd0a1bd7 --- /dev/null +++ b/dependencies/parsers/fplo @@ -0,0 +1 @@ +Subproject commit e4bd0a1bd72092de4f41c88ebf843265c162a84c diff --git a/dependencies/parsers/gamess b/dependencies/parsers/gamess index 2b49f65218..b830cf96f3 160000 --- a/dependencies/parsers/gamess +++ b/dependencies/parsers/gamess @@ -1 +1 @@ -Subproject commit 2b49f652182adb2139f9bc7503c25212c5ad35a7 +Subproject commit b830cf96f325c521c232ce8ccba83073eb5f3bab diff --git a/dependencies/parsers/gromacs b/dependencies/parsers/gromacs new file mode 160000 index 0000000000..ae0068cd75 --- /dev/null +++ b/dependencies/parsers/gromacs @@ -0,0 +1 @@ +Subproject commit ae0068cd75c1b0f6bb818e8cbd5a6a72fe7722f4 diff --git a/dependencies/parsers/gromos b/dependencies/parsers/gromos new file mode 160000 index 0000000000..4a74602f7d --- /dev/null +++ b/dependencies/parsers/gromos @@ -0,0 +1 @@ +Subproject commit 4a74602f7dd287da835cf6a64228b5ad53fd09be diff --git a/dependencies/parsers/lammps b/dependencies/parsers/lammps new file mode 160000 index 0000000000..433f848030 --- /dev/null +++ b/dependencies/parsers/lammps @@ -0,0 +1 @@ +Subproject commit 433f848030759d28bacb780d800877445490da70 diff --git a/dependencies/parsers/mopac b/dependencies/parsers/mopac new file mode 160000 index 0000000000..aac03d990b --- /dev/null +++ b/dependencies/parsers/mopac @@ -0,0 +1 @@ +Subproject commit aac03d990b7823776c51a1976b2d5a304e4a1cce diff --git a/dependencies/parsers/namd b/dependencies/parsers/namd new file mode 160000 index 0000000000..3d225e6858 --- /dev/null +++ b/dependencies/parsers/namd @@ -0,0 +1 @@ +Subproject commit 3d225e6858fc449e12c5ca45d9597562011f6589 diff --git a/dependencies/parsers/openkim b/dependencies/parsers/openkim new file mode 160000 index 0000000000..e38d2615a2 --- /dev/null +++ b/dependencies/parsers/openkim @@ -0,0 +1 @@ +Subproject commit e38d2615a243c775a378e47a461c731babebf3e6 diff --git a/dependencies/parsers/tinker b/dependencies/parsers/tinker new file mode 160000 index 0000000000..f9822b6611 --- /dev/null +++ b/dependencies/parsers/tinker @@ -0,0 +1 @@ +Subproject commit f9822b6611dc5c8f816c76405923d4973e797d93 diff --git a/dependencies/python_common b/dependencies/python_common index 5023497ce1..5c98d3fc34 160000 --- a/dependencies/python_common +++ b/dependencies/python_common @@ -1 +1 @@ -Subproject commit 5023497ce1651f41de44cb35e953432d450b29f5 +Subproject commit 5c98d3fc345beb43ecb8c163fb2f1ad9678f45a2 diff --git a/docs/archive.rst b/docs/archive.rst index 06d321811b..258d09ea58 100644 --- a/docs/archive.rst +++ b/docs/archive.rst @@ -7,4 +7,4 @@ Of course, you can access the NOMAD Archive directly via the NOMAD API (see the and `API reference <api.html>`_). But, it is more effective and convenient to use NOMAD's Python client library. -.. automodule:: nomad.client \ No newline at end of file +.. automodule:: nomad.client diff --git a/docs/dev/setup.md b/docs/dev/setup.md index 31cc96fd98..9bee9630a4 100644 --- a/docs/dev/setup.md +++ b/docs/dev/setup.md @@ -38,7 +38,7 @@ cd nomad-FAIR ### C libs Even though the NOMAD infrastructure is written in python, there is a C library -required by one of our pyhton dependencies. +required by one of our python dependencies. #### libmagic @@ -52,11 +52,9 @@ brew install libmagic ### Virtual environment #### pyenv -The nomad code currently targets python 3.6. If you host machine has 3.7 or later installed, -you can use [pyenv](https://github.com/pyenv/pyenv) to use python 3.6 in parallel. -To use 3.7 there is a slight issue about the `enum34` which fails the compilation of the -`mdtraj` and `mdanalysis` packages. A possible work arround is to uninstall and tham re-install -`enum34` once the other packages are installed. +The nomad code currently targets python 3.7. If you host machine has an older version installed, +you can use [pyenv](https://github.com/pyenv/pyenv) to use python 3.7 in parallel to your +system's python. #### virtualenv We strongly recommend to use *virtualenv* to create a virtual environment. It will allow you @@ -73,7 +71,7 @@ source .pyenv/bin/activate If you are a conda user, there is an equivalent, but you have to install pip and the right python version while creating the environment. ``` -conda create --name nomad_env pip python=3.6 +conda create --name nomad_env pip python=3.7 conda activate nomad_env ``` diff --git a/gui/src/searchQuantities.json b/gui/src/searchQuantities.json index 15f889e986..b9853c22ba 100644 --- a/gui/src/searchQuantities.json +++ b/gui/src/searchQuantities.json @@ -71,17 +71,17 @@ }, "labels.label": { "name": "labels.label", - "description": null, + "description": "The label as a string", "many": false }, "labels.type": { "name": "labels.type", - "description": null, + "description": "The type of the label", "many": false }, "labels.source": { "name": "labels.source", - "description": null, + "description": "The source that this label was taken from.", "many": false }, "optimade.elements": { @@ -234,16 +234,20 @@ "name": "dft.code_name", "description": "The name of the used code.", "many": false, - "statistic_size": 34, + "statistic_size": 46, "statistic_values": [ "ABINIT", + "Amber", + "ASAP", "ATK", "BAND", "BigDFT", "CASTEP", + "Charmm", "CP2K", "CPMD", "Crystal", + "DFTb plus", "DL_POLY", "DMol3", "elastic", @@ -251,20 +255,28 @@ "exciting", "FHI-aims", "fleur", + "fplo", "GAMESS", "Gaussian", "GPAW", + "Gromacs", + "Gromos", "gulp", + "lammps", "libAtoms", "MOLCAS", + "MOPAC", + "Namd", "NWChem", "Octopus", "ONETEP", + "OpenKIM", "ORCA", "Phonopy", "qbox", "Quantum Espresso", "Siesta", + "TINKER", "turbomole", "VASP", "WIEN2k", diff --git a/nomad/parsing/__init__.py b/nomad/parsing/__init__.py index 5ca0195a82..7edbc88cf0 100644 --- a/nomad/parsing/__init__.py +++ b/nomad/parsing/__init__.py @@ -464,6 +464,72 @@ parsers = [ name='parsers/onetep', code_name='ONETEP', code_homepage='https://www.onetep.org/', domain='dft', parser_class_name='onetepparser.OnetepParser', mainfile_contents_re=r'####### # # ####### ####### ####### ######' + ), + LegacyParser( + name='parsers/openkim', code_name='OpenKIM', domain='dft', + parser_class_name='openkimparser.OpenKIMParser', + mainfile_contents_re=r'OPENKIM' + ), + LegacyParser( + name='parsers/tinker', code_name='TINKER', domain='dft', + parser_class_name='tinkerparser.TinkerParser', + mainfile_contents_re=r'TINKER --- Software Tools for Molecular Design' + ), + LegacyParser( + name='parsers/lammps', code_name='lammps', domain='dft', + parser_class_name='lammpsparser.LammpsParser', + mainfile_contents_re=r'^LAMMPS' + ), + LegacyParser( + name='parsers/amber', code_name='Amber', domain='dft', + parser_class_name='amberparser.AMBERParser', + mainfile_contents_re=r'\s*Amber\s[0-9]+\s[A-Z]+\s*[0-9]+' + ), + LegacyParser( + name='parsers/gromacs', code_name='Gromacs', domain='dft', + parser_class_name='gromacsparser.GROMACSParser', + mainfile_contents_re=r'GROMACS - gmx mdrun' + ), + LegacyParser( + name='parsers/gromos', code_name='Gromos', domain='dft', + parser_class_name='gromosparser.GromosParser', + mainfile_contents_re=r'Bugreports to http://www.gromos.net' + ), + LegacyParser( + name='parsers/namd', code_name='Namd', domain='dft', + parser_class_name='namdparser.NamdParser', + mainfile_contents_re=r'\s*Info:\s*NAMD\s*[0-9.]+\s*for\s*', + mainfile_mime_re=r'text/.*', + ), + LegacyParser( + name='parsers/charmm', code_name='Charmm', domain='dft', + parser_class_name='charmmparser.CharmmParser', + mainfile_contents_re=r'\s*Chemistry\s*at\s*HARvard\s*Macromolecular\s*Mechanics\s*', + mainfile_mime_re=r'text/.*', + ), + LegacyParser( + name='parsers/dftbplus', code_name='DFTb plus', domain='dft', + parser_class_name='dftbplusparser.DFTBPlusParser', + mainfile_contents_re=r'^ Fermi distribution function\s*', + mainfile_mime_re=r'text/.*', + ), + LegacyParser( + name='parsers/asap', code_name='ASAP', domain='dft', + parser_class_name='asapparser.AsapParser', + mainfile_name_re=r'.*.traj$', + mainfile_mime_re=r'application/octet-stream', + ), + LegacyParser( + name='parsers/fplo', code_name='fplo', domain='dft', + parser_class_name='fploparser.FploParser', + mainfile_contents_re=r'\s*\|\s*FULL-POTENTIAL LOCAL-ORBITAL MINIMUM BASIS BANDSTRUCTURE CODE\s*\|\s*', + mainfile_mime_re=r'text/.*', + ), + LegacyParser( + name='parsers/mopac', code_name='MOPAC', domain='dft', + parser_class_name='mopacparser.MopacParser', + mainfile_contents_re=r'\s*\*\*\s*MOPAC\s*([0-9a-zA-Z]*)\s*\*\*\s*', + mainfile_mime_re=r'text/.*', ) ] diff --git a/nomad/processing/data.py b/nomad/processing/data.py index fe60a55c05..b9782abff6 100644 --- a/nomad/processing/data.py +++ b/nomad/processing/data.py @@ -296,6 +296,12 @@ class Calc(Proc): except Exception as e: logger.error('could unload processing results', exc_info=e) + def _setup_fallback_metadata(self): + self._entry_metadata = self.create_metadata() + self._entry_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile) + self._entry_metadata.last_processing = datetime.utcnow() + self._entry_metadata.files = self.upload_files.calc_files(self.mainfile) + @process def process_calc(self): ''' @@ -309,10 +315,7 @@ class Calc(Proc): try: # save preliminary minimum calc metadata in case processing fails # successful processing will replace it with the actual metadata - self._entry_metadata = self.create_metadata() - self._entry_metadata.calc_hash = self.upload_files.calc_hash(self.mainfile) - self._entry_metadata.last_processing = datetime.utcnow() - self._entry_metadata.files = self.upload_files.calc_files(self.mainfile) + self._setup_fallback_metadata() if len(self._entry_metadata.files) >= config.auxfile_cutoff: self.warning( @@ -334,6 +337,9 @@ class Calc(Proc): # in case of failure, index a minimum set of metadata and mark # processing failure try: + if self._entry_metadata is None: + self._setup_fallback_metadata() + self._entry_metadata.processed = False self.apply_entry_metadata(self._entry_metadata) diff --git a/requirements.txt b/requirements.txt index 41a323f7e4..b7199b04e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,8 +31,8 @@ matid==0.6.0 python-magic panedr==0.2 parmed==3.0.0 -mdtraj==1.9.1 -mdanalysis==0.16.2 +mdtraj +mdanalysis nomadcore # [infrastructure] diff --git a/setup.py b/setup.py index c16a3817d2..07358a0ad4 100644 --- a/setup.py +++ b/setup.py @@ -250,7 +250,7 @@ def setup_kwargs(): if __name__ == '__main__': if len(sys.argv) == 2 and sys.argv[1] == 'compile': kwargs = compile_dependency_setup_kwargs(['dependencies'], **setup_kwargs()) - kwargs['packages'].remove('nomadcore.md_data_access') + # kwargs['packages'].remove('nomadcore.md_data_access') with open('setup.json', 'wt') as f: json.dump(kwargs, f, indent=2) sys.exit(0) -- GitLab