Commit d443a581 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added nomadtoolkit package for local analysis, refactoring of the cp2kparser code.

parent c98f4988
[submodule "cp2kparser/cp2kparser/metainfo"]
path = cp2kparser/cp2kparser/metainfo
[submodule "nomadtoolkit/submodules/nomad-meta-info"]
path = nomadtoolkit/submodules/nomad-meta-info
url = git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-meta-info.git
[submodule "nomadtoolkit/submodules/python-common"]
path = nomadtoolkit/submodules/python-common
url = git@gitlab.mpcdf.mpg.de:nomad-lab/python-common.git
......@@ -30,49 +30,9 @@
```
# Structure
Currently the python package is divided into three subpackages:
- Engines: Classes for parsing different type of files
- Generics: Generic utility classes and base classes
- Implementation: The classes that actually define the parser functionality.
## Engines
Basically all the "engines", that is the modules that parse certain type of
files, are reusable in other parsers. They could be put into a common
repository where other developers can improve and extend them. One should also
write tests for the engines that would validate their behaviour and ease the
performance analysis.
The engine classes work also as interfaces. You can change the engine behaviour
while maintaining the same API in the parsers. For example one might improve
the performance of an engine but if the function calls remain the same no other
code has to be changed.
Currently implemented engines that could be reused (not tested properly yet):
- AtomsEngine: For reading various atomic coordinate files. Currently uses ASE
to read the files.
- RegexEngine: For parsing text files with regular expressions. Uses the re2
library if available (falls back to default python regex implementation if
re2 not found).
- CSVEngine: For parsing CSV-like files. Has a very
flexible nature as you can specify comments, column delimiters, column
indices and the patterns used to separate different configurations.
- XMLEngine: For parsing XML files using XPath syntax.
## Generics
In the generics folder there is a module called nomadparser.py that defines a
class called NomadParser. This acts as a base class for the cp2k parser defined
in the implementation folder.
The NomadParser class defines the interface which is eventually used by e.g.
the scala code (will be modified later to conform to the common interface).
This class is also responsible for some common tasks that are present in all
parsers:
- Unit conversion
- JSON encoding
- Caching
- Time measurement for performance analysis
- Providing file contents, sizes and handles
Currently the python package is divided the following subpackages:
- utils: Generic utility classes and base classes
- implementation: The classes that actually define the parser functionality.
# Tools and Methods
......
from .implementation.cp2kparser import CP2KParser
# import cp2kparser.generics.logconfig
# import cp2kparser.implementation.cp2kparser
# Import classes for easier access. NOTE: Typically the __init__ file is empty.
# This signifies python that the user can import anything from the
# subdirectiories. Now that there are imports in the init file, you have to
# explicitly state all imports that should be available.
# import generics
# import implementation
# import engines
# from implementation.cp2kparser import CP2KParser
import cp2kparser.utils.logconfig
from cp2kparser.implementation.parsing import CP2KParser
import os
import logging
from abc import ABCMeta, abstractmethod
from nomadanalysis.local_backend import LocalBackend
logger = logging.getLogger(__name__)
#===============================================================================
class Parser(object):
"""
"""
__metaclass__ = ABCMeta
def __init__(self, dirpath=None, files=None, metainfo_to_keep=None, backend=None):
"""
"""
self.parser_context = ParserContext()
self.parser_context.backend = backend
self.parser_context.files = files
self.parser_context.backend = backend
self.parser_context.metainfo_to_keep = metainfo_to_keep
self.implementation = None
# If directory provided, the interesting files are first identified
if dirpath:
files = self.search_path(dirpath)
self.parser_context.files = files
# If no backend provided, create one with default metainfos
if not backend:
metainfo_path = "/home/lauri/Dropbox/nomad-dev/nomad-meta-info/meta_info/nomad_meta_info/cp2k.nomadmetainfo.json"
metainfoenv, warnings = loadJsonFile(metainfo_path)
backend = LocalBackend(metainfoenv)
self.parser_context.backend = LocalBackend()
@abstractmethod
def setup(self):
"""Deduce the version of the software that was used and setup a correct
implementation. The implementations should subclass NomadParser.
Returns:
A NomadParser object that is ready to do the parsing.
"""
pass
def search_path(self, dirpath):
"""Searches the given path for files that are of interest to this
parser. Returns them as a list of path strings.
"""
files = []
for filename in os.listdir(dirpath):
files.append(os.path.join(dirpath, filename))
return files
#===============================================================================
class ParserContext(object):
"""Contains everything needed to instantiate a parser implementation.
"""
def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None):
self.files = files
self.version_id = version_id
self.metainfo_to_keep = metainfo_to_keep
self.backend = backend
......@@ -17,7 +17,7 @@ else:
#===============================================================================
class CSVEngine(object):
class CSVParser(object):
"""Used to parse out freeform CSV-like content.
Currently only can parse floating point information.
......
import re
import os
import logging
from ..engines.csvengine import CSVEngine
from ..implementation.cp2kinputparsers import CP2KInputParser
from ..implementation.cp2kinputenginedata.input_tree import CP2KInput
from ..implementation.cp2koutputparsers import *
from ..generics.parserimplementation import ParserImplementation
from cp2kparser.implementation.csvparsing import CSVParser
from cp2kparser.implementation.inputparsing import CP2KInputParser
from cp2kparser.implementation.cp2kinputenginedata.input_tree import CP2KInput
from cp2kparser.implementation.outputparsing import *
from cp2kparser.utils.parserimplementation import ParserImplementation
from nomadcore.coordinate_reader import CoordinateReader
from nomadcore.unit_conversion.unit_conversion import convert_unit
logger = logging.getLogger(__name__)
......@@ -22,11 +21,11 @@ class CP2KImplementation262(ParserImplementation):
# Initialize the parsing tools. The input and output parsers need to
# know the version id.
self.csvengine = CSVEngine(self)
self.csvengine = CSVParser(self)
self.atomsengine = CoordinateReader()
self.inputparser = CP2KInputParser()
self.inputparser.setup_version(self.version_id)
self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfos)
self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfo_to_keep)
self.input_tree = None
self.extended_input = None
......@@ -273,11 +272,9 @@ class CP2KImplementation262(ParserImplementation):
# Use the SimpleMatcher to extract most of the results
parserInfo = {"name": "cp2k-parser", "version": "1.0"}
outputfilename = self.get_file_handle("output").name
backend = self.backend
metainfos = self.metainfos
outputstructure = self.outputparser.outputstructure
cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName
self.parse_file(outputfilename, outputstructure, metainfos, backend, parserInfo, cachingLevelForMetaName, superContext=self.outputparser)
self.parse_file(outputfilename, outputstructure, parserInfo, cachingLevelForMetaName, superContext=self.outputparser)
# Then extract the things that cannot be extracted by the SimpleMatcher
......
......@@ -76,7 +76,7 @@ class CP2KOutputParser262(object):
startReStr=" FUNCTIONAL\|",
forwardMatch=True,
sections=["section_method", "cp2k_section_functionals"],
otherMetaInfo=["XC_functional"],
otherMetaInfo=["XC_functional_scf"],
subMatchers=[
SM(
repeats=True,
......@@ -244,7 +244,7 @@ class CP2KOutputParser262(object):
functionals = "_".join(sorted(functionals))
# Push the functional string into the backend
backend.addValue('XC_functional', functionals)
backend.addValue('XC_functional_scf', functionals)
def onClose_cp2k_section_atom_position(self, backend, gIndex, section):
"""Get the initial atomic positions from cp2kparser.
......
import re
import logging
from ..generics.parser import Parser
from ..implementation.cp2kimplementations import *
from cp2kparser.utils.parser import Parser
from cp2kparser.implementation.implementations import *
logger = logging.getLogger(__name__)
......@@ -17,8 +17,9 @@ class CP2KParser(Parser):
After the implementation has been setup, you can parse the files with
parse().
"""
def __init__(self, dirpath=None, files=None, metainfo_path=None, backend=None):
Parser.__init__(self, dirpath, files, metainfo_path, backend)
def __init__(self, contents=None, metainfo_to_keep=None, backend=None):
Parser.__init__(self, contents, metainfo_to_keep, backend)
def setup(self):
"""Setups the version by looking at the output file and the version
......@@ -57,6 +58,25 @@ class CP2KParser(Parser):
self.parser_context.version_id = "262"
self.implementation = globals()["CP2KImplementation262"](self.parser_context)
def search_parseable_files(self, files):
"""Searches the given path for files that are of interest to this
parser. Returns them as a list of path strings.
"""
return files
def parse(self):
self.setup()
self.implementation.parse()
def get_metainfo_filename(self):
"""This function should return the name of the metainfo file that is
specific for this parser. This name is used by the Analyzer class in
the nomadtoolkit.
"""
return "cp2k.nomadmetainfo.json"
#===============================================================================
# This is what gets run when the scala layer calls for this parser
if __name__ == "__main__":
print "Moi"
Subproject commit 163501eabba0fa385f28edcb55aa577de96e7624
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment