From d443a581a055ec6057fc59da831eefe6bfd8fce0 Mon Sep 17 00:00:00 2001 From: "Himanen, Lauri (himanel1)" <lauri.himanen@aalto.fi> Date: Sat, 16 Jan 2016 23:00:21 +0200 Subject: [PATCH] Added nomadtoolkit package for local analysis, refactoring of the cp2kparser code. --- .gitmodules | 8 +- cp2kparser/README.md => README.md | 46 +------- cp2kparser/cp2kparser/__init__.py | 14 +-- cp2kparser/cp2kparser/generics/__init__.py | 1 - cp2kparser/cp2kparser/generics/parser.py | 64 ----------- .../csvparsing.py} | 2 +- ...kimplementations.py => implementations.py} | 19 ++-- .../{cp2kinputparsers.py => inputparsing.py} | 0 ...{cp2koutputparsers.py => outputparsing.py} | 4 +- .../{cp2kparser.py => parsing.py} | 28 ++++- cp2kparser/cp2kparser/metainfo | 1 - .../cp2kparser/{engines => old}/__init__.py | 0 .../{engines => old}/csvengine/tests.py | 0 .../{engines => old}/csvengine/xyz/10.xyz | 0 .../{engines => old}/csvengine/xyz/100.xyz | 0 .../{engines => old}/csvengine/xyz/1000.xyz | Bin .../{engines => old}/csvengine/xyz/10000.xyz | Bin .../{engines => old}/regexengine.py | 0 .../cp2kparser/{engines => old}/xmlengine.py | 0 .../cp2kparser}/utils/__init__.py | 0 .../{generics => utils}/logconfig.py | 0 cp2kparser/cp2kparser/utils/parser.py | 106 ++++++++++++++++++ .../parserimplementation.py | 8 +- .../cp2kparser/{generics => utils}/testing.py | 0 nomadanalysis/README.md | 1 - nomadanalysis/nomadanalysis.egg-info/PKG-INFO | 10 -- .../nomadanalysis.egg-info/SOURCES.txt | 9 -- .../dependency_links.txt | 1 - .../nomadanalysis.egg-info/not-zip-safe | 1 - .../nomadanalysis.egg-info/requires.txt | 2 - .../nomadanalysis.egg-info/top_level.txt | 1 - nomadanalysis/nomadanalysis/__init__.py | 7 -- .../nomadanalysis/examples/1_basics.py | 11 -- nomadtoolkit/README.md | 27 +++++ nomadtoolkit/nomadtoolkit/__init__.py | 1 + .../nomadtoolkit/analysis.py | 2 +- nomadtoolkit/nomadtoolkit/config.py | 28 +++++ .../nomadtoolkit/examples/1_basics.py | 14 +++ .../nomadtoolkit}/local_backend.py | 6 +- nomadtoolkit/nomadtoolkit/utils/__init__.py | 0 .../nomadtoolkit}/utils/log.py | 0 {nomadanalysis => nomadtoolkit}/setup.py | 12 +- nomadtoolkit/submodules/nomad-meta-info | 1 + nomadtoolkit/submodules/python-common | 1 + 44 files changed, 243 insertions(+), 193 deletions(-) rename cp2kparser/README.md => README.md (68%) delete mode 100644 cp2kparser/cp2kparser/generics/__init__.py delete mode 100644 cp2kparser/cp2kparser/generics/parser.py rename cp2kparser/cp2kparser/{engines/csvengine.py => implementation/csvparsing.py} (99%) rename cp2kparser/cp2kparser/implementation/{cp2kimplementations.py => implementations.py} (97%) rename cp2kparser/cp2kparser/implementation/{cp2kinputparsers.py => inputparsing.py} (100%) rename cp2kparser/cp2kparser/implementation/{cp2koutputparsers.py => outputparsing.py} (99%) rename cp2kparser/cp2kparser/implementation/{cp2kparser.py => parsing.py} (71%) delete mode 160000 cp2kparser/cp2kparser/metainfo rename cp2kparser/cp2kparser/{engines => old}/__init__.py (100%) rename cp2kparser/cp2kparser/{engines => old}/csvengine/tests.py (100%) rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/10.xyz (100%) rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/100.xyz (100%) rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/1000.xyz (100%) rename cp2kparser/cp2kparser/{engines => old}/csvengine/xyz/10000.xyz (100%) rename cp2kparser/cp2kparser/{engines => old}/regexengine.py (100%) rename cp2kparser/cp2kparser/{engines => old}/xmlengine.py (100%) rename {nomadanalysis/nomadanalysis => cp2kparser/cp2kparser}/utils/__init__.py (100%) rename cp2kparser/cp2kparser/{generics => utils}/logconfig.py (100%) create mode 100644 cp2kparser/cp2kparser/utils/parser.py rename cp2kparser/cp2kparser/{generics => utils}/parserimplementation.py (99%) rename cp2kparser/cp2kparser/{generics => utils}/testing.py (100%) delete mode 100644 nomadanalysis/README.md delete mode 100644 nomadanalysis/nomadanalysis.egg-info/PKG-INFO delete mode 100644 nomadanalysis/nomadanalysis.egg-info/SOURCES.txt delete mode 100644 nomadanalysis/nomadanalysis.egg-info/dependency_links.txt delete mode 100644 nomadanalysis/nomadanalysis.egg-info/not-zip-safe delete mode 100644 nomadanalysis/nomadanalysis.egg-info/requires.txt delete mode 100644 nomadanalysis/nomadanalysis.egg-info/top_level.txt delete mode 100644 nomadanalysis/nomadanalysis/__init__.py delete mode 100644 nomadanalysis/nomadanalysis/examples/1_basics.py create mode 100644 nomadtoolkit/README.md create mode 100644 nomadtoolkit/nomadtoolkit/__init__.py rename nomadanalysis/nomadanalysis/analyzer.py => nomadtoolkit/nomadtoolkit/analysis.py (96%) create mode 100644 nomadtoolkit/nomadtoolkit/config.py create mode 100644 nomadtoolkit/nomadtoolkit/examples/1_basics.py rename {nomadanalysis/nomadanalysis => nomadtoolkit/nomadtoolkit}/local_backend.py (91%) create mode 100644 nomadtoolkit/nomadtoolkit/utils/__init__.py rename {nomadanalysis/nomadanalysis => nomadtoolkit/nomadtoolkit}/utils/log.py (100%) rename {nomadanalysis => nomadtoolkit}/setup.py (58%) create mode 160000 nomadtoolkit/submodules/nomad-meta-info create mode 160000 nomadtoolkit/submodules/python-common diff --git a/.gitmodules b/.gitmodules index 59f7d05..0ace117 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,7 @@ -[submodule "cp2kparser/cp2kparser/metainfo"] - path = cp2kparser/cp2kparser/metainfo + +[submodule "nomadtoolkit/submodules/nomad-meta-info"] + path = nomadtoolkit/submodules/nomad-meta-info url = git@gitlab.mpcdf.mpg.de:nomad-lab/nomad-meta-info.git +[submodule "nomadtoolkit/submodules/python-common"] + path = nomadtoolkit/submodules/python-common + url = git@gitlab.mpcdf.mpg.de:nomad-lab/python-common.git diff --git a/cp2kparser/README.md b/README.md similarity index 68% rename from cp2kparser/README.md rename to README.md index 2ed3ac7..d4c8b07 100644 --- a/cp2kparser/README.md +++ b/README.md @@ -30,49 +30,9 @@ ``` # Structure -Currently the python package is divided into three subpackages: -- Engines: Classes for parsing different type of files -- Generics: Generic utility classes and base classes -- Implementation: The classes that actually define the parser functionality. - -## Engines -Basically all the "engines", that is the modules that parse certain type of -files, are reusable in other parsers. They could be put into a common -repository where other developers can improve and extend them. One should also -write tests for the engines that would validate their behaviour and ease the -performance analysis. - -The engine classes work also as interfaces. You can change the engine behaviour -while maintaining the same API in the parsers. For example one might improve -the performance of an engine but if the function calls remain the same no other -code has to be changed. - -Currently implemented engines that could be reused (not tested properly yet): -- AtomsEngine: For reading various atomic coordinate files. Currently uses ASE - to read the files. -- RegexEngine: For parsing text files with regular expressions. Uses the re2 -library if available (falls back to default python regex implementation if -re2 not found). -- CSVEngine: For parsing CSV-like files. Has a very -flexible nature as you can specify comments, column delimiters, column -indices and the patterns used to separate different configurations. -- XMLEngine: For parsing XML files using XPath syntax. - -## Generics -In the generics folder there is a module called nomadparser.py that defines a -class called NomadParser. This acts as a base class for the cp2k parser defined -in the implementation folder. - -The NomadParser class defines the interface which is eventually used by e.g. -the scala code (will be modified later to conform to the common interface). -This class is also responsible for some common tasks that are present in all -parsers: - -- Unit conversion -- JSON encoding -- Caching -- Time measurement for performance analysis -- Providing file contents, sizes and handles +Currently the python package is divided the following subpackages: +- utils: Generic utility classes and base classes +- implementation: The classes that actually define the parser functionality. # Tools and Methods diff --git a/cp2kparser/cp2kparser/__init__.py b/cp2kparser/cp2kparser/__init__.py index 8c2d19d..21526fc 100644 --- a/cp2kparser/cp2kparser/__init__.py +++ b/cp2kparser/cp2kparser/__init__.py @@ -1,12 +1,2 @@ -from .implementation.cp2kparser import CP2KParser -# import cp2kparser.generics.logconfig -# import cp2kparser.implementation.cp2kparser - -# Import classes for easier access. NOTE: Typically the __init__ file is empty. -# This signifies python that the user can import anything from the -# subdirectiories. Now that there are imports in the init file, you have to -# explicitly state all imports that should be available. -# import generics -# import implementation -# import engines -# from implementation.cp2kparser import CP2KParser +import cp2kparser.utils.logconfig +from cp2kparser.implementation.parsing import CP2KParser diff --git a/cp2kparser/cp2kparser/generics/__init__.py b/cp2kparser/cp2kparser/generics/__init__.py deleted file mode 100644 index ee3ecd2..0000000 --- a/cp2kparser/cp2kparser/generics/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#! /usr/bin/env python diff --git a/cp2kparser/cp2kparser/generics/parser.py b/cp2kparser/cp2kparser/generics/parser.py deleted file mode 100644 index 0396ac9..0000000 --- a/cp2kparser/cp2kparser/generics/parser.py +++ /dev/null @@ -1,64 +0,0 @@ -import os -import logging -from abc import ABCMeta, abstractmethod -from nomadanalysis.local_backend import LocalBackend -logger = logging.getLogger(__name__) - - -#=============================================================================== -class Parser(object): - """ - """ - __metaclass__ = ABCMeta - - def __init__(self, dirpath=None, files=None, metainfo_to_keep=None, backend=None): - """ - """ - self.parser_context = ParserContext() - self.parser_context.backend = backend - self.parser_context.files = files - self.parser_context.backend = backend - self.parser_context.metainfo_to_keep = metainfo_to_keep - self.implementation = None - - # If directory provided, the interesting files are first identified - if dirpath: - files = self.search_path(dirpath) - self.parser_context.files = files - - # If no backend provided, create one with default metainfos - if not backend: - metainfo_path = "/home/lauri/Dropbox/nomad-dev/nomad-meta-info/meta_info/nomad_meta_info/cp2k.nomadmetainfo.json" - metainfoenv, warnings = loadJsonFile(metainfo_path) - backend = LocalBackend(metainfoenv) - self.parser_context.backend = LocalBackend() - - @abstractmethod - def setup(self): - """Deduce the version of the software that was used and setup a correct - implementation. The implementations should subclass NomadParser. - - Returns: - A NomadParser object that is ready to do the parsing. - """ - pass - - def search_path(self, dirpath): - """Searches the given path for files that are of interest to this - parser. Returns them as a list of path strings. - """ - files = [] - for filename in os.listdir(dirpath): - files.append(os.path.join(dirpath, filename)) - return files - - -#=============================================================================== -class ParserContext(object): - """Contains everything needed to instantiate a parser implementation. - """ - def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None): - self.files = files - self.version_id = version_id - self.metainfo_to_keep = metainfo_to_keep - self.backend = backend diff --git a/cp2kparser/cp2kparser/engines/csvengine.py b/cp2kparser/cp2kparser/implementation/csvparsing.py similarity index 99% rename from cp2kparser/cp2kparser/engines/csvengine.py rename to cp2kparser/cp2kparser/implementation/csvparsing.py index 7435327..324d08c 100644 --- a/cp2kparser/cp2kparser/engines/csvengine.py +++ b/cp2kparser/cp2kparser/implementation/csvparsing.py @@ -17,7 +17,7 @@ else: #=============================================================================== -class CSVEngine(object): +class CSVParser(object): """Used to parse out freeform CSV-like content. Currently only can parse floating point information. diff --git a/cp2kparser/cp2kparser/implementation/cp2kimplementations.py b/cp2kparser/cp2kparser/implementation/implementations.py similarity index 97% rename from cp2kparser/cp2kparser/implementation/cp2kimplementations.py rename to cp2kparser/cp2kparser/implementation/implementations.py index b738f01..2684756 100644 --- a/cp2kparser/cp2kparser/implementation/cp2kimplementations.py +++ b/cp2kparser/cp2kparser/implementation/implementations.py @@ -1,13 +1,12 @@ import re import os import logging -from ..engines.csvengine import CSVEngine -from ..implementation.cp2kinputparsers import CP2KInputParser -from ..implementation.cp2kinputenginedata.input_tree import CP2KInput -from ..implementation.cp2koutputparsers import * -from ..generics.parserimplementation import ParserImplementation +from cp2kparser.implementation.csvparsing import CSVParser +from cp2kparser.implementation.inputparsing import CP2KInputParser +from cp2kparser.implementation.cp2kinputenginedata.input_tree import CP2KInput +from cp2kparser.implementation.outputparsing import * +from cp2kparser.utils.parserimplementation import ParserImplementation from nomadcore.coordinate_reader import CoordinateReader -from nomadcore.unit_conversion.unit_conversion import convert_unit logger = logging.getLogger(__name__) @@ -22,11 +21,11 @@ class CP2KImplementation262(ParserImplementation): # Initialize the parsing tools. The input and output parsers need to # know the version id. - self.csvengine = CSVEngine(self) + self.csvengine = CSVParser(self) self.atomsengine = CoordinateReader() self.inputparser = CP2KInputParser() self.inputparser.setup_version(self.version_id) - self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfos) + self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfo_to_keep) self.input_tree = None self.extended_input = None @@ -273,11 +272,9 @@ class CP2KImplementation262(ParserImplementation): # Use the SimpleMatcher to extract most of the results parserInfo = {"name": "cp2k-parser", "version": "1.0"} outputfilename = self.get_file_handle("output").name - backend = self.backend - metainfos = self.metainfos outputstructure = self.outputparser.outputstructure cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName - self.parse_file(outputfilename, outputstructure, metainfos, backend, parserInfo, cachingLevelForMetaName, superContext=self.outputparser) + self.parse_file(outputfilename, outputstructure, parserInfo, cachingLevelForMetaName, superContext=self.outputparser) # Then extract the things that cannot be extracted by the SimpleMatcher diff --git a/cp2kparser/cp2kparser/implementation/cp2kinputparsers.py b/cp2kparser/cp2kparser/implementation/inputparsing.py similarity index 100% rename from cp2kparser/cp2kparser/implementation/cp2kinputparsers.py rename to cp2kparser/cp2kparser/implementation/inputparsing.py diff --git a/cp2kparser/cp2kparser/implementation/cp2koutputparsers.py b/cp2kparser/cp2kparser/implementation/outputparsing.py similarity index 99% rename from cp2kparser/cp2kparser/implementation/cp2koutputparsers.py rename to cp2kparser/cp2kparser/implementation/outputparsing.py index 3b599b3..b9b5d31 100644 --- a/cp2kparser/cp2kparser/implementation/cp2koutputparsers.py +++ b/cp2kparser/cp2kparser/implementation/outputparsing.py @@ -76,7 +76,7 @@ class CP2KOutputParser262(object): startReStr=" FUNCTIONAL\|", forwardMatch=True, sections=["section_method", "cp2k_section_functionals"], - otherMetaInfo=["XC_functional"], + otherMetaInfo=["XC_functional_scf"], subMatchers=[ SM( repeats=True, @@ -244,7 +244,7 @@ class CP2KOutputParser262(object): functionals = "_".join(sorted(functionals)) # Push the functional string into the backend - backend.addValue('XC_functional', functionals) + backend.addValue('XC_functional_scf', functionals) def onClose_cp2k_section_atom_position(self, backend, gIndex, section): """Get the initial atomic positions from cp2kparser. diff --git a/cp2kparser/cp2kparser/implementation/cp2kparser.py b/cp2kparser/cp2kparser/implementation/parsing.py similarity index 71% rename from cp2kparser/cp2kparser/implementation/cp2kparser.py rename to cp2kparser/cp2kparser/implementation/parsing.py index 48fa28b..8f88d1a 100644 --- a/cp2kparser/cp2kparser/implementation/cp2kparser.py +++ b/cp2kparser/cp2kparser/implementation/parsing.py @@ -1,7 +1,7 @@ import re import logging -from ..generics.parser import Parser -from ..implementation.cp2kimplementations import * +from cp2kparser.utils.parser import Parser +from cp2kparser.implementation.implementations import * logger = logging.getLogger(__name__) @@ -17,8 +17,9 @@ class CP2KParser(Parser): After the implementation has been setup, you can parse the files with parse(). """ - def __init__(self, dirpath=None, files=None, metainfo_path=None, backend=None): - Parser.__init__(self, dirpath, files, metainfo_path, backend) + + def __init__(self, contents=None, metainfo_to_keep=None, backend=None): + Parser.__init__(self, contents, metainfo_to_keep, backend) def setup(self): """Setups the version by looking at the output file and the version @@ -57,6 +58,25 @@ class CP2KParser(Parser): self.parser_context.version_id = "262" self.implementation = globals()["CP2KImplementation262"](self.parser_context) + def search_parseable_files(self, files): + """Searches the given path for files that are of interest to this + parser. Returns them as a list of path strings. + """ + return files + def parse(self): self.setup() self.implementation.parse() + + def get_metainfo_filename(self): + """This function should return the name of the metainfo file that is + specific for this parser. This name is used by the Analyzer class in + the nomadtoolkit. + """ + return "cp2k.nomadmetainfo.json" + + +#=============================================================================== +# This is what gets run when the scala layer calls for this parser +if __name__ == "__main__": + print "Moi" diff --git a/cp2kparser/cp2kparser/metainfo b/cp2kparser/cp2kparser/metainfo deleted file mode 160000 index 163501e..0000000 --- a/cp2kparser/cp2kparser/metainfo +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 163501eabba0fa385f28edcb55aa577de96e7624 diff --git a/cp2kparser/cp2kparser/engines/__init__.py b/cp2kparser/cp2kparser/old/__init__.py similarity index 100% rename from cp2kparser/cp2kparser/engines/__init__.py rename to cp2kparser/cp2kparser/old/__init__.py diff --git a/cp2kparser/cp2kparser/engines/csvengine/tests.py b/cp2kparser/cp2kparser/old/csvengine/tests.py similarity index 100% rename from cp2kparser/cp2kparser/engines/csvengine/tests.py rename to cp2kparser/cp2kparser/old/csvengine/tests.py diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/10.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/10.xyz similarity index 100% rename from cp2kparser/cp2kparser/engines/csvengine/xyz/10.xyz rename to cp2kparser/cp2kparser/old/csvengine/xyz/10.xyz diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/100.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/100.xyz similarity index 100% rename from cp2kparser/cp2kparser/engines/csvengine/xyz/100.xyz rename to cp2kparser/cp2kparser/old/csvengine/xyz/100.xyz diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/1000.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/1000.xyz similarity index 100% rename from cp2kparser/cp2kparser/engines/csvengine/xyz/1000.xyz rename to cp2kparser/cp2kparser/old/csvengine/xyz/1000.xyz diff --git a/cp2kparser/cp2kparser/engines/csvengine/xyz/10000.xyz b/cp2kparser/cp2kparser/old/csvengine/xyz/10000.xyz similarity index 100% rename from cp2kparser/cp2kparser/engines/csvengine/xyz/10000.xyz rename to cp2kparser/cp2kparser/old/csvengine/xyz/10000.xyz diff --git a/cp2kparser/cp2kparser/engines/regexengine.py b/cp2kparser/cp2kparser/old/regexengine.py similarity index 100% rename from cp2kparser/cp2kparser/engines/regexengine.py rename to cp2kparser/cp2kparser/old/regexengine.py diff --git a/cp2kparser/cp2kparser/engines/xmlengine.py b/cp2kparser/cp2kparser/old/xmlengine.py similarity index 100% rename from cp2kparser/cp2kparser/engines/xmlengine.py rename to cp2kparser/cp2kparser/old/xmlengine.py diff --git a/nomadanalysis/nomadanalysis/utils/__init__.py b/cp2kparser/cp2kparser/utils/__init__.py similarity index 100% rename from nomadanalysis/nomadanalysis/utils/__init__.py rename to cp2kparser/cp2kparser/utils/__init__.py diff --git a/cp2kparser/cp2kparser/generics/logconfig.py b/cp2kparser/cp2kparser/utils/logconfig.py similarity index 100% rename from cp2kparser/cp2kparser/generics/logconfig.py rename to cp2kparser/cp2kparser/utils/logconfig.py diff --git a/cp2kparser/cp2kparser/utils/parser.py b/cp2kparser/cp2kparser/utils/parser.py new file mode 100644 index 0000000..7e3a097 --- /dev/null +++ b/cp2kparser/cp2kparser/utils/parser.py @@ -0,0 +1,106 @@ +import os +import logging +from abc import ABCMeta, abstractmethod +import nomadtoolkit.config +from nomadcore.local_meta_info import loadJsonFile +from nomadtoolkit.local_backend import LocalBackend +logger = logging.getLogger(__name__) + + +#=============================================================================== +class Parser(object): + """ + Attributes: + self.implementation: an object that actually does the parsing and is + setup by this class based on the given contents. + """ + __metaclass__ = ABCMeta + parser_name = None + + def __init__(self, contents, metainfo_to_keep=None, backend=None): + """ + Args: + contents: list of absolute filepaths as strings + metainfo_to_keep: list of metainfo names to parse as strings. + backend: the backend where the parsing results are outputted + """ + self.parser_context = ParserContext() + self.parser_context.backend = backend + self.parser_context.metainfo_to_keep = metainfo_to_keep + self.implementation = None + + # If single path provided, make it into a list + if isinstance(contents, basestring): + contents = [contents] + + # Figure out all the files from the contents + files = set() + for content in contents: + if os.path.isdir(content): + dir_files = set() + for filename in os.listdir(content): + dir_files.add(os.path.join(content, filename)) + files |= dir_files + elif os.path.isfile(content): + files.add(content) + else: + logger.error("The string '{}' is not a valid path.".format(content)) + + # Filter the files leaving only the parseable ones. Each parser can + # specify which files are of interest or to include them all. + self.parser_context.files = self.search_parseable_files(files) + + # If no backend provided, create Local one with default metainfos + if not backend: + metadir = nomadtoolkit.config.get_config("metaInfoPath") + default_metainfo_path = os.path.realpath(os.path.join(metadir, self.get_metainfo_filename())) + metainfoenv, warnings = loadJsonFile(default_metainfo_path) + backend = LocalBackend(metainfoenv) + self.parser_context.backend = backend + + @abstractmethod + def setup(self): + """Deduce the version of the software that was used and setup a correct + implementation. The implementations should subclass + ParserImplementation and be stored to the 'implementation' attribute of + this class. You can give the parser_context wrapper object in the + parser implementation constructor to pass all the relevant data onto + the implementation. + """ + pass + + @abstractmethod + def search_parseable_files(self, files): + """From a list of filenames tries to guess which files are relevant to + the parsing process. Essentially filters the files before they are sent + to the parser implementation. + """ + return files + + @abstractmethod + def get_metainfo_filename(self): + """This function should return the name of the metainfo file that is + specific for this parser. This name is used by the Analyzer class in + the nomadtoolkit. + """ + return None + + @abstractmethod + def parse(self): + """Starts the actual parsing process outputting the results to the + backend. + """ + self.setup() + if not self.implementation: + logger.error("No parser implementation has been setup.") + + +#=============================================================================== +class ParserContext(object): + """Contains everything needed to instantiate a parser implementation. + """ + def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None): + self.files = files + self.version_id = version_id + self.metainfo_to_keep = metainfo_to_keep + self.backend = backend diff --git a/cp2kparser/cp2kparser/generics/parserimplementation.py b/cp2kparser/cp2kparser/utils/parserimplementation.py similarity index 99% rename from cp2kparser/cp2kparser/generics/parserimplementation.py rename to cp2kparser/cp2kparser/utils/parserimplementation.py index 61b90c4..ca2fb80 100644 --- a/cp2kparser/cp2kparser/generics/parserimplementation.py +++ b/cp2kparser/cp2kparser/utils/parserimplementation.py @@ -66,8 +66,6 @@ class ParserImplementation(object): self, fileToParse, mainFileDescription, - metainfos, - backend, parserInfo, cachingLevelForMetaName={}, defaultDataCachingLevel=CachingLevel.ForwardAndCache, @@ -79,8 +77,12 @@ class ParserImplementation(object): Args: Returns: """ + + metainfo_to_keep = self.metainfo_to_keep + backend = self.backend + # Initialize the parser builder - parserBuilder = SimpleParserBuilder(mainFileDescription, backend.metaInfoEnv(), metainfos) + parserBuilder = SimpleParserBuilder(mainFileDescription, backend.metaInfoEnv(), metainfo_to_keep) if logger.isEnabledFor(logging.DEBUG): s = StringIO.StringIO() s.write("matchers:") diff --git a/cp2kparser/cp2kparser/generics/testing.py b/cp2kparser/cp2kparser/utils/testing.py similarity index 100% rename from cp2kparser/cp2kparser/generics/testing.py rename to cp2kparser/cp2kparser/utils/testing.py diff --git a/nomadanalysis/README.md b/nomadanalysis/README.md deleted file mode 100644 index bf317aa..0000000 --- a/nomadanalysis/README.md +++ /dev/null @@ -1 +0,0 @@ -# Nomad Analysis diff --git a/nomadanalysis/nomadanalysis.egg-info/PKG-INFO b/nomadanalysis/nomadanalysis.egg-info/PKG-INFO deleted file mode 100644 index 082b26c..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/PKG-INFO +++ /dev/null @@ -1,10 +0,0 @@ -Metadata-Version: 1.0 -Name: nomadanalysis -Version: 0.1 -Summary: Tools for analysing calculation results parsed by NOMAD parsers. -Home-page: UNKNOWN -Author: Lauri Himanen -Author-email: lauri.himanen@gmail.com -License: GPL3 -Description: UNKNOWN -Platform: UNKNOWN diff --git a/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt b/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt deleted file mode 100644 index e5e018a..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/SOURCES.txt +++ /dev/null @@ -1,9 +0,0 @@ -setup.py -nomadanalysis/__init__.py -nomadanalysis/analyzer.py -nomadanalysis.egg-info/PKG-INFO -nomadanalysis.egg-info/SOURCES.txt -nomadanalysis.egg-info/dependency_links.txt -nomadanalysis.egg-info/not-zip-safe -nomadanalysis.egg-info/requires.txt -nomadanalysis.egg-info/top_level.txt \ No newline at end of file diff --git a/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt b/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/nomadanalysis/nomadanalysis.egg-info/not-zip-safe b/nomadanalysis/nomadanalysis.egg-info/not-zip-safe deleted file mode 100644 index 8b13789..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/not-zip-safe +++ /dev/null @@ -1 +0,0 @@ - diff --git a/nomadanalysis/nomadanalysis.egg-info/requires.txt b/nomadanalysis/nomadanalysis.egg-info/requires.txt deleted file mode 100644 index e076fe3..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/requires.txt +++ /dev/null @@ -1,2 +0,0 @@ -pint -numpy \ No newline at end of file diff --git a/nomadanalysis/nomadanalysis.egg-info/top_level.txt b/nomadanalysis/nomadanalysis.egg-info/top_level.txt deleted file mode 100644 index 5f449b7..0000000 --- a/nomadanalysis/nomadanalysis.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -nomadanalysis diff --git a/nomadanalysis/nomadanalysis/__init__.py b/nomadanalysis/nomadanalysis/__init__.py deleted file mode 100644 index 786cde3..0000000 --- a/nomadanalysis/nomadanalysis/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -#! /usr/bin/env python - -# This will activate the logging utilities for nomadanalysis -import utils.log - -# Import the common classes here for less typing -from .analyzer import Analyzer diff --git a/nomadanalysis/nomadanalysis/examples/1_basics.py b/nomadanalysis/nomadanalysis/examples/1_basics.py deleted file mode 100644 index 1592424..0000000 --- a/nomadanalysis/nomadanalysis/examples/1_basics.py +++ /dev/null @@ -1,11 +0,0 @@ -from nomadanalysis import Analyzer -from cp2kparser import CP2KParser - -# Initialize the parser you want to use -parser = CP2KParser() -parser.dirpath = "/home/lauri/Dropbox/nomad-dev/parser-cp2k/cp2kparser/cp2kparser/tests/cp2k_2.6.2/forces/outputfile/n" -parser.metainto_to_keep = ["section_run"] - -# Initialize the analyzer -analyzer = Analyzer(parser) -results = analyzer.parse() diff --git a/nomadtoolkit/README.md b/nomadtoolkit/README.md new file mode 100644 index 0000000..6adc1a1 --- /dev/null +++ b/nomadtoolkit/README.md @@ -0,0 +1,27 @@ +# Nomad Toolkit +This a package that contains the necessary tools for running the nomad parsers +locally. It contains the python-common and nomad-meta-info repositories as +submodules for easier installation. + +This package does not contain any of the parsers themselves. You should +download and install them separately. The parsers should have one main class +that inherits the 'Parser' baseclass and implements it's interface. + +# Download +Currently this package is contained inside the parser-cp2k repository because +it is only used by it. If someones else want's to adopt these tools this +package can be maybe separated to it's own repository. + +Use git to copy this repository to your local machine. You will also have to +recursively download the submodules. All this can be achieved with the command: + +```sh +git clone --recursive git@gitlab.mpcdf.mpg.de:nomad-lab/parser-cp2k.git +``` + +# Installation +To install this toolkit run the included nomadtoolkit/setup.py file as follows: + +```sh +python setup.py develop --user +``` diff --git a/nomadtoolkit/nomadtoolkit/__init__.py b/nomadtoolkit/nomadtoolkit/__init__.py new file mode 100644 index 0000000..f1e7f55 --- /dev/null +++ b/nomadtoolkit/nomadtoolkit/__init__.py @@ -0,0 +1 @@ +from nomadtoolkit.analysis import Analyzer diff --git a/nomadanalysis/nomadanalysis/analyzer.py b/nomadtoolkit/nomadtoolkit/analysis.py similarity index 96% rename from nomadanalysis/nomadanalysis/analyzer.py rename to nomadtoolkit/nomadtoolkit/analysis.py index de1336f..9219086 100644 --- a/nomadanalysis/nomadanalysis/analyzer.py +++ b/nomadtoolkit/nomadtoolkit/analysis.py @@ -2,7 +2,7 @@ import sys import logging from nomadcore.local_meta_info import loadJsonFile from nomadcore.parser_backend import JsonParseEventsWriterBackend -from nomadanalysis.local_backend import LocalBackend +from nomadtoolkit.local_backend import LocalBackend logger = logging.getLogger(__name__) diff --git a/nomadtoolkit/nomadtoolkit/config.py b/nomadtoolkit/nomadtoolkit/config.py new file mode 100644 index 0000000..ce75639 --- /dev/null +++ b/nomadtoolkit/nomadtoolkit/config.py @@ -0,0 +1,28 @@ +import os +import json + + +def open_config_file(): + return open(os.path.join(os.path.dirname(__file__), "config.json"), "r+") + + +def open_config_json(): + configfile = open_config_file() + contents = configfile.read() + jsonobject = json.loads(contents) + return jsonobject + + +def set_config(name, value): + configfile = open_config_file() + contents = configfile.read() + jsonobject = json.loads(contents) + jsonobject[name] = value + configfile.seek(0) + configfile.truncate() + configfile.write(json.dumps(jsonobject, configfile)) + + +def get_config(name): + jsonobj = open_config_json() + return jsonobj.get(name) diff --git a/nomadtoolkit/nomadtoolkit/examples/1_basics.py b/nomadtoolkit/nomadtoolkit/examples/1_basics.py new file mode 100644 index 0000000..77fb385 --- /dev/null +++ b/nomadtoolkit/nomadtoolkit/examples/1_basics.py @@ -0,0 +1,14 @@ +from nomadtoolkit import Analyzer +from cp2kparser import CP2KParser + +# Initialize the parser you want to use. By default the parser will use the +# local backend. The local backend uses the metainfo files that come together +# with the nomadtoolkit repository and it outputs results in a python +# dictionary. +dirpaths = "/home/lauri/Dropbox/nomad-dev/parser-cp2k/cp2kparser/cp2kparser/tests/cp2k_2.6.2/forces/outputfile/n" +parser = CP2KParser(contents=dirpaths) + +# Initialize the analyzer +analyzer = Analyzer(parser) +results = analyzer.parse() +# print results diff --git a/nomadanalysis/nomadanalysis/local_backend.py b/nomadtoolkit/nomadtoolkit/local_backend.py similarity index 91% rename from nomadanalysis/nomadanalysis/local_backend.py rename to nomadtoolkit/nomadtoolkit/local_backend.py index b8fa931..f5a63bf 100644 --- a/nomadanalysis/nomadanalysis/local_backend.py +++ b/nomadtoolkit/nomadtoolkit/local_backend.py @@ -24,7 +24,7 @@ class LocalBackend(object): gIndex should be unique (no reopening of a closed section)""" self.__lastIndex[metaName] = gIndex self.__openSections.add((metaName, gIndex)) - self.__jsonOutput({"event":"openSection", "metaName":metaName, "gIndex":gIndex}) + self.__jsonOutput({"event": "openSection", "metaName": metaName, "gIndex": gIndex}) def __jsonOutput(self, dic): pass @@ -50,10 +50,10 @@ class LocalBackend(object): def metaInfoEnv(self): return self.__metaInfoEnv - def startedParsingSession(self, mainFileUri, parserInfo, parsingStatus = None, parsingErrors = None): + def startedParsingSession(self, mainFileUri, parserInfo, parsingStatus=None, parsingErrors=None): pass - def finishedParsingSession(self, parsingStatus, parsingErrors, mainFileUri = None, parserInfo = None): + def finishedParsingSession(self, parsingStatus, parsingErrors, mainFileUri=None, parserInfo=None): pass diff --git a/nomadtoolkit/nomadtoolkit/utils/__init__.py b/nomadtoolkit/nomadtoolkit/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/nomadanalysis/nomadanalysis/utils/log.py b/nomadtoolkit/nomadtoolkit/utils/log.py similarity index 100% rename from nomadanalysis/nomadanalysis/utils/log.py rename to nomadtoolkit/nomadtoolkit/utils/log.py diff --git a/nomadanalysis/setup.py b/nomadtoolkit/setup.py similarity index 58% rename from nomadanalysis/setup.py rename to nomadtoolkit/setup.py index 60a18a9..311144a 100644 --- a/nomadanalysis/setup.py +++ b/nomadtoolkit/setup.py @@ -1,17 +1,18 @@ from setuptools import setup +import os #=============================================================================== def main(): # Start package setup setup( - name="nomadanalysis", + name="nomadtoolkit", version="0.1", description="Tools for analysing calculation results parsed by NOMAD parsers.", author="Lauri Himanen", author_email="lauri.himanen@gmail.com", license="GPL3", - packages=["nomadanalysis"], + packages=["nomadtoolkit"], install_requires=[ 'pint', 'numpy', @@ -21,4 +22,11 @@ def main(): # Run main function by default if __name__ == "__main__": + + # Install the toolkit package main() + + # Save the path where the metainfo are saved for further use + import nomadtoolkit.config + metapath = os.path.realpath(os.path.join(os.path.dirname(__file__), "submodules/nomad-meta-info/meta_info/nomad_meta_info")) + nomadtoolkit.config.set_config("metaInfoPath", metapath) diff --git a/nomadtoolkit/submodules/nomad-meta-info b/nomadtoolkit/submodules/nomad-meta-info new file mode 160000 index 0000000..c53dee7 --- /dev/null +++ b/nomadtoolkit/submodules/nomad-meta-info @@ -0,0 +1 @@ +Subproject commit c53dee7ab6a8d40b9b4c4c2d70262aec79fc05e5 diff --git a/nomadtoolkit/submodules/python-common b/nomadtoolkit/submodules/python-common new file mode 160000 index 0000000..1ec174e --- /dev/null +++ b/nomadtoolkit/submodules/python-common @@ -0,0 +1 @@ +Subproject commit 1ec174e5558f1ee46aca3a44a2205f3af86f23f8 -- GitLab