diff --git a/parser/parser-cp2k/cp2kparser/generic/baseclasses.py b/parser/parser-cp2k/cp2kparser/generic/baseclasses.py deleted file mode 100644 index 35b020b9203aeda2765a87a7265709bf72056c0c..0000000000000000000000000000000000000000 --- a/parser/parser-cp2k/cp2kparser/generic/baseclasses.py +++ /dev/null @@ -1,592 +0,0 @@ -""" -This module contains the base classes that help in building parsers for the -NoMaD project. -""" - -import os -import logging -from abc import ABCMeta, abstractmethod -from nomadcore.unit_conversion import unit_conversion -from nomadcore.simple_parser import AncillaryParser, mainFunction -from nomadcore.local_backend import LocalBackend -from nomadcore.local_meta_info import load_metainfo -from nomadcore.caching_backend import CachingLevel -import nomadcore.ActivateLogging -logger = logging.getLogger("nomad") - - -#=============================================================================== -class ParserInterface(object): - """This class provides the interface parsing. The end-user will typically - only interact with this class. All the input is given to this class (or - typically a subclass) and the parsing is done by calling the parse() - method. The parsing output is determined by the backend object that is - given in the constructor as a dependency. - - Attributes: - main_parser: Object that actually does the parsing and is - setup by this class based on the given contents. - parser_context: A wrapper class for all the parser related information. - This is contructed here and then passed onto the different - subparsers. - """ - __metaclass__ = ABCMeta - - def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None): - """ - Args: - main_file: A special file that can be considered the main file of the - calculation. - metainfo_to_keep: A list of metainfo names. This list is used to - optimize the parsing process as optimally only the information - relevant to these metainfos will be parsed. - backend: An object to which the parser will give all the parsed data. - The backend will then determine where and when to output that data. - """ - self.initialize(main_file, metainfo_to_keep, backend, default_units, metainfo_units) - - def initialize(self, main_file, metainfo_to_keep, backend, default_units, metainfo_units): - """Initialize the parser with the given environment. - """ - self.parser_context = ParserContext() - self.parser_context.metainfo_to_keep = metainfo_to_keep - self.parser_context.main_file = main_file - self.parser_context.file_service = FileService() - self.parser_context.parser_info = self.get_parser_info() - self.main_parser = None - - # Check that the main file exists - if not os.path.isfile(main_file): - logger.error("Couldn't find the main file {}. Check that the path is valid and the file exists on this path.".format(main_file)) - - # Load metainfo environment - metainfo_env, warn = load_metainfo(self.get_metainfo_filename()) - self.parser_context.metainfo_env = metainfo_env - - # Initialize the backend. Use local backend if none given - if backend is not None: - self.parser_context.super_backend = backend(metainfo_env) - else: - self.parser_context.super_backend = LocalBackend(metainfo_env) - - # Check the list of default units - default_unit_map = {} - if default_units is not None: - for unit in default_units: - dimension = unit_conversion.ureg(unit).dimensionality - old_value = default_unit_map.get(str(dimension)) - if old_value is not None: - raise LookupError("You can only specify one default value per dimension in the 'default_units' list. There are two different units given for the dimension '{}'".format(dimension)) - default_unit_map[str(dimension)] = unit - - # Check the list of metainfo units - if metainfo_units is not None: - for metaname, unit in metainfo_units.iteritems(): - - # Check that the unit is OK - unit_conversion.ureg(unit) - - # Check that the metaname is OK - meta = metainfo_env.infoKinds.get(metaname) - if meta is None: - raise KeyError("The metainfo name '{}' could not be found. Check for typos or try updating the metainfo repository.".format(metaname)) - - # Save the default units - self.parser_context.default_units = default_unit_map - self.parser_context.metainfo_units = metainfo_units - - @abstractmethod - def setup_version(self): - """Deduce the version of the software that was used and setup a correct - main parser. The main parser should subclass MainParser and be stored - to the 'main_parser' attribute of this class. You can give the - 'parser_context' wrapper object in the main parser constructor to pass - all the relevant data for it. - """ - pass - - @abstractmethod - def get_metainfo_filename(self): - """This function should return the name of the metainfo file that is - specific for this parser. When the parser is started, the metainfo - environment is loaded from this file that is located within a separate - repository (nomad-meta-info). - - Returns: - A string containing the metainfo filename for this parser. - """ - return None - - @abstractmethod - def get_parser_info(self): - """This function should return a dictionary containing the parser info. - This info is printed to the JSON backend. it should be of the form: - - {'name': 'softwarename-parser', 'version': '1.0'} - - Returns: - A dictionary containing information about this parser. - """ - return None - - def parse(self): - """Starts the actual parsing process, and outputs the results to the - backend specified in the constructor. - """ - self.setup_version() - if not self.main_parser: - logger.error("The main parser has not been set up.") - - self.main_parser.parse() - - # If using a local backend, the results will have been saved to a - # separate results dictionary which should be returned. - try: - return self.parser_context.super_backend.results - except AttributeError: - return None - - -#=============================================================================== -class FileService(object): - """Provides the interface to accessing files related to a calculation. - - Before you can use the service you must setup the root path, where every - file related to this calculation resides. All queries filepaths will be - handled as relative to this root folder. - - You can also setup ids that point to a certain path. This helps in querying - the files as you don't have to remember the exact path and you can store - paths for later use. - - Used to map file paths to certain ID's. This helps in setting up the - Secondary parsers as you can associate file paths to simpler ID's that are - easier to use. - - Attributes: - root_folder: A path to the root folder - _file_ids: A dictionary containing the mapping between file ids and filepaths - _file_handles: A "private" dictionary containing the cached file handles - _file_contents: A "private" dictionary containing the cached file contents - _file_sizes: A "private" dictionary containing the cached file sizes - """ - def __init__(self, root_folder=None): - """ - Args: - root_folder: a path to the root folder as a string. - """ - self.map_id_to_path = {} - if root_folder is not None: - self.setup_root_folder(root_folder) - - def setup_root_folder(self, root_folder): - """Setup the path to the root folder. Every filepath you set or get - through this service should be relative to this root folder. - """ - if os.path.isdir(root_folder): - self.root_folder = root_folder - else: - raise IOError("Could not find the folder '{}'".format(root_folder)) - - def get_absolute_path_to_file(self, relative_path): - """ - Returns: - If the given . Return none if no file with the given path can - be found. - """ - path = os.path.join(self.root_folder, relative_path) - if os.path.isfile(path): - return path - else: - logger.error("Could not open the file '{}'.".format(path)) - return None - - def get_file_by_id(self, file_id): - """ - Returns: - Handle to the file. Return none if no file with the given id has - been set. - """ - path = self.map_id_to_path.get(file_id) - if path is None: - logger.error("The id '{}' has no path associated with it.".format(file_id)) - return None - else: - return self.get_file_by_path(path) - - def set_file_id(self, path, file_id): - """Used to map a simple identifier string to a file path. When a file - id has been setup, you can easily access the file by using the - functions get_file_handle() or get_file_contents() - """ - # Check if there is an old definition - old = self.map_id_to_path.get(file_id) - if old is not None: - raise LookupError("The path '{}' is already associated with id '{}'".format(old, file_id)) - - # Check that the file exists - path = os.path.join(self.root_folder, path) - if not os.path.isfile(path): - logger.error("Could not set the id for file '{}' as it cannot be found.".format(path)) - else: - self.map_id_to_path[file_id] = path - - -#=============================================================================== -class BasicParser(object): - """A base class for all parsers. - - Attributes: - file_path: Path to a file that is parsed by this class. - parser_context: The ParserContext object that contains various in-depth - information about the parsing environment. - backend: The backend that will cache things according to the rules - given in the main parser. - super_backend: The final backend where things are Forwarded to by the - caching backend. - """ - __metaclass__ = ABCMeta - - def __init__(self, file_path, parser_context): - self.file_path = file_path - self.parser_context = parser_context - self.backend = parser_context.caching_backend - self.super_backend = parser_context.super_backend - - @abstractmethod - def parse(self): - """Used to do the actual parsing. Inside this function you should push - the parsing results into the Caching backend, or directly to the - superBackend. You will also have to open new sections, but remember - that certain sections may already be opened by other parsers. - """ - pass - - -#=============================================================================== -class HierarchicalParser(BasicParser): - """A base class for all parsers that parse a file using a hierarchy of - SimpleMatcher objects. - - Attributes: - root_matcher: The SimpleMatcher object at the root of this parser. - caching_level_for_metaname: A dicionary containing the caching options - that the ActiveBackend will use for individual metanames. - - Example: - self.caching_level_for_metaname = { - 'section_XC_functionals': CachingLevel.ForwardAndCache, - } - default_data_caching_level: A default caching level for data, i.e. - metainfo with kindStr=type_document_content or type_dimension - default_section_caching_level: A default caching level for sections. - """ - def __init__(self, file_path, parser_context): - super(HierarchicalParser, self).__init__(file_path, parser_context) - self.root_matcher = None - self.caching_level_for_metaname = {} - self.default_data_caching_level = CachingLevel.ForwardAndCache - self.default_section_caching_level = CachingLevel.Forward - - -#=============================================================================== -class SecondaryParser(HierarchicalParser): - """A base class for ancillary file parsers. Instantiated and run by a - MainParser. - - Attributes: - ancillary_parser: An AncillaryParser object - """ - def __init__(self, file_path, parser_context, simple_parser): - """ - Args: - file_path: The path of the file to parse. Can be absolute or relative path. - simple_parser: The SimpleParser object that is does the actual - parsing. Shared by the SecondaryParsers and the MainParser. - """ - super(SecondaryParser, self).__init__(file_path, parser_context) - self.simple_parser = simple_parser - self.ancillary_parser = None - - def parse(self): - """Parser the given ancillary file in place. - """ - self.ancillary_parser = AncillaryParser(self.root_matcher, self.simple_parser, self.caching_levels, self) - - # Try opening the given file - try: - with open(self.file_path) as fIn: - self.ancillary_parser.parseFile(fIn) - except IOError: - dir_name, file_name = os.path.split(os.path.abspath(self.file_path)) - logger.warning("Could not find file '{}' in directory '{}'. No data will be parsed from this file".format(dir_name, file_name)) - - -#=============================================================================== -class MainParser(HierarchicalParser): - """Base class for parsers that parse the main file of a calculation using - SimpleMatchers. There should only be one main parser, and all other parsers - are instantiated by this one. - """ - - def __init__(self, file_path, parser_context): - """ - Args: - file_path: Path to the main file as a string. - parser_context: The ParserContext object that contains various - in-depth information about the parsing environment. - """ - super(MainParser, self).__init__(file_path, parser_context) - self.simple_parser = None - - def parse(self): - """Starts the parsing. By default uses the SimpleParser scheme, if you - want to use something else or customize the process just override this - method - """ - mainFunction( - mainFileDescription=self.root_matcher, - metaInfoEnv=self.parser_context.metainfo_env, - parserInfo=self.parser_context.parser_info, - outF=self.parser_context.super_backend.fileOut, - cachingLevelForMetaName=self.caching_level_for_metaname, - superContext=self, - onClose={}, - default_units=self.parser_context.default_units, - metainfo_units=self.parser_context.metainfo_units, - superBackend=self.parser_context.super_backend, - mainFile=self.parser_context.main_file) - - def startedParsing(self, fInName, parser): - """Function is called when the parsing starts. - - Get compiled parser. - Later one can compile a parser for parsing an external file. - """ - self.parser_context.caching_backend = parser.backend - - -#=============================================================================== -class ParserContext(object): - """A container class for storing and moving information about the parsing - environment. A single ParserContext object is initialized by the Parser - class, or it's subclass. - """ - def __init__(self): - self.main_file = None - self.version_id = None - self.metainfo_to_keep = None - self.super_backend = None - self.caching_backend = None - self.default_units = None - self.metainfo_units = None - self.file_service = None - self.metainfo_env = None - self.parser_info = None - - -#=============================================================================== -# class ParserImplementation(object): - # """The base class for a version specific parser implementation in. Provides - # some useful tools for setting up file access. - - # Attributes: - # parser_context: ParserContext object - # file_storage: FileStorage object - # main_parser: MainParser object - # """ - # def __init__(self, parser_context): - - # self.parser_context = parser_context - # self.file_storage = FileStorage() - # self.main_parser = None - - # # Copy all the attributes from the ParserContext object for quick access - # attributes = dir(parser_context) - # for attribute in attributes: - # if not attribute.startswith("__"): - # setattr(self, attribute, getattr(parser_context, attribute)) - - # # self.file_parsers = [] - - # # def setup_given_file_ids(self): - # # """Saves the file id's that were given in the JSON input. - # # """ - # # for path, file_id in self.files.iteritems(): - # # if file_id: - # # self.file_storage.setup_file_id(path, file_id) - - # def parse(self): - # """Start the parsing. Will try to parse everything unless given special - # rules (metaInfoToKeep).""" - # self.main_parser.parse() - # for file_parser in self.file_parsers: - # file_parser.parse() - - - - # Initialize the parser builder - # default_units = self.parser_context.default_units - # metainfo_units = self.parser_context.metainfo_units - # parserBuilder = SimpleParserBuilder(self.root_matcher, self.backend.metaInfoEnv(), self.metainfo_to_keep, default_units=default_units, metainfo_units=metainfo_units) - - # # Verify the metainfo - # if not parserBuilder.verifyMetaInfo(sys.stderr): - # sys.exit(1) - - # # Gather onClose functions from supercontext - # onClose = dict(self.onClose) - # for attr, callback in extractOnCloseTriggers(self).items(): - # oldCallbacks = onClose.get(attr, None) - # if oldCallbacks: - # oldCallbacks.append(callback) - # else: - # onClose[attr] = [callback] - - # # Setup the backend that caches ond handles triggers - # self.caching_backend = ActiveBackend.activeBackend( - # metaInfoEnv=self.backend.metaInfoEnv(), - # cachingLevelForMetaName=self.caching_level_for_metaname, - # defaultDataCachingLevel=self.default_data_caching_level, - # defaultSectionCachingLevel=self.default_section_caching_level, - # onClose=onClose, - # superBackend=self.backend, - # default_units=default_units, - # metainfo_units=metainfo_units) - - # # Compile the SimpleMatcher tree - # parserBuilder.compile() - - # self.backend.fileOut.write("[") - # uri = "file://" + self.file_path - # parserInfo = {'name': 'cp2k-parser', 'version': '1.0'} - # self.caching_backend.startedParsingSession(uri, parserInfo) - # with open(self.file_path, "r") as fIn: - # parser = parserBuilder.buildParser(PushbackLineFile(fIn), self.caching_backend, superContext=self) - # parser.parse() - # self.caching_backend.finishedParsingSession("ParseSuccess", None) - # self.backend.fileOut.write("]\n") - # def add_file_id(self, path, file_id): - # """ - # """ - # value = self.file_ids.get(file_id) - # if value: - # if isinstance(value, list): - # value.append(path) - # else: - # raise LookupError("You have already setup an unique file_path '{}' to this id.".format(value)) - # else: - # pathlist = [] - # pathlist.append(path) - # self.file_ids[file_id] = pathlist - - # def get_filepath_by_id(self, file_id, show_warning=True): - # """Get the file paths that were registered with the given id. - # """ - # value = self.file_ids.get(file_id) - # if value: - # if isinstance(value, list): - # n = len(value) - # if n == 0: - # if show_warning: - # logger.warning("No files set with id '{}'".format(file_id)) - # return None - # else: - # if show_warning: - # logger.debug("Multiple files set with id '{}'".format(file_id)) - # return value - # else: - # return value - # else: - # if show_warning: - # logger.warning("No files set with id '{}'".format(file_id)) - - # def get_file_handle(self, file_id, show_warning=True): - # """Get the handle for a single file with the given id. Uses cached result - # if available. Always seeks to beginning of file before returning it. - # """ - # # Get the filepath(s) - # path = self.get_filepath_by_id(file_id, show_warning) - # if not path: - # if show_warning: - # logger.warning("No filepaths registered to id '{}'. Register id's with setup_file_id().".format(file_id)) - # return - - # if isinstance(path, list): - # if len(path) == 0: - # return - # elif len(path) != 1: - # logger.error("Multiple filepaths found with id '{}'. Use get_file_handles() instead if you expect to have multiple files.".format(file_id)) - # return - # else: - # path = path[0] - - # # Search for filehandles, if not present create one - # handle = self._file_handles.get(path) - # if not handle: - # try: - # handle = open(path, "r") - # except (OSError, IOError): - # logger.error("Could not open file: '{}'".format(path)) - # else: - # self._file_handles[file_id] = handle - # handle.seek(0, os.SEEK_SET) - # return handle - - # def get_file_handles(self, file_id, show_warning=True): - # """Get the handles for multiple files with the given id. Uses cached result - # if available. Always seeks to beginning of files before returning them. - # """ - # # Get the filepath(s) - # paths = self.get_filepath_by_id(file_id, show_warning) - # if not paths: - # return - # if not isinstance(paths, list): - # paths = [paths] - - # # Search for filehandles, if not present create one - # handles = [] - # for path in paths: - # handle = self._file_handles.get(path) - # if not handle: - # try: - # handle = open(path, "r") - # except (OSError, IOError): - # logger.error("Could not open file: '{}'".format(path)) - # else: - # self._file_handles[file_id] = handle - # handle.seek(0, os.SEEK_SET) - # handles.append(handle) - - # # Return handles - # if len(handles) == 0: - # return None - # else: - # return handles - - # def get_file_contents(self, file_id): - # """Get the contents for the file with the given id. Uses cached result - # if available. Does not cache files that are bigger than a certain - # limit. - # """ - # cache_limit = 10000 - # contents = self._file_contents.get(file_id) - # if not contents: - # fh = self.get_file_handle(file_id) - # fh.seek(0) - # contents = fh.read() - # if self.get_file_size(file_id) <= cache_limit: - # self._file_contents[file_id] = contents - # return contents - - # def get_file_size(self, file_id): - # """Get the size of a file with the given id. Uses cached result - # if available. - # """ - # size = self._file_sizes.get(file_id) - # if not size: - # fh = self.get_file_handle(file_id) - # fh.seek(0, os.SEEK_END) - # size = fh.tell() - # self._file_sizes[file_id] = size - # return size diff --git a/parser/parser-cp2k/cp2kparser/parser.py b/parser/parser-cp2k/cp2kparser/parser.py index 5104e23097f671f5e6fbcc2ec584b14bb3c1c23e..9cfea52de60f2dde2160a9bbf34b6cb27fee914e 100644 --- a/parser/parser-cp2k/cp2kparser/parser.py +++ b/parser/parser-cp2k/cp2kparser/parser.py @@ -1,7 +1,7 @@ import os import re import logging -from cp2kparser.generic.baseclasses import ParserInterface +from nomadcore.baseclasses import ParserInterface from cp2kparser.versions.versionsetup import get_main_parser logger = logging.getLogger(__name__) @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) class CP2KParser(ParserInterface): """This class handles the initial setup before any parsing can happen. It determines which version of CP2K was used to generate the output and then - sets up a correct implementation. + sets up a correct main parser. After the implementation has been setup, you can parse the files with parse(). diff --git a/parser/parser-cp2k/cp2kparser/setup_paths.py b/parser/parser-cp2k/cp2kparser/setup_paths.py index bb2e19394a321f358a0cc0bb943f85c498d09ba4..51e83a6360978d9d4115ce4091009c4e512a72f9 100644 --- a/parser/parser-cp2k/cp2kparser/setup_paths.py +++ b/parser/parser-cp2k/cp2kparser/setup_paths.py @@ -1,5 +1,5 @@ """ -Setups the python-common lirary in the PYTHONPATH system variable. +Setups the python-common library in the PYTHONPATH system variable. """ import sys import os diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py index 30d433cd6b6c95f9a182f6c65591169bb6ed0734..699a554ba1be712a4f9c5ca27e05c5a9172690b1 100644 --- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py +++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/inputparser.py @@ -2,7 +2,7 @@ import os import logging import cPickle as pickle import numpy as np -from cp2kparser.generic.baseclasses import BasicParser +from nomadcore.baseclasses import BasicParser from cp2kparser.generic.inputparsing import * logger = logging.getLogger("nomad") diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/mainparser.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/mainparser.py index ab31ba099009663524489f38ffd292e57a2af791..607b04dad81eed640d42049fd082cd41f29c39e9 100644 --- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/mainparser.py +++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/mainparser.py @@ -1,7 +1,7 @@ import re from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.caching_backend import CachingLevel -from cp2kparser.generic.baseclasses import MainParser +from nomadcore.baseclasses import MainHierarchicalParser from inputparser import CP2KInputParser import numpy as np import logging @@ -9,7 +9,7 @@ logger = logging.getLogger("nomad") #=============================================================================== -class CP2KMainParser(MainParser): +class CP2KMainParser(MainHierarchicalParser): """The main parser class. """ def __init__(self, file_path, parser_context): diff --git a/parser/parser-cp2k/cp2kparser/versions/versionsetup.py b/parser/parser-cp2k/cp2kparser/versions/versionsetup.py index 0ccce8987af0cdf0cd8c7eeaeb2cc4b1107ce743..ccde2c7c458901535c8ef429c3d1ad30dc1e463e 100644 --- a/parser/parser-cp2k/cp2kparser/versions/versionsetup.py +++ b/parser/parser-cp2k/cp2kparser/versions/versionsetup.py @@ -1,10 +1,10 @@ -"""Returns the implementation classes based on the given version identifier. -The different version are grouped into subpackages. +"""Returns the main parser class based on the given version identifier. The +different version are grouped into subpackages. """ import importlib import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger("nomad") def get_main_parser(version_id): diff --git a/parser/parser-cp2k/setup.py b/parser/parser-cp2k/setup.py index cbd18ae1e426912ccbc5880e63564f98df002f48..ccb9cf6f26782a23da2ea8aa1b6d1f87bcfa25bd 100644 --- a/parser/parser-cp2k/setup.py +++ b/parser/parser-cp2k/setup.py @@ -13,7 +13,7 @@ def main(): version="0.1", include_package_data=True, package_data={ - '': ['*.json', '*.pickle'], + '': ['*.pickle'], }, description="NoMaD parser implementation for CP2K", author="Lauri Himanen",