Commit c8ab8bd0 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added new base classes that help in structuring the code. Added initial...

Added new base classes that help in structuring the code. Added initial support for ancillary files.
parent 30b1c39e
......@@ -4,7 +4,7 @@ import logging
from cp2kparser.parsing.csvparsing import CSVParser
from .inputparsing import CP2KInputParser
from .outputparser import CP2KOutputParser
from cp2kparser.parsing.cp2kinputenginedata.input_tree import CP2KInput
# from cp2kparser.parsing.cp2kinputenginedata.input_tree import CP2KInput
from cp2kparser.utils.baseclasses import ParserImplementation
from nomadcore.coordinate_reader import CoordinateReader
logger = logging.getLogger(__name__)
......@@ -36,18 +36,17 @@ class CP2KImplementation(ParserImplementation):
"""Resolve the input and output files based on extension and the
include files by looking for @INCLUDE commands in the input file.
"""
# Input and output files
for file_path in self.files:
if file_path.endswith(".inp"):
self.setup_file_id(file_path, "input")
if file_path.endswith(".out"):
self.setup_file_id(file_path, "output")
outputparser = CP2KOutputParser(file_path, self.parser_context)
self.file_parsers.append(outputparser)
self.file_storage.setup_file_id(file_path, "output")
outputparser = CP2KOutputParser(file_path, self.file_storage, self.parser_context)
self.main_parser = outputparser
if file_path.endswith(".inp"):
self.file_storage.setup_file_id(file_path, "input")
# Include files
input_file = self.get_file_contents("input")
input_file = self.file_storage.get_file_contents("input")
for line in input_file.split("\n"):
line = line.strip()
if line.startswith("@INCLUDE") or line.startswith("@include"):
......@@ -56,7 +55,7 @@ class CP2KImplementation(ParserImplementation):
if filename.startswith(('\"', '\'')) and filename.endswith(('\"', '\'')):
filename = filename[1:-1]
filepath = self.search_file(filename)
self.setup_file_id(filepath, "include")
self.file_storage.add_file_id(filepath, "include")
# def determine_output_file(self):
# """Determine which of the given files is the output file.
......@@ -79,8 +78,8 @@ class CP2KImplementation(ParserImplementation):
input file and explicitly state all variables.
"""
# Merge include files to input
include_files = self.get_file_handles("include", show_warning=False)
input_file = self.get_file_contents("input")
include_files = self.file_storage.get_file_handles("include", show_warning=False)
input_file = self.file_storage.get_file_contents("input")
input_lines = input_file.split("\n")
extended_input = input_lines[:] # Make a copy
if include_files:
......@@ -177,7 +176,7 @@ class CP2KImplementation(ParserImplementation):
# Check against the given files
file_path = self.search_file(force_path)
self.setup_file_id(file_path, "forces")
self.file_storage.setup_file_id(file_path, "forces")
# Determine the presence of an initial coordinate file
init_coord_file = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_NAME")
......@@ -185,7 +184,7 @@ class CP2KImplementation(ParserImplementation):
logger.debug("Initial coordinate file found.")
# Check against the given files
file_path = self.search_file(init_coord_file)
self.setup_file_id(file_path, "initial_coordinates")
self.file_storage.setup_file_id(file_path, "initial_coordinates")
# Determine the presence of a trajectory file
traj_file = self.input_tree.get_keyword("MOTION/PRINT/TRAJECTORY/FILENAME")
......@@ -201,7 +200,7 @@ class CP2KImplementation(ParserImplementation):
logger.debug("Trajectory file found.")
normalized_path = self.normalize_cp2k_path(traj_file, extension, "pos")
file_path = self.search_file(normalized_path)
self.setup_file_id(file_path, "trajectory")
self.file_storage.setup_file_id(file_path, "trajectory")
# Determine the presence of a cell output file
cell_motion_file = self.input_tree.get_keyword("MOTION/PRINT/CELL/FILENAME")
......@@ -210,13 +209,13 @@ class CP2KImplementation(ParserImplementation):
extension = "cell"
normalized_path = self.normalize_cp2k_path(cell_motion_file, extension)
file_path = self.search_file(normalized_path)
self.setup_file_id(file_path, "cell_output")
self.file_storage.setup_file_id(file_path, "cell_output")
# Determine the presence of a cell input file
cell_input_file = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/CELL/CELL_FILE_NAME")
if cell_input_file is not None:
file_path = self.search_file(cell_input_file)
self.setup_file_id(file_path, "cell_input")
self.file_storage.setup_file_id(file_path, "cell_input")
def normalize_cp2k_path(self, path, extension, name=""):
"""The paths in CP2K input can be given in many ways. This function
......
import re
from nomadcore.simple_parser import SimpleMatcher as SM
from nomadcore.caching_backend import CachingLevel
from cp2kparser.utils.baseclasses import FileParser
from cp2kparser.utils.baseclasses import MainParser
import numpy as np
#===============================================================================
class CP2KOutputParser(FileParser):
"""The object that goes through the CP2K output file and parses everything
it can using the SimpleParser architecture.
class CP2KOutputParser(MainParser):
"""The main parser class.
"""
def __init__(self, files, parser_context):
def __init__(self, files, file_storage, parser_context):
"""Initialize an output parser.
"""
FileParser.__init__(self, files, parser_context)
super(CP2KOutputParser, self).__init__(files, parser_context)
self.file_storage = file_storage
self.f_regex = "-?\d+\.\d+(?:E(?:\+|-)\d+)?" # Regex for a floating point value
self.i_regex = "-?\d+" # Regex for an integer
......
......@@ -7,10 +7,8 @@ from cp2kparser.parsing.versions.cp2k262.outputparser import CP2KOutputParser
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
from nomadcore.simple_parser import mainFunction
# This is what gets run when the scala layer calls for this parser. Currently
# only the outputparser is used because the scala layer doesn't support
# auxiliary files. Also the version identification is skipped and the structure
# used in CP2K 2.6.2 is assumed.
# This is what gets run when the scala layer calls for this parser. Version
# identification is skipped and the structure used in CP2K 2.6.2 is assumed.
if __name__ == "__main__":
# Get the outputparser class
......
......@@ -2,24 +2,24 @@ import os
import sys
import logging
from abc import ABCMeta, abstractmethod
from nomadcore.simple_parser import SimpleParserBuilder, extractOnCloseTriggers, PushbackLineFile
from nomadcore.simple_parser import SimpleParserBuilder, extractOnCloseTriggers, PushbackLineFile, AncillaryParser
from nomadcore.caching_backend import CachingLevel, ActiveBackend
logger = logging.getLogger(__name__)
#===============================================================================
class Parser(object):
"""This class provides the interface for parsing. All the input is given to
this class (or typically a subclass) and the parsing is done by calling the
parse() method. The parsing output is determined by the backend object that
is given in the constructor as a dependency.
"""This class provides the interface for local parsing. All the input is
given to this class (or typically a subclass) and the parsing is done by
calling the parse() method. The parsing output is determined by the backend
object that is given in the constructor as a dependency.
Attributes:
implementation: an object that actually does the parsing and is
setup by this class based on the given contents.
parser_context: A wrapper class for all the parser related information.
This is contructed here and then passed onto the different
implementations and FileParsers.
implementations.
"""
__metaclass__ = ABCMeta
......@@ -115,61 +115,44 @@ class Parser(object):
#===============================================================================
class ParserImplementation(object):
"""The base class for a version specific parser implementation in. Provides
some useful tools for setting up file access.
class FileStorage(object):
"""Used to map file paths to certain ID's. This helps in setting up the
Secondary parsers as you can associate file paths to simpler ID's that are
easier to use.
Attributes:
See the ParserContext class for more details about the attributes.
_file_handles: A "private" dictionary containing the cached file handles
_file_contents: A "private" dictionary containing the cached file contents
_file_sizes: A "private" dictionary containing the cached file sizes
file_ids: A dictionary containing the mapping between file ids and filepaths
"""
def __init__(self, parser_context):
self.parser_context = parser_context
# Copy all the attributes from the ParserContext object for quick access
attributes = dir(parser_context)
for attribute in attributes:
if not attribute.startswith("__"):
setattr(self, attribute, getattr(parser_context, attribute))
def __init__(self):
self._file_handles = {}
self._file_contents = {}
self._file_sizes = {}
self.file_ids = {}
self.file_parsers = []
def setup_given_file_ids(self):
"""Saves the file id's that were given in the JSON input.
"""
for path, file_id in self.files.iteritems():
if file_id:
self.setup_file_id(path, file_id)
def parse(self):
"""Start the parsing. Will try to parse everything unless given special
rules (metaInfoToKeep)."""
for file_parser in self.file_parsers:
file_parser.parse()
def setup_file_id(self, path, file_id):
"""Used to map a simple identifier string to a file path. When a file
id has been setup, you can easily access the file by using the
functions get_file_handle() or get_file_contents()
"""
if path in self.files:
value = self.file_ids.get(file_id)
if value:
old = self.file_ids.get(file_id)
if old is not None:
raise LookupError("The path '{}' is already associated with id '{}'".format(old, file_id))
self.file_ids[file_id] = path
def add_file_id(self, path, file_id):
value = self.file_ids.get(file_id)
if value:
if isinstance(value, list):
value.append(path)
else:
pathlist = []
pathlist.append(path)
self.file_ids[file_id] = pathlist
raise LookupError("You have already setup an unique file_path '{}' to this id.".format(value))
else:
logger.error("Trying to setup an id for an undefined path. See that the path was written correctly and it was given in the files attribute of the JSON string.")
pathlist = []
pathlist.append(path)
self.file_ids[file_id] = pathlist
def get_filepath_by_id(self, file_id, show_warning=True):
"""Get the file paths that were registered with the given id.
......@@ -178,9 +161,7 @@ class ParserImplementation(object):
if value:
if isinstance(value, list):
n = len(value)
if n == 1:
return value[0]
elif n == 0:
if n == 0:
if show_warning:
logger.warning("No files set with id '{}'".format(file_id))
return None
......@@ -188,6 +169,8 @@ class ParserImplementation(object):
if show_warning:
logger.debug("Multiple files set with id '{}'".format(file_id))
return value
else:
return value
else:
if show_warning:
logger.warning("No files set with id '{}'".format(file_id))
......@@ -284,30 +267,120 @@ class ParserImplementation(object):
#===============================================================================
class FileParser(object):
"""Base class for objects that parse certain type of files. Typically a
subclass of ParserImplementation will initialize one FileParser per parsed
file. You can also assign a list of files to a FileParser if they are of
similar type or are otherwise connected to each other.
class ParserImplementation(object):
"""The base class for a version specific parser implementation in. Provides
some useful tools for setting up file access.
Attributes:
parser_context: ParserContext object
file_storage: FileStorage object
main_parser: MainParser object
"""
__metaclass__ = ABCMeta
def __init__(self, parser_context):
self.parser_context = parser_context
self.file_storage = FileStorage()
self.main_parser = None
# Copy all the attributes from the ParserContext object for quick access
attributes = dir(parser_context)
for attribute in attributes:
if not attribute.startswith("__"):
setattr(self, attribute, getattr(parser_context, attribute))
# self.file_parsers = []
def __init__(self, files, parser_context):
# def setup_given_file_ids(self):
# """Saves the file id's that were given in the JSON input.
# """
# for path, file_id in self.files.iteritems():
# if file_id:
# self.file_storage.setup_file_id(path, file_id)
def parse(self):
"""Start the parsing. Will try to parse everything unless given special
rules (metaInfoToKeep)."""
self.main_parser.parse()
# for file_parser in self.file_parsers:
# file_parser.parse()
#===============================================================================
class HierarchicalParser(object):
"""A base class for all parsers that do parsing based on the SimpleMatcher
hierarchy.
Attributes:
root_matcher: The root of this parsers SimpleMatcher tree.
"""
def __init__(self, file_path):
self.file_path = file_path
self.root_matcher = None
self.caching_levels = {}
self.default_data_caching_level = CachingLevel.ForwardAndCache
self.default_section_caching_level = CachingLevel.Forward
self.onClose = {}
self.simple_parser = None
#===============================================================================
class SecondaryParser(HierarchicalParser):
"""A base class for ancillary file parsers. Instantiated and run by a
MainParser.
Attributes:
ancillary_parser: An nomadcore.simple_parser.AncillaParser object
"""
def __init__(self, file_path, simple_parser):
"""
Args:
file_path: The path of the file to parse. Can be absolute or relative path.
simple_parser: The SimpleParser object that is does the actual
parsing. Shared with ther SecondaryParsers and the MainParser.
"""
super(SecondaryParser, self).__init__(file_path)
self.simple_parser = simple_parser
self.ancillary_parser = None
def parse(self):
"""Parser the given ancillary file in place.
"""
self.ancillary_parser = AncillaryParser(self.root_matcher, self.simple_parser, self.caching_levels, self)
# Try opening the given file
try:
with open(self.file_path) as fIn:
self.ancillary_parser.parseFile(fIn)
except IOError:
dir_name, file_name = os.path.split(os.path.abspath(self.file_path))
logger.warning("Could not find file '{}' in directory '{}'. No data will be parsed from this file".format(dir_name, file_name))
#===============================================================================
class MainParser(HierarchicalParser):
"""Base class for main parsers. Will call AncillaryParsers to parse additional
files. Typically this main parser is also tied to a file ("main file").
A subclass of ParserImplementation will initialize only one MainParser.
Attributes:
files: A list of file or directory paths that are used by this parser.
root_matcher
"""
def __init__(self, file_path, parser_context):
"""
Args:
files: A list of filenames that are parsed and analyzed by this
object.
parser_context: The parsing context that contains e.g. the backend.
"""
if not isinstance(files, list):
files = [files]
self.files = files
super(MainParser, self).__init__(file_path)
if parser_context:
self.parser_context = parser_context
self.backend = parser_context.backend
self.metainfo_to_keep = parser_context.metainfo_to_keep
self.version_id = parser_context.version_id
self.root_matcher = None
self.caching_level_for_metaName = {}
self.default_data_caching_level = CachingLevel.ForwardAndCache
self.default_section_caching_level = CachingLevel.Forward
......@@ -319,13 +392,6 @@ class FileParser(object):
SimpleParser scheme, if you want to use something else or customize the
process just override this method.
"""
# If there is only one file assigned to this FileParser, and a
# root_matcher has been assigned, parse with the SimpleParser. Otherwise
# halt.
if len(self.files) != 1 or self.root_matcher is None:
logger.error("Could not use the default parsing implementation. If you want to use it wou must specify a root_matcher and only assign one file to the FileParser. If you need custom parsing you should override the parse() method.")
return
# Initialize the parser builder
parserBuilder = SimpleParserBuilder(self.root_matcher, self.backend.metaInfoEnv(), self.metainfo_to_keep)
......@@ -354,12 +420,11 @@ class FileParser(object):
# Compile the SimpleMatcher tree
parserBuilder.compile()
fileToParse = self.files[0]
self.backend.fileOut.write("[")
uri = "file://" + fileToParse
uri = "file://" + self.file_path
parserInfo = {'name': 'cp2k-parser', 'version': '1.0'}
self.caching_backend.startedParsingSession(uri, parserInfo)
with open(fileToParse, "r") as fIn:
with open(self.file_path, "r") as fIn:
parser = parserBuilder.buildParser(PushbackLineFile(fIn), self.caching_backend, superContext=self)
parser.parse()
self.caching_backend.finishedParsingSession("ParseSuccess", None)
......@@ -379,7 +444,7 @@ class FileParser(object):
Get compiled parser.
Later one can compile a parser for parsing an external file.
"""
self.parser = parser
self.simple_parser = parser
#===============================================================================
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment