Commit 7a8aece2 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added a package hierarchy for different versions.

parent e53ff56f
......@@ -17,7 +17,7 @@
# Structure
Currently the python package is divided the following subpackages:
- utils: Generic utility classes and base classes
- implementation: The classes that actually define the parser functionality.
- parsing: The classes that actually define the parser functionality.
# Tools and Methods
......
import re
import logging
from cp2kparser.utils.baseclasses import Parser
from cp2kparser.parsing.implementations import *
from cp2kparser.parsing.versions.versionsetup import get_implementation_class
logger = logging.getLogger(__name__)
#===============================================================================
class CP2KParser(Parser):
"""Builds the correct parser by looking at the given files and the given
input.
This class handles the initial setup before any parsing can happen. It
"""This class handles the initial setup before any parsing can happen. It
determines which version of CP2K was used to generate the output and then
sets up a correct implementation.
......@@ -18,25 +15,30 @@ class CP2KParser(Parser):
parse().
"""
def __init__(self, contents=None, metainfo_to_keep=None, backend=None):
Parser.__init__(self, contents, metainfo_to_keep, backend)
def __init__(self, contents=None, metainfo_to_keep=None, backend=None, main_file=None):
Parser.__init__(self, contents, metainfo_to_keep, backend, main_file)
def setup(self):
"""Setups the version by looking at the output file and the version
specified in it.
"""
# Search for the output file
count = 0
for filepath in self.parser_context.files:
if filepath.endswith(".out"):
count += 1
outputpath = filepath
if count > 1:
logger("Could not determine the correct outputfile because multiple files with extension '.out' were found.")
return
elif count == 0:
logger.error("No output file could be found. The outputfile should have a '.out' extension.")
return
# If a main file is provided, search it for a version number.
if self.parser_context.main_file is not None:
outputpath = self.parser_context.main_file
else:
# Search for the output file
count = 0
for filepath in self.parser_context.files:
if filepath.endswith(".out"):
count += 1
outputpath = filepath
if count > 1:
logger("Could not determine the correct outputfile because multiple files with extension '.out' were found.")
return
elif count == 0:
logger.error("No output file could be found. The outputfile should have a '.out' extension.")
return
# Search for the version specification
outputfile = open(outputpath, 'r')
......@@ -48,15 +50,7 @@ class CP2KParser(Parser):
break
# Search and initialize a version specific implementation
class_name = "CP2KImplementation{}".format(self.parser_context.version_id)
class_object = globals().get(class_name)
if class_object:
logger.debug("Using version specific implementation '{}'.".format(class_name))
self.implementation = class_object(self.parser_context)
else:
logger.debug("No version specific implementation found. Using the default implementation: {}".format(class_name))
self.parser_context.version_id = "262"
self.implementation = globals()["CP2KImplementation262"](self.parser_context)
self.implementation = get_implementation_class(self.parser_context.version_id)(self.parser_context)
def search_parseable_files(self, files):
"""Searches the given path for files that are of interest to this
......
......@@ -2,16 +2,16 @@ import re
import os
import logging
from cp2kparser.parsing.csvparsing import CSVParser
from cp2kparser.parsing.inputparsing import CP2KInputParser
from .inputparsing import CP2KInputParser
from .outputparser import CP2KOutputParser
from cp2kparser.parsing.cp2kinputenginedata.input_tree import CP2KInput
from cp2kparser.parsing.outputparsing import *
from cp2kparser.utils.baseclasses import ParserImplementation
from nomadcore.coordinate_reader import CoordinateReader
logger = logging.getLogger(__name__)
#===============================================================================
class CP2KImplementation262(ParserImplementation):
class CP2KImplementation(ParserImplementation):
"""The default implementation for a CP2K parser based on version 2.6.2.
"""
def __init__(self, parser_context):
......@@ -25,7 +25,6 @@ class CP2KImplementation262(ParserImplementation):
self.atomsengine = CoordinateReader()
self.inputparser = CP2KInputParser()
self.inputparser.setup_version(self.version_id)
self.outputparser = None #globals()["CP2KOutputParser{}".format(self.version_id)](file_path, self.parser_context)
self.input_tree = None
self.extended_input = None
......@@ -37,14 +36,15 @@ class CP2KImplementation262(ParserImplementation):
"""Resolve the input and output files based on extension and the
include files by looking for @INCLUDE commands in the input file.
"""
# Input and output files
for file_path in self.files:
if file_path.endswith(".inp"):
self.setup_file_id(file_path, "input")
if file_path.endswith(".out"):
self.setup_file_id(file_path, "output")
self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](file_path, self.parser_context)
self.file_parsers.append(self.outputparser)
outputparser = CP2KOutputParser(file_path, self.parser_context)
self.file_parsers.append(outputparser)
# Include files
input_file = self.get_file_contents("input")
......@@ -58,6 +58,22 @@ class CP2KImplementation262(ParserImplementation):
filepath = self.search_file(filename)
self.setup_file_id(filepath, "include")
# def determine_output_file(self):
# """Determine which of the given files is the output file.
# """
# # If a main file has been specified it is the output file.
# if self.parser_context.main_file is not None:
# self.setup_file_id(file_path, "output")
# # Otherwise try to determine by the file extension
# else:
# n_outfiles = 0
# for file_path in self.files:
# if file_path.endswith(".out"):
# n_outfiles += 1
# self.setup_file_id(file_path, "output")
# self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](file_path, self.parser_context)
# self.file_parsers.append(self.outputparser)
def input_preprocessor(self):
"""Preprocess the input file. Concatenate .inc files into the main
input file and explicitly state all variables.
......
......@@ -84,6 +84,6 @@ class CP2KInputParser(object):
cp2k_input.xml is version specific. By calling this function before
parsing the correct file can be found.
"""
pickle_path = os.path.dirname(__file__) + "/cp2kinputenginedata/cp2k_{}/cp2k_input_tree.pickle".format(version_number)
pickle_path = os.path.dirname(__file__) + "/input_xml/cp2k_input_tree.pickle".format(version_number)
input_tree_pickle_file = open(pickle_path, 'rb')
self.input_tree = pickle.load(input_tree_pickle_file)
......@@ -6,7 +6,7 @@ import numpy as np
#===============================================================================
class CP2KOutputParser262(FileParser):
class CP2KOutputParser(FileParser):
"""The object that goes through the CP2K output file and parses everything
it can using the SimpleParser architecture.
"""
......
"""Returns the implementation classes based on the given version identifier.
The different version are grouped into subpackages.
"""
import importlib
def get_implementation_class(version_id):
# Currently the version id is a pure integer, so it can directly be mapped
# into a package name.
base = "cp2kparser.parsing.versions.cp2k{}.".format(version_id)
implementation = importlib.import_module(base + "implementation").CP2KImplementation
return implementation
......@@ -3,7 +3,7 @@ This is the access point to the parser for the scala layer in the nomad project.
"""
import os
from cp2kparser import CP2KParser
from cp2kparser.parsing.outputparsing import CP2KOutputParser262
from cp2kparser.parsing.versions.cp2k262.outputparser import CP2KOutputParser
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
from nomadcore.simple_parser import mainFunction
......@@ -16,7 +16,7 @@ if __name__ == "__main__":
cp2kparser = CP2KParser()
# Get the outputparser class
outputparser = globals()["CP2KOutputParser262"](None, None)
outputparser = CP2KOutputParser(None, None)
# Setup the metainfos
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../../nomad-meta-info/meta_info/nomad_meta_info/{}".format(cp2kparser.get_metainfo_filename())))
......
import os
import sys
import logging
import StringIO
from abc import ABCMeta, abstractmethod
from nomadcore.simple_parser import SimpleParserBuilder, defaultParseFile, extractOnCloseTriggers, PushbackLineFile
from nomadcore.simple_parser import SimpleParserBuilder, extractOnCloseTriggers, PushbackLineFile
from nomadcore.caching_backend import CachingLevel, ActiveBackend
logger = logging.getLogger(__name__)
......@@ -20,42 +19,54 @@ class Parser(object):
setup by this class based on the given contents.
parser_context: A wrapper class for all the parser related information.
This is contructed here and then passed onto the different
implementations.
backend: An object to which the parser will give all the parsed data.
The backend will then determine where and when to output that data.
implementations and FileParsers.
"""
__metaclass__ = ABCMeta
def __init__(self, contents, metainfo_to_keep=None, backend=None):
def __init__(self, contents, metainfo_to_keep=None, backend=None, main_file=None):
"""
Args:
contents: list of absolute filepaths as strings
metainfo_to_keep: list of metainfo names to parse as strings.
backend: the backend where the parsing results are outputted
Args:
contents: The contents to parse as a list of file and directory paths.
The given directory paths will be searched recursively for interesting
files.
metainfo_to_keep: A list of metainfo names. This list is used to
optimize the parsing process as optimally only the information relevant
to these metainfos will be parsed.
backend: An object to which the parser will give all the parsed data.
The backend will then determine where and when to output that data.
main_file: A special file that can be considered the main file.
Currently used in when interfacing to the scala environment in the
nomad project.
"""
self.initialize(contents, metainfo_to_keep, backend)
self.initialize(contents, metainfo_to_keep, backend, main_file)
def initialize(self, contents, metainfo_to_keep, backend):
def initialize(self, contents, metainfo_to_keep, backend, main_file):
"""Initialize the parser with the given environment.
"""
self.parser_context = ParserContext()
self.parser_context.backend = backend
self.parser_context.metainfo_to_keep = metainfo_to_keep
self.parser_context.main_file = main_file
self.implementation = None
# If single path provided, make it into a list
if isinstance(contents, basestring):
contents = [contents]
# Figure out all the files from the contents
if contents:
# Use a set as it will automatically ignore duplicates (nested
# folders may have been included)
files = set()
for content in contents:
# Add all files recursively from a directory
found_files = []
if os.path.isdir(content):
dir_files = set()
for filename in os.listdir(content):
dir_files.add(os.path.join(content, filename))
files |= dir_files
for root, dirnames, filenames in os.walk(content):
for filename in filenames:
filename = os.path.join(root, filename)
found_files.append(filename)
files |= set(found_files)
elif os.path.isfile(content):
files.add(content)
else:
......@@ -80,7 +91,7 @@ class Parser(object):
def search_parseable_files(self, files):
"""From a list of filenames tries to guess which files are relevant to
the parsing process. Essentially filters the files before they are sent
to the parser implementation.
to the parser implementation. By default does not do any filtering.
"""
return files
......@@ -282,6 +293,12 @@ class FileParser(object):
__metaclass__ = ABCMeta
def __init__(self, files, parser_context):
"""
Args:
files: A list of filenames that are parsed and analyzed by this
object.
parser_context: The parsing context that contains e.g. the backend.
"""
if not isinstance(files, list):
files = [files]
self.files = files
......@@ -360,8 +377,9 @@ class FileParser(object):
class ParserContext(object):
"""Contains everything needed to instantiate a parser implementation.
"""
def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None):
def __init__(self, files=None, metainfo_to_keep=None, backend=None, version_id=None, main_file=None):
self.files = files
self.version_id = version_id
self.metainfo_to_keep = metainfo_to_keep
self.backend = backend
self.main_file = main_file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment