Commit 68f73d31 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Refactored logging tools

parent 6c24d973
# CP2K
# CP2K NoMaD Parser
The NoMaD parser for CP2K. Under development. Will be modified to conform to
the common parser structure when it is available.
---
## QuickStart
- Clone repository
- Run setup by running the setup.py script. For local, user specific install
without sudo permissions use:
```shell
python setup.py install --user
git clone git@gitlab.mpcdf.mpg.de:nomad-lab/parser-cp2k.git
```
- For a system-wide install use:
- Run setup by running the setup.py script. For local, user specific install
without sudo permissions use (omit --user for a system-wide install):
```shell
python setup.py install
python setup.py install --user
```
- You can test if everything is running fine by running the test script in tests folder:
......@@ -31,17 +31,19 @@ the common parser structure when it is available.
```shell
python -m cp2kparser
```
---
## Structure
Currently the python package is divided into three subpackages:
- Engines: Classes for parsing different type of files
- Generics: Generic utility classes and base classes
- Implementation: The classes that actually define the parser functionality.
---
## Reusable components and ideas for other parsers
Some components and ideas could be reused in other parsers as well. If you find
any of the following useful in you parser, you are welcome to do so.
any of the following ideas useful in you parser, you are welcome to do reuse
them.
### Engines
Basically all the "engines", that is the modules that parse certain type of
......@@ -54,7 +56,7 @@ Currently implemented engines that could be reused (not tested properly yet):
- RegexEngine: For parsing text files with regular expressions. Uses the re2
library if available (falls back to default python regex implementation if
re2 not found).
- XyzEngine: For parsing XYZ files and files with similar structure. Has a very
- XYZEngine: For parsing XYZ files and files with similar structure. Has a very
flexible nature as you can specify comments, column delimiters, column
indices and the patterns used to separate different configurations.
......@@ -74,5 +76,13 @@ parsers:
- Time measurement for performance analysis
- Providing file contents, sizes and handles
### Logging
Python has a great [logging package](https://www.google.com) which helps in
following the program flow and catching different errors and warnings. In
cp2kparser the file cp2kparser/generics/logconfig.py defines the behaviour of
the logger. There you can setup the log levels even at a modular level. A more
easily readable formatting is also provided for the log messages.
---
## Lessons learned
#! /usr/bin/env python
import cp2kparser.generics.logconfig
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from collections import defaultdict
from cp2kparser.generics.nomadlogging import *
import logging
logger = logging.getLogger(__name__)
#===============================================================================
......@@ -117,14 +118,14 @@ class InputSection(object):
parts = path.upper().split('/', 1)
candidates = self.subsections.get(parts[0]) # [s for s in self.subsections if s.name == parts[0]]
if not candidates:
print_debug("Subsection '{}' not found.".format(parts[0]))
logger.debug("Subsection '{}' not found.".format(parts[0]))
return None
elif len(candidates) > 1:
print_warning("Multiple subsections with the same name found with name '{}' If no index is given, the first occurence in the input file is returned.".format(parts[0]))
logger.warning("Multiple subsections with the same name found with name '{}' If no index is given, the first occurence in the input file is returned.".format(parts[0]))
try:
subsection = candidates[index]
except IndexError:
print_error("Invalid subsection index given.")
logger.error("Invalid subsection index given.")
if len(parts) == 1:
return subsection
......@@ -146,17 +147,17 @@ class InputSection(object):
"""
candidates = self.keywords.get(keyword) # [s for s in self.subsections if s.name == parts[0]]
if not candidates:
print_debug("No keywords with name '{}' found in subsection '{}'".format(keyword, self.name))
logger.debug("No keywords with name '{}' found in subsection '{}'".format(keyword, self.name))
return None
elif len(candidates) > 1:
print_warning("Multiple keywords with the same name found with name '{}' If no index is given, the first occurence in the input file is returned.".format(parts[0]))
logger.warning("Multiple keywords found with name '{}'. If no index is given, the first occurence in the input file is returned.".format(keyword))
try:
result = candidates[index]
except IndexError:
print_error("Invalid keyword index given.")
logger.error("Invalid keyword index given.")
return result
def get_parameter(self):
if self.params is None:
print_debug("The section '{}' has no parameters set".format(self.name))
logger.debug("The section '{}' has no parameters set".format(self.name))
return self.params
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
from cp2kparser.generics.nomadlogging import *
import logging
logger = logging.getLogger(__name__)
try:
import re2 as re
except ImportError:
import re
print_warning((
logger.warning((
"re2 package not found. Using re package instead. "
"If you wan't to use re2 please see the following links:"
" https://github.com/google/re2"
......@@ -71,7 +72,7 @@ class Regex(object):
def check_input(self):
if self.direction != "down" and self.direction != "up":
print_error("Unsupported direction value '{}' in a regex".format(self.direction))
logger.error("Unsupported direction value '{}' in a regex".format(self.direction))
def match(self, string):
return self.compiled_regex.match(string)
......@@ -110,13 +111,13 @@ class RegexEngine(object):
"""Use the given regex to parse contents from the given file handle"""
file_name = file_handle.name
print_debug("Searching regex in file '{}'".format(file_name))
logger.debug("Searching regex in file '{}'".format(file_name))
result = self.recursive_extraction(regex, file_handle)
if result:
return result
# Couldn't find the quantity
print_debug("Could not find a result for {}.".format(regex.regex_string))
logger.debug("Could not find a result for {}.".format(regex.regex_string))
def recursive_extraction(self, regex, data):
"""Goes through the exctractor tree recursively until the final
......@@ -133,16 +134,16 @@ class RegexEngine(object):
# If separator specified, do a blockwise search
if regex.separator is not None:
print_debug("Going into blockwise regex search")
logger.debug("Going into blockwise regex search")
result = self.regex_block_search(data, regex)
# Regular string search
else:
print_debug("Going into full regex search")
logger.debug("Going into full regex search")
result = self.regex_search_string(data, regex)
# See if the tree continues
if regex.inner_regex is not None:
print_debug("Entering next regex recursion level.")
logger.debug("Entering next regex recursion level.")
return self.recursive_extraction(regex.inner_regex, result)
else:
return result
......@@ -164,15 +165,15 @@ class RegexEngine(object):
result = None
if from_beginning:
print_debug("Doing full string search from beginning.")
logger.debug("Doing full string search from beginning.")
return regex.match(contents)
elif index == "all":
print_debug("Doing full string search for all results.")
logger.debug("Doing full string search for all results.")
result = regex.findall(contents)
if not result:
print_debug("No matches.")
logger.debug("No matches.")
elif index >= 0:
print_debug("Doing full string search with specified index.")
logger.debug("Doing full string search with specified index.")
iter = regex.finditer(contents)
i = 0
while i <= index:
......@@ -180,9 +181,9 @@ class RegexEngine(object):
match = iter.next()
except StopIteration:
if i == 0:
print_debug("No results.")
logger.debug("No results.")
else:
print_debug("Invalid regex index.")
logger.debug("Invalid regex index.")
break
if i == index:
result = match.groups()[0]
......@@ -190,12 +191,12 @@ class RegexEngine(object):
elif index < 0:
matches = regex.findall(contents)
if not matches:
print_debug("No matches.")
logger.debug("No matches.")
else:
try:
result = matches[index]
except IndexError:
print_debug("Invalid regex index.")
logger.debug("Invalid regex index.")
return result
......@@ -208,33 +209,30 @@ class RegexEngine(object):
direction = regex.direction
index = regex.index
from_beginning = regex.from_beginning
print_debug("Doing blockwise search with separator: '{}', direction: '{}', from_beginning: '{}' and index '{}'".format(separator, direction, from_beginning, index))
logger.debug("Doing blockwise search with separator: '{}', direction: '{}', from_beginning: '{}' and index '{}'".format(separator, direction, from_beginning, index))
# Determine the direction in which the blocks are read
if direction == "up":
print_debug("Searching from bottom to up.")
logger.debug("Searching from bottom to up.")
generator = self.reverse_block_generator(file_handle, separator)
elif direction == "down":
print_debug("Searching from up to bottom.")
logger.debug("Searching from up to bottom.")
generator = self.block_generator(file_handle, separator)
else:
print_error("Unknown direction specifier: {}".format(direction))
logger.error("Unknown direction specifier: {}".format(direction))
return
# If all results wanted, just get all results from all blocks
if index == "all":
print_debug("Searchin for all matches.")
logger.debug("Searchin for all matches.")
results = []
for block in generator:
matches = regex.findall(block)
if matches:
print_debug("Found match within block.")
if isinstance(matches, list):
print_debug("Found multiple matches in a block")
for match in matches:
results.append(match)
else:
print_debug("Found single match in a block")
results.append(matches.groups()[0])
return results
......@@ -242,12 +240,12 @@ class RegexEngine(object):
i_result = 0
counter = 0
for block in generator:
print_debug("Searchin for a specific index.")
logger.debug("Searchin for a specific index.")
counter += 1
if from_beginning:
result = regex.match(block)
if result:
print_debug("Found match in beginning of block.")
logger.debug("Found match in beginning of block.")
if index + 1 > i_result + 1:
i_result += 1
else:
......@@ -260,7 +258,7 @@ class RegexEngine(object):
else:
n_results = 1
print_debug("Found results within block.")
logger.debug("Found results within block.")
if index + 1 > i_result + n_results:
i_result += n_results
else:
......
from cp2kparser.generics.nomadlogging import *
# from cp2kparser.generics.nomadlogging import *
import numpy as np
import logging
logger = logging.getLogger(__name__)
from io import StringIO
try:
import re2 as re
except ImportError:
import re
print_warning((
logger.warning((
"re2 package not found. Using re package instead. "
"If you wan't to use re2 please see the following links:"
" https://github.com/google/re2"
......@@ -96,12 +98,12 @@ class XYZEngine(object):
try:
value = vals[column]
except IndexError:
print_warning("The given index '{}' could not be found on the line '{}'. The given delimiter or index could be wrong.".format(column, line))
logger.warning("The given index '{}' could not be found on the line '{}'. The given delimiter or index could be wrong.".format(column, line))
return
try:
value = float(value)
except ValueError:
print_warning("Could not cast value '{}' to float. Currently only floating point values are accepted".format(value))
logger.warning("Could not cast value '{}' to float. Currently only floating point values are accepted".format(value))
return
else:
line_forces.append(value)
......
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""Misc. utility functions."""
import textwrap
import logging
logging.basicConfig(format='%(message)s', level=logging.INFO)
#===============================================================================
def make_title(title, width=80):
"""Styles a title to be printed into console.
"""
space = width-len(title)-4
pre_space = space/2-1
post_space = space-pre_space
line = "|" + str((pre_space)*"=") + " "
line += title
line += " " + str((post_space)*"=") + "|"
return line
#===============================================================================
def print_subtitle(title, width=80):
"""Styles a title to be printed into console.
"""
space = width-len(title)-4
pre_space = space/2-1
post_space = space-pre_space
line = "|" + str((pre_space)*"-") + " "
line += title
line += " " + str((post_space)*"-") + "|"
print line
#===============================================================================
def make_message(message, width=80, spaces=0):
"""Styles a message to be printed into console.
"""
wrapper = textwrap.TextWrapper(width=width-6)
lines = wrapper.wrap(message)
styled_message = ""
first = True
for line in lines:
new_line = spaces*" " + "| " + line + (width-6-len(line))*" " + " |"
if first:
styled_message += new_line
first = False
else:
styled_message += "\n" + new_line
styled_message += "\n" + spaces*" " + "|" + (width-2)*"-" + "|"
return styled_message
#===============================================================================
def make_titled_message(title, message, width=80, spaces=0):
"""Styles a message to be printed into console.
"""
wrapper = textwrap.TextWrapper(width=width-6)
lines = wrapper.wrap(message)
styled_message = ""
first = True
for line in lines:
if first:
new_line = spaces*" " + " >> {}: ".format(title) + line + (width-6-len(line))*" " + " "
styled_message += new_line
first = False
else:
new_line = spaces*" " + " " + line + (width-6-len(line))*" " + " "
styled_message += "\n" + new_line
return styled_message
#===============================================================================
def print_title(title, width=80):
"""Prints styled title into console.
"""
print make_title(title, width=width)
#===============================================================================
def print_message(title, message, width=80):
"""Returns a styled warning message to be printed into console.
"""
print make_title(title) + "\n" + make_message(message) + "\n"
#===============================================================================
def print_debug(message, width=80):
"""Returns a styled warning message to be printed into console.
"""
logging.debug(make_titled_message("DEBUG", message))
#===============================================================================
def print_info(message, width=80):
"""Returns a styled warning message to be printed into console.
"""
logging.info(make_titled_message("INFO", message))
#===============================================================================
def print_text(text, spaces=0, width=80):
"""Styles a message to be printed into console. No borders, no footer, no
header.
"""
wrapper = textwrap.TextWrapper(width=width-4)
lines = wrapper.wrap(text)
styled_message = ""
first = True
for line in lines:
new_line = spaces*" " + " " + line
if first:
styled_message += new_line
first = False
else:
styled_message += "\n" + new_line
print(styled_message)
#===============================================================================
def print_warning(message, width=80):
"""Returns a styled warning message to be printed into console.
"""
logging.warning("\n " + make_title("WARNING", width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n")
#===============================================================================
def print_error(message, width=80):
"""Returns a styled warning message to be printed into console.
"""
logging.error("\n " + make_title("ERROR", width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n")
......@@ -4,8 +4,8 @@ import json
import os
import time
from abc import ABCMeta, abstractmethod
from cp2kparser.generics.nomadlogging import *
from pint import UnitRegistry
import logging
logger = logging.getLogger(__name__)
#===============================================================================
......@@ -80,7 +80,7 @@ class NomadParser(object):
try:
handle = open(path, "r")
except (OSError, IOError):
print_error("Could not open file: '{}'".format(path))
logger.error("Could not open file: '{}'".format(path))
else:
self.file_handles[file_id] = handle
handle.seek(0, os.SEEK_SET)
......@@ -95,10 +95,10 @@ class NomadParser(object):
# See if the needed attributes exist
self.tmp_dir = self.input_json_object.get("tmpDir")
if self.tmp_dir is None:
print_error("No temporary folder specified.")
logger.error("No temporary folder specified.")
self.files = self.input_json_object.get("files")
if self.files is None:
print_error("No files specified.")
logger.error("No files specified.")
self.metainfo_to_keep = self.input_json_object.get("metainfoToKeep")
self.metainfo_to_skip = self.input_json_object.get("metainfoToSkip")
......@@ -110,14 +110,14 @@ class NomadParser(object):
return the value as json.
"""
# Start timing
print_debug(74*'-')
print_debug("Getting quantity '{}'".format(name))
logger.debug(74*'-')
logger.debug("Getting quantity '{}'".format(name))
start = time.clock()
#Check availability
available = self.check_quantity_availability(name)
if not available:
print_warning("The quantity '{}' is not available for this parser version.".format(name))
logger.warning("The quantity '{}' is not available for this parser version.".format(name))
return
# Check cache
......@@ -127,20 +127,20 @@ class NomadParser(object):
result = self.get_unformatted_quantity(name)
self.results[name] = result
else:
print_debug("Using cached result.")
logger.debug("Using cached result.")
if result is None:
print_debug("The quantity '{}' is not present or could not be succesfully parsed.".format(name))
logger.debug("The quantity '{}' is not present or could not be succesfully parsed.".format(name))
# Check results
if result is None:
print_info("There was an issue in parsing quantity '{}'. It is either not present in the files or could not be succesfully parsed.".format(name))
logger.info("There was an issue in parsing quantity '{}'. It is either not present in the files or could not be succesfully parsed.".format(name))
else:
print_info("Succesfully parsed quantity '{}'. Result:\n{}".format(name, result))
logger.info("Succesfully parsed quantity '{}'. Result:\n{}".format(name, result))
# Do the conversion to SI units based on the given units
stop = time.clock()
print_debug("Elapsed time: {} ms".format((stop-start)*1000))
logger.debug("Elapsed time: {} ms".format((stop-start)*1000))
return result
def get_all_quantities(self):
......
......@@ -42,7 +42,6 @@ def get_parser(path):
#===============================================================================
if __name__ == '__main__':
print __file__
path = os.path.dirname(os.path.realpath(os.path.dirname(__file__)))
parser = get_parser(path)
parser.parse_all()
......@@ -2,13 +2,15 @@
# -*- coding: utf-8 -*-
import os
import re
from cp2kparser.generics.nomadlogging import *
# from cp2kparser.generics.nomadlogging import *
from cp2kparser.generics.nomadparser import NomadParser
from cp2kparser.implementation.regexs import *
from cp2kparser.engines.regexengine import RegexEngine
from cp2kparser.engines.xyzengine import XYZEngine
from cp2kparser.engines.cp2kinputengine import CP2KInputEngine
import numpy as np
import logging
logger = logging.getLogger(__name__)
#===============================================================================
......@@ -20,7 +22,6 @@ class CP2KParser(NomadParser):
implementation. For other versions there should be classes that extend from
this.
"""
def __init__(self, input_json_string):
NomadParser.__init__(self, input_json_string)
......@@ -38,7 +39,6 @@ class CP2KParser(NomadParser):
def setup_version(self):
"""Inherited from NomadParser.
"""
# Determine the CP2K version from the input file
beginning = self.read_part_of_file("output", 2048)
version_regex = re.compile(r"CP2K\|\ version\ string:\s+CP2K\ version\ (\d+\.\d+\.\d+)\n")
......@@ -48,20 +48,20 @@ class CP2KParser(NomadParser):
class_name = "CP2K{}Regexs".format(version_number)
self.regexs = globals().get(class_name)
if self.regexs:
print_debug("Using version specific regexs '{}'.".format(class_name))
logger.debug("Using version specific regexs '{}'.".format(class_name))
self.regexs = self.regexs()
else:
print_debug("Using default regexs.")
logger.debug("Using default regexs.")
self.regexs = globals()["CP2KRegexs"]()
# Search for a version specific implementation
class_name = "CP2K{}Implementation".format(version_number)
class_object = globals().get(class_name)
if class_object:
print_debug("Using version specific implementation '{}'.".format(class_name))
logger.debug("Using version specific implementation '{}'.".format(class_name))
self.implementation = class_object(self)
else:
print_debug("Using default implementation.")
logger.debug("Using default implementation.")
self.implementation = globals()["CP2KImplementation"](self)
def read_part_of_file(self, file_id, size=1024):
......@@ -73,7 +73,6 @@ class CP2KParser(NomadParser):
def determine_file_ids(self):
"""Inherited from NomadParser.
"""
# Determine a list of filepaths that need id resolution
resolved = {}
resolvable = []
......@@ -103,13 +102,13 @@ class CP2KParser(NomadParser):
# The force path is not typically exactly as written in input
if force_path.startswith("="):
print_debug("Using single force file.")
logger.debug("Using single force file.")
force_path = force_path[1:]
elif re.match(r".?/", force_path):
print_debug("Using separate force file for each step.")
logger.debug("Using separate force file for each step.")
force_path = "{}-1_0.xyz".format(force_path)
else:
print_debug("Using separate force file for each step.")
logger.debug("Using separate force file for each step.")