Commit 2e37288a authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Parsed more, now the outputparser can ask the the parser for additional information.

parent 8df634f9
...@@ -3,13 +3,13 @@ import logging ...@@ -3,13 +3,13 @@ import logging
import StringIO import StringIO
import sys import sys
from abc import ABCMeta, abstractmethod from abc import ABCMeta, abstractmethod
from nomadcore.simple_parser import SimpleParserBuilder, defaultParseFile from nomadcore.simple_parser import SimpleParserBuilder, defaultParseFile, extractOnCloseTriggers
from nomadcore.caching_backend import CachingLevel, ActiveBackend from nomadcore.caching_backend import CachingLevel, ActiveBackend
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
#=============================================================================== #===============================================================================
class NomadParser(object): class Parser(object):
"""The base class for parsers in the NoMaD project. """The base class for parsers in the NoMaD project.
What you can expect from this class: What you can expect from this class:
...@@ -38,11 +38,12 @@ class NomadParser(object): ...@@ -38,11 +38,12 @@ class NomadParser(object):
__metaclass__ = ABCMeta __metaclass__ = ABCMeta
def __init__(self, parser_context): def __init__(self, parser_context):
self.files = parser_context.files
self.metainfoenv = parser_context.metainfoenv # Copy all the attributes from the parser_context object
self.backend = parser_context.backend attributes = dir(parser_context)
self.stream = parser_context.stream for attribute in attributes:
self.version_id = parser_context.version_id if not attribute.startswith("__"):
setattr(self, attribute, getattr(parser_context, attribute))
self._file_handles = {} self._file_handles = {}
self._file_contents = {} self._file_contents = {}
...@@ -91,6 +92,16 @@ class NomadParser(object): ...@@ -91,6 +92,16 @@ class NomadParser(object):
if not parserBuilder.verifyMetaInfo(sys.stderr): if not parserBuilder.verifyMetaInfo(sys.stderr):
sys.exit(1) sys.exit(1)
# Gather onClose functions from supercontext
if superContext:
onClose = dict(onClose)
for attr, callback in extractOnCloseTriggers(superContext).items():
oldCallbacks = onClose.get(attr, None)
if oldCallbacks:
oldCallbacks.append(callback)
else:
onClose[attr] = [callback]
# Setup the backend that caches ond handles triggers # Setup the backend that caches ond handles triggers
backend = ActiveBackend.activeBackend( backend = ActiveBackend.activeBackend(
metaInfoEnv=metaInfoEnv, metaInfoEnv=metaInfoEnv,
...@@ -137,7 +148,7 @@ class NomadParser(object): ...@@ -137,7 +148,7 @@ class NomadParser(object):
else: else:
logger.error("Trying to setup an id for an undefined path. See that the path was written correctly and it was given in the files attribute of the JSON string.") logger.error("Trying to setup an id for an undefined path. See that the path was written correctly and it was given in the files attribute of the JSON string.")
def get_filepath_by_id(self, file_id): def get_filepath_by_id(self, file_id, show_warning=True):
"""Get the file paths that were registered with the given id. """Get the file paths that were registered with the given id.
""" """
value = self.file_ids.get(file_id) value = self.file_ids.get(file_id)
...@@ -147,22 +158,26 @@ class NomadParser(object): ...@@ -147,22 +158,26 @@ class NomadParser(object):
if n == 1: if n == 1:
return value[0] return value[0]
elif n == 0: elif n == 0:
logger.warning("No files set with id '{}'".format(file_id)) if show_warning:
logger.warning("No files set with id '{}'".format(file_id))
return None return None
else: else:
logger.debug("Multiple files set with id '{}'".format(file_id)) if show_warning:
logger.debug("Multiple files set with id '{}'".format(file_id))
return value return value
else: else:
logger.warning("No files set with id '{}'".format(file_id)) if show_warning:
logger.warning("No files set with id '{}'".format(file_id))
def get_file_handle(self, file_id): def get_file_handle(self, file_id, show_warning=True):
"""Get the handle for a single file with the given id. Uses cached result """Get the handle for a single file with the given id. Uses cached result
if available. Always seeks to beginning of file before returning it. if available. Always seeks to beginning of file before returning it.
""" """
# Get the filepath(s) # Get the filepath(s)
path = self.get_filepath_by_id(file_id) path = self.get_filepath_by_id(file_id, show_warning)
if not path: if not path:
logger.warning("No filepaths registered to id '{}'. Register id's with setup_file_id().".format(file_id)) if show_warning:
logger.warning("No filepaths registered to id '{}'. Register id's with setup_file_id().".format(file_id))
return return
if isinstance(path, list): if isinstance(path, list):
...@@ -186,12 +201,12 @@ class NomadParser(object): ...@@ -186,12 +201,12 @@ class NomadParser(object):
handle.seek(0, os.SEEK_SET) handle.seek(0, os.SEEK_SET)
return handle return handle
def get_file_handles(self, file_id): def get_file_handles(self, file_id, show_warning=True):
"""Get the handles for multiple files with the given id. Uses cached result """Get the handles for multiple files with the given id. Uses cached result
if available. Always seeks to beginning of files before returning them. if available. Always seeks to beginning of files before returning them.
""" """
# Get the filepath(s) # Get the filepath(s)
paths = self.get_filepath_by_id(file_id) paths = self.get_filepath_by_id(file_id, show_warning)
if not paths: if not paths:
return return
if not isinstance(paths, list): if not isinstance(paths, list):
...@@ -244,19 +259,6 @@ class NomadParser(object): ...@@ -244,19 +259,6 @@ class NomadParser(object):
self._file_sizes[file_id] = size self._file_sizes[file_id] = size
return size return size
# @abstractmethod # @abstractmethod
# def get_supported_quantities(self): # def get_supported_quantities(self):
# """Return a list of the nomad quantities that this parser supports. The # """Return a list of the nomad quantities that this parser supports. The
......
...@@ -5,7 +5,7 @@ logger = logging.getLogger(__name__) ...@@ -5,7 +5,7 @@ logger = logging.getLogger(__name__)
#=============================================================================== #===============================================================================
class CP2KInputEngine(object): class CP2KInputParser(object):
"""Used to parse out a CP2K input file. """Used to parse out a CP2K input file.
When given a file handle to a CP2K input file, this class attemts to parse When given a file handle to a CP2K input file, this class attemts to parse
......
...@@ -2,36 +2,30 @@ import re ...@@ -2,36 +2,30 @@ import re
import os import os
import logging import logging
from cp2kparser.engines.csvengine import CSVEngine from cp2kparser.engines.csvengine import CSVEngine
from cp2kparser.implementation.cp2kinputparsers import CP2KInputEngine from cp2kparser.implementation.cp2kinputparser import CP2KInputParser
from cp2kparser.implementation.cp2kinputenginedata.input_tree import CP2KInput
from cp2kparser.implementation.outputparsers import * from cp2kparser.implementation.outputparsers import *
from nomadcore.coordinate_reader import CoordinateReader from nomadcore.coordinate_reader import CoordinateReader
from cp2kparser.generics.nomadparser import NomadParser from cp2kparser.generics.parser import Parser
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
#=============================================================================== #===============================================================================
class CP2KImplementation262(NomadParser): class CP2KImplementation262(Parser):
"""Defines the basic functions that are used to map results to the """The default implementation for a CP2K parser based on version 2.6.2.
corresponding NoMaD quantities.
This class provides the basic implementations and for a version specific
updates and additions please make a new class that inherits from this.
The functions that return certain quantities are tagged with a prefix '_Q_'
to be able to automatically determine which quantities have at least some
level of support. With the tag they can be also looped through.
""" """
def __init__(self, parser_context): def __init__(self, parser_context):
# Initialize the base class # Initialize the base class
NomadParser.__init__(self, parser_context) Parser.__init__(self, parser_context)
# Engines are created here # Initialize the parsing tools. The input and output parsers need to
# know the version id.
self.csvengine = CSVEngine(self) self.csvengine = CSVEngine(self)
self.inputengine = CP2KInputEngine()
self.atomsengine = CoordinateReader() self.atomsengine = CoordinateReader()
self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)]() self.inputparser = CP2KInputParser()
self.inputparser.setup_version(self.version_id)
self.outputparser = globals()["CP2KOutputParser{}".format(self.version_id)](self, self.metainfo_to_keep, self.metainfo_to_skip)
self.input_tree = None self.input_tree = None
self.extended_input = None self.extended_input = None
...@@ -40,7 +34,8 @@ class CP2KImplementation262(NomadParser): ...@@ -40,7 +34,8 @@ class CP2KImplementation262(NomadParser):
self.determine_file_ids_post_setup() self.determine_file_ids_post_setup()
def determine_file_ids_pre_setup(self): def determine_file_ids_pre_setup(self):
"""First resolve the files that can be identified by extension. """Resolve the input and output files based on extension and the
include files by looking for @INCLUDE commands in the input file.
""" """
# Input and output files # Input and output files
for file_path in self.files.iterkeys(): for file_path in self.files.iterkeys():
...@@ -62,12 +57,11 @@ class CP2KImplementation262(NomadParser): ...@@ -62,12 +57,11 @@ class CP2KImplementation262(NomadParser):
self.setup_file_id(filepath, "include") self.setup_file_id(filepath, "include")
def input_preprocessor(self): def input_preprocessor(self):
"""Preprocess the input file. Concatenate .inc files into the main input file and """Preprocess the input file. Concatenate .inc files into the main
explicitly state all variables. input file and explicitly state all variables.
""" """
# Merge include files to input # Merge include files to input
include_files = self.get_file_handles("include") include_files = self.get_file_handles("include", show_warning=False)
input_file = self.get_file_contents("input") input_file = self.get_file_contents("input")
input_lines = input_file.split("\n") input_lines = input_file.split("\n")
extended_input = input_lines[:] # Make a copy extended_input = input_lines[:] # Make a copy
...@@ -106,8 +100,6 @@ class CP2KImplementation262(NomadParser): ...@@ -106,8 +100,6 @@ class CP2KImplementation262(NomadParser):
else: else:
input_set_removed.append(line) input_set_removed.append(line)
# print '\n'.join(input_set_removed)
# Place the variables # Place the variables
variable_pattern = r"\@\{(\w+)\}|@(\w+)" variable_pattern = r"\@\{(\w+)\}|@(\w+)"
compiled = re.compile(variable_pattern) compiled = re.compile(variable_pattern)
...@@ -142,12 +134,11 @@ class CP2KImplementation262(NomadParser): ...@@ -142,12 +134,11 @@ class CP2KImplementation262(NomadParser):
input_variables_replaced.append(new_line) input_variables_replaced.append(new_line)
self.extended_input = '\n'.join(input_variables_replaced) self.extended_input = '\n'.join(input_variables_replaced)
self.inputengine.setup_version(self.version_id) self.input_tree = self.inputparser.parse(self.extended_input)
self.input_tree = self.inputengine.parse(self.extended_input)
def determine_file_ids_post_setup(self): def determine_file_ids_post_setup(self):
"""Determines the file id's after the CP2K verion has been set """Determines the file id's after the CP2K verion has been set
up. up. This includes force files, coordinate files, cell files, etc.
""" """
# Determine the presence of force file # Determine the presence of force file
force_path = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME") force_path = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
...@@ -210,6 +201,9 @@ class CP2KImplementation262(NomadParser): ...@@ -210,6 +201,9 @@ class CP2KImplementation262(NomadParser):
self.setup_file_id(file_path, "cell_input") self.setup_file_id(file_path, "cell_input")
def normalize_cp2k_path(self, path, extension, name=""): def normalize_cp2k_path(self, path, extension, name=""):
"""The paths in CP2K input can be given in many ways. This function
tries to normalize these paths to a common form.
"""
if name: if name:
name = "-" + name name = "-" + name
logger.debug("Normalizing trajectory path") logger.debug("Normalizing trajectory path")
...@@ -226,15 +220,15 @@ class CP2KImplementation262(NomadParser): ...@@ -226,15 +220,15 @@ class CP2KImplementation262(NomadParser):
"""Searches the list of given files for a file that is defined in the """Searches the list of given files for a file that is defined in the
CP2K input file. CP2K input file.
First compares the basenames, and if multiple matches found descends First compares the filename, and if multiple matches found descends
the path until only only one or zero matches found. the path until only only one or zero matches found.
""" """
matches = {} matches = {}
resolvable = [x for x in self.files.iterkeys() if x not in self.file_ids.itervalues()] resolvable = [x for x in self.files.iterkeys() if x not in self.file_ids.itervalues()]
searched_parts = self.split_path(path)
for file_path in resolvable: for file_path in resolvable:
available_parts = self.split_path(file_path) available_parts = self.split_path(file_path)
searched_parts = self.split_path(path)
for i_part, part in enumerate(searched_parts): for i_part, part in enumerate(searched_parts):
if part == available_parts[i_part]: if part == available_parts[i_part]:
matches[file_path] = i_part matches[file_path] = i_part
...@@ -247,13 +241,14 @@ class CP2KImplementation262(NomadParser): ...@@ -247,13 +241,14 @@ class CP2KImplementation262(NomadParser):
return return
else: else:
sorted_list = [(k, v) in sorted(mydict.items(), key=lambda (k, v): v[1])] sorted_list = [(k, v) in sorted(mydict.items(), key=lambda (k, v): v[1])]
# sorted_list = sorted(mathes, key=lambda k: matches[k])
if (sorted_list[0][1] == sorted_list[1][1]): if (sorted_list[0][1] == sorted_list[1][1]):
logger.error("When searching for file '{}', multiple matches were found. Could not determine which file to use based on their path.") logger.error("When searching for file '{}', multiple matches were found. Could not determine which file to use based on their path.")
else: else:
return sorted_list[0][0] return sorted_list[0][0]
def split_path(self, path): def split_path(self, path):
"""Splits a path into components and returns them in a reversed order.
"""
folders = [] folders = []
while 1: while 1:
path, folder = os.path.split(path) path, folder = os.path.split(path)
...@@ -281,89 +276,19 @@ class CP2KImplementation262(NomadParser): ...@@ -281,89 +276,19 @@ class CP2KImplementation262(NomadParser):
backend = self.backend backend = self.backend
outputstructure = self.outputparser.outputstructure outputstructure = self.outputparser.outputstructure
cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName
self.parse_file(outputfilename, outputstructure, metainfoenv, backend, parserInfo, cachingLevelForMetaName) self.parse_file(outputfilename, outputstructure, metainfoenv, backend, parserInfo, cachingLevelForMetaName, superContext=self.outputparser)
# Then extract the things that cannot be extracted by the SimpleMatcher # Then extract the things that cannot be extracted by the SimpleMatcher
# Write the ending bracket # Write the ending bracket
self.stream.write("]\n") self.stream.write("]\n")
# def dateconverter(datestring): # def _Q_energy_total(self):
# """Return the total energy from the bottom of the input file"""
def _Q_energy_total(self): # result = Result()
"""Return the total energy from the bottom of the input file""" # result.unit = "hartree"
result = Result() # result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output")))
result.unit = "hartree" # return result
result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output")))
return result
def _Q_XC_functional(self):
"""Returns the type of the XC functional.
Can currently only determine version if they are declared as parameters
for XC_FUNCTIONAL or via activating subsections of XC_FUNCTIONAL.
Returns:
A string containing the final result that should
belong to the list defined in NoMaD wiki.
"""
result = Result()
# First try to look at the shortcut
xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
if xc_shortcut is not None and xc_shortcut != "NONE" and xc_shortcut != "NO_SHORTCUT":
logger.debug("Shortcut defined for XC_FUNCTIONAL")
# If PBE, check version
if xc_shortcut == "PBE":
pbe_version = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/PBE/PARAMETRIZATION")
result.value = {
'ORIG': "GGA_X_PBE",
'PBESOL': "GGA_X_PBE_SOL",
'REVPBE': "GGA_X_PBE_R",
}.get(pbe_version, "GGA_X_PBE")
return result
result.value = {
'B3LYP': "HYB_GGA_XC_B3LYP",
'BEEFVDW': None,
'BLYP': "GGA_C_LYP_GGA_X_B88",
'BP': None,
'HCTH120': None,
'OLYP': None,
'LDA': "LDA_XC_TETER93",
'PADE': "LDA_XC_TETER93",
'PBE0': None,
'TPSS': None,
}.get(xc_shortcut, None)
return result
else:
logger.debug("No shortcut defined for XC_FUNCTIONAL. Looking into subsections.")
# Look at the subsections and determine what part have been activated
# Becke88
xc_components = []
becke_88 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE88")
if becke_88 == "TRUE":
xc_components.append("GGA_X_B88")
# Becke 97
becke_97 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97")
if becke_97 == "TRUE":
becke_97_param = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97/PARAMETRIZATION")
becke_97_result = {
'B97GRIMME': None,
'B97_GRIMME': None,
'ORIG': "GGA_XC_B97",
'WB97X-V': None,
}.get(becke_97_param, None)
if becke_97_result is not None:
xc_components.append(becke_97_result)
# Return an alphabetically sorted and joined list of the xc components
result.value = "_".join(sorted(xc_components))
return result
def _Q_particle_forces(self): def _Q_particle_forces(self):
"""Return the forces that are controlled by """Return the forces that are controlled by
...@@ -417,67 +342,65 @@ class CP2KImplementation262(NomadParser): ...@@ -417,67 +342,65 @@ class CP2KImplementation262(NomadParser):
result.value_iterable = iterator result.value_iterable = iterator
return result return result
def _Q_particle_number(self): def get_initial_atom_positions_and_unit(self):
"""Return the number of particles in the system. """Returns the starting configuration of the atoms in the system.
CP2K output doesn't automatically print the number of atoms. For this
reason this function has to look at the initial configuration and
calculate the number from it. The initial configuration is something
that must be present for all calculations.
""" """
result = Result() unit = "angstrom"
result.cache = True
# Check where the coordinates are specified # Check where the coordinates are specified
coord_format = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") coord_format = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT")
if not coord_format: if not coord_format:
coord_format = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") coord_format = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT")
# Check if the unit cell is multiplied programmatically
multiples = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL")
if not multiples:
multiples = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL")
factors = [int(x) for x in multiples.split()]
factor = np.prod(np.array(factors))
# See if the coordinates are provided in the input file # See if the coordinates are provided in the input file
if coord_format == "OFF": if coord_format == "OFF":
logger.debug("Using coordinates from the input file.") logger.debug("Using coordinates from the input file.")
coords = self.input_tree.get_default_keyword("FORCE_EVAL/SUBSYS/COORD") coords = self.input_tree.get_default_keyword("FORCE_EVAL/SUBSYS/COORD")
coords.strip() coords = coords.strip().split('\n')
n_particles = coords.count("\n") positions = []
result.value = factor*n_particles for line in coords:
components = [float(x) for x in line.split()[1:]]
positions.append(components)
positions = np.array(positions)
return positions, unit
elif coord_format in ["CP2K", "G96", "XTL", "CRD"]: elif coord_format in ["CP2K", "G96", "XTL", "CRD"]:
msg = "Tried to read the number of atoms from the initial configuration, but the parser does not yet support the '{}' format that is used by file '{}'.".format(coord_format, self.parser.file_ids["initial_coordinates"]) msg = "Tried to read the number of atoms from the initial configuration, but the parser does not yet support the '{}' format that is used by file '{}'.".format(coord_format, self.parser.file_ids["initial_coordinates"])
logger.warning(msg) logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
else: else:
# External file, use AtomsEngine # External file, use AtomsEngine
init_coord_file = self.parser.get_file_handle("initial_coordinates") init_coord_file = self.parser.get_file_handle("initial_coordinates")
if coord_format == "XYZ": if coord_format == "XYZ":
n_particles = self.atomsengine.n_atoms(init_coord_file, format="xyz") iter_pos = self.atomsengine.iread(init_coord_file, format="xyz")
if coord_format == "CIF": if coord_format == "CIF":
n_particles = self.atomsengine.n_atoms(init_coord_file, format="cif") iter_pos = self.atomsengine.iread(init_coord_file, format="cif")
if coord_format == "PDB": if coord_format == "PDB":
n_particles = self.atomsengine.n_atoms(init_coord_file, format="pdb") iter_pos = self.atomsengine.iread(init_coord_file, format="pdb")
return next(iter_pos), unit
result.value = factor*n_particles
return result # # Check if the unit cell is multiplied programmatically
# multiples = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL")
def _Q_particle_position(self): # if not multiples:
"""Returns the particle positions (trajectory). # multiples = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL")
# factors = [int(x) for x in multiples.split()]
# factor = np.prod(np.array(factors))
def get_atom_positions_and_unit(self):
"""Returns the atom positions and unit that were calculated during the
simulation.
""" """
result = Result()
# Determine the unit # Determine the unit
unit_path = "MOTION/PRINT/TRAJECTORY/UNIT" unit_path = "MOTION/PRINT/TRAJECTORY/UNIT"
unit = self.input_tree.get_keyword(unit_path) unit = self.input_tree.get_keyword(unit_path)
unit = unit.lower() # unit = unit.lower()
result.unit = CP2KInput.decode_cp2k_unit(unit) unit = CP2KInput.decode_cp2k_unit(unit)
# Read the trajectory # Read the trajectory
traj_file = self.parser.get_file_handle("trajectory") traj_file = self.get_file_handle("trajectory", show_warning=False)
if not traj_file:
logger.debug("No trajectory file detected.")
return None, None