Commit 4499b75e authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added the parsing of every parameter given in the CP2K input.

parent 9d28c018
import numpy as np
import logging
from collections import defaultdict
logger = logging.getLogger("nomad")
......@@ -39,17 +40,30 @@ class CP2KInput(object):
def set_parameter(self, path, value):
parameter, section = self.get_parameter_and_section(path)
parameter.value = value
if section is None:
message = "The CP2K input does not contain a section {}".format(path)
logger.warning(message)
if parameter is None:
message = "The CP2K input section {} does not contain a SECTION_PARAMETER".format(path)
logger.warning(message)
else:
parameter.value = value
def set_keyword(self, path, value):
keyword, section = self.get_keyword_and_section(path)
# If keyword found, put data in there
if keyword and section:
keyword.value = value
# Keyword not found in the input tree, assuming it is a default keyword
elif section is not None:
# print "Saving default keyword at path '{}'".format(path)
split_path = path.rsplit("/", 1)
keyword = split_path[1]
section.default_keyword += keyword + " " + value + "\n"
if section.default_keyword is not None:
# print "Saving default keyword at path '{}'".format(path)
section.default_keyword.value += keyword + " " + value + "\n"
else:
message = "The CP2K input does not contain the keyword {}, and there is no default keyword for the section {}".format(path, split_path[0])
logger.warning(message)
def get_section(self, path):
split_path = path.split("/")
......@@ -57,7 +71,8 @@ class CP2KInput(object):
for part in split_path:
section = section.get_subsection(part)
if not section:
print "Error in getting section at path '{}'.".format(path)
message = "The CP2K input does not contain the section {}".format(path)
logger.warning(message)
return None
return section
......@@ -66,12 +81,17 @@ class CP2KInput(object):
keyword = split_path[1]
section_path = split_path[0]
section = self.get_section(section_path)
if section is None:
message = "The CP2K input does not contain the section {}".format(path)
logger.warning(message)
return (None, None)
keyword = section.get_keyword(keyword)
if keyword and section:
return (keyword, section)
elif section:
else:
return (None, section)
return (None, None)
def get_keyword(self, path):
"""Returns the keyword that is specified by the given path.
......@@ -83,15 +103,18 @@ class CP2KInput(object):
if keyword.value is not None:
return keyword.get_value()
else:
# if section.accessed:
return keyword.default_value
def get_default_keyword(self, path):
return self.get_section(path).default_keyword
return self.get_section(path).default_keyword.value
def set_section_accessed(self, path):
section = self.get_section(path)
section.accessed = True
if section:
section.accessed = True
else:
message = "The CP2K input does not contain the section {}".format(path)
logger.warning(message)
def get_keyword_default(self, path):
keyword, section = self.get_keyword_and_section(path)
......@@ -110,8 +133,13 @@ class CP2KInput(object):
def get_parameter_and_section(self, path):
section = self.get_section(path)
parameter = section.section_parameter
return (parameter, section)
if section is None:
return (None, None)
if section.section_parameter is not None:
parameter = section.section_parameter
return (parameter, section)
else:
return (None, section)
def get_parameter(self, path):
parameter, section = self.get_parameter_and_section(path)
......@@ -123,18 +151,68 @@ class CP2KInput(object):
#===============================================================================
class Keyword(object):
"""Information about a keyword in a CP2K calculation.
class InputObject(object):
"""Base class for all kind of data elements in the CP2K input.
"""
__slots__ = ['value', 'unit', 'value_no_unit', 'default_name', 'default_value', 'default_unit']
__slots__ = ['name', 'value', 'default_value', 'description', 'data_type', 'data_dimension']
def __init__(self, default_name, default_value, default_unit_value):
def __init__(self, name):
self.name = name
self.value = None
self.description = None
self.data_type = None
self.data_dimension = None
self.default_value = None
#===============================================================================
class Keyword(InputObject):
"""Information about a keyword in a CP2K calculation.
"""
__slots__ = ['unit', 'value_no_unit', 'default_unit', 'default_name']
def __init__(self, name, default_value, default_unit, default_name):
super(Keyword, self).__init__(name)
self.unit = None
self.value_no_unit = None
self.default_name = default_name
self.default_unit = default_unit
self.default_value = default_value
self.default_unit = default_unit_value
self.default_name = default_name
def get_formatted_value(self):
""" Used to set the value of the keyword. The data will be transformed
into the correct data type and dimension from a simple string.
"""
returned = None
dim = int(self.data_dimension)
splitted = self.value.split()
if len(splitted) != dim:
logger.error("The dimensions of the CP2K input parameter {} do not match the specification in the XML file.".format(self.name))
try:
if self.data_type == "integer":
returned = np.array([int(x) for x in splitted])
elif self.data_type == "real":
returned = np.array([float(x) for x in splitted])
elif self.data_type == "word":
returned = np.array(splitted)
elif self.data_type == "keyword":
returned = np.array(splitted)
elif self.data_type == "string":
returned = np.array(splitted)
elif self.data_type == "logical":
returned = np.array([True if x.upper() == "T" else False for x in splitted])
else:
logger.error("Unknown data type '{}'".format(self.data_type))
return
except TypeError:
logger.error("The CP2K input parameter {} could not be converted to the type specified in the XML file.".format(self.name))
return
if len(returned) == 1:
return returned[0]
return returned
def get_value(self):
"""If the units of this value can be changed, return a value and the
......@@ -174,18 +252,20 @@ class Keyword(object):
#===============================================================================
class Section(object):
class Section(InputObject):
"""An input section in a CP2K calculation.
"""
__slots__ = ['accessed', 'name', 'keywords', 'default_keyword', 'section_parameter', 'sections']
__slots__ = ['accessed', 'name', 'keywords', 'default_keyword_names', 'default_keyword', 'section_parameter', 'sections', 'description']
def __init__(self, name):
self.accessed = False
self.name = name
self.keywords = defaultdict(list)
self.default_keyword = ""
self.default_keyword_names = []
self.default_keyword = None
self.section_parameter = None
self.sections = defaultdict(list)
self.description = None
def get_keyword(self, name):
keyword = self.keywords.get(name)
......@@ -207,15 +287,26 @@ class Section(object):
#===============================================================================
class SectionParameters(object):
class SectionParameters(InputObject):
"""Section parameters in a CP2K calculation.
Section parameters are the short values that can be added right after a
section name, e.g. &PRINT ON, where ON is the section parameter.
"""
__slots__ = ['value', 'default_value', 'lone_value']
__slots__ = ['lone_keyword_value']
def __init__(self, default_value, lone_value):
self.value = None
def __init__(self, default_value, lone_keyword_value):
super(SectionParameters, self).__init__("SECTION_PARAMETERS")
self.default_value = default_value
self.lone_value = lone_value
self.lone_keyword_value = lone_keyword_value
#===============================================================================
class DefaultKeyword(InputObject):
"""Default keyword in the CP2K input.
"""
def __init__(self):
super(DefaultKeyword, self).__init__("DEFAULT_KEYWORD")
self.default_value = None
self.lone_value = None
self.value = ""
......@@ -15,21 +15,38 @@ cp2k_input.xml.
import xml.etree.cElementTree as ET
import logging
import json
import cPickle as pickle
from cp2kparser.generic.inputparsing import *
logger = logging
#===============================================================================
def generate_object_tree(xml_file):
def generate_object_tree(xml_file, for_metainfo=False):
xml_element = ET.parse(xml_file)
object_tree = recursive_tree_generation(xml_element)
# Leave out certain currently uninteresting parts of the input. These can be
# added later if need be.
root = xml_element.getroot()
# ignored = ["ATOM", "DEBUG", "EXT_RESTART", "FARMING", "OPTIMIZE_BASIS", "OPTIMIZE_INPUT", "SWARM", "TEST"]
# removed = []
# for child in root:
# name = child.find("NAME")
# if name is not None:
# name_string = name.text
# if name_string in ignored:
# removed.append(child)
# for child in removed:
# root.remove(child)
# Recursively generate the tree
object_tree = recursive_tree_generation(root, for_metainfo)
return object_tree
#===============================================================================
def recursive_tree_generation(xml_element):
def recursive_tree_generation(xml_element, for_metainfo=False, name_stack=[]):
# Make new section object for the root
section_name_element = xml_element.find("NAME")
......@@ -39,6 +56,17 @@ def recursive_tree_generation(xml_element):
section_name = "CP2K_INPUT"
section = Section(section_name)
# Ignore sections that control the print settings
ignored = ["EACH", "PRINT"]
if section_name in ignored:
return
if for_metainfo:
# Descriptions
description = xml_element.find("DESCRIPTION")
if description is not None:
section.description = description.text
# Section parameters
parameter = xml_element.find("SECTION_PARAMETERS")
if parameter:
......@@ -53,9 +81,51 @@ def recursive_tree_generation(xml_element):
parameter_object = SectionParameters(sp_default_value, sp_lone_value)
section.section_parameter = parameter_object
# Data type
data_type = parameter.find("DATA_TYPE")
if data_type is not None:
data_type_kind = data_type.get("kind")
parameter_object.data_type = data_type_kind
# Data dimension
data_dim = data_type.find("N_VAR")
if data_dim is not None:
parameter_object.data_dimension = data_dim.text
if for_metainfo:
# Description
section_param_description = parameter.find("DESCRIPTION")
if section_param_description is not None:
parameter_object.description = section_param_description.text
# Default keyword
default_keyword_element = xml_element.find("DEFAULT_KEYWORD")
if default_keyword_element is not None:
default_keyword_object = DefaultKeyword()
# Data type
data_type = default_keyword_element.find("DATA_TYPE")
if data_type is not None:
data_type_kind = data_type.get("kind")
default_keyword_object.data_type = data_type_kind
# Data dimension
data_dim = data_type.find("N_VAR")
if data_dim is not None:
default_keyword_object.data_dimension = data_dim.text
if for_metainfo:
# Description
description = default_keyword_element.find("DESCRIPTION")
if description is not None:
default_keyword_object.description = description.text
section.default_keyword = default_keyword_object
# Keywords
for keyword in xml_element.findall("KEYWORD"):
keyword_names = keyword.findall("NAME")
default_name = None
aliases = []
for name in keyword_names:
......@@ -64,6 +134,14 @@ def recursive_tree_generation(xml_element):
default_name = name.text
else:
aliases.append(name.text)
# Ignore hidden keywords
if default_name.startswith("__"):
continue
# Save the default keyword name
section.default_keyword_names.append(default_name)
default_keyword_element = keyword.find("DEFAULT_VALUE")
default_keyword_value = None
if default_keyword_element is not None:
......@@ -74,24 +152,136 @@ def recursive_tree_generation(xml_element):
if default_unit_element is not None:
default_unit_value = default_unit_element.text
keyword_object = Keyword(default_name, default_keyword_value, default_unit_value)
keyword_object = Keyword(default_name, default_keyword_value, default_unit_value, default_name)
section.keywords[default_name].append(keyword_object)
for alias in aliases:
section.keywords[alias].append(keyword_object)
# Data type
data_type = keyword.find("DATA_TYPE")
if data_type is not None:
data_type_kind = data_type.get("kind")
keyword_object.data_type = data_type_kind
# Data dimension
data_dim = data_type.find("N_VAR")
if data_dim is not None:
keyword_object.data_dimension = data_dim.text
if for_metainfo:
# Description
keyword_description = keyword.find("DESCRIPTION")
if keyword_description is not None:
keyword_object.description = keyword_description.text
# Sections
for sub_section_element in xml_element.findall("SECTION"):
sub_section = recursive_tree_generation(sub_section_element)
section.sections[sub_section.name].append(sub_section)
sub_section = recursive_tree_generation(sub_section_element, for_metainfo)
if sub_section is not None:
section.sections[sub_section.name].append(sub_section)
# Return section
return section
#===============================================================================
def generate_input_metainfos(object_tree):
parent = Section("dummy")
root_section = object_tree.root_section
root_section.name = "CP2K_INPUT"
root_section.description = "The section containing all information that is explicitly stated in the CP2K input file. The sections that control printing (PRINT, EACH) and the hidden input keywords starting with a double underscore are not included."
container = []
name_stack = []
generate_metainfo_recursively(root_section, parent, container, name_stack)
with open("input_metainfo.json", "w") as f:
f.write(json.dumps(container, indent=2, separators=(',', ': ')))
#===============================================================================
def generate_metainfo_recursively(obj, parent, container, name_stack):
json = None
if isinstance(obj, Section):
name_stack.append(obj.name)
json = generate_section_metainfo_json(obj, parent, name_stack)
for child in obj.sections.itervalues():
generate_metainfo_recursively(child[0], obj, container, name_stack)
for child in obj.keywords.itervalues():
generate_metainfo_recursively(child[0], obj, container, name_stack)
if obj.section_parameter is not None:
generate_metainfo_recursively(obj.section_parameter, obj, container, name_stack)
if obj.default_keyword is not None:
generate_metainfo_recursively(obj.default_keyword, obj, container, name_stack)
name_stack.pop()
else:
json = generate_input_object_metainfo_json(obj, parent, name_stack)
container.append(json)
#===============================================================================
def generate_input_object_metainfo_json(child, parent, name_stack):
path = ".".join(name_stack)
json_obj = {}
json_obj["name"] = "x_cp2k_{}.{}".format(path, child.name)
json_obj["superNames"] = ["x_cp2k_{}".format(path)]
# Description
description = child.description
if description is None:
description = ""
json_obj["description"] = description
# Shape
data_dim = int(child.data_dimension)
if data_dim == -1:
data_dim = "n"
if data_dim == 1:
json_obj["shape"] = []
else:
json_obj["shape"] = [data_dim]
# Determine data type according to xml info
mapping = {
"keyword": "C",
"logical": "C",
"string": "C",
"integer": "i",
"word": "C",
"real": "f",
}
json_obj["dtypeStr"] = mapping[child.data_type]
return json_obj
#===============================================================================
def generate_section_metainfo_json(child, parent, name_stack):
name = ".".join(name_stack)
path = ".".join(name_stack[:-1])
json_obj = {}
json_obj["name"] = "x_cp2k_{}".format(name)
json_obj["kindStr"] = "type_section"
json_obj["superNames"] = ["x_cp2k_{}".format(path)]
description = child.description
if description is None:
description = ""
json_obj["description"] = description
return json_obj
#===============================================================================
# Run main function by default
if __name__ == "__main__":
# xml to pickle
xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
object_tree = CP2KInput(generate_object_tree(xml_file))
file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
fh = open(file_name, "wb")
pickle.dump(object_tree, fh, protocol=2)
# Metainfo generation
# xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
# object_tree = CP2KInput(generate_object_tree(xml_file, for_metainfo=True))
# generate_input_metainfos(object_tree)
......@@ -95,6 +95,13 @@ class CommonMatcher(object):
#===========================================================================
# Section close triggers
def onClose_section_run(self, backend, gIndex, section):
"""Information that is pushed regardless at the end of parsing.
Contains also information that is totally agnostic on the calculation
contents, like program_basis_set_type.
"""
backend.addValue("program_basis_set_type", "gaussian")
def onClose_section_method(self, backend, gIndex, section):
"""When all the functional definitions have been gathered, matches them
with the nomad correspondents and combines into one single string which
......
......@@ -27,15 +27,35 @@ class CP2KInputParser(BasicParser):
some point.
"""
def __init__(self, file_path, parser_context):
"""
Attributes:
input_tree: The input structure for this version of CP2K. The
structure is already present, in this module it will be filled with
data found from the input file.
input_lines: List of preprocessed lines in the input. Here all the
variables have been stated explicitly and the additional input files have
been merged.
"""
super(CP2KInputParser, self).__init__(file_path, parser_context)
self.root_section = None
self.input_tree = None
self.input_lines = None
self.force_file_name = None
def parse(self):
#=======================================================================
# Preprocess to spell out variables and to include stuff from other
# files
self.preprocess_input()
#=======================================================================
# Gather the information from the input file
self.fill_input_tree(self.file_path)
#=======================================================================
# Parse everything in the input to cp2k specific metadata
self.fill_metadata()
#=======================================================================
# Parse the used XC_functionals and their parameters
xc = self.input_tree.get_section("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
......@@ -120,8 +140,9 @@ class CP2KInputParser(BasicParser):
#=======================================================================
# Single point force file name
force_file = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
if force_file != "__STD_OUT__":
# force_file = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
force_file = self.force_file_name
if force_file is not None and force_file != "__STD_OUT__":
force_file_path = self.normalize_cp2k_path(force_file, "xyz")
self.file_service.set_file_id(force_file_path, "force_file_single_point")
......@@ -181,44 +202,102 @@ class CP2KInputParser(BasicParser):
self.setup_version(self.parser_context.version_id)
section_stack = []
self.input_tree.root_section.accessed = True
for line in self.input_lines:
line = line.split('!', 1)[0].strip()
# Skip empty lines
if len(line) == 0:
continue
# Section ends
if line.upper().startswith('&END'):
section_stack.pop()
# Section starts
elif line[0] == '&':
parts = line.split(' ', 1)
name = parts[0][1:].upper()
section_stack.append(name)
# Form the path
path = ""
for index, item in enumerate(section_stack):
if index != 0:
path += '/'
path += item
# Mark the section as accessed.
self.input_tree.set_section_accessed(path)
# Save the section parameters
if len(parts) > 1:
self.input_tree.set_parameter(path, parts[1].strip().upper())