Commit a4f1bfbb authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added parsing of input file if found, added support for more functionals,...

Added parsing of input file if found, added support for more functionals, corrected the formatting of functionals, restructured the package a bit.
parent 4e3b1201
...@@ -56,6 +56,7 @@ env/ ...@@ -56,6 +56,7 @@ env/
# CP2K files # CP2K files
*.inp *.inp
*.wfn *.wfn
test/unittests/BASIS_SET test/unittests/BASIS_SET
test/unittests/GTH_POTENTIALS test/unittests/GTH_POTENTIALS
This diff is collapsed.
import logging
from collections import defaultdict
logger = logging.getLogger(__name__)
class CP2KInput(object):
"""The contents of a CP2K simulation including default values and default
units from the version-specific xml file.
def __init__(self, root_section):
self.root_section = root_section
def decode_cp2k_unit(unit):
"""Given a CP2K unit name, decode it as Pint unit definition.
map = {
# Length
"bohr": "bohr",
"m": "meter",
"pm": "picometer",
"nm": "nanometer",
"angstrom": "angstrom",
# Angle
"rad": "radian",
"deg": "degree",
"Ry": "rydberg"
pint_unit = map.get(unit)
if pint_unit:
return pint_unit
logger.error("Unknown CP2K unit definition '{}'.".format(unit))
def set_parameter(self, path, value):
parameter, section = self.get_parameter_and_section(path)
parameter.value = value
def set_keyword(self, path, value):
keyword, section = self.get_keyword_and_section(path)
if keyword and section:
keyword.value = value
elif section is not None:
# print "Saving default keyword at path '{}'".format(path)
split_path = path.rsplit("/", 1)
keyword = split_path[1]
section.default_keyword += keyword + " " + value + "\n"
def get_section(self, path):
split_path = path.split("/")
section = self.root_section
for part in split_path:
section = section.get_subsection(part)
if not section:
print "Error in getting section at path '{}'.".format(path)
return None
return section
def get_keyword_and_section(self, path):
split_path = path.rsplit("/", 1)
keyword = split_path[1]
section_path = split_path[0]
section = self.get_section(section_path)
keyword = section.get_keyword(keyword)
if keyword and section:
return (keyword, section)
elif section:
return (None, section)
return (None, None)
def get_keyword(self, path):
"""Returns the keyword that is specified by the given path.
If the keyword has no value set, returns the default value defined in
the XML.
keyword, section = self.get_keyword_and_section(path)
if keyword:
if keyword.value is not None:
return keyword.get_value()
if section.accessed:
return keyword.default_value
def get_default_keyword(self, path):
return self.get_section(path).default_keyword
def set_section_accessed(self, path):
section = self.get_section(path)
section.accessed = True
def get_keyword_default(self, path):
keyword, section = self.get_keyword_and_section(path)
if keyword:
return keyword.default_value
def get_default_unit(self, path):
keyword, section = self.get_keyword_and_section(path)
if keyword:
return keyword.default_unit
def get_unit(self, path):
keyword, section = self.get_keyword_and_section(path)
if keyword:
return keyword.get_unit()
def get_parameter_and_section(self, path):
section = self.get_section(path)
parameter = section.section_parameter
return (parameter, section)
def get_parameter(self, path):
parameter, section = self.get_parameter_and_section(path)
if parameter:
if parameter.value:
return parameter.value
elif section and section.accessed:
return parameter.lone_value
class Keyword(object):
"""Information about a keyword in a CP2K calculation.
__slots__ = ['value', 'unit', 'value_no_unit', 'default_name', 'default_value', 'default_unit']
def __init__(self, default_name, default_value, default_unit_value):
self.value = None
self.unit = None
self.value_no_unit = None
self.default_name = default_name
self.default_value = default_value
self.default_unit = default_unit_value
def get_value(self):
"""If the units of this value can be changed, return a value and the
unit separately.
if self.default_unit:
if not self.value_no_unit:
return self.value_no_unit
return self.value
def get_unit(self):
if self.default_unit:
if not self.unit:
return self.unit
logger.error("The keyword '{}' does not have a unit.".format(self.default_name))
def decode_cp2k_unit_and_value(self):
"""Given a CP2K unit name, decode it as Pint unit definition.
splitted = self.value.split(None, 1)
unit_definition = splitted[0]
if unit_definition.startswith('[') and unit_definition.endswith(']'):
unit_definition = unit_definition[1:-1]
self.unit = CP2KInput.decode_cp2k_unit(self.default_unit)
self.value_no_unit = splitted[1]
elif self.default_unit:
logger.debug("No special unit definition found, returning default unit.")
self.unit = CP2KInput.decode_cp2k_unit(self.default_unit)
self.value_no_unit = self.value
logger.debug("The value has no unit, returning bare value.")
self.value_no_unit = self.value
class Section(object):
"""An input section in a CP2K calculation.
__slots__ = ['accessed', 'name', 'keywords', 'default_keyword', 'section_parameter', 'sections']
def __init__(self, name):
self.accessed = False = name
self.keywords = defaultdict(list)
self.default_keyword = ""
self.section_parameter = None
self.sections = defaultdict(list)
def get_keyword(self, name):
keyword = self.keywords.get(name)
if keyword:
if len(keyword) == 1:
return keyword[0]
logger.error("The keyword '{}' in '{}' does not exist or has too many entries.".format(name,
def get_subsection(self, name):
subsection = self.sections.get(name)
if subsection:
if len(subsection) == 1:
return subsection[0]
logger.error("The subsection '{}' in '{}' has too many entries.".format(name,
logger.error("The subsection '{}' in '{}' does not exist.".format(name,
class SectionParameters(object):
"""Section parameters in a CP2K calculation.
Section parameters are the short values that can be added right after a
section name, e.g. &PRINT ON, where ON is the section parameter.
__slots__ = ['value', 'default_value', 'lone_value']
def __init__(self, default_value, lone_value):
self.value = None
self.default_value = default_value
self.lone_value = lone_value
import os
import re
import logging
from cp2kparser.generic.baseclasses import ParserInterface
from cp2kparser.versions.versionsetup import get_main_parser
logger = logging.getLogger(__name__)
class CP2KParser(ParserInterface):
"""This class handles the initial setup before any parsing can happen. It
determines which version of CP2K was used to generate the output and then
sets up a correct implementation.
After the implementation has been setup, you can parse the files with
def __init__(self, main_file, metainfo_to_keep=None, backend=None, default_units=None, metainfo_units=None):
super(CP2KParser, self).__init__(main_file, metainfo_to_keep, backend, default_units, metainfo_units)
def setup_version(self):
"""Setups the version by looking at the output file and the version
specified in it.
# Search for the version specification and initialize a correct
# main parser for this version.
regex = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
n_lines = 30
with open(self.parser_context.main_file, 'r') as outputfile:
for i_line in xrange(n_lines):
line = next(outputfile)
result = regex.match(line)
if result:
version_id ='.', '')
if not result:
logger.error("Could not find a version specification from the given main file.")
# Setup the root folder to the fileservice that is used to access files
dirpath, filename = os.path.split(self.parser_context.main_file)
self.parser_context.file_service.set_file_id(filename, "output")
# Setup the correct main parser based on the version id. If no match
# for the version is found, use the main parser for CP2K 2.6.2
self.main_parser = get_main_parser(version_id)(self.parser_context.main_file, self.parser_context)
def get_metainfo_filename(self):
return "cp2k.nomadmetainfo.json"
def get_parser_info(self):
return {'name': 'cp2k-parser', 'version': '1.0'}
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""Provides functions for creating a python object representing a CP2K input
Creates preparsed versions of the cp2k_input.xmls and pickles them (python
version of serialization). The pickle files can then be easily reused without
doing the xml parsing again.
The actual calculation input contents can later be added to this object. Then
the object can be queried for the results, or the default values defined by the
import xml.etree.cElementTree as ET
import logging
import cPickle as pickle
from cp2kparser.generic.inputparsing import *
logger = logging
def generate_object_tree(xml_file):
xml_element = ET.parse(xml_file)
object_tree = recursive_tree_generation(xml_element)
return object_tree
def recursive_tree_generation(xml_element):
# Make new section object for the root
section_name_element = xml_element.find("NAME")
if section_name_element is not None:
section_name = section_name_element.text
section_name = "CP2K_INPUT"
section = Section(section_name)
# Section parameters
parameter = xml_element.find("SECTION_PARAMETERS")
if parameter:
sp_default_element = parameter.find("DEFAULT_VALUE")
sp_default_value = None
if sp_default_element is not None:
sp_default_value = sp_default_element.text
sp_lone_element = parameter.find("LONE_KEYWORD_VALUE")
sp_lone_value = None
if sp_lone_element is not None:
sp_lone_value = sp_lone_element.text
parameter_object = SectionParameters(sp_default_value, sp_lone_value)
section.section_parameter = parameter_object
# Keywords
for keyword in xml_element.findall("KEYWORD"):
keyword_names = keyword.findall("NAME")
default_name = None
aliases = []
for name in keyword_names:
keytype = name.get("type")
if keytype == "default":
default_name = name.text
default_keyword_element = keyword.find("DEFAULT_VALUE")
default_keyword_value = None
if default_keyword_element is not None:
default_keyword_value = default_keyword_element.text
default_unit_element = keyword.find("DEFAULT_UNIT")
default_unit_value = None
if default_unit_element is not None:
default_unit_value = default_unit_element.text
keyword_object = Keyword(default_name, default_keyword_value, default_unit_value)
for alias in aliases:
# Sections
for sub_section_element in xml_element.findall("SECTION"):
sub_section = recursive_tree_generation(sub_section_element)
# Return section
return section
# Run main function by default
if __name__ == "__main__":
xml_file = open("../versions/cp2k262/input_data/cp2k_input.xml", 'r')
object_tree = CP2KInput(generate_object_tree(xml_file))
file_name = "../versions/cp2k262/input_data/cp2k_input_tree.pickle"
fh = open(file_name, "wb")
pickle.dump(object_tree, fh, protocol=2)
<HTML><BODY><HEAD><TITLE>The cp2k units list</TITLE>
<H1>CP2K Available Units of Measurement</H1>
If the default unit of a keyword is explicitly undefined, all possible units of measurement can be used to define a proper value.<BR><DL>
Possible units of measurement for Energies. The [energy] entry acts like a dummy flag (assumes the unit of measurement of energy is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Lengths. The [length] entry acts like a dummy flag (assumes the unit of measurement of length is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Temperature. The [temperature] entry acts like a dummy flag (assumes the unit of measurement of temperature is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Pressure. The [pressure] entry acts like a dummy flag (assumes the unit of measurement of pressure is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Angles. The [angle] entry acts like a dummy flag (assumes the unit of measurement of angle is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Time. The [time] entry acts like a dummy flag (assumes the unit of measurement of time is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for Masses. The [mass] entry acts like a dummy flag (assumes the unit of measurement of mass is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for potentials. The [potential] entry acts like a dummy flag (assumes the unit of measurement of potential is in internal units), useful for dimensional analysis.<BR><DL>
Possible units of measurement for forces. The [force] entry acts like a dummy flag (assumes the unit of measurement of force is in internal units), useful for dimensional analysis.<BR><DL>
import os
import logging
import cPickle as pickle
from cp2kparser.generic.baseclasses import BasicParser
from cp2kparser.generic.inputparsing import *
logger = logging.getLogger(__name__)
class CP2KInputParser(BasicParser):
"""Used to parse out a CP2K input file.
When given a filepath to a CP2K input file, this class attemts to parse
it's contents.
def __init__(self, file_path, parser_context):
super(CP2KInputParser, self).__init__(file_path, parser_context)
self.root_section = None
self.input_tree = None
def parse(self):
# Gather the information from the input file
# Parse the used XC_functionals and their parameters
xc = self.input_tree.get_section("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
if xc is not None:
xc_list = []
class XCFunctional(object):
def __init__(self, name, weight=1, parameters=None): = name
self.weight = weight
self.parameters = parameters
# First see if a functional has been specified in the section parameter
section_parameter = xc.section_parameter.value
if section_parameter is not None:
if section_parameter == "BLYP":
elif section_parameter == "LDA" or section_parameter == "PADE":
elif section_parameter == "PBE":
elif section_parameter == "OLYP":
elif section_parameter == "HCTH120":
elif section_parameter == "PBE0":
elif section_parameter == "B3LYP":
logger.warning("Unknown XC functional given in XC_FUNCTIONAL section parameter.")
# Otherwise one has to look at the individual functional settings
# Sort the functionals alphabetically by name
xc_list.sort(key=lambda x:
xc_summary = ""
# For every defined functional, stream the information to the
# backend and construct the summary string
for i, functional in enumerate(xc_list):
gId = self.backend.openSection("section_XC_functionals")
self.backend.addValue("XC_functional_weight", functional.weight)
if functional.parameters is not None:
self.backend.closeSection("section_XC_functionals", gId)
if i != 0:
xc_summary += "+"
xc_summary += "{}*{}".format(functional.weight,
if functional.parameters is not None:
xc_summary += ":{}".format()
# Stream summary
if xc_summary is not "":
self.backend.addValue("XC_functional", xc_summary)
def fill_input_tree(self, file_path):
"""Parses a CP2K input file into an object tree.
Return an object tree represenation of the input augmented with the
default values and lone keyword values from the cp2k_input.xml file
which is version specific. Keyword aliases are also mapped to the same
The cp2k input is largely case-insensitive. In the input tree, we wan't
only one standard way to name things, so all section names and section
parameters will be transformed into upper case.
To query the returned tree use the following functions:
: A string containing the contents of a CP2K input file. The
input file can be stored as string as it isn't that big.