import os import re import logging import cPickle as pickle import numpy as np from nomadcore.baseclasses import BasicParser from cp2kparser.generic.inputparsing import * logger = logging.getLogger("nomad") #=============================================================================== class CP2KInputParser(BasicParser): """Used to parse out a CP2K input file. CP2K offers a complete structure for the input in an XML file, which can be printed with the command cp2k --xml. This XML file has been preparsed into a native python object ('CP2KInput' class found in generic.inputparsing) and stored in a python pickle file. It e.g. contains all the default values that are often needed as they are used if the user hasn't specified a settings in the input. This XML file is used to get the default values because it is rather cumbersome to hard code them in the parser itself, especially if there will be lot's of them. Hard coded values will also be more error prone, and would have to be checked for each parser version. CP2K input supports including other input files and also supports variables. This is currently not supported, but may be added at some point. """ def __init__(self, file_path, parser_context): """ Attributes: input_tree: The input structure for this version of CP2K. The structure is already present, in this module it will be filled with data found from the input file. input_lines: List of preprocessed lines in the input. Here all the variables have been stated explicitly and the additional input files have been merged. """ super(CP2KInputParser, self).__init__(file_path, parser_context) self.input_tree = None self.input_lines = None self.force_file_name = None def parse(self): #======================================================================= # Preprocess to spell out variables and to include stuff from other # files self.preprocess_input() #======================================================================= # Gather the information from the input file self.fill_input_tree(self.file_path) #======================================================================= # Parse everything in the input to cp2k specific metadata self.fill_metadata() #======================================================================= # Parse the used XC_functionals and their parameters xc = self.input_tree.get_section("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL") if xc is not None: xc_list = [] class XCFunctional(object): def __init__(self, name, weight=1, parameters=None): self.name = name self.weight = weight self.parameters = parameters # First see if a functional has been specified in the section parameter section_parameter = xc.section_parameter.value if section_parameter is not None: if section_parameter == "BLYP": xc_list.append(XCFunctional("GGA_X_B88")) xc_list.append(XCFunctional("GGA_C_LYP")) elif section_parameter == "LDA" or section_parameter == "PADE": xc_list.append(XCFunctional("LDA_XC_TETER93")) elif section_parameter == "PBE": xc_list.append(XCFunctional("GGA_X_PBE")) xc_list.append(XCFunctional("GGA_C_PBE")) elif section_parameter == "OLYP": xc_list.append(XCFunctional("GGA_X_OPTX")) xc_list.append(XCFunctional("GGA_C_LYP")) elif section_parameter == "HCTH120": xc_list.append(XCFunctional("GGA_XC_HCTH_120")) elif section_parameter == "PBE0": xc_list.append(XCFunctional("HYB_GGA_XC_PBEH")) elif section_parameter == "B3LYP": xc_list.append(XCFunctional("HYB_GGA_XC_B3LYP")) else: logger.warning("Unknown XC functional given in XC_FUNCTIONAL section parameter.") # Otherwise one has to look at the individual functional settings else: pass # Sort the functionals alphabetically by name xc_list.sort(key=lambda x: x.name) xc_summary = "" # For every defined functional, stream the information to the # backend and construct the summary string for i, functional in enumerate(xc_list): gId = self.backend.openSection("section_XC_functionals") self.backend.addValue("XC_functional_name", functional.name) self.backend.addValue("XC_functional_weight", functional.weight) if functional.parameters is not None: pass self.backend.closeSection("section_XC_functionals", gId) if i != 0: xc_summary += "+" xc_summary += "{}*{}".format(functional.weight, functional.name) if functional.parameters is not None: xc_summary += ":{}".format() # Stream summary if xc_summary is not "": self.backend.addValue("XC_functional", xc_summary) #======================================================================= # Cell periodicity periodicity = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/CELL/PERIODIC") if periodicity is not None: periodicity = periodicity.upper() periodicity_list = ("X" in periodicity, "Y" in periodicity, "Z" in periodicity) self.backend.addArrayValues("configuration_periodic_dimensions", np.asarray(periodicity_list)) else: logger.warning("Could not determine cell periodicity from FORCE_EVAL/SUBSYS/CELL/PERIODIC") #======================================================================= # Single point force file name # force_file = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME") force_file = self.force_file_name if force_file is not None and force_file != "__STD_OUT__": force_file_path = self.normalize_cp2k_path(force_file, "xyz") self.file_service.set_file_id(force_file_path, "force_file_single_point") #======================================================================= # Stress tensor calculation method stress_tensor_method = self.input_tree.get_keyword("FORCE_EVAL/STRESS_TENSOR") if stress_tensor_method != "NONE": mapping = { "NUMERICAL": "Numerical", "ANALYTICAL": "Analytical", "DIAGONAL_ANALYTICAL": "Diagonal analytical", "DIAGONAL_NUMERICAL": "Diagonal numerical", } stress_tensor_method = mapping.get(stress_tensor_method) if stress_tensor_method is not None: self.backend.addValue("stress_tensor_method", stress_tensor_method) def normalize_cp2k_path(self, path, extension, name=""): """The paths in CP2K input can be given in many ways. This function tries to normalize these forms into a valid path. """ if name: name = "-" + name project_name = self.input_tree.get_keyword("GLOBAL/PROJECT_NAME") if path.startswith("="): normalized_path = path[1:] elif re.match(r"./", path): normalized_path = "{}{}-1_0.{}".format(path, name, extension) else: normalized_path = "{}-{}{}-1_0.{}".format(project_name, path, name, extension) return normalized_path def fill_input_tree(self, file_path): """Parses a CP2K input file into an object tree. Return an object tree represenation of the input augmented with the default values and lone keyword values from the cp2k_input.xml file which is version specific. Keyword aliases are also mapped to the same data. The cp2k input is largely case-insensitive. In the input tree, we wan't only one standard way to name things, so all section names and section parameters will be transformed into upper case. To query the returned tree use the following functions: get_keyword("GLOBAL/PROJECT_NAME") get_parameter("GLOBAL/PRINT") get_default_keyword("FORCE_EVAL/SUBSYS/COORD") Args: : A string containing the contents of a CP2K input file. The input file can be stored as string as it isn't that big. Returns: The input as an object tree. """ self.setup_version(self.parser_context.version_id) section_stack = [] self.input_tree.root_section.accessed = True for line in self.input_lines: line = line.split('!', 1)[0].strip() # Skip empty lines if len(line) == 0: continue # Section ends if line.upper().startswith('&END'): section_stack.pop() # Section starts elif line[0] == '&': parts = line.split(' ', 1) name = parts[0][1:].upper() section_stack.append(name) # Form the path path = "" for index, item in enumerate(section_stack): if index != 0: path += '/' path += item # Mark the section as accessed. self.input_tree.set_section_accessed(path) # Save the section parameters if len(parts) > 1: self.input_tree.set_parameter(path, parts[1].strip().upper()) # Ignore variables and includes that might still be here for some # reason elif line.upper().startswith('@'): continue # Contents (keywords, default keywords) else: split = line.split(' ', 1) keyword_name = split[0].upper() keyword_value = split[1] self.input_tree.set_keyword(path + "/" + keyword_name, keyword_value) # Here we store some exceptional print settings that are # inportant to the parsing. These dont exist in the input tree # because they take much space and are not really important # otherwise. if path == "FORCE_EVAL/PRINT/FORCES": if keyword_name == "FILENAME": self.force_file_name = keyword_value def fill_metadata(self): """Goes through the input data and pushes everything to the backend. """ name_stack = [] self.fill_metadata_recursively(self.input_tree.root_section, name_stack) def fill_metadata_recursively(self, section, name_stack): """Recursively goes through the input sections and pushes everything to the backend. """ if not section.accessed: return name_stack.append(section.name) path = "x_cp2k_{}".format(".".join(name_stack)) gid = self.backend.openSection(path) # Keywords for default_name in section.default_keyword_names: keywords = section.keywords.get(default_name) for keyword in keywords: if keyword.value is not None: name = "{}.{}".format(path, keyword.default_name) formatted_value = keyword.get_formatted_value() self.add_formatted_value_to_backend(name, formatted_value) # Section parameter section_parameter = section.section_parameter if section_parameter is not None: name = "{}.SECTION_PARAMETERS".format(path) formatted_value = section_parameter.get_formatted_value() self.add_formatted_value_to_backend(name, formatted_value) # Default keyword default_keyword = section.default_keyword if default_keyword is not None: name = "{}.DEFAULT_KEYWORD".format(path) formatted_value = default_keyword.get_formatted_value() self.add_formatted_value_to_backend(name, formatted_value) # Subsections for name, subsections in section.sections.iteritems(): for subsection in subsections: self.fill_metadata_recursively(subsection, name_stack) self.backend.closeSection(path, gid) name_stack.pop() def add_formatted_value_to_backend(self, name, formatted_value): if formatted_value is not None: if isinstance(formatted_value, np.ndarray): self.backend.addArrayValues(name, formatted_value) else: self.backend.addValue(name, formatted_value) def setup_version(self, version_number): """ The pickle file which contains preparsed data from the cp2k_input.xml is version specific. By calling this function before parsing the correct file can be found. """ pickle_path = os.path.dirname(__file__) + "/input_data/cp2k_input_tree.pickle".format(version_number) input_tree_pickle_file = open(pickle_path, 'rb') self.input_tree = pickle.load(input_tree_pickle_file) def preprocess_input(self): """Preprocess the input file. Concatenate .inc files into the main input file and explicitly state all variables. """ # Read the input file into memory. It shouldn't be that big so we can # do this easily input_lines = [] with open(self.file_path, "r") as f: for line in f: input_lines.append(line.strip()) # Merge include files to input extended_input = input_lines[:] # Make a copy i_line = 0 for line in input_lines: if line.startswith("@INCLUDE") or line.startswith("@include"): split = line.split(None, 1) includepath = split[1] basedir = os.path.dirname(self.file_path) filepath = os.path.join(basedir, includepath) filepath = os.path.abspath(filepath) if not os.path.isfile(filepath): logger.warning("Could not find the include file '{}' stated in the CP2K input file. Continuing without it.".format(filepath)) print filepath continue # Get the content from include file included_lines = [] with open(filepath, "r") as includef: for line in includef: included_lines.append(line.strip()) del extended_input[i_line] extended_input[i_line:i_line] = included_lines i_line += len(included_lines) i_line += 1 # Gather the variable definitions variables = {} input_set_removed = [] for i_line, line in enumerate(extended_input): if line.startswith("@SET") or line.startswith("@set"): components = line.split(None, 2) name = components[1] value = components[2] variables[name] = value logger.debug("Variable '{}' found with value '{}'".format(name, value)) else: input_set_removed.append(line) # Place the variables variable_pattern = r"\@\{(\w+)\}|@(\w+)" compiled = re.compile(variable_pattern) reserved = ("include", "set", "if", "endif") input_variables_replaced = [] for line in input_set_removed: results = compiled.finditer(line) new_line = line offset = 0 for result in results: options = result.groups() first = options[0] second = options[1] if first: name = first elif second: name = second if name in reserved: continue value = variables.get(name) if not value: logger.error("Value for variable '{}' not set.".format(name)) continue len_value = len(value) len_name = len(name) start = result.start() end = result.end() beginning = new_line[:offset+start] rest = new_line[offset+end:] new_line = beginning + value + rest offset += len_value - len_name - 1 input_variables_replaced.append(new_line) self.input_lines = input_variables_replaced