#! /usr/bin/env python # -*- coding: utf-8 -*- from collections import defaultdict from cp2kparser.generics.nomadlogging import * #=============================================================================== class CP2KInputEngine(object): """Used to parse out a CP2K input file. When given a file handle to a CP2K input file, this class attemts to parse out it's structure into an accessible object tree. Because the input file has such a clearly defined structure (unlike the output file of CP2K), it is better to use a dedicated parser instead of regular expressions. """ def __init__(self, cp2k_parser): """ Args: cp2k_parser: Instance of a CP2KParser or it's subclass. Allows access to e.g. unified file reading methods. """ self.cp2k_parser = cp2k_parser self.root_section = None def parse_input(self): """Parses the given CP2K input string""" # The input file should be quite small, so just get the entire contents inp = self.cp2k_parser.get_file_contents("input") root_section = InputSection('CP2K_INPUT') section_stack = [root_section] for line in inp.split('\n'): line = line.split('!', 1)[0].strip() if len(line) == 0: continue if line.upper().startswith('&END'): s = section_stack.pop() elif line[0] == '&': parts = line.split(' ', 1) name = parts[0][1:] if len(parts) > 1: s = InputSection(name=name, params=parts[1].strip()) else: s = InputSection(name=name) section_stack[-1].subsections[name.upper()].append(s) section_stack.append(s) else: split = line.split(' ', 1) keyword_name = split[0] keyword_value = split[1] section_stack[-1].keywords[keyword_name].append(keyword_value) self.root_section = root_section def get_subsection(self, path, index=0): return self.root_section.get_subsection(path, index) def get_keyword(self, path, index=0): split = path.rsplit('/', 1) section_path = split[0] keyword = split[1] section = self.root_section.get_subsection(section_path, index) if section is not None: return section.get_keyword(keyword) def get_parameter(self, path, index=0): section = self.root_section.get_subsection(path, index) if section is not None: return section.get_parameter() #=============================================================================== class InputSection(object): """Represents a section in a CP2K input file""" def __init__(self, name, params=None): self.name = name.upper() self.params = params self.keywords = defaultdict(list) self.subsections = defaultdict(list) def write(self): """Outputs input section as string""" output = [] for name, k_list in self.keywords.iteritems(): for value in k_list: output.append(value) for name, s_list in self.subsections.iteritems(): for s in s_list: if s.params: output.append('&%s %s' % (s.name, s.params)) else: output.append('&%s' % s.name) for l in s.write(): output.append(' %s' % l) output.append('&END %s' % s.name) return output def get_subsection(self, path, index=0): """Finds a subsection specified by a string where subsections are separated by a slash. If multiple subsections are found with the same path, the one specified by the given index (default 0) is returned. Example: get_subsection("FORCE_EVAL/PRINT/FORCES") Args: path: String indicating the path to the subsection index: In case of repeating subsections, return the one specified by this index. Returns: The InputSection object if found. """ parts = path.upper().split('/', 1) candidates = self.subsections.get(parts[0]) # [s for s in self.subsections if s.name == parts[0]] if not candidates: print_debug("Subsection '{}' not found.".format(parts[0])) return None elif len(candidates) > 1: print_warning("Multiple subsections with the same name found with name '{}' If no index is given, the first occurence in the input file is returned.".format(parts[0])) try: subsection = candidates[index] except IndexError: print_error("Invalid subsection index given.") if len(parts) == 1: return subsection return subsection.get_subsection(parts[1]) def get_keyword(self, keyword, index=0): """Finds a keyword specified by a string. If multiple keywords are found with the same name, the one specified by the given index (default 0) is returned. Args: keyword: String indicating the name of the keyword. The name is the first word in the line. index: In case of repeating keywords, return the one specified by this index. Returns: The keyword value (everything else than the first word on the line). """ candidates = self.keywords.get(keyword) # [s for s in self.subsections if s.name == parts[0]] if not candidates: print_debug("No keywords with name '{}' found in subsection '{}'".format(keyword, self.name)) return None elif len(candidates) > 1: print_warning("Multiple keywords with the same name found with name '{}' If no index is given, the first occurence in the input file is returned.".format(parts[0])) try: result = candidates[index] except IndexError: print_error("Invalid keyword index given.") return result def get_parameter(self): if self.params is None: print_debug("The section '{}' has no parameters set".format(self.name)) return self.params