Commit 7bccc7dc authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Started implementing the reading of various atomic coordinate files.

parent a8950983
...@@ -50,10 +50,12 @@ the performance of an engine but if the function calls remain the same no other ...@@ -50,10 +50,12 @@ the performance of an engine but if the function calls remain the same no other
code has to be changed. code has to be changed.
Currently implemented engines that could be reused (not tested properly yet): Currently implemented engines that could be reused (not tested properly yet):
- AtomsEngine: For reading various atomic coordinate files. Currently uses ASE
to read the files.
- RegexEngine: For parsing text files with regular expressions. Uses the re2 - RegexEngine: For parsing text files with regular expressions. Uses the re2
library if available (falls back to default python regex implementation if library if available (falls back to default python regex implementation if
re2 not found). re2 not found).
- XYZEngine: For parsing XYZ files and files with similar structure. Has a very - CSVEngine: For parsing CSV-like files. Has a very
flexible nature as you can specify comments, column delimiters, column flexible nature as you can specify comments, column delimiters, column
indices and the patterns used to separate different configurations. indices and the patterns used to separate different configurations.
- XMLEngine: For parsing XML files using XPath syntax. - XMLEngine: For parsing XML files using XPath syntax.
......
#! /usr/bin/env python
import os import os
import logging import logging
from cp2kparser.implementation.autoparser import get_parser from cp2kparser.implementation.autoparser import get_parser
......
import ase.io
#===============================================================================
class AtomsEngine(object):
"""Used to parse various different atomic coordinate files.
Initially use ASE for all file types, if needed add new types or make
own implementations.
"""
def parse_atoms(self, contents, index=None, format=None):
atoms = ase.io.read(contents, index=index, format=format)
return atoms
def parse_number(self, contents, format=None):
atoms = ase.io.read(contents, index=0, format=format)
n_atoms = atoms.get_number_of_atoms()
return n_atoms
def parse_coordinates(self, contents, index, format=None):
atoms = ase.io.read(contents, index=index, format=format)
coordinates = atoms.get_positions()
return coordinates
...@@ -43,11 +43,15 @@ class CP2KInputEngine(object): ...@@ -43,11 +43,15 @@ class CP2KInputEngine(object):
for line in inp.split('\n'): for line in inp.split('\n'):
line = line.split('!', 1)[0].strip() line = line.split('!', 1)[0].strip()
# Skip empty lines
if len(line) == 0: if len(line) == 0:
continue continue
# Section ends
if line.upper().startswith('&END'): if line.upper().startswith('&END'):
section_stack.pop() section_stack.pop()
# Section starts
elif line[0] == '&': elif line[0] == '&':
parts = line.split(' ', 1) parts = line.split(' ', 1)
name = parts[0][1:] name = parts[0][1:]
...@@ -59,11 +63,11 @@ class CP2KInputEngine(object): ...@@ -59,11 +63,11 @@ class CP2KInputEngine(object):
if index != 0: if index != 0:
path += '/' path += '/'
path += item path += item
# print path
# Save the section parameters # Save the section parameters
if len(parts) > 1: if len(parts) > 1:
self.input_tree.set_parameter(path, parts[1].strip()) self.input_tree.set_parameter(path, parts[1].strip())
# Contents (keywords, default keywords)
else: else:
split = line.split(' ', 1) split = line.split(' ', 1)
keyword_name = split[0] keyword_name = split[0]
......
...@@ -59,12 +59,19 @@ class Section(object): ...@@ -59,12 +59,19 @@ class Section(object):
return None return None
def get_keyword(self, path): def get_keyword(self, path):
"""Returns the keyword that is specified by the given path.
If the keyword has no value set, returns the default value defined in
the XML.
"""
keyword = self.get_keyword_object(path) keyword = self.get_keyword_object(path)
if keyword: if keyword:
return keyword.value if keyword.value is not None:
return keyword.value
else:
return keyword.default_value
def get_default_keyword(self, path): def get_default_keyword(self, path):
return self.get_section(path) return self.get_section(path).default_keyword
def set_keyword(self, path, value): def set_keyword(self, path, value):
keyword = self.get_keyword_object(path) keyword = self.get_keyword_object(path)
...@@ -76,7 +83,7 @@ class Section(object): ...@@ -76,7 +83,7 @@ class Section(object):
keyword = split_path[1] keyword = split_path[1]
section_path = split_path[0] section_path = split_path[0]
section = self.get_section(section_path) section = self.get_section(section_path)
section.default_keyword += '\n' + keyword section.default_keyword += keyword + " " + value + "\n"
def get_keyword_default(self, path): def get_keyword_default(self, path):
keyword = self.get_keyword_object(path) keyword = self.get_keyword_object(path)
......
# from cp2kparser.generics.nomadlogging import *
import numpy as np import numpy as np
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -18,8 +17,8 @@ else: ...@@ -18,8 +17,8 @@ else:
#=============================================================================== #===============================================================================
class XYZEngine(object): class CSVEngine(object):
"""Used to parse out XYZ content and other content with similar structure. """Used to parse out freeform CSV-like content.
Currently only can parse floating point information. Currently only can parse floating point information.
Reads the given file or string line by line, ignoring commented sections. Reads the given file or string line by line, ignoring commented sections.
......
...@@ -31,25 +31,26 @@ class LogFormatter(logging.Formatter): ...@@ -31,25 +31,26 @@ class LogFormatter(logging.Formatter):
if level == "INFO" or level == "DEBUG": if level == "INFO" or level == "DEBUG":
return make_titled_message("{}:{}".format(level, module), message) return make_titled_message("{}:{}".format(level, module), message)
else: else:
return "\n " + make_title("WARNING", width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n" return "\n " + make_title(level, width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n"
#=============================================================================== #===============================================================================
def make_titled_message(title, message, width=80, spaces=0): def make_titled_message(title, message, width=80):
"""Styles a message to be printed into console. """Styles a message to be printed into console.
""" """
wrapper = textwrap.TextWrapper(width=width-6) wrapper = textwrap.TextWrapper(width=width-5)
lines = wrapper.wrap(message) lines = wrapper.wrap(message)
styled_message = "" styled_message = ""
first = True first = True
for line in lines: for line in lines:
if first: if first:
new_line = spaces*" " + " >> {}: ".format(title) + line + (width-6-len(line))*" " + " " new_line = " >> {}: ".format(title) + line
styled_message += new_line styled_message += new_line
first = False first = False
else: else:
new_line = spaces*" " + " " + line + (width-6-len(line))*" " + " " new_line = 5*" " + line
styled_message += "\n" + new_line styled_message += "\n" + new_line
return styled_message return styled_message
......
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import json import json
import os import os
import time import time
...@@ -7,6 +5,7 @@ from abc import ABCMeta, abstractmethod ...@@ -7,6 +5,7 @@ from abc import ABCMeta, abstractmethod
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
from cp2kparser import ureg from cp2kparser import ureg
from enum import Enum
#=============================================================================== #===============================================================================
...@@ -111,8 +110,8 @@ class NomadParser(object): ...@@ -111,8 +110,8 @@ class NomadParser(object):
it to SI units and return the value as json. it to SI units and return the value as json.
""" """
# Start timing # Start timing
logger.debug(74*'-') logger.info("===========================================================================")
logger.info("Getting quantity '{}'".format(name)) logger.info("GETTING QUANTITY '{}'".format(name))
start = time.clock() start = time.clock()
#Check availability #Check availability
...@@ -140,7 +139,8 @@ class NomadParser(object): ...@@ -140,7 +139,8 @@ class NomadParser(object):
stop = time.clock() stop = time.clock()
logger.info("Elapsed time: {} ms".format((stop-start)*1000)) logger.info("Elapsed time: {} ms".format((stop-start)*1000))
# logger.info("Result: {}".format(result)) logger.info("Result: {}".format(result))
logger.info("")
return result return result
...@@ -175,15 +175,15 @@ class NomadParser(object): ...@@ -175,15 +175,15 @@ class NomadParser(object):
Supports single floating point numbers, arbitrary dimensional numpy Supports single floating point numbers, arbitrary dimensional numpy
arrays and regular lists/tuples of floating point numbers. arrays and regular lists/tuples of floating point numbers.
""" """
kind = result.kind return_type = result.return_type
value = result.value * result.unit value = result.value * result.unit
# Energy to joule # Energy to joule
if kind == Result.energy: if return_type == Result.energy:
converted = value.to(ureg.joule) converted = value.to(ureg.joule)
# Force to newton # Force to newton
if kind == Result.force: if return_type == Result.force:
converted = value.to(ureg.newton) converted = value.to(ureg.newton)
return converted return converted
...@@ -226,14 +226,31 @@ class NomadParser(object): ...@@ -226,14 +226,31 @@ class NomadParser(object):
#=============================================================================== #===============================================================================
class Result(object): class ResultCode(Enum):
""" Encapsulates a parsing result. """Enumeration for indicating the result status.
"""
fail = 0
success = 1
#===============================================================================
class ResultKind(Enum):
"""Enumeration for indicating the type of returned value.
""" """
energy = "energy" energy = "energy"
force = "force" force = "force"
text = "text" text = "text"
number = "number"
#===============================================================================
class Result(object):
""" Encapsulates a parsing result.
"""
def __init__(self, value=None, kind=None, unit=None): def __init__(self, value=None, kind=None, unit=None, code=ResultCode.success):
self.value = value self.value = value
self.unit = unit self.unit = unit
self.kind = kind self.kind = kind
self.code = code
self.error_message = ""
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os import os
import json import json
from cp2kparser.implementation.parser import CP2KParser from cp2kparser.implementation.parser import CP2KParser
......
import os import os
import re import re
from cp2kparser.generics.nomadparser import NomadParser, Result from cp2kparser.generics.nomadparser import NomadParser, Result, ResultKind, ResultCode
from cp2kparser.implementation.regexs import * from cp2kparser.implementation.regexs import *
from cp2kparser.engines.regexengine import RegexEngine from cp2kparser.engines.regexengine import RegexEngine
from cp2kparser.engines.xyzengine import XYZEngine from cp2kparser.engines.csvengine import CSVEngine
from cp2kparser.engines.cp2kinputengine import CP2KInputEngine from cp2kparser.engines.cp2kinputengine import CP2KInputEngine
from cp2kparser.engines.xmlengine import XMLEngine from cp2kparser.engines.xmlengine import XMLEngine
from cp2kparser.engines.atomsengine import AtomsEngine
import numpy as np import numpy as np
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -27,10 +28,11 @@ class CP2KParser(NomadParser): ...@@ -27,10 +28,11 @@ class CP2KParser(NomadParser):
self.version_number = None self.version_number = None
# Engines are created here # Engines are created here
self.xyzengine = XYZEngine(self) self.csvengine = CSVEngine(self)
self.regexengine = RegexEngine(self) self.regexengine = RegexEngine(self)
self.xmlengine = XMLEngine(self) self.xmlengine = XMLEngine(self)
self.inputengine = CP2KInputEngine() self.inputengine = CP2KInputEngine()
self.atomsengine = AtomsEngine()
self.input_tree = None self.input_tree = None
self.regexs = None self.regexs = None
...@@ -111,7 +113,7 @@ class CP2KParser(NomadParser): ...@@ -111,7 +113,7 @@ class CP2KParser(NomadParser):
def determine_file_ids(self): def determine_file_ids(self):
"""Inherited from NomadParser. """Inherited from NomadParser.
""" """
# Check from input what the other files are called # Determine the presence of force file
force_path = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME") force_path = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
project_name = self.input_tree.get_keyword("GLOBAL/PROJECT_NAME") project_name = self.input_tree.get_keyword("GLOBAL/PROJECT_NAME")
if force_path is not None and force_path != "__STD_OUT__": if force_path is not None and force_path != "__STD_OUT__":
...@@ -128,23 +130,70 @@ class CP2KParser(NomadParser): ...@@ -128,23 +130,70 @@ class CP2KParser(NomadParser):
force_path = "{}-{}-1_0.xyz".format(project_name, force_path) force_path = "{}-{}-1_0.xyz".format(project_name, force_path)
force_path = os.path.basename(force_path) force_path = os.path.basename(force_path)
# Check against the given files # Check against the given files
file_path = self.search_file(force_path)
self.file_ids["forces"] = file_path
self.get_file_handle("forces")
# Determine the presence of an initial coordinate file
init_coord_file = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_NAME")
if init_coord_file is not None:
# Check against the given files
file_path = self.search_file(init_coord_file)
self.file_ids["initial_coordinates"] = file_path
self.get_file_handle("initial_coordinates")
# for file_path in self.resolvable:
# tail = os.path.basename(file_path)
# if force_path is not None and tail == force_path:
# self.file_ids["forces"] = file_path
# self.get_file_handle("forces")
def search_file(self, path):
"""Searches the list of given files for a file that is defined in the
CP2K input file.
First compares the basenames, and if multiple matches found descends
the path until only one or zero matches found.
"""
matches = {}
for file_path in self.resolvable: for file_path in self.resolvable:
tail = os.path.basename(file_path) available_parts = self.split_path(file_path)
if force_path is not None and tail == force_path: searched_parts = self.split_path(path)
self.file_ids["forces"] = file_path for i_part, part in enumerate(searched_parts):
self.get_file_handle("forces") if part == available_parts[i_part]:
matches[file_path] = i_part
# def open_files(self):
# """Open the file handles and keep them open until program finishes. n = len(matches)
# """ if n == 1:
# for file_id, file_path in self.file_ids.iteritems(): return matches.keys()[0]
# try: elif n == 0:
# file_handle = open(file_path, 'r') logger.error("Could not find file '{}' specified in CP2K input.".format(path))
# except (OSError, IOError): return
# logger.error("Could not open file: '{}'".format(file_path)) else:
# else: sorted_list = [(k, v) in sorted(mydict.items(), key=lambda (k, v): v[1])]
# self.file_handles[file_id] = file_handle # sorted_list = sorted(mathes, key=lambda k: matches[k])
if (sorted_list[0][1] == sorted_list[1][1]):
logger.error("When searching for file '{}', multiple matches were found. Could not determine which file to use based on their path.")
else:
return sorted_list[0][0]
def split_path(self, path):
folders = []
while 1:
path, folder = os.path.split(path)
if folder != "":
folders.append(folder)
else:
if path != "":
folders.append(path)
break
# folders.reverse()
return folders
def get_quantity_unformatted(self, name): def get_quantity_unformatted(self, name):
"""Inherited from NomadParser. The timing and caching is already """Inherited from NomadParser. The timing and caching is already
...@@ -180,13 +229,14 @@ class CP2KImplementation(object): ...@@ -180,13 +229,14 @@ class CP2KImplementation(object):
self.parser = parser self.parser = parser
self.regexs = parser.regexs self.regexs = parser.regexs
self.regexengine = parser.regexengine self.regexengine = parser.regexengine
self.xyzengine = parser.xyzengine self.csvengine = parser.csvengine
self.atomsengine = parser.atomsengine
self.input_tree = parser.input_tree self.input_tree = parser.input_tree
def _Q_energy_total(self): def _Q_energy_total(self):
"""Return the total energy from the bottom of the input file""" """Return the total energy from the bottom of the input file"""
result = Result() result = Result()
result.kind = Result.energy result.return_type = ResultKind.energy
result.unit = ureg.hartree result.unit = ureg.hartree
result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output"))) result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output")))
return result return result
...@@ -202,7 +252,7 @@ class CP2KImplementation(object): ...@@ -202,7 +252,7 @@ class CP2KImplementation(object):
belong to the list defined in NoMaD wiki. belong to the list defined in NoMaD wiki.
""" """
result = Result() result = Result()
result.kind = Result.text result.return_type = ResultKind.text
# First try to look at the shortcut # First try to look at the shortcut
xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL") xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
...@@ -266,7 +316,7 @@ class CP2KImplementation(object): ...@@ -266,7 +316,7 @@ class CP2KImplementation(object):
Supports forces printed in the output file or in a single .xyz file. Supports forces printed in the output file or in a single .xyz file.
""" """
result = Result() result = Result()
result.kind = Result.force result.return_type = ResultKind.force
result.unit = ureg.force_au result.unit = ureg.force_au
# Determine if a separate force file is used or are the forces printed # Determine if a separate force file is used or are the forces printed
...@@ -278,18 +328,20 @@ class CP2KImplementation(object): ...@@ -278,18 +328,20 @@ class CP2KImplementation(object):
# Look for the forces either in output or in separate file # Look for the forces either in output or in separate file
if not separate_file: if not separate_file:
logger.debug("Looking for forcesnature in output file.") logger.debug("Looking for forces in output file.")
forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output")) forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output"))
if forces is None: if forces is None:
logger.warning("No forces could be found in the output file.") msg = "No force configurations were found when searching the output file."
result.value = forces logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
return result return result
# Insert force configuration into the array # Insert force configuration into the array
i_conf = 0 i_conf = 0
force_array = None force_array = None
for force_conf in forces: for force_conf in forces:
i_force_array = self.xyzengine.parse(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None) i_force_array = self.csvengine.parse(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None)
i_force_array = i_force_array[0] i_force_array = i_force_array[0]
# Initialize the numpy array if not done yet # Initialize the numpy array if not done yet
...@@ -305,11 +357,51 @@ class CP2KImplementation(object): ...@@ -305,11 +357,51 @@ class CP2KImplementation(object):
return result return result
else: else:
logger.debug("Looking for forces in separate force file.") logger.debug("Looking for forces in separate force file.")
forces = self.xyzengine.parse(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]") forces = self.csvengine.parse(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]")
if forces is None: if forces is None:
logger.warning("No forces could be found in the XYZ file.") msg = "No force configurations were found when searching an external XYZ force file."
logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
return result
else:
result.value = forces
return result
def _Q_particle_number(self):
"""Return the number of particles in the system.
CP2K output doesn't automatically print the number of atoms. For this
reason this function has to look at the initial configuration and
calculate the number from it. The initial configuration is something
that must be present for all calculations.
"""
result = Result()