Commit 7bccc7dc authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Started implementing the reading of various atomic coordinate files.

parent a8950983
......@@ -50,10 +50,12 @@ the performance of an engine but if the function calls remain the same no other
code has to be changed.
Currently implemented engines that could be reused (not tested properly yet):
- AtomsEngine: For reading various atomic coordinate files. Currently uses ASE
to read the files.
- RegexEngine: For parsing text files with regular expressions. Uses the re2
library if available (falls back to default python regex implementation if
re2 not found).
- XYZEngine: For parsing XYZ files and files with similar structure. Has a very
- CSVEngine: For parsing CSV-like files. Has a very
flexible nature as you can specify comments, column delimiters, column
indices and the patterns used to separate different configurations.
- XMLEngine: For parsing XML files using XPath syntax.
......
#! /usr/bin/env python
import os
import logging
from cp2kparser.implementation.autoparser import get_parser
......
import ase.io
#===============================================================================
class AtomsEngine(object):
"""Used to parse various different atomic coordinate files.
Initially use ASE for all file types, if needed add new types or make
own implementations.
"""
def parse_atoms(self, contents, index=None, format=None):
atoms = ase.io.read(contents, index=index, format=format)
return atoms
def parse_number(self, contents, format=None):
atoms = ase.io.read(contents, index=0, format=format)
n_atoms = atoms.get_number_of_atoms()
return n_atoms
def parse_coordinates(self, contents, index, format=None):
atoms = ase.io.read(contents, index=index, format=format)
coordinates = atoms.get_positions()
return coordinates
......@@ -43,11 +43,15 @@ class CP2KInputEngine(object):
for line in inp.split('\n'):
line = line.split('!', 1)[0].strip()
# Skip empty lines
if len(line) == 0:
continue
# Section ends
if line.upper().startswith('&END'):
section_stack.pop()
# Section starts
elif line[0] == '&':
parts = line.split(' ', 1)
name = parts[0][1:]
......@@ -59,11 +63,11 @@ class CP2KInputEngine(object):
if index != 0:
path += '/'
path += item
# print path
# Save the section parameters
if len(parts) > 1:
self.input_tree.set_parameter(path, parts[1].strip())
# Contents (keywords, default keywords)
else:
split = line.split(' ', 1)
keyword_name = split[0]
......
......@@ -59,12 +59,19 @@ class Section(object):
return None
def get_keyword(self, path):
"""Returns the keyword that is specified by the given path.
If the keyword has no value set, returns the default value defined in
the XML.
"""
keyword = self.get_keyword_object(path)
if keyword:
return keyword.value
if keyword.value is not None:
return keyword.value
else:
return keyword.default_value
def get_default_keyword(self, path):
return self.get_section(path)
return self.get_section(path).default_keyword
def set_keyword(self, path, value):
keyword = self.get_keyword_object(path)
......@@ -76,7 +83,7 @@ class Section(object):
keyword = split_path[1]
section_path = split_path[0]
section = self.get_section(section_path)
section.default_keyword += '\n' + keyword
section.default_keyword += keyword + " " + value + "\n"
def get_keyword_default(self, path):
keyword = self.get_keyword_object(path)
......
# from cp2kparser.generics.nomadlogging import *
import numpy as np
import logging
logger = logging.getLogger(__name__)
......@@ -18,8 +17,8 @@ else:
#===============================================================================
class XYZEngine(object):
"""Used to parse out XYZ content and other content with similar structure.
class CSVEngine(object):
"""Used to parse out freeform CSV-like content.
Currently only can parse floating point information.
Reads the given file or string line by line, ignoring commented sections.
......
......@@ -31,25 +31,26 @@ class LogFormatter(logging.Formatter):
if level == "INFO" or level == "DEBUG":
return make_titled_message("{}:{}".format(level, module), message)
else:
return "\n " + make_title("WARNING", width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n"
return "\n " + make_title(level, width=64) + "\n" + make_message(message, width=64, spaces=8) + "\n"
#===============================================================================
def make_titled_message(title, message, width=80, spaces=0):
def make_titled_message(title, message, width=80):
"""Styles a message to be printed into console.
"""
wrapper = textwrap.TextWrapper(width=width-6)
wrapper = textwrap.TextWrapper(width=width-5)
lines = wrapper.wrap(message)
styled_message = ""
first = True
for line in lines:
if first:
new_line = spaces*" " + " >> {}: ".format(title) + line + (width-6-len(line))*" " + " "
new_line = " >> {}: ".format(title) + line
styled_message += new_line
first = False
else:
new_line = spaces*" " + " " + line + (width-6-len(line))*" " + " "
new_line = 5*" " + line
styled_message += "\n" + new_line
return styled_message
......
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import json
import os
import time
......@@ -7,6 +5,7 @@ from abc import ABCMeta, abstractmethod
import logging
logger = logging.getLogger(__name__)
from cp2kparser import ureg
from enum import Enum
#===============================================================================
......@@ -111,8 +110,8 @@ class NomadParser(object):
it to SI units and return the value as json.
"""
# Start timing
logger.debug(74*'-')
logger.info("Getting quantity '{}'".format(name))
logger.info("===========================================================================")
logger.info("GETTING QUANTITY '{}'".format(name))
start = time.clock()
#Check availability
......@@ -140,7 +139,8 @@ class NomadParser(object):
stop = time.clock()
logger.info("Elapsed time: {} ms".format((stop-start)*1000))
# logger.info("Result: {}".format(result))
logger.info("Result: {}".format(result))
logger.info("")
return result
......@@ -175,15 +175,15 @@ class NomadParser(object):
Supports single floating point numbers, arbitrary dimensional numpy
arrays and regular lists/tuples of floating point numbers.
"""
kind = result.kind
return_type = result.return_type
value = result.value * result.unit
# Energy to joule
if kind == Result.energy:
if return_type == Result.energy:
converted = value.to(ureg.joule)
# Force to newton
if kind == Result.force:
if return_type == Result.force:
converted = value.to(ureg.newton)
return converted
......@@ -226,14 +226,31 @@ class NomadParser(object):
#===============================================================================
class Result(object):
""" Encapsulates a parsing result.
class ResultCode(Enum):
"""Enumeration for indicating the result status.
"""
fail = 0
success = 1
#===============================================================================
class ResultKind(Enum):
"""Enumeration for indicating the type of returned value.
"""
energy = "energy"
force = "force"
text = "text"
number = "number"
#===============================================================================
class Result(object):
""" Encapsulates a parsing result.
"""
def __init__(self, value=None, kind=None, unit=None):
def __init__(self, value=None, kind=None, unit=None, code=ResultCode.success):
self.value = value
self.unit = unit
self.kind = kind
self.code = code
self.error_message = ""
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import os
import json
from cp2kparser.implementation.parser import CP2KParser
......
import os
import re
from cp2kparser.generics.nomadparser import NomadParser, Result
from cp2kparser.generics.nomadparser import NomadParser, Result, ResultKind, ResultCode
from cp2kparser.implementation.regexs import *
from cp2kparser.engines.regexengine import RegexEngine
from cp2kparser.engines.xyzengine import XYZEngine
from cp2kparser.engines.csvengine import CSVEngine
from cp2kparser.engines.cp2kinputengine import CP2KInputEngine
from cp2kparser.engines.xmlengine import XMLEngine
from cp2kparser.engines.atomsengine import AtomsEngine
import numpy as np
import logging
logger = logging.getLogger(__name__)
......@@ -27,10 +28,11 @@ class CP2KParser(NomadParser):
self.version_number = None
# Engines are created here
self.xyzengine = XYZEngine(self)
self.csvengine = CSVEngine(self)
self.regexengine = RegexEngine(self)
self.xmlengine = XMLEngine(self)
self.inputengine = CP2KInputEngine()
self.atomsengine = AtomsEngine()
self.input_tree = None
self.regexs = None
......@@ -111,7 +113,7 @@ class CP2KParser(NomadParser):
def determine_file_ids(self):
"""Inherited from NomadParser.
"""
# Check from input what the other files are called
# Determine the presence of force file
force_path = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
project_name = self.input_tree.get_keyword("GLOBAL/PROJECT_NAME")
if force_path is not None and force_path != "__STD_OUT__":
......@@ -128,23 +130,70 @@ class CP2KParser(NomadParser):
force_path = "{}-{}-1_0.xyz".format(project_name, force_path)
force_path = os.path.basename(force_path)
# Check against the given files
# Check against the given files
file_path = self.search_file(force_path)
self.file_ids["forces"] = file_path
self.get_file_handle("forces")
# Determine the presence of an initial coordinate file
init_coord_file = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_NAME")
if init_coord_file is not None:
# Check against the given files
file_path = self.search_file(init_coord_file)
self.file_ids["initial_coordinates"] = file_path
self.get_file_handle("initial_coordinates")
# for file_path in self.resolvable:
# tail = os.path.basename(file_path)
# if force_path is not None and tail == force_path:
# self.file_ids["forces"] = file_path
# self.get_file_handle("forces")
def search_file(self, path):
"""Searches the list of given files for a file that is defined in the
CP2K input file.
First compares the basenames, and if multiple matches found descends
the path until only one or zero matches found.
"""
matches = {}
for file_path in self.resolvable:
tail = os.path.basename(file_path)
if force_path is not None and tail == force_path:
self.file_ids["forces"] = file_path
self.get_file_handle("forces")
# def open_files(self):
# """Open the file handles and keep them open until program finishes.
# """
# for file_id, file_path in self.file_ids.iteritems():
# try:
# file_handle = open(file_path, 'r')
# except (OSError, IOError):
# logger.error("Could not open file: '{}'".format(file_path))
# else:
# self.file_handles[file_id] = file_handle
available_parts = self.split_path(file_path)
searched_parts = self.split_path(path)
for i_part, part in enumerate(searched_parts):
if part == available_parts[i_part]:
matches[file_path] = i_part
n = len(matches)
if n == 1:
return matches.keys()[0]
elif n == 0:
logger.error("Could not find file '{}' specified in CP2K input.".format(path))
return
else:
sorted_list = [(k, v) in sorted(mydict.items(), key=lambda (k, v): v[1])]
# sorted_list = sorted(mathes, key=lambda k: matches[k])
if (sorted_list[0][1] == sorted_list[1][1]):
logger.error("When searching for file '{}', multiple matches were found. Could not determine which file to use based on their path.")
else:
return sorted_list[0][0]
def split_path(self, path):
folders = []
while 1:
path, folder = os.path.split(path)
if folder != "":
folders.append(folder)
else:
if path != "":
folders.append(path)
break
# folders.reverse()
return folders
def get_quantity_unformatted(self, name):
"""Inherited from NomadParser. The timing and caching is already
......@@ -180,13 +229,14 @@ class CP2KImplementation(object):
self.parser = parser
self.regexs = parser.regexs
self.regexengine = parser.regexengine
self.xyzengine = parser.xyzengine
self.csvengine = parser.csvengine
self.atomsengine = parser.atomsengine
self.input_tree = parser.input_tree
def _Q_energy_total(self):
"""Return the total energy from the bottom of the input file"""
result = Result()
result.kind = Result.energy
result.return_type = ResultKind.energy
result.unit = ureg.hartree
result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output")))
return result
......@@ -202,7 +252,7 @@ class CP2KImplementation(object):
belong to the list defined in NoMaD wiki.
"""
result = Result()
result.kind = Result.text
result.return_type = ResultKind.text
# First try to look at the shortcut
xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
......@@ -266,7 +316,7 @@ class CP2KImplementation(object):
Supports forces printed in the output file or in a single .xyz file.
"""
result = Result()
result.kind = Result.force
result.return_type = ResultKind.force
result.unit = ureg.force_au
# Determine if a separate force file is used or are the forces printed
......@@ -278,18 +328,20 @@ class CP2KImplementation(object):
# Look for the forces either in output or in separate file
if not separate_file:
logger.debug("Looking for forcesnature in output file.")
logger.debug("Looking for forces in output file.")
forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output"))
if forces is None:
logger.warning("No forces could be found in the output file.")
result.value = forces
msg = "No force configurations were found when searching the output file."
logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
return result
# Insert force configuration into the array
i_conf = 0
force_array = None
for force_conf in forces:
i_force_array = self.xyzengine.parse(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None)
i_force_array = self.csvengine.parse(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None)
i_force_array = i_force_array[0]
# Initialize the numpy array if not done yet
......@@ -305,11 +357,51 @@ class CP2KImplementation(object):
return result
else:
logger.debug("Looking for forces in separate force file.")
forces = self.xyzengine.parse(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]")
forces = self.csvengine.parse(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]")
if forces is None:
logger.warning("No forces could be found in the XYZ file.")
msg = "No force configurations were found when searching an external XYZ force file."
logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
return result
else:
result.value = forces
return result
def _Q_particle_number(self):
"""Return the number of particles in the system.
CP2K output doesn't automatically print the number of atoms. For this
reason this function has to look at the initial configuration and
calculate the number from it. The initial configuration is something
that must be present for all calculations.
"""
result = Result()
result.return_type = ResultKind.number
# Check where the coordinates are specified
coord_format = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT")
# See if the coordinates are provided in the input file
if coord_format == "OFF":
logger.debug("Using coordinates from the input file.")
coords = self.input_tree.get_default_keyword("FORCE_EVAL/SUBSYS/COORD")
coords.strip()
n_particles = coords.count("\n")
result.value = n_particles
return result
elif coord_format == "CP2K":
msg = "Unsupported coordinate file format: '{}'".format(coord_format)
logger.warning(msg)
result.error_message = msg
result.code = ResultCode.fail
return result
# External file
init_coord_file = self.parser.get_file_handle("initial_coordinates")
n_particles = self.atomsengine.parse_number(init_coord_file, format="xyz")
result.value = n_particles
result.value = forces
return result
......
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../../data/BASIS_SET
POTENTIAL_FILE_NAME ../../../data/POTENTIAL
UKS TRUE
&MGRID
CUTOFF 50
&END MGRID
&QS
EPS_DEFAULT 1.0E-6
&END QS
&SCF
EPS_SCF 1.0E-4
SCF_GUESS ATOMIC
&END SCF
&XC
&XC_FUNCTIONAL Pade
&END XC_FUNCTIONAL
&END XC
&END DFT
&SUBSYS
&CELL
ABC 6.0 6.0 6.0
&END CELL
&COORD
Na 0.000000 0.000000 -0.065587
&END COORD
&KIND Na
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND Cl
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q7
&END KIND
&END SUBSYS
&PRINT
&FORCES ON
&END FORCES
&END PRINT
&END FORCE_EVAL
&GLOBAL
PROJECT_NAME NaCl
RUN_TYPE ENERGY_FORCE
PRINT_LEVEL LOW
&END GLOBAL
DBCSR| Multiplication driver SMM
DBCSR| Multrec recursion limit 512
DBCSR| Multiplication stack size 1000
DBCSR| Multiplication size stacks 3
DBCSR| Use subcommunicators T
DBCSR| Use MPI combined types F
DBCSR| Use MPI memory allocation T
DBCSR| Use Communication thread T
DBCSR| Communication thread load 87
**** **** ****** ** PROGRAM STARTED AT 2015-11-23 08:29:45.491
***** ** *** *** ** PROGRAM STARTED ON lauri-Lenovo-Z50-70
** **** ****** PROGRAM STARTED BY lauri
***** ** ** ** ** PROGRAM PROCESS ID 5542
**** ** ******* ** PROGRAM STARTED IN /home/lauri/Dropbox/nomad-dev/gitlab/
parser-cp2k/cp2kparser/tests/cp2k_2.6
.2/particle_number/inputfile/1
CP2K| version string: CP2K version 2.6.2
CP2K| source code revision number: svn:15893
CP2K| is freely available from http://www.cp2k.org/
CP2K| Program compiled at ke 4.11.2015 08.48.42 +0200
CP2K| Program compiled on lauri-Lenovo-Z50-70
CP2K| Program compiled for Linux-x86-64-gfortran_basic
CP2K| Input file name NaCl.inp
GLOBAL| Force Environment number 1
GLOBAL| Basis set file name ../../../data/BASIS_SET
GLOBAL| Geminal file name BASIS_GEMINAL
GLOBAL| Potential file name ../../../data/POTENTIAL
GLOBAL| MM Potential file name MM_POTENTIAL
GLOBAL| Coordinate file name __STD_INPUT__
GLOBAL| Method name CP2K
GLOBAL| Project name NaCl
GLOBAL| Preferred FFT library FFTW3
GLOBAL| Preferred diagonalization lib. SL
GLOBAL| Run type ENERGY_FORCE
GLOBAL| All-to-all communication in single precision F
GLOBAL| FFTs using library dependent lengths F
GLOBAL| Global print level LOW
GLOBAL| Total number of message passing processes 1
GLOBAL| Number of threads for this process 1
GLOBAL| This output is from process 0
MEMORY| system memory details [Kb]
MEMORY| rank 0 min max average
MEMORY| MemTotal 8070396 8070396 8070396 8070396
MEMORY| MemFree 2954712 2954712 2954712 2954712
MEMORY| Buffers 286596 286596 286596 286596
MEMORY| Cached 2935616 2935616 2935616 2935616
MEMORY| Slab 201228 201228 201228 201228
MEMORY| SReclaimable 158268 158268 158268 158268
MEMORY| MemLikelyFree 6335192 6335192 6335192 6335192
GENERATE| Preliminary Number of Bonds generated: 0
GENERATE| Achieved consistency in connectivity generation.
*******************************************************************************
*******************************************************************************
** **
** ##### ## ## **
** ## ## ## ## ## **
** ## ## ## ###### **
** ## ## ## ## ## ##### ## ## #### ## ##### ##### **
** ## ## ## ## ## ## ## ## ## ## ## ## ## ## **
** ## ## ## ## ## ## ## #### ### ## ###### ###### **
** ## ### ## ## ## ## ## ## ## ## ## ## **
** ####### ##### ## ##### ## ## #### ## ##### ## **
** ## ## **
** **
** ... make the atoms dance **