Commit e9e9b3c8 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Added some tests for forces, small additions here and there.

parent 671ce31b
# CP2K
The NoMaD parser for CP2K. Under development.
The NoMaD parser for CP2K. Under development. Will be modified to conform to
the common parser structure when it is available.
## QuickStart
- Clone repository
- Run setup by running the setup.py script:
$ python setup.py install --user
- Run tests (TODO)
- Parsing can be currently tested by simply running the script "parse.py" in a folder
## Structure
Currently the python package is divided into three subpackages:
......
......@@ -25,7 +25,8 @@ class Regex(object):
regex_string: The regular expression as a string. Supports also the
more verbose form
(https://docs.python.org/2/library/re.html#re.VERBOSE)
(https://docs.python.org/2/library/re.html#re.VERBOSE). Currently
supports only one capturing group.
index: Index for the wanted match. Can be a single integer number (also
negative indices supported) or if the special value "all" is provided,
......@@ -78,7 +79,7 @@ class Regex(object):
return self.compiled_regex.search(string)
def findall(self, string):
return self.compiled_regex.search(string)
return self.compiled_regex.findall(string)
def finditer(self, string):
return self.compiled_regex.finditer(string)
......@@ -113,8 +114,8 @@ class RegexEngine(object):
if result:
return result
# Couldn't find the quantity from any of the specified files
print_error("Could not find a result for {}.".format(regex.regex_string))
# Couldn't find the quantity
print_debug("Could not find a result for {}.".format(regex.regex_string))
def recursive_extraction(self, regex, data):
"""Goes through the exctractor tree recursively until the final
......@@ -139,7 +140,7 @@ class RegexEngine(object):
result = self.regex_search_string(data, regex)
if not result:
print_error("There was an issue in regex '{}' with index '{}' .".format(regex.regex_string, regex.index))
print_debug("There was an issue in regex '{}' with index '{}' .".format(regex.regex_string, regex.index))
return None
# See if the tree continues
......@@ -172,7 +173,7 @@ class RegexEngine(object):
print_debug("Doing full string search for all results.")
result = regex.findall(contents)
if not result:
print_error("No matches.")
print_debug("No matches.")
elif index >= 0:
print_debug("Doing full string search with specified index.")
iter = regex.finditer(contents)
......@@ -182,9 +183,9 @@ class RegexEngine(object):
match = iter.next()
except StopIteration:
if i == 0:
print_error("No results.")
print_debug("No results.")
else:
print_error("Invalid regex index.")
print_debug("Invalid regex index.")
break
if i == index:
result = match.groups()[0]
......@@ -192,12 +193,12 @@ class RegexEngine(object):
elif index < 0:
matches = regex.findall(contents)
if not matches:
print_error("No matches.")
print_debug("No matches.")
else:
try:
result = matches[index]
except IndexError:
print_error("Invalid regex index.")
print_debug("Invalid regex index.")
return result
......@@ -226,15 +227,26 @@ class RegexEngine(object):
# If all results wanted, just get all results from all blocks
if index == "all":
print_debug("Searchin for all matches.")
results = []
for block in generator:
results += regex.findall(block)
matches = regex.findall(block)
if matches:
print_debug("Found match within block.")
if isinstance(matches, list):
print_debug("Found multiple matches in a block")
for match in matches:
results.append(match)
else:
print_debug("Found single match in a block")
results.append(matches.groups()[0])
return results
# If index given, search until the correct index found
i_result = 0
counter = 0
for block in generator:
print_debug("Searchin for a specific index.")
counter += 1
if from_beginning:
result = regex.match(block)
......@@ -246,13 +258,20 @@ class RegexEngine(object):
return result.groups()[0]
else:
results = regex.findall(block)
n_results = len(results)
if results:
if isinstance(results, list):
n_results = len(results)
else:
n_results = 1
print_debug("Found results within block.")
if index + 1 > i_result + n_results:
i_result += n_results
else:
return results[i_result + (n_results-1) - index]
if n_results == 1:
return results.groups()[0]
else:
return results[i_result + (n_results-1) - index]
def reverse_block_generator(self, fh, separator, buf_size=1000000):
"""A generator that returns chunks of a file piece-by-piece in reverse
......@@ -262,6 +281,11 @@ class RegexEngine(object):
offset = 0
fh.seek(0, os.SEEK_END)
total_size = remaining_size = fh.tell()
# Compile the separator with an added end of string character.
end_match = separator.pattern + r'$'
compiled_end_match = re.compile(end_match)
while remaining_size > 0:
offset = min(total_size, offset + buf_size)
fh.seek(-offset, os.SEEK_END)
......@@ -274,13 +298,13 @@ class RegexEngine(object):
# we'll save it and append it to the last line of the next buffer
# we read
if segment is not None:
# if the previous chunk starts right from the beginning of line
# do not concact the segment to the last line of new chunk
# instead, yield the segment first
if not buffer.endswith(separator):
lines[-1] += segment
else:
# If this chunk ends with the separator, do not concatenate
# the segment to the last line of new chunk instead, yield the
# segment instead
if compiled_end_match.find(buffer):
yield segment
else:
lines[-1] += segment
segment = lines[0]
for index in range(len(lines) - 1, 0, -1):
if len(lines[index]):
......@@ -301,21 +325,18 @@ class RegexEngine(object):
offset += buf_size
buffer = fh.read(min(remaining_size, buf_size))
remaining_size -= buf_size
lines = separator.split(buffer)
# lines = buffer.split(separator)
# the first line of the buffer is probably not a complete line so
# we'll save it and append it to the last line of the next buffer
# we read
parts = separator.split(buffer)
# The last part of the buffer must be appended to the next chunk's first part.
if segment is not None:
# if the previous chunk starts right from the beginning of line
# do not concact the segment to the last line of new chunk
# instead, yield the segment first
if not buffer.startswith(separator):
lines[0] = segment + lines[0]
else:
# If this chunk starts right with the separator, do not concatenate
# the segment to the first line of new chunk instead, yield the
# segment instead
if separator.match(buffer):
yield segment
segment = lines[-1]
for index in range(0, len(lines) - 1, 1):
if len(lines[index]):
yield lines[index]
else:
parts[0] = segment + parts[0]
segment = parts[-1]
for index in range(0, len(parts) - 1, 1):
if len(parts[index]):
yield parts[index]
yield segment
......@@ -124,12 +124,12 @@ class NomadParser(object):
result = self.results.get(name)
if not result:
# Ask the engine for the quantity
result = self.parse_quantity(name)
result = self.get_unformatted_quantity(name)
self.results[name] = result
if result is None:
print_debug("The quantity '{}' could not be succesfully parsed.".format(name))
else:
print_debug("Using cached result.")
if result is None:
print_debug("The quantity '{}' is not present or could not be succesfully parsed.".format(name))
# Do the conversion to SI units based on the given units
......@@ -151,7 +151,7 @@ class NomadParser(object):
pass
@abstractmethod
def parse_quantity(self, name):
def get_unformatted_quantity(self, name):
"""Parse a quantity from the given files. Should return a tuple
containing the result object (numeric results preferably as numpy
arrays) and the unit of the result (None if no unit is needed)
......
......@@ -38,3 +38,11 @@ def get_parser(path):
}
parser = CP2KParser(json.dumps(json_input))
return parser
#===============================================================================
if __name__ == '__main__':
print __file__
path = os.path.dirname(os.path.realpath(os.path.dirname(__file__)))
parser = get_parser(path)
parser.parse_all()
......@@ -9,6 +9,7 @@ from cp2kparser.implementation.regexs import *
from cp2kparser.engines.regexengine import RegexEngine
from cp2kparser.engines.xyzengine import XYZEngine
from cp2kparser.engines.cp2kinputengine import CP2KInputEngine
import numpy as np
#===============================================================================
......@@ -56,9 +57,10 @@ class CP2KParser(NomadParser):
# Search for a version specific implementation
class_name = "CP2K{}Implementation".format(version_number)
self.implementation = globals().get(class_name)(self)
if self.implementation:
class_object = globals().get(class_name)
if class_object:
print_debug("Using version specific implementation '{}'.".format(class_name))
self.implementation = class_object(self)
else:
print_debug("Using default implementation.")
self.implementation = globals()["CP2KImplementation"](self)
......@@ -118,13 +120,23 @@ class CP2KParser(NomadParser):
else:
self.file_handles[file_id] = file_handle
def parse_quantity(self, name):
def get_unformatted_quantity(self, name):
"""Inherited from NomadParser. The timing and caching is already
implemented in the superclass.
"""
# Ask the implementation for the quantity
result = getattr(self.implementation, name)()
return result
function = getattr(self.implementation, "_Q_" + name)
if function:
return function()
else:
print_error("The function for quantity '{}' is not defined".format(name))
def parse_all(self):
"""Parse all supported quantities."""
implementation_methods = [method for method in dir(self.implementation) if callable(getattr(self.implementation, method))]
for method in implementation_methods:
if method.startswith("_Q_"):
getattr(self.implementation, method)()
def check_quantity_availability(self, name):
"""Inherited from NomadParser.
......@@ -140,14 +152,11 @@ class CP2KImplementation(object):
This class provides the basic implementations and for a version specific
updates and additions please make a new class that inherits from this.
"""
# The nomad quantities that this implementation supports
supported_quantities = [
"energy_total",
"XC_functional",
"particle_forces",
]
The functions that return certain quantities are tagged with a prefix '_Q_'
to be able to automatically determine which quantities have at least some
level of support. With the tag they can be also looped through.
"""
def __init__(self, parser):
self.parser = parser
......@@ -156,11 +165,11 @@ class CP2KImplementation(object):
self.inputengine = parser.inputengine
self.xyzengine = parser.xyzengine
def energy_total(self):
def _Q_energy_total(self):
"""Return the total energy from the bottom of the input file"""
return self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output"))
def XC_functional(self):
def _Q_XC_functional(self):
"""Returns the type of the XC functional.
Can currently only determine version if they are declared as parameters
......@@ -224,14 +233,14 @@ class CP2KImplementation(object):
# Return an alphabetically sorted and joined list of the xc components
return "_".join(sorted(xc_components))
def particle_forces(self):
def _Q_particle_forces(self):
"""Return all the forces for every step found.
"""
# Determine if a separate force file is used or are the forces printed
# in the output file.
separate_file = True
filename = self.inputengine.get_subsection("FORCE_EVAL/PRINT/FORCES").get_keyword("FILENAME")
filename = self.inputengine.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME")
if not filename or filename == "__STD_OUT__":
separate_file = False
......@@ -239,8 +248,26 @@ class CP2KImplementation(object):
if not separate_file:
print_debug("Looking for forces in output file.")
forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output"))
forces = unicode("\n".join(forces))
forces = self.xyzengine.parse_string(forces, (-3, -2, -1), ("#", "ATOMIC", "SUM"))
if forces is None:
return None
# Insert force configuration into the array
i_conf = 0
force_array = None
for force_conf in forces:
unicode_force_conf = unicode(force_conf)
i_force_array = self.xyzengine.parse_string(unicode_force_conf, (-3, -2, -1), ("#", "ATOMIC", "SUM"))
# Initialize the numpy array if not done yet
n_particles = i_force_array.shape[0]
n_dim = i_force_array.shape[1]
n_confs = len(forces)
force_array = np.empty((n_particles, n_dim, n_confs))
force_array[:, :, i_conf] = i_force_array
i_conf += 1
return force_array
else:
print_debug("Looking for forces in separate force file.")
forces = self.xyzengine.parse_file(self.parser.get_file_handle("forces"), (-3, -2, -1), ("#", "ATOMIC", "SUM"))
......
......@@ -30,7 +30,7 @@ class CP2KRegexs(object):
''',
separator=r" ATOMIC FORCES in",
direction="down",
index=0,
index="all",
from_beginning=False)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Step Nr. Time[fs] Kin.[a.u.] Temp[K] Pot.[a.u.] Cons Qty[a.u.] UsedTime[s]
0 0.000000 0.002850134 300.000000000 -13.678862034 -13.675536878 0.000000000
3
i = 0, time = 0.000, E = -13.6788620342
O 0.0000000000 0.0000000000 -0.0655870000
H 0.0000000000 -0.7571360000 0.5205450000
H 0.0000000000 0.7571360000 0.5205450000
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../../data/BASIS_SET
POTENTIAL_FILE_NAME ../../../data/POTENTIAL
&MGRID
CUTOFF 50
&END MGRID
&QS
EPS_DEFAULT 1.0E-6
&END QS
&SCF
EPS_SCF 1.0E-4
SCF_GUESS ATOMIC
&END SCF
&XC
&XC_FUNCTIONAL Pade
&END XC_FUNCTIONAL
&END XC
&END DFT
&SUBSYS
&CELL
ABC 6.0 6.0 6.0
&END CELL
&COORD
O 0.000000 0.000000 -0.065587
H 0.000000 -0.757136 0.520545
H 0.000000 0.757136 0.520545
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&END FORCE_EVAL
&GLOBAL
PROJECT H2O-2
RUN_TYPE MD
PRINT_LEVEL LOW
&END GLOBAL
&MOTION
&MD
ENSEMBLE NVT
STEPS 0
TIMESTEP 0.1
TEMPERATURE 300.0
&THERMOSTAT
&NOSE
LENGTH 3
YOSHIDA 3
TIMECON 100.0
MTS 2
&END NOSE
&END
&END MD
&END MOTION
This diff is collapsed.
# Step Nr. Time[fs] Kin.[a.u.] Temp[K] Pot.[a.u.] Cons Qty[a.u.] UsedTime[s]
0 0.000000 0.002850134 300.000000000 -13.678862034 -13.675536878 0.000000000
3
i = 0, time = 0.000, E = -13.6788620342
O 0.0000000000 0.0000000000 -0.0655870000
H 0.0000000000 -0.7571360000 0.5205450000
H 0.0000000000 0.7571360000 0.5205450000
&FORCE_EVAL
METHOD Quickstep
&DFT
BASIS_SET_FILE_NAME ../../../data/BASIS_SET
POTENTIAL_FILE_NAME ../../../data/POTENTIAL
&MGRID
CUTOFF 50
&END MGRID
&QS
EPS_DEFAULT 1.0E-6
&END QS
&SCF
EPS_SCF 1.0E-4
SCF_GUESS ATOMIC
&END SCF
&XC
&XC_FUNCTIONAL Pade
&END XC_FUNCTIONAL
&END XC
&END DFT
&SUBSYS
&CELL
ABC 6.0 6.0 6.0
&END CELL
&COORD
O 0.000000 0.000000 -0.065587
H 0.000000 -0.757136 0.520545
H 0.000000 0.757136 0.520545
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&PRINT
&FORCES
&EACH
MD 1
&END EACH
&END FORCES
&END PRINT
&END FORCE_EVAL
&GLOBAL
PROJECT H2O-2
RUN_TYPE MD
PRINT_LEVEL LOW
&END GLOBAL
&MOTION
&MD
ENSEMBLE NVT
STEPS 0
TIMESTEP 0.1
TEMPERATURE 300.0
&THERMOSTAT
&NOSE
LENGTH 3
YOSHIDA 3
TIMECON 100.0
MTS 2
&END NOSE
&END
&END MD
&END MOTION
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment