Commit ad2dbd63 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Simplified parsing structure.

parent 7936bfae
...@@ -3,18 +3,6 @@ from nomadcore.caching_backend import CachingLevel ...@@ -3,18 +3,6 @@ from nomadcore.caching_backend import CachingLevel
import numpy as np import numpy as np
#===============================================================================
class MetaInfo(object):
def __init__(self, name, description="", dtypeStr="c", shape=[], dependencies=[]):
self.name = name
self.description = description
self.dtypeStr = dtypeStr
self.shape = shape
if not isinstance(dependencies, list):
dependencies = [dependencies]
self.dependencies = dependencies
#=============================================================================== #===============================================================================
class CP2KOutputParser262(object): class CP2KOutputParser262(object):
"""The object that goes through the CP2K output file and parses everything """The object that goes through the CP2K output file and parses everything
...@@ -33,124 +21,111 @@ class CP2KOutputParser262(object): ...@@ -33,124 +21,111 @@ class CP2KOutputParser262(object):
# Define the output parsing tree for this version # Define the output parsing tree for this version
self.outputstructure = SM( self.outputstructure = SM(
startReStr="", startReStr="",
sections=['section_run'],
subMatchers=[ subMatchers=[
# SM(
# sections=['cp2k_section_dbcsr'],
# startReStr=r" DBCSR\| Multiplication driver",
# ),
# SM(
# sections=['cp2k_section_startinformation'],
# startReStr=r"[\*\s]+PROGRAM STARTED AT\s+(?P<cp2k_run_start_date>\d{4}-\d{2}-\d{2}) (?P<cp2k_run_start_time>\d{2}:\d{2}:\d{2}.\d{3})",
# forwardMatch=True,
# subMatchers=[
# SM(
# startReStr=r"[\*\s]+PROGRAM STARTED AT\s+(?P<cp2k_run_start_date>\d{4}-\d{2}-\d{2}) (?P<cp2k_run_start_time>\d{2}:\d{2}:\d{2}.\d{3})",
# ),
# SM(
# startReStr=r" CP2K\| version string:\s+(?P<program_version>[\w\d\W\s]+)",
# sections=['program_info'],
# ),
# SM(
# startReStr=r" CP2K\| source code revision number:\s+svn:(?P<cp2k_svn_revision>\d+)",
# )
# ]
# ),
SM( SM(
startReStr=r" DBCSR\| Multiplication driver", sections=["cp2k_section_cell"],
endReStr="[.\*]+PROGRAM STOPPED IN", startReStr=" CELL\|",
required=True, forwardMatch=True,
sections=['section_run'],
subMatchers=[ subMatchers=[
SM( SM(
startReStr=r"[\*\s]+PROGRAM STARTED AT\s+(?P<cp2k_run_start_date>\d{4}-\d{2}-\d{2}) (?P<cp2k_run_start_time>\d{2}:\d{2}:\d{2}.\d{3})", startReStr=" CELL\| Vector a \[angstrom\]:\s+(?P<cp2k_cell_vector_a>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
), ),
SM( SM(
startReStr=r" CP2K\| version string:\s+(?P<program_version>[\w\d\W\s]+)", startReStr=" CELL\| Vector b \[angstrom\]:\s+(?P<cp2k_cell_vector_b>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
), ),
SM( SM(
startReStr=r" CP2K\| source code revision number:\s+svn:(?P<cp2k_svn_revision>\d+)", startReStr=" CELL\| Vector c \[angstrom\]:\s+(?P<cp2k_cell_vector_c>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
),
]
),
SM(
sections=["cp2k_section_functional"],
startReStr=" FUNCTIONAL\|",
forwardMatch=True,
otherMetaInfo=["XC_functional"],
subMatchers=[
SM(
repeats=True,
startReStr=" FUNCTIONAL\| (?P<cp2k_functional_name>[\w\d\W]+):"
)
]
),
SM(
sections=["cp2k_section_total_numbers"],
startReStr=" TOTAL NUMBERS AND MAXIMUM NUMBERS",
subMatchers=[
SM(
startReStr="\s+- Atoms:\s+(?P<number_of_atoms>\d+)",
sections=["section_system_description"]
), ),
# System Description
SM( SM(
startReStr="", startReStr="\s+- Shell sets:\s+(?P<cp2k_shell_sets>\d+)"
sections=["cp2k_system_description"], )
otherMetaInfo=["number_of_atoms"], ]
dependencies={"atom_number": ["cp2k_atom_number"]}, ),
SM(
sections=["cp2k_section_md"],
startReStr=" MD| Molecular Dynamics Protocol",
forwardMatch=True,
subMatchers=[
SM(
repeats=True,
startReStr=" ENERGY\| Total FORCE_EVAL",
sections=["cp2k_section_md_step"],
subMatchers=[ subMatchers=[
SM( SM(
startReStr=" CELL\|", startReStr=" ATOMIC FORCES in \[a\.u\.\]",
forwardMatch=True, sections=["cp2k_section_md_forces"],
sections=["cp2k_section_cell"],
subMatchers=[
SM(
startReStr=" CELL\| Vector a \[angstrom\]:\s+(?P<cp2k_cell_vector_a>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
),
SM(
startReStr=" CELL\| Vector b \[angstrom\]:\s+(?P<cp2k_cell_vector_b>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
),
SM(
startReStr=" CELL\| Vector c \[angstrom\]:\s+(?P<cp2k_cell_vector_c>[\d\.]+\s+[\d\.]+\s+[\d\.]+)+"
),
]
),
SM(
startReStr=" FUNCTIONAL\|",
forwardMatch=True,
sections=["section_method", "cp2k_section_functionals"],
otherMetaInfo=["XC_functional"],
subMatchers=[ subMatchers=[
SM( SM(
startReStr="\s+\d+\s+\d+\s+[\w\W\d]+\s+(?P<cp2k_md_force_atom_string>{0}\s+{0}\s+{0})".format(self.f_regex),
sections=["cp2k_section_md_force_atom"],
repeats=True, repeats=True,
startReStr=" FUNCTIONAL\| (?P<cp2k_functional_name>[\w\d\W]+):"
) )
] ]
), ),
# SM(
# startReStr=" MODULE QUICKSTEP: ATOMIC COORDINATES IN angstrom",
# subMatchers=[
# SM(
# repeats=True,
# startReStr="\s+\d+\s+\d+\s+(?P<cp2k_atom_label>\w+)\s+\d+\s+{}\s+{}\s+{}".format(self.f_regex)
# )
# ]
# )
SM( SM(
startReStr=" TOTAL NUMBERS AND MAXIMUM NUMBERS", startReStr=" STEP NUMBER\s+=\s+(?P<cp2k_md_step_number>\d+)"
sections=["cp2k_section_numbers"], ),
subMatchers=[ SM(
SM( startReStr=" TIME \[fs\]\s+=\s+(?P<cp2k_md_step_time>\d+\.\d+)"
startReStr="\s+- Atoms:\s+(?P<cp2k_atom_number>\d+)" ),
), SM(
SM( startReStr=" TEMPERATURE \[K\]\s+=\s+(?P<cp2k_md_temperature_instantaneous>{0})\s+(?P<cp2k_md_temperature_average>{0})".format(self.f_regex)
startReStr="\s+- Shell sets:\s+(?P<cp2k_shell_sets>\d+)" ),
)
]
)
]
),
# Molecular Dynamics
SM(
startReStr=" MD| Molecular Dynamics Protocol",
forwardMatch=True,
sections=["cp2k_section_md"],
subMatchers=[
SM( SM(
repeats=True, startReStr=" i =",
startReStr=" ENERGY\| Total FORCE_EVAL", sections=["cp2k_section_md_coordinates"],
sections=["cp2k_section_md_step"], otherMetaInfo=["cp2k_md_coordinates"],
dependencies={"cp2k_md_coordinates": ["cp2k_md_coordinate_atom_string"]},
subMatchers=[ subMatchers=[
SM( SM(
startReStr=" ATOMIC FORCES in \[a\.u\.\]", startReStr=" \w+\s+(?P<cp2k_md_coordinate_atom_string>{0}\s+{0}\s+{0})".format(self.f_regex),
sections=["cp2k_section_md_forces"], endReStr="\n",
subMatchers=[ sections=["cp2k_section_md_coordinate_atom"],
SM( repeats=True,
startReStr="\s+\d+\s+\d+\s+[\w\W\d]+\s+(?P<cp2k_md_force_atom_string>{0}\s+{0}\s+{0})".format(self.f_regex),
sections=["cp2k_section_md_force_atom"],
repeats=True,
)
]
),
SM(
startReStr=" STEP NUMBER\s+=\s+(?P<cp2k_md_step_number>\d+)"
),
SM(
startReStr=" TIME \[fs\]\s+=\s+(?P<cp2k_md_step_time>\d+\.\d+)"
),
SM(
startReStr=" TEMPERATURE \[K\]\s+=\s+(?P<cp2k_md_temperature_instantaneous>{0})\s+(?P<cp2k_md_temperature_average>{0})".format(self.f_regex)
),
SM(
startReStr=" i =",
sections=["cp2k_section_md_coordinates"],
otherMetaInfo=["cp2k_md_coordinates"],
dependencies={"cp2k_md_coordinates": ["cp2k_md_coordinate_atom_string"]},
subMatchers=[
SM(
startReStr=" \w+\s+(?P<cp2k_md_coordinate_atom_string>{0}\s+{0}\s+{0})".format(self.f_regex),
endReStr="\n",
sections=["cp2k_section_md_coordinate_atom"],
repeats=True,
)
]
) )
] ]
) )
...@@ -160,14 +135,13 @@ class CP2KOutputParser262(object): ...@@ -160,14 +135,13 @@ class CP2KOutputParser262(object):
) )
] ]
) )
#=======================================================================
# The cache settings # The cache settings
self.cachingLevelForMetaName = { self.cachingLevelForMetaName = {
'cp2k_cell_vector_a': CachingLevel.Cache, 'cp2k_cell_vector_a': CachingLevel.Cache,
'cp2k_cell_vector_b': CachingLevel.Cache, 'cp2k_cell_vector_b': CachingLevel.Cache,
'cp2k_cell_vector_c': CachingLevel.Cache, 'cp2k_cell_vector_c': CachingLevel.Cache,
'cp2k_section_cell': CachingLevel.Cache, 'cp2k_section_cell': CachingLevel.Cache,
'cp2k_system_description': CachingLevel.Cache,
'cp2k_section_atom_position': CachingLevel.Cache,
'cp2k_functional_name': CachingLevel.Cache, 'cp2k_functional_name': CachingLevel.Cache,
'cp2k_section_functionals': CachingLevel.Cache, 'cp2k_section_functionals': CachingLevel.Cache,
'cp2k_section_numbers': CachingLevel.Cache, 'cp2k_section_numbers': CachingLevel.Cache,
...@@ -185,49 +159,36 @@ class CP2KOutputParser262(object): ...@@ -185,49 +159,36 @@ class CP2KOutputParser262(object):
'cp2k_md_force_atom_float': CachingLevel.Cache, 'cp2k_md_force_atom_float': CachingLevel.Cache,
} }
# The trigger functions #===========================================================================
def onClose_cp2k_system_description(self, backend, gIndex, section): # The functions that trigger when sections are closed
def onClose_cp2k_section_cell(self, backend, gIndex, section):
"""When the cell definition finishes, gather the results to a 3x3 """When the cell definition finishes, gather the results to a 3x3
matrix. Or if not present, ask the cp2kparser for help. matrix. Or if not present, ask the cp2kparser for help.
""" """
# Open the common system description section # Open the common system description section
backend.openSection("section_system_description") gIndex = backend.openSection("section_system_description")
# Get the cell information
cell = section["cp2k_section_cell"]
if cell:
cell = cell[0]
# Get the cell vector strings # Get the cell vector strings
a = cell["cp2k_cell_vector_a"][0] a = section["cp2k_cell_vector_a"][0]
b = cell["cp2k_cell_vector_b"][0] b = section["cp2k_cell_vector_b"][0]
c = cell["cp2k_cell_vector_c"][0] c = section["cp2k_cell_vector_c"][0]
# Extract the components and put into numpy array # Extract the components and put into numpy array
a_comp = a.split() a_comp = a.split()
b_comp = b.split() b_comp = b.split()
c_comp = c.split() c_comp = c.split()
cell = np.zeros((3, 3)) cell = np.zeros((3, 3))
cell[0, :] = a_comp cell[0, :] = a_comp
cell[1, :] = b_comp cell[1, :] = b_comp
cell[2, :] = c_comp cell[2, :] = c_comp
backend.addArrayValues("simulation_cell", cell, unit="angstrom") backend.addArrayValues("simulation_cell", cell, unit="angstrom")
# Get the number of atoms
numbers = section["cp2k_section_numbers"]
if numbers:
numbers = numbers[0]
n_atoms = numbers["cp2k_atom_number"]
if n_atoms:
n_atoms = n_atoms[0]
backend.addValue("number_of_atoms", n_atoms)
# Close the common system description section # Close the common system description section
backend.closeSection("section_system_description", 0) backend.closeSection("section_system_description", gIndex)
def onClose_cp2k_section_functionals(self, backend, gIndex, section): def onClose_cp2k_section_functional(self, backend, gIndex, section):
"""When all the functional definitions have been gathered, matches them """When all the functional definitions have been gathered, matches them
with the nomad correspondents and combines into one single string which with the nomad correspondents and combines into one single string which
is put into the backend. is put into the backend.
...@@ -253,12 +214,14 @@ class CP2KOutputParser262(object): ...@@ -253,12 +214,14 @@ class CP2KOutputParser262(object):
functionals = "_".join(sorted(functionals)) functionals = "_".join(sorted(functionals))
# Push the functional string into the backend # Push the functional string into the backend
gIndex = backend.openSection("section_method")
backend.addValue('XC_functional', functionals) backend.addValue('XC_functional', functionals)
backend.closeSection("section_method", gIndex)
def onClose_cp2k_section_atom_position(self, backend, gIndex, section): # def onClose_cp2k_section_atom_position(self, backend, gIndex, section):
"""Get the initial atomic positions from cp2kparser. # """Get the initial atomic positions from cp2kparser.
""" # """
pass # pass
# positions, unit = self.cp2kparser.get_initial_atom_positions_and_unit() # positions, unit = self.cp2kparser.get_initial_atom_positions_and_unit()
# backend.addArrayValues("atom_position", positions) # backend.addArrayValues("atom_position", positions)
...@@ -295,3 +258,6 @@ class CP2KOutputParser262(object): ...@@ -295,3 +258,6 @@ class CP2KOutputParser262(object):
forces = section["cp2k_md_force_atom_float"] forces = section["cp2k_md_force_atom_float"]
forces = np.array(forces) forces = np.array(forces)
backend.addArrayValues("cp2k_md_forces", forces, unit="force_au") backend.addArrayValues("cp2k_md_forces", forces, unit="force_au")
#===========================================================================
# adHoc functions that are used to do custom parsing.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment