Commit 4961e40b authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Fixed issue with restarted calculations, fixed bugs in reading MD files with...

Fixed issue with restarted calculations, fixed bugs in reading MD files with non-unity print frequencies.
parent d361b1ad
......@@ -32,12 +32,15 @@ class CP2KParser(ParserInterface):
# this information.
regex_version = re.compile(r" CP2K\| version string:\s+CP2K version ([\d\.]+)")
regex_run_type = re.compile(r"\s+GLOBAL\| Run type\s+(.+)")
n_lines = 50
n_lines = 100
version_id = None
run_type = None
with open(self.parser_context.main_file, 'r') as outputfile:
for i_line in range(n_lines):
line = next(outputfile)
try:
line = next(outputfile)
except StopIteration:
break
result_version = regex_version.match(line)
result_run_type = regex_run_type.match(line)
if result_version:
......
......@@ -127,11 +127,28 @@ class CP2KCommonParser(CommonParser):
],
),
SM( " CELL\|",
adHoc=self.adHoc_x_cp2k_section_cell(),
adHoc=self.adHoc_x_cp2k_section_cell,
),
]
)
# SimpleMatcher for the restart information
def restart(self):
return SM( re.escape(" * RESTART INFORMATION *"),
sections=["x_cp2k_section_restart_information"],
subMatchers=[
SM( re.escape(" *******************************************************************************")),
SM( re.escape(" * *")),
SM( re.escape(" * RESTART FILE NAME: (?P<x_cp2k_restart_file_name>{})\s+*".format(self.regexs.word))),
SM( re.escape(" * *")),
SM( re.escape(" * RESTARTED QUANTITIES: *")),
SM( re.escape(" * (?P<x_cp2k_restarted_quantity>{})\s+*".format(self.regexs.word)),
repeats=True,
),
SM( re.escape(" *******************************************************************************"))
]
)
# SimpleMatcher for the footer that is common to all run types
def footer(self):
return SM( " - DBCSR STATISTICS -",
......@@ -168,32 +185,32 @@ class CP2KCommonParser(CommonParser):
]
),
SM( r" \*\*\* SCF run converged in\s+(\d+) steps \*\*\*",
adHoc=self.adHoc_single_point_converged()
adHoc=self.adHoc_single_point_converged
),
SM( r" \*\*\* SCF run NOT converged \*\*\*",
adHoc=self.adHoc_single_point_not_converged()
adHoc=self.adHoc_single_point_not_converged
),
SM( r" Electronic kinetic energy:\s+(?P<x_cp2k_electronic_kinetic_energy__hartree>{})".format(self.regexs.float)),
SM( r" **************************** NUMERICAL STRESS ********************************".replace("*", "\*"),
# endReStr=" **************************** NUMERICAL STRESS END *****************************".replace("*", "\*"),
adHoc=self.adHoc_stress_calculation(),
adHoc=self.adHoc_stress_calculation,
),
SM( r" ENERGY\| Total FORCE_EVAL \( \w+ \) energy \(a\.u\.\):\s+(?P<x_cp2k_energy_total__hartree>{0})".format(self.regexs.float),
),
SM( r" ATOMIC FORCES in \[a\.u\.\]"),
SM( r" # Atom Kind Element X Y Z",
adHoc=self.adHoc_atom_forces(),
adHoc=self.adHoc_atom_forces,
),
SM( r" (?:NUMERICAL )?STRESS TENSOR \[GPa\]",
sections=["x_cp2k_section_stress_tensor"],
subMatchers=[
SM( r"\s+X\s+Y\s+Z",
adHoc=self.adHoc_stress_tensor(),
adHoc=self.adHoc_stress_tensor,
),
SM( " 1/3 Trace\(stress tensor\):\s+(?P<x_cp2k_stress_tensor_one_third_of_trace__GPa>{})".format(self.regexs.float)),
SM( " Det\(stress tensor\)\s+:\s+(?P<x_cp2k_stress_tensor_determinant__GPa3>{})".format(self.regexs.float)),
SM( " EIGENVECTORS AND EIGENVALUES OF THE STRESS TENSOR",
adHoc=self.adHoc_stress_tensor_eigenpairs()),
adHoc=self.adHoc_stress_tensor_eigenpairs),
]
)
]
......@@ -201,7 +218,7 @@ class CP2KCommonParser(CommonParser):
# SimpleMatcher the stuff that is done to initialize a quickstep calculation
def quickstep_header(self):
return SM( " *******************************************************************************".replace("*", "\*"),
return SM( re.escape(" ** ... make the atoms dance **"),
forwardMatch=True,
sections=["x_cp2k_section_quickstep_settings"],
subMatchers=[
......@@ -216,7 +233,7 @@ class CP2KCommonParser(CommonParser):
],
),
SM( " DFT\+U\|",
adHoc=self.adHoc_dft_plus_u(),
adHoc=self.adHoc_dft_plus_u,
),
SM( " QS\|",
forwardMatch=True,
......@@ -301,10 +318,10 @@ class CP2KCommonParser(CommonParser):
]
),
SM( " MP2\|",
adHoc=self.adHoc_mp2()
adHoc=self.adHoc_mp2
),
SM( " RI-RPA\|",
adHoc=self.adHoc_rpa()
adHoc=self.adHoc_rpa
),
]
)
......@@ -501,113 +518,96 @@ class CP2KCommonParser(CommonParser):
#===========================================================================
# adHoc functions
def adHoc_x_cp2k_section_cell(self):
def adHoc_x_cp2k_section_cell(self, parser):
"""Used to extract the cell information.
"""
def wrapper(parser):
# Read the lines containing the cell vectors
a_line = parser.fIn.readline()
b_line = parser.fIn.readline()
c_line = parser.fIn.readline()
# Define the regex that extracts the components and apply it to the lines
regex_string = r" CELL\| Vector \w \[angstrom\]:\s+({0})\s+({0})\s+({0})".format(self.regexs.float)
regex_compiled = re.compile(regex_string)
a_result = regex_compiled.match(a_line)
b_result = regex_compiled.match(b_line)
c_result = regex_compiled.match(c_line)
# Convert the string results into a 3x3 numpy array
cell = np.zeros((3, 3))
cell[0, :] = [float(x) for x in a_result.groups()]
cell[1, :] = [float(x) for x in b_result.groups()]
cell[2, :] = [float(x) for x in c_result.groups()]
# Push the results to cache
self.cache_service["simulation_cell"] = cell
return wrapper
def adHoc_atom_forces(self):
# Read the lines containing the cell vectors
a_line = parser.fIn.readline()
b_line = parser.fIn.readline()
c_line = parser.fIn.readline()
# Define the regex that extracts the components and apply it to the lines
regex_string = r" CELL\| Vector \w \[angstrom\]:\s+({0})\s+({0})\s+({0})".format(self.regexs.float)
regex_compiled = re.compile(regex_string)
a_result = regex_compiled.match(a_line)
b_result = regex_compiled.match(b_line)
c_result = regex_compiled.match(c_line)
# Convert the string results into a 3x3 numpy array
cell = np.zeros((3, 3))
cell[0, :] = [float(x) for x in a_result.groups()]
cell[1, :] = [float(x) for x in b_result.groups()]
cell[2, :] = [float(x) for x in c_result.groups()]
# Push the results to cache
self.cache_service["simulation_cell"] = cell
def adHoc_atom_forces(self, parser):
"""Used to extract the final atomic forces printed at the end of a
calculation.
"""
def wrapper(parser):
end_str = " SUM OF ATOMIC FORCES"
end = False
force_array = []
# Loop through coordinates until the sum of forces is read
while not end:
line = parser.fIn.readline()
if line.startswith(end_str):
end = True
else:
forces = line.split()[-3:]
forces = [float(x) for x in forces]
force_array.append(forces)
force_array = np.array(force_array)
# If anything found, push the results to the correct section
if len(force_array) != 0:
# self.cache_service["atom_forces"] = force_array
self.backend.addArrayValues("x_cp2k_atom_forces", force_array, unit="forceAu")
end_str = " SUM OF ATOMIC FORCES"
end = False
force_array = []
# Loop through coordinates until the sum of forces is read
while not end:
line = parser.fIn.readline()
if line.startswith(end_str):
end = True
else:
forces = line.split()[-3:]
forces = [float(x) for x in forces]
force_array.append(forces)
force_array = np.array(force_array)
return wrapper
# If anything found, push the results to the correct section
if len(force_array) != 0:
# self.cache_service["atom_forces"] = force_array
self.backend.addArrayValues("x_cp2k_atom_forces", force_array, unit="forceAu")
def adHoc_stress_tensor(self):
def adHoc_stress_tensor(self, parser):
"""Used to extract the stress tensor printed at the end of a
calculation.
"""
def wrapper(parser):
row1 = [float(x) for x in parser.fIn.readline().split()[-3:]]
row2 = [float(x) for x in parser.fIn.readline().split()[-3:]]
row3 = [float(x) for x in parser.fIn.readline().split()[-3:]]
stress_array = np.array([row1, row2, row3])
parser.backend.addArrayValues("x_cp2k_stress_tensor", stress_array, unit="GPa")
return wrapper
row1 = [float(x) for x in parser.fIn.readline().split()[-3:]]
row2 = [float(x) for x in parser.fIn.readline().split()[-3:]]
row3 = [float(x) for x in parser.fIn.readline().split()[-3:]]
stress_array = np.array([row1, row2, row3])
parser.backend.addArrayValues("x_cp2k_stress_tensor", stress_array, unit="GPa")
def adHoc_stress_calculation(self):
def adHoc_stress_calculation(self, parser):
"""Used to skip over the stress tensor calculation details.
"""
def wrapper(parser):
end_line = " **************************** NUMERICAL STRESS END *****************************\n"
finished = False
while not finished:
line = parser.fIn.readline()
if line == end_line:
finished = True
return wrapper
def adHoc_stress_tensor_eigenpairs(self):
end_line = " **************************** NUMERICAL STRESS END *****************************\n"
finished = False
while not finished:
line = parser.fIn.readline()
if line == end_line:
finished = True
def adHoc_stress_tensor_eigenpairs(self, parser):
"""Parses the stress tensor eigenpairs.
"""
def wrapper(parser):
parser.fIn.readline()
eigenvalues = np.array([float(x) for x in parser.fIn.readline().split()])
parser.fIn.readline()
row1 = [float(x) for x in parser.fIn.readline().split()]
row2 = [float(x) for x in parser.fIn.readline().split()]
row3 = [float(x) for x in parser.fIn.readline().split()]
eigenvectors = np.array([row1, row2, row3])
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvalues", eigenvalues, unit="GPa")
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvectors", eigenvectors)
return wrapper
def adHoc_single_point_converged(self):
parser.fIn.readline()
eigenvalues = np.array([float(x) for x in parser.fIn.readline().split()])
parser.fIn.readline()
row1 = [float(x) for x in parser.fIn.readline().split()]
row2 = [float(x) for x in parser.fIn.readline().split()]
row3 = [float(x) for x in parser.fIn.readline().split()]
eigenvectors = np.array([row1, row2, row3])
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvalues", eigenvalues, unit="GPa")
parser.backend.addArrayValues("x_cp2k_stress_tensor_eigenvectors", eigenvectors)
def adHoc_single_point_converged(self, parser):
"""Called when the SCF cycle of a single point calculation has converged.
"""
def wrapper(parser):
parser.backend.addValue("x_cp2k_quickstep_converged", True)
return wrapper
parser.backend.addValue("x_cp2k_quickstep_converged", True)
def adHoc_single_point_not_converged(self):
def adHoc_single_point_not_converged(self, parser):
"""Called when the SCF cycle of a single point calculation did not converge.
"""
def wrapper(parser):
parser.backend.addValue("x_cp2k_quickstep_converged", False)
return wrapper
parser.backend.addValue("x_cp2k_quickstep_converged", False)
def adHoc_x_cp2k_section_quickstep_atom_information(self):
"""Used to extract the initial atomic coordinates and names in the
......@@ -680,17 +680,16 @@ class CP2KCommonParser(CommonParser):
return wrapper
def adHoc_dft_plus_u(self):
def wrapper(parser):
self.test_electronic_structure_method = "DFT+U"
return wrapper
def adHoc_dft_plus_u(self, parser):
self.test_electronic_structure_method = "DFT+U"
def adHoc_mp2(self):
def wrapper(parser):
self.test_electronic_structure_method = "MP2"
return wrapper
def adHoc_mp2(self, parser):
self.test_electronic_structure_method = "MP2"
def adHoc_rpa(self):
def wrapper(parser):
self.test_electronic_structure_method = "RPA"
def adHoc_rpa(self, parser):
self.test_electronic_structure_method = "RPA"
def adHoc_print(self, msg):
def wrapper(parser, groups):
print(msg)
return wrapper
......@@ -70,20 +70,16 @@ class CP2KInputParser(AbstractBaseParser):
def parse(self, filepath):
#=======================================================================
# Preprocess to spell out variables and to include stuff from other
# files
self.preprocess_input(filepath)
#=======================================================================
# Gather the information from the input file
self.fill_input_tree(filepath)
#=======================================================================
# Parse everything in the input to cp2k specific metadata
self.fill_metadata()
#=======================================================================
# Parse the used XC_functionals and their parameters
xc = self.input_tree.get_section("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL")
if xc is not None:
......
......@@ -103,7 +103,7 @@ class CP2KMDParser(MainHierarchicalParser):
subMatchers=[
self.cm.quickstep_calculation(),
SM( " ENSEMBLE TYPE ="),
SM( " STEP NUMBER ="),
SM( " STEP NUMBER =\s+(?P<x_cp2k_md_step_number>{})".format(self.regexs.int)),
SM( " TIME \[fs\] =\s+(?P<x_cp2k_md_time__fs>{})".format(self.regexs.float)),
SM( " CONSERVED QUANTITY \[hartree\] =\s+(?P<x_cp2k_md_conserved_quantity__hartree>{})".format(self.regexs.float)),
SM( " CPU TIME \[s\] =\s+(?P<x_cp2k_md_cpu_time_instantaneous>{})\s+(?P<x_cp2k_md_cpu_time_average>{})".format(self.regexs.float, self.regexs.float)),
......@@ -135,6 +135,7 @@ class CP2KMDParser(MainHierarchicalParser):
forwardMatch=True,
sections=["section_method"],
subMatchers=[
self.cm.restart(),
self.cm.header(),
self.cm.quickstep_header(),
],
......@@ -260,7 +261,7 @@ class CP2KMDParser(MainHierarchicalParser):
if add_last_vel_setting == "NUMERIC" or add_last_vel_setting == "SYMBOLIC":
add_last_vel = True
last_step = self.n_steps - 1
last_step = self.n_steps
md_steps = section["x_cp2k_section_md_step"]
frame_sequence_potential_energy = []
......@@ -269,10 +270,24 @@ class CP2KMDParser(MainHierarchicalParser):
frame_sequence_kinetic_energy = []
frame_sequence_conserved_quantity = []
frame_sequence_pressure = []
ener_frames = []
single_conf_gids = []
i_md_step = 0
# Check that is the calculation starting from first frame. If not, this
# is a restarted calculation. In this case it is practically impossible
# to know from which frame number we should start reading the
# configurations, because the print settings in the previous runs may
# have been different from now, and the step numbers are hard to align.
# In this case we just parse what is found in this output file
start_step_number = md_steps[1]["x_cp2k_md_step_number"][0]
if start_step_number != 1:
self.traj_iterator = None
self.cell_iterator = None
self.vel_iterator = None
self.energy_iterator = None
for i_step in range(self.n_steps + 1):
sectionGID = backend.openSection("section_single_configuration_calculation")
......@@ -290,7 +305,7 @@ class CP2KMDParser(MainHierarchicalParser):
# Trajectory
if freqs["trajectory"][1] and self.traj_iterator is not None:
if (i_step + 1) % freqs["trajectory"][0] == 0 or (i_step == last_step and add_last_traj):
if (i_step) % freqs["trajectory"][0] == 0 or (i_step == last_step and add_last_traj):
try:
pos = next(self.traj_iterator)
except StopIteration:
......@@ -300,7 +315,7 @@ class CP2KMDParser(MainHierarchicalParser):
# Velocities
if freqs["velocities"][1] and self.vel_iterator is not None:
if (i_step + 1) % freqs["velocities"][0] == 0 or (i_step == last_step and add_last_vel):
if (i_step) % freqs["velocities"][0] == 0 or (i_step == last_step and add_last_vel):
try:
vel = next(self.vel_iterator)
except StopIteration:
......@@ -310,7 +325,7 @@ class CP2KMDParser(MainHierarchicalParser):
# Energy file
if self.energy_iterator is not None:
if (i_step + 1) % freqs["energies"][0] == 0:
if (i_step) % freqs["energies"][0] == 0:
line = next(self.energy_iterator)
time = line[1]
......@@ -326,20 +341,30 @@ class CP2KMDParser(MainHierarchicalParser):
frame_sequence_potential_energy.append(potential_energy)
frame_sequence_conserved_quantity.append(conserved_quantity)
ener_frames.append(i_step)
backend.addValue("energy_total", conserved_quantity)
backend.addValue("time_calculation", wall_time)
# Cell file
if self.cell_iterator is not None:
if (i_step + 1) % freqs["cell"][0] == 0:
if (i_step) % freqs["cell"][0] == 0:
line = next(self.cell_iterator)
cell = np.reshape(line, (3, 3))
self.backend.addArrayValues("simulation_cell", cell, unit="angstrom")
# Output file
if md_steps:
if (i_step + 1) % freqs["output"][0] == 0:
md_step = md_steps[i_md_step]
if (i_step) % freqs["output"][0] == 0:
try:
md_step = md_steps[i_md_step]
except IndexError:
# The process has stopped unexpectedly, but still we
# should report all the steps so far. So we break the
# loop and retain what was parsed.
break
# print(md_step["x_cp2k_md_step_number"])
quickstep = self.md_quicksteps[i_md_step]
if quickstep is not None:
quickstep.add_latest_value("x_cp2k_atom_forces", "atom_forces")
......@@ -358,23 +383,47 @@ class CP2KMDParser(MainHierarchicalParser):
backend.closeSection("section_single_configuration_calculation", sectionGID)
# Add the summaries to frame sequence
frame_sequence_potential_energy = convert_unit(np.array(frame_sequence_potential_energy), "hartree")
frame_sequence_kinetic_energy = convert_unit(np.array(frame_sequence_kinetic_energy), "hartree")
frame_sequence_conserved_quantity = convert_unit(np.array(frame_sequence_conserved_quantity), "hartree")
frame_sequence_time = np.array(frame_sequence_time)
frame_sequence_temperature = np.array(frame_sequence_temperature)
frame_sequence_pressure = np.array(frame_sequence_pressure)
backend.addArrayValues("frame_sequence_potential_energy", frame_sequence_potential_energy)
backend.addArrayValues("frame_sequence_kinetic_energy", frame_sequence_kinetic_energy)
backend.addArrayValues("frame_sequence_conserved_quantity", frame_sequence_conserved_quantity)
backend.addArrayValues("frame_sequence_temperature", frame_sequence_temperature)
backend.addArrayValues("frame_sequence_time", frame_sequence_time, unit="fs")
if frame_sequence_pressure.size != 0:
if len(frame_sequence_potential_energy) != 0:
frame_sequence_potential_energy = convert_unit(np.array(frame_sequence_potential_energy), "hartree")
backend.addArrayValues("frame_sequence_potential_energy", frame_sequence_potential_energy)
backend.addArrayValues("frame_sequence_potential_energy_frames", np.array(ener_frames))
mean_pot = frame_sequence_potential_energy.mean()
std_pot = frame_sequence_potential_energy.std()
backend.addArrayValues("frame_sequence_potential_energy_stats", np.array([mean_pot, std_pot]))
if len(frame_sequence_kinetic_energy) != 0:
frame_sequence_kinetic_energy = convert_unit(np.array(frame_sequence_kinetic_energy), "hartree")
backend.addArrayValues("frame_sequence_kinetic_energy", frame_sequence_kinetic_energy)
backend.addArrayValues("frame_sequence_kinetic_energy_frames", np.array(ener_frames))
mean_kin = frame_sequence_kinetic_energy.mean()
std_kin = frame_sequence_kinetic_energy.std()
backend.addArrayValues("frame_sequence_kinetic_energy_stats", np.array([mean_kin, std_kin]))
if len(frame_sequence_conserved_quantity) != 0:
frame_sequence_conserved_quantity = convert_unit(np.array(frame_sequence_conserved_quantity), "hartree")
backend.addArrayValues("frame_sequence_conserved_quantity", frame_sequence_conserved_quantity)
backend.addArrayValues("frame_sequence_conserved_quantity_frames", np.array(ener_frames))
mean_cons = frame_sequence_conserved_quantity.mean()
std_cons = frame_sequence_conserved_quantity.std()
backend.addArrayValues("frame_sequence_conserved_quantity_stats", np.array([mean_cons, std_cons]))
if len(frame_sequence_time) != 0:
frame_sequence_time = np.array(frame_sequence_time)
backend.addArrayValues("frame_sequence_time", frame_sequence_time, unit="fs")
if len(frame_sequence_temperature) != 0:
frame_sequence_temperature = np.array(frame_sequence_temperature)
backend.addArrayValues("frame_sequence_temperature", frame_sequence_temperature)
backend.addArrayValues("frame_sequence_temperature_frames", np.array(ener_frames))
mean_temp = frame_sequence_temperature.mean()
std_temp = frame_sequence_temperature.std()
backend.addArrayValues("frame_sequence_temperature_stats", np.array([mean_temp, std_temp]))
if len(frame_sequence_pressure) != 0:
frame_sequence_pressure = np.array(frame_sequence_pressure)
backend.addArrayValues("frame_sequence_pressure", frame_sequence_pressure)
mean_pressure = frame_sequence_pressure.mean()
std_pressure = frame_sequence_pressure.std()
backend.addArrayValues("frame_sequence_pressure_stats", np.array([mean_pressure, std_pressure]))
# Number of frames
backend.addValue("number_of_frames_in_sequence", self.n_steps + 1)
# Number of frames. We open a SCC for each frame, even if there is no
# information for it.
backend.addValue("number_of_frames_in_sequence", len(single_conf_gids))
# Reference to sampling method
backend.addValue("frame_sequence_to_sampling_ref", 0)
......@@ -382,32 +431,6 @@ class CP2KMDParser(MainHierarchicalParser):
# References to local frames
backend.addArrayValues("frame_sequence_local_frames_ref", np.array(single_conf_gids))
# Temperature stats
mean_temp = frame_sequence_temperature.mean()
std_temp = frame_sequence_temperature.std()
backend.addArrayValues("frame_sequence_temperature_stats", np.array([mean_temp, std_temp]))
# Potential energy stats
mean_pot = frame_sequence_potential_energy.mean()
std_pot = frame_sequence_potential_energy.std()
backend.addArrayValues("frame_sequence_potential_energy_stats", np.array([mean_pot, std_pot]))
# Kinetic energy stats
mean_kin = frame_sequence_kinetic_energy.mean()
std_kin = frame_sequence_kinetic_energy.std()
backend.addArrayValues("frame_sequence_kinetic_energy_stats", np.array([mean_kin, std_kin]))
# Conserved quantity stats
mean_cons = frame_sequence_conserved_quantity.mean()
std_cons = frame_sequence_conserved_quantity.std()
backend.addArrayValues("frame_sequence_conserved_quantity_stats", np.array([mean_cons, std_cons]))
# Pressure stats
if frame_sequence_pressure.size != 0:
mean_pressure = frame_sequence_pressure.mean()
std_pressure = frame_sequence_pressure.std()
backend.addArrayValues("frame_sequence_pressure_stats", np.array([mean_pressure, std_pressure]))
#===========================================================================
# adHoc functions
def adHoc_save_md_quickstep(self):
......
*******************************************************************************
* RESTART INFORMATION *
*******************************************************************************
* *
* RESTART FILE NAME: ./md_trajectory-restart-1.restart *
* *
* RESTARTED QUANTITIES: *
* CELL *
* COORDINATES *
* RANDOM NUMBER GENERATOR *
* VELOCITIES *
* MD COUNTERS *
* MD AVERAGES *
* BAROSTAT *
* THERMOSTAT OF BAROSTAT *
* PARTICLE THERMOSTAT *
* REAL TIME PROPAGATION *
* PINT BEAD POSITIONS *
* PINT BEAD VELOCITIES *
* PINT NOSE THERMOSTAT