Commit c3e50b91 authored by Lauri Himanen's avatar Lauri Himanen
Browse files

Support for multiple trajectory formats.

parent bb65685b
import ase.io
import ase.io.formats
import mdtraj as md
import mdtraj.formats
import numpy as np
import logging
logger = logging.getLogger("nomad")
......@@ -7,13 +11,29 @@ logger = logging.getLogger("nomad")
def iread(filename, index=None, file_format=None):
"""
"""
# Test if ASE can open the file
ase_failed = False
if file_format is None:
file_format = ase.io.formats.filetype(filename)
try:
generator = ase.io.iread(filename, index, file_format)
io = ase.io.formats.get_ioformat(file_format)
except ValueError:
logger.error("ASE could not read the file '{}'.".format(filename))
raise
ase_failed = True
else:
# Return the positions in a numpy array instead of an ASE Atoms object
generator = ase.io.iread(filename, index, file_format)
for atoms in generator:
pos = atoms.positions
yield pos
# Return the positions in a numpy array instead of the atoms object
for atoms in generator:
pos = atoms.positions
yield pos
if ase_failed:
if file_format == "dcd":
with mdtraj.formats.DCDTrajectoryFile(filename, mode="r") as f:
empty = False
while not empty:
(xyz, cell_len, cell_angle) = f.read(1)
if len(xyz) == 0:
empty = True
else:
pos = xyz[0]
yield pos
import numpy as np
import logging
logger = logging.getLogger(__name__)
from io import StringIO
try:
import re2 as re
except ImportError:
import re
logger.warning((
"re2 package not found. Using re package instead. "
"If you wan't to use re2 please see the following links:"
" https://github.com/google/re2"
" https://pypi.python.org/pypi/re2/"
))
else:
re.set_fallback_notification(re.FALLBACK_WARNING)
#===============================================================================
def iread(filepath, columns, delimiter=r"\s+", comments=r"#", start=None, end=None):
"""Used to iterate a CSV-like file. If a separator is provided the file
is iterated one configuration at a time. Only keeps one configuration
of the file in memory. If no separator is given, the whole file will be
handled.
The contents are separated into configurations whenever the separator
regex is encountered on a line.
Args:
filepath: Path to the CSV like file to be processed.
columns: List of integers indicating the columns of interest in the CSV file.
start: A regex that is used to indicate the start of a new configuration.
end: A regex that is used to indicate the end of a configuration.
comments: A regex that is used identify comments in the file that are ignored.
"""
def split_line(line):
"""Chop off comments, strip, and split at delimiter.
"""
if line.isspace():
return None
if comments:
line = compiled_comments.split(line, maxsplit=1)[0]
line = line.strip('\r\n ')
if line:
return compiled_delimiter.split(line)
else:
return []
def is_end(line):
"""Check if the given line matches the separator pattern.
Separators are used to split a file into multiple configurations.
"""
if end:
return compiled_end.search(line)
return False
def is_start(line):
"""Check if the given line matches the separator pattern.
Separators are used to split a file into multiple configurations.
"""
if start:
return compiled_start.search(line)
return False
# Precompile the different regexs before looping
compiled_delimiter = re.compile(delimiter)
if comments:
comments = (re.escape(comment) for comment in comments)
compiled_comments = re.compile('|'.join(comments))
if end:
compiled_end = re.compile(end)
if start:
compiled_start = re.compile(start)
# Columns as list
if columns is not None:
columns = list(columns)
# Start iterating
configuration = []
started = False
with open(filepath, "r") as f:
for line in f: # This actually reads line by line and only keeps the current line in memory
# If a start regex is provided, use it to detect the start of a configuration
if is_start(line):
started = True
continue
# If separator encountered, yield the stored configuration
if is_end(line):
started = False
if configuration:
yield np.array(configuration)
configuration = []
elif start is not None and started:
# Ignore comments, separate by delimiter
vals = split_line(line)
line_forces = []
if vals:
for column in columns:
try:
value = vals[column]
except IndexError:
logger.warning("The given index '{}' could not be found on the line '{}'. The given delimiter or index could be wrong.".format(column, line))
return
try:
value = float(value)
except ValueError:
logger.warning("Could not cast value '{}' to float. Currently only floating point values are accepted".format(value))
return
else:
line_forces.append(value)
configuration.append(line_forces)
# The last configuration is yielded even if separator is not present at
# the end of file or is not given at all
if configuration:
yield np.array(configuration)
......@@ -77,7 +77,15 @@ class CommonMatcher(object):
),
SM( r" CP2K\| Input file name\s+(?P<x_cp2k_input_filename>.+$)",
sections=['x_cp2k_section_filenames'],
otherMetaInfo=['section_XC_functionals', 'XC_functional_name', 'XC_functional_weight', 'XC_functional', 'configuration_periodic_dimensions', "stress_tensor_method"],
otherMetaInfo=[
"section_XC_functionals",
'XC_functional_name',
'XC_functional_weight',
'XC_functional',
'configuration_periodic_dimensions',
"stress_tensor_method",
"atom_positions",
],
subMatchers=[
SM( r" GLOBAL\| Basis set file name\s+(?P<x_cp2k_basis_set_filename>.+$)"),
SM( r" GLOBAL\| Geminal file name\s+(?P<x_cp2k_geminal_filename>.+$)"),
......@@ -423,15 +431,3 @@ class CommonMatcher(object):
for attr, callback in extractOnCloseTriggers(self).items():
onClose[attr] = [callback]
return onClose
#===========================================================================
def onclosecatcher(func):
"""Used to catch exceptions in onClose
"""
def dec():
try:
func()
except Exception, e:
print 'Decorator handled exception %s' % e
raise e
return dec
from nomadcore.simple_parser import SimpleMatcher as SM
from nomadcore.baseclasses import MainHierarchicalParser
from commonmatcher import CommonMatcher
from cp2kparser.generic.configurationreading import iread
import cp2kparser.generic.configurationreading
import cp2kparser.generic.csvparsing
from nomadcore.caching_backend import CachingLevel
import logging
import ase.io
......@@ -22,8 +23,8 @@ class CP2KGeoOptParser(MainHierarchicalParser):
#=======================================================================
# Cached values
self.cache_service.add_cache_object("number_of_frames_in_sequence", 0, single=True, update=True)
self.cache_service.add_cache_object("frame_sequence_potential_energy", [], single=True, update=True)
self.cache_service.add_cache_object("number_of_frames_in_sequence", 0)
self.cache_service.add_cache_object("frame_sequence_potential_energy", [])
#=======================================================================
# Cache levels
......@@ -50,19 +51,28 @@ class CP2KGeoOptParser(MainHierarchicalParser):
subMatchers=[
SM( " -------- Informations at step",
sections=["x_cp2k_section_geometry_optimization_information"],
otherMetaInfo=[
"atom_positions",
],
subMatchers=[
SM( " Optimization Method =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
SM( " Total Energy =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f)),
SM( " Total Energy =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
otherMetaInfo=["frame_sequence_potential_energy"]
),
SM( " Real energy change =\s+(?P<x_cp2k_optimization_energy_change__hartree>{})".format(self.cm.regex_f)),
SM( " Decrease in energy =\s+(?P<x_cp2k_optimization_energy_decrease>{})".format(self.cm.regex_word)),
SM( " Used time =\s+(?P<x_cp2k_optimization_used_time>{})".format(self.cm.regex_f)),
SM( " Max. step size =\s+(?P<x_cp2k_optimization_max_step_size__bohr>{})".format(self.cm.regex_f)),
SM( " Conv. limit for step size =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f)),
SM( " Conv. limit for step size =\s+(?P<x_cp2k_optimization_step_size_convergence_limit__bohr>{})".format(self.cm.regex_f),
otherMetaInfo=["geometry_optimization_geometry_change"]
),
SM( " Convergence in step size =\s+(?P<x_cp2k_optimization_step_size_convergence>{})".format(self.cm.regex_word)),
SM( " RMS step size =\s+(?P<x_cp2k_optimization_rms_step_size__bohr>{})".format(self.cm.regex_f)),
SM( " Convergence in RMS step =\s+(?P<x_cp2k_optimization_rms_step_size_convergence>{})".format(self.cm.regex_word)),
SM( " Max. gradient =\s+(?P<x_cp2k_optimization_max_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
SM( " Conv. limit for gradients =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f)),
SM( " Conv. limit for gradients =\s+(?P<x_cp2k_optimization_gradient_convergence_limit__bohr_1hartree>{})".format(self.cm.regex_f),
otherMetaInfo=["geometry_optimization_threshold_force"]
),
SM( " Conv. for gradients =\s+(?P<x_cp2k_optimization_max_gradient_convergence>{})".format(self.cm.regex_word)),
SM( " RMS gradient =\s+(?P<x_cp2k_optimization_rms_gradient__bohr_1hartree>{})".format(self.cm.regex_f)),
SM( " Conv. in RMS gradients =\s+(?P<x_cp2k_optimization_rms_gradient_convergence>{})".format(self.cm.regex_word)),
......@@ -72,7 +82,9 @@ class CP2KGeoOptParser(MainHierarchicalParser):
]
),
SM( " *** GEOMETRY OPTIMIZATION COMPLETED ***".replace("*", "\*"),
adHoc=self.adHoc_geo_opt_converged())
adHoc=self.adHoc_geo_opt_converged(),
otherMetaInfo=["geometry_optimization_converged"]
),
],
)
......@@ -120,12 +132,17 @@ class CP2KGeoOptParser(MainHierarchicalParser):
def onClose_section_method(self, backend, gIndex, section):
traj_file = self.file_service.get_file_by_id("trajectory")
if traj_file is not None:
try:
self.traj_iterator = iread(traj_file)
except ValueError:
# The format was not supported by ase
pass
traj_format = self.cache_service["trajectory_format"]
if traj_format is not None and traj_file is not None:
# Use special parsing for CP2K pdb files because they don't follow the proper syntax
if traj_format == "PDB":
self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END")
else:
try:
self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file)
except ValueError:
pass
#===========================================================================
# adHoc functions
......@@ -159,14 +176,20 @@ class CP2KGeoOptParser(MainHierarchicalParser):
# Get the next position from the trajectory file
if self.traj_iterator is not None:
pos = next(self.traj_iterator)
self.cache_service["atom_positions"] = pos
# pos = next(self.traj_iterator)
# self.cache_service["atom_positions"] = pos
try:
pos = next(self.traj_iterator)
except StopIteration:
logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.")
else:
self.cache_service["atom_positions"] = pos
return wrapper
def adHoc_setup_traj_file(self):
def wrapper(parser):
print "HERE"
pass
return wrapper
def debug(self):
......
......@@ -41,10 +41,12 @@ class CP2KInputParser(BasicParser):
self.input_lines = None
self.force_file_name = None
self.trajectory_file_name = ""
self.trajectory_format = "XMOL"
#=======================================================================
# Cached values
self.cache_service.add_cache_object("configuration_periodic_dimensions", single=False, update=False)
self.cache_service.add_cache_object("trajectory_format")
def parse(self):
......@@ -178,7 +180,10 @@ class CP2KInputParser(BasicParser):
# Path is relative, project name added
else:
project_name = self.input_tree.get_keyword("GLOBAL/PROJECT_NAME")
normalized_path = "{}-{}".format(project_name, path)
if path:
normalized_path = "{}-{}".format(project_name, path)
else:
normalized_path = project_name
return normalized_path
def setup_force_file_name(self):
......@@ -194,9 +199,21 @@ class CP2KInputParser(BasicParser):
def setup_trajectory_file_name(self):
"""Setup the trajectory file path.
"""
extension = "xyz"
traj_format = self.trajectory_format.upper()
self.cache_service["trajectory_format"] = traj_format
extension_map = {
"XYZ": "xyz",
"XMOL": "xyz",
"ATOMIC": "xyz",
"PDB": "pdb",
"DCD": "dcd",
}
extension = extension_map.get(traj_format)
if extension is None:
logger.error("Unknown file format '{}' for CP2K trajectory file ".format(traj_format))
return
normalized_path = self.normalize_x_cp2k_path(self.trajectory_file_name)
final_path = "{}pos-1.{}".format(normalized_path, extension)
final_path = "{}-pos-1.{}".format(normalized_path, extension)
self.file_service.set_file_id(final_path, "trajectory")
def fill_input_tree(self, file_path):
......@@ -280,6 +297,9 @@ class CP2KInputParser(BasicParser):
if path == "MOTION/PRINT/TRAJECTORY":
if keyword_name == "FILENAME":
self.trajectory_file_name = keyword_value
if path == "MOTION/PRINT/TRAJECTORY":
if keyword_name == "FORMAT":
self.trajectory_format = keyword_value
def fill_metadata(self):
"""Goes through the input data and pushes everything to the
......
......@@ -41,6 +41,7 @@ object Cp2kParser extends SimpleExternalParserGenerator(
"parser-cp2k/cp2kparser/generic/__init__.py",
"parser-cp2k/cp2kparser/generic/inputparsing.py",
"parser-cp2k/cp2kparser/generic/configurationreading.py",
"parser-cp2k/cp2kparser/generic/csvparsing.py",
"parser-cp2k/cp2kparser/versions/__init__.py",
"parser-cp2k/cp2kparser/versions/versionsetup.py",
"parser-cp2k/cp2kparser/versions/cp2k262/__init__.py",
......
&GLOBAL
PROJECT H2O
RUN_TYPE GEO_OPT
PRINT_LEVEL MEDIUM
&END GLOBAL
&FORCE_EVAL
METHOD QS
&SUBSYS
&CELL
ABC 12.4138 12.4138 12.4138
&END CELL
&COORD
O 12.235322 1.376642 10.869880
H 12.415139 2.233125 11.257611
H 11.922476 1.573799 9.986994
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../../../BASIS_SET
POTENTIAL_FILE_NAME ../../../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-7
&END QS
&MGRID
CUTOFF 100
NGRIDS 3
REL_CUTOFF 20
&END MGRID
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-05
MAX_SCF 200
&DIAGONALIZATION T
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
ALPHA 0.5
METHOD PULAY_MIXING
NPULAY 5
&END MIXING
&PRINT
&RESTART OFF
&END RESTART
&END PRINT
&END SCF
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&END DFT
&END FORCE_EVAL
&MOTION
&GEO_OPT
TYPE MINIMIZATION
MAX_DR 1.0E-03
MAX_FORCE 1.0E-03
RMS_DR 1.0E-03
RMS_FORCE 1.0E-03
MAX_ITER 200
OPTIMIZER CG
&CG
MAX_STEEP_STEPS 0
RESTART_LIMIT 9.0E-01
&END CG
&END GEO_OPT
&CONSTRAINT
&FIXED_ATOMS
COMPONENTS_TO_FIX XYZ
LIST 1
&END FIXED_ATOMS
&END CONSTRAINT
&PRINT
&TRAJECTORY
FILENAME geometry
FORMAT DCD
&END TRAJECTORY
&END PRINT
&END MOTION
TITLE PDB file created by CP2K version 2.6.2 (revision svn:15893)
AUTHOR lauri@lauri-Lenovo-Z50-70 2016-05-30 14:36:15
REMARK Step 1, E = -17.1547973121
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.418 2.236 11.262 0.00 0.00 H
ATOM 3 H 11.927 1.572 10.012 0.00 0.00 H
END
REMARK Step 2, E = -17.1952478380
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.461 2.232 11.333 0.00 0.00 H
ATOM 3 H 11.999 1.578 10.038 0.00 0.00 H
END
REMARK Step 3, E = -17.2105811202
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.489 2.231 11.336 0.00 0.00 H
ATOM 3 H 11.995 1.576 10.001 0.00 0.00 H
END
REMARK Step 4, E = -17.2111555704
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.496 2.231 11.336 0.00 0.00 H
ATOM 3 H 11.997 1.575 10.005 0.00 0.00 H
END
REMARK Step 5, E = -17.2111711743
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.496 2.231 11.335 0.00 0.00 H
ATOM 3 H 11.998 1.575 10.006 0.00 0.00 H
END
REMARK Step 6, E = -17.2111712050
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.496 2.231 11.335 0.00 0.00 H
ATOM 3 H 11.998 1.575 10.006 0.00 0.00 H
END
REMARK Step 7, E = -17.2111764458
CRYST1 12.414 12.414 12.414 90.00 90.00 90.00
ATOM 1 O 12.235 1.377 10.870 0.00 0.00 O
ATOM 2 H 12.496 2.231 11.335 0.00 0.00 H
ATOM 3 H 11.998 1.575 10.006 0.00 0.00 H
END
&GLOBAL
PROJECT H2O
RUN_TYPE GEO_OPT
PRINT_LEVEL MEDIUM
&END GLOBAL
&FORCE_EVAL
METHOD QS
&SUBSYS
&CELL
ABC 12.4138 12.4138 12.4138
&END CELL
&COORD
O 12.235322 1.376642 10.869880
H 12.415139 2.233125 11.257611
H 11.922476 1.573799 9.986994
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../../../BASIS_SET
POTENTIAL_FILE_NAME ../../../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-7
&END QS
&MGRID
CUTOFF 100
NGRIDS 3
REL_CUTOFF 20
&END MGRID
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-05
MAX_SCF 200
&DIAGONALIZATION T
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
ALPHA 0.5
METHOD PULAY_MIXING
NPULAY 5
&END MIXING
&PRINT
&RESTART OFF
&END RESTART
&END PRINT
&END SCF
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&END DFT
&END FORCE_EVAL
&MOTION
&GEO_OPT
TYPE MINIMIZATION
MAX_DR 1.0E-03
MAX_FORCE 1.0E-03
RMS_DR 1.0E-03
RMS_FORCE 1.0E-03
MAX_ITER 200
OPTIMIZER CG
&CG
MAX_STEEP_STEPS 0
RESTART_LIMIT 9.0E-01
&END CG
&END GEO_OPT
&CONSTRAINT
&FIXED_ATOMS
COMPONENTS_TO_FIX XYZ
LIST 1
&END FIXED_ATOMS
&END CONSTRAINT
&PRINT
&TRAJECTORY
FILENAME geometry
FORMAT PDB
&END TRAJECTORY
&END PRINT
&END MOTION