Commit f4e2a529 authored by Lauri Himanen's avatar Lauri Himanen

Better reading of trajectory files, more detailed geo_opt parsing.

parent c3e50b91
......@@ -8,32 +8,79 @@ logger = logging.getLogger("nomad")
#===============================================================================
def iread(filename, index=None, file_format=None):
"""
def iread(filename, file_format=None):
"""Generator function that is used to read an atomic configuration file (MD
trajectory, geometry optimization, static snapshot) from a file one frame
at a time. Only the xyz positions are returned from the file, and no unit
conversion is done, so you have to be careful with units.
By using a generator pattern we can avoid loading the entire trajectory
file into memory. This function will instead load a chunk of the file into
memory (with MDTraj you can decide the chunk size, with ASE it seems to
always be one frame), and serve individual files from that chunk. Once the
frames in one chunk are iterated, the chunk will be garbage collected and
memory is freed.
Args:
filename: String for the file path.
file_format: String for the file format. If not given the format is
automatically detected from the extension.
Yields:
numpy array containing the atomic positions in one frame.
"""
# Test if ASE can open the file
ase_failed = False
# If file format is not explicitly stated, determine the format from the
# filename
if file_format is None:
file_format = ase.io.formats.filetype(filename)
try:
io = ase.io.formats.get_ioformat(file_format)
except ValueError:
ase_failed = True
else:
# Return the positions in a numpy array instead of an ASE Atoms object
generator = ase.io.iread(filename, index, file_format)
for atoms in generator:
pos = atoms.positions
yield pos
if ase_failed:
if file_format == "dcd":
with mdtraj.formats.DCDTrajectoryFile(filename, mode="r") as f:
file_format = filename.split(".")[-1]
# Try to open the file with MDTraj first. With a brief inspection it seems
# that MDTraj is better performance wise, because it can iteratively load a
# "chunk" of frames, and still serve the individual frames one by one. ASE
# on the other hand will iteratively read frames one by one (unnecessary
# IO).
mdtraj_chunk = 100 # How many frames MDTraj will load at once
mdtraj_failed = False
# Must use the low level MDTraj API to open files without topology.
class_format_map = {
"dcd": mdtraj.formats.DCDTrajectoryFile,
"xyz": mdtraj.formats.XYZTrajectoryFile,
"pdb": mdtraj.formats.PDBTrajectoryFile,
}
traj_class = class_format_map.get(file_format)
if traj_class is not None:
try:
with traj_class(filename, mode="r") as f:
empty = False
while not empty:
(xyz, cell_len, cell_angle) = f.read(1)
if len(xyz) == 0:
data = f.read(mdtraj_chunk)
if isinstance(data, tuple):
positions = data[0]
else:
positions = data
if len(positions) == 0:
empty = True
else:
pos = xyz[0]
yield pos
for pos in positions:
yield pos
except IOError:
logger.warning("MDTraj could not read the file '{}' with format '{}'. The contents might be malformed or wrong format used.".format(filename, file_format))
return
else:
mdtraj_failed = True
# If MDTraj didn't support the format, try ASE instead
if mdtraj_failed:
try:
io = ase.io.formats.get_ioformat(file_format)
except ValueError:
logger.error("MDTraj could not read the file '{}' with format '{}'. The contents might be malformed or wrong format used.".format(filename, file_format))
return
else:
# Return the positions in a numpy array instead of an ASE Atoms object
generator = ase.io.iread(filename, index, file_format)
for atoms in generator:
pos = atoms.positions
yield pos
......@@ -41,20 +41,70 @@ class CP2KGeoOptParser(MainHierarchicalParser):
sections=["section_frame_sequence"],
subMatchers=[
SM( " *** CONJUGATE GRADIENTS ***".replace("*", "\*"),
adHoc=self.adHoc_conjugate_gradient()
adHoc=self.adHoc_conjugate_gradient(),
otherMetaInfo=["geometry_optimization_method"]
),
SM( " -------- Informations at step",
SM( " *** BFGS ***".replace("*", "\*"),
adHoc=self.adHoc_bfgs(),
otherMetaInfo=["geometry_optimization_method"]
),
SM( " *** L-BFGS ***".replace("*", "\*"),
adHoc=self.adHoc_bfgs(),
otherMetaInfo=["geometry_optimization_method"]
),
SM( "",
forwardMatch=True,
sections=["section_single_configuration_calculation", "section_system", "x_cp2k_section_geometry_optimization_information"],
subMatchers=[
self.cm.scf(),
SM( " -------- Informations at step"),
SM( " Optimization Method =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
SM( " Total Energy =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
otherMetaInfo=["frame_sequence_potential_energy"]
),
],
adHoc=self.adHoc_step()
),
SM( " OPTIMIZATION STEP:",
name="geooptstep",
repeats=True,
sections=["section_single_configuration_calculation", "section_system"],
subMatchers=[
SM( " -------- Informations at step",
SM( "",
forwardMatch=True,
sections=["x_cp2k_section_geometry_optimization_information"],
otherMetaInfo=[
"atom_positions",
],
subMatchers=[
SM( "",
forwardMatch=True,
endReStr=" *** MNBRACK - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
subMatchers=[
SM(" SCF WAVEFUNCTION OPTIMIZATION",
forwardMatch=True,
adHoc=self.debug(),
repeats=True,
subMatchers=[
self.cm.scf(),
]
)
]
),
SM( "",
forwardMatch=True,
endReStr=" *** BRENT - NUMBER OF ENERGY EVALUATIONS :\s+{}\s+***".replace("*", "\*").format(self.cm.regex_i),
subMatchers=[
SM(" SCF WAVEFUNCTION OPTIMIZATION",
forwardMatch=True,
repeats=True,
subMatchers=[
self.cm.scf(),
]
)
]
),
SM( " -------- Informations at step"),
SM( " Optimization Method =\s+(?P<x_cp2k_optimization_method>{})".format(self.cm.regex_word)),
SM( " Total Energy =\s+(?P<x_cp2k_optimization_energy__hartree>{})".format(self.cm.regex_f),
otherMetaInfo=["frame_sequence_potential_energy"]
......@@ -128,6 +178,7 @@ class CP2KGeoOptParser(MainHierarchicalParser):
def onClose_x_cp2k_section_geometry_optimization_information(self, backend, gIndex, section):
energy = section["x_cp2k_optimization_energy"][0]
# backend.addValue("energy_total", energy)
self.cache_service["frame_sequence_potential_energy"].append(energy)
def onClose_section_method(self, backend, gIndex, section):
......@@ -167,11 +218,19 @@ class CP2KGeoOptParser(MainHierarchicalParser):
parser.backend.addValue("geometry_optimization_method", "conjugate_gradient")
return wrapper
def adHoc_bfgs(self):
"""Called when conjugate gradient method is used.
"""
def wrapper(parser):
parser.backend.addValue("geometry_optimization_method", "bfgs")
return wrapper
def adHoc_step(self):
"""Called when all the step information has been retrieved from the
output file. Here further information is gathered from external files.
"""
def wrapper(parser):
# print "STEP"
self.cache_service["number_of_frames_in_sequence"] += 1
# Get the next position from the trajectory file
......
3
i = 1, E = -17.1705629399
O 12.2353220000 1.3766420000 10.8698800000
H 12.4743227317 2.2119518827 11.3409419924
H 11.8467551292 1.6742807006 10.1768554175
3
i = 2, E = -17.0909183389
O 12.2353220000 1.3766420000 10.8698800000
H 12.5924764029 2.1950848767 11.4203237558
H 11.7106741967 1.8499552576 10.4268554175
3
i = 3, E = -17.1274193985
O 12.2353220000 1.3766420000 10.8698800000
H 12.4482369359 2.2133669873 11.4616621265
H 11.4606741967 1.9798782219 10.6015506891
3
i = 4, E = -17.0937574332
O 12.2353220000 1.3766420000 10.8698800000
H 12.6982369359 2.1737334024 11.5767847876
H 11.4198139800 2.1307697166 10.8047560026
3
i = 5, E = -17.1485876409
O 12.2353220000 1.3766420000 10.8698800000
H 12.7821563642 2.3635027310 11.3776797418
H 11.3780224043 1.9011505428 10.8454337567
3
i = 6, E = -17.0728891866
O 12.2353220000 1.3766420000 10.8698800000
H 13.0321563642 2.3428044459 11.2972367708
H 11.2904428918 1.9829881537 10.7743888772
3
i = 7, E = -17.1312811704
O 12.2353220000 1.3766420000 10.8698800000
H 12.9582874729 2.5343526200 11.1646738017
H 11.3058153038 1.7329881537 10.8147761255
3
i = 8, E = -17.0055707525
O 12.2353220000 1.3766420000 10.8698800000
H 12.9207611285 2.7843526200 10.9746118417
H 11.4072946921 1.6483418095 10.9109449995
3
i = 9, E = -17.1089089394
O 12.2353220000 1.3766420000 10.8698800000
H 12.9582858279 2.5838862429 11.2030953006
H 11.3473488118 1.8983418095 10.9347187512
3
i = 10, E = -17.1025059018
O 12.2353220000 1.3766420000 10.8698800000
H 12.9699276332 2.3483995802 11.2136883558
H 11.4380272805 2.1483418095 10.8559349080
3
i = 11, E = -17.1128269349
O 12.2353220000 1.3766420000 10.8698800000
H 12.9926315944 2.1556445148 11.0624257355
H 11.6014797282 2.1684177603 10.6059349080
3
i = 12, E = -17.1548669608
O 12.2353220000 1.3766420000 10.8698800000
H 12.9968678537 1.9069043824 10.9126656618
H 11.8299661906 2.3529292090 10.3559349080
3
i = 13, E = -17.0737712857
O 12.2353220000 1.3766420000 10.8698800000
H 13.0928866303 1.8216627543 11.0297183438
H 11.8136905384 2.4578130397 10.1059349080
3
i = 14, E = -17.0425443160
O 12.2353220000 1.3766420000 10.8698800000
H 13.0198282444 1.6427948509 10.7797183438
H 12.0542780638 2.5548762324 10.0491402893
3
i = 15, E = -17.0930810779
O 12.2353220000 1.3766420000 10.8698800000
H 13.0873393282 1.7575690454 10.9512839088
H 11.8647019856 2.4233839307 10.0785305868
3
i = 16, E = -17.1270239811
O 12.2353220000 1.3766420000 10.8698800000
H 13.1824208739 1.7605910589 11.1198682216
H 11.6609540604 2.1733839307 10.0490593748
3
i = 17, E = -17.0430494567
O 12.2353220000 1.3766420000 10.8698800000
H 13.2302285914 1.5413130629 11.0883188482
H 11.7428130253 2.1560119230 9.7990593748
3
i = 18, E = -17.1147923888
O 12.2353220000 1.3766420000 10.8698800000
H 13.1631583265 1.6851992487 11.1048195707
H 11.7222180532 2.2051992102 9.9946856320
3
i = 19, E = -17.1347647505
O 12.2353220000 1.3766420000 10.8698800000
H 13.0918810798 1.8898708559 11.1594639000
H 11.6793556123 2.2820754874 10.2446856320
3
i = 20, E = -17.0803403792
O 12.2353220000 1.3766420000 10.8698800000
H 13.1054244873 1.9555454696 11.2504193799
H 11.5830780535 2.1560395571 10.2835196303
3
i = 21, E = -17.1281843148
O 12.2353220000 1.3766420000 10.8698800000
H 13.1221790003 1.8961898032 11.2603400171
H 11.5637907910 2.1133434743 10.3053702247
3
i = 22, E = -17.1367413893
O 12.2353220000 1.3766420000 10.8698800000
H 13.1937474287 1.6461898032 11.3077992474
H 11.4752653369 1.9245680148 10.4064797301
3
i = 23, E = -17.1162841940
O 12.2353220000 1.3766420000 10.8698800000
H 13.1943642566 1.5255837838 11.3565576970
H 11.4640642743 1.8156570137 10.4163898248
3
i = 24, E = -17.1671682289
O 12.2353220000 1.3766420000 10.8698800000
H 13.1360287682 1.6114240815 11.3419374858
H 11.5205356492 1.8675502081 10.3956558266
3
i = 25, E = -17.1789658272
O 12.2353220000 1.3766420000 10.8698800000
H 13.1532433353 1.6281130429 11.3217464498
H 11.5039563140 1.8808284007 10.3465110420
3
i = 26, E = -17.1815493293
O 12.2353220000 1.3766420000 10.8698800000
H 13.1535616059 1.6182495677 11.3355554918
H 11.5023491407 1.8800190460 10.3622412361
3
i = 27, E = -17.1832499791
O 12.2353220000 1.3766420000 10.8698800000
H 13.1519788472 1.6097861330 11.3435795587
H 11.5005777426 1.8759871209 10.3554242371
3
i = 28, E = -17.1851259622
O 12.2353220000 1.3766420000 10.8698800000
H 13.1489462026 1.5996124325 11.3447827394
H 11.4952699712 1.8740983282 10.3513030680
3
i = 29, E = -17.1888477638
O 12.2353220000 1.3766420000 10.8698800000
H 13.1462676213 1.5945603861 11.3446730252
H 11.4926394043 1.8875490857 10.3400885851
3
i = 30, E = -17.1923610739
O 12.2353220000 1.3766420000 10.8698800000
H 13.1454342809 1.5681704207 11.3423884475
H 11.4919075787 1.8914956200 10.3416594178
3
i = 31, E = -17.1964010617
O 12.2353220000 1.3766420000 10.8698800000
H 13.1466243711 1.5759100355 11.3341850017
H 11.5032973089 1.9093577891 10.3454090213
3
i = 32, E = -17.1974875181
O 12.2353220000 1.3766420000 10.8698800000
H 13.1512060243 1.5754905689 11.3322062324
H 11.5118929878 1.9011719164 10.3489806303
3
i = 33, E = -17.1975225393
O 12.2353220000 1.3766420000 10.8698800000
H 13.1509977713 1.5742251610 11.3327568225
H 11.5109348168 1.9025069451 10.3496947848
3
i = 34, E = -17.1975226623
O 12.2353220000 1.3766420000 10.8698800000
H 13.1509282837 1.5743356843 11.3327493058
H 11.5109172053 1.9024589399 10.3496816980
3
i = 35, E = -17.1975185135
O 12.2353220000 1.3766420000 10.8698800000
H 13.1509282837 1.5743356843 11.3327493058
H 11.5109172053 1.9024589399 10.3496816980
&GLOBAL
PROJECT H2O
RUN_TYPE GEO_OPT
PRINT_LEVEL MEDIUM
&END GLOBAL
&FORCE_EVAL
METHOD QS
&SUBSYS
&CELL
ABC 12.4138 12.4138 12.4138
&END CELL
&COORD
O 12.235322 1.376642 10.869880
H 12.415139 2.233125 11.257611
H 11.922476 1.573799 9.986994
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../../BASIS_SET
POTENTIAL_FILE_NAME ../../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-7
&END QS
&MGRID
CUTOFF 100
NGRIDS 4
REL_CUTOFF 30
&END MGRID
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-05
MAX_SCF 200
&DIAGONALIZATION T
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
ALPHA 0.5
METHOD PULAY_MIXING
NPULAY 5
&END MIXING
&PRINT
&RESTART OFF
&END RESTART
&END PRINT
&END SCF
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&END DFT
&END FORCE_EVAL
&MOTION
&GEO_OPT
TYPE MINIMIZATION
MAX_DR 1.0E-03
MAX_FORCE 1.0E-03
RMS_DR 1.0E-03
RMS_FORCE 1.0E-03
MAX_ITER 200
OPTIMIZER BFGS
&CG
MAX_STEEP_STEPS 0
RESTART_LIMIT 9.0E-01
&END CG
&END GEO_OPT
&CONSTRAINT
&FIXED_ATOMS
COMPONENTS_TO_FIX XYZ
LIST 1
&END FIXED_ATOMS
&END CONSTRAINT
&END MOTION
This diff is collapsed.
......@@ -5,6 +5,7 @@
&END GLOBAL
&FORCE_EVAL
METHOD QS
STRESS_TENSOR ANALYTICAL
&SUBSYS
&CELL
ABC 12.4138 12.4138 12.4138
......@@ -56,6 +57,12 @@
&END XC_FUNCTIONAL
&END XC
&END DFT
&PRINT
&FORCES ON
&END FORCES
&STRESS_TENSOR ON
&END STRESS_TENSOR
&END PRINT
&END FORCE_EVAL
&MOTION
&GEO_OPT
......
3
i = 1, E = -17.1532053605
O 12.2353220000 1.3766420000 10.8698800000
H 12.4170953422 2.2356209749 11.2608285328
H 11.9262206100 1.5726378862 10.0066645029
3
i = 2, E = -17.1700441887
O 12.2353220000 1.3766420000 10.8698800000
H 12.4451328212 2.2362525025 11.3064769492
H 11.9727113526 1.5740641827 10.0541792754
3
i = 3, E = -17.1716505833
O 12.2353220000 1.3766420000 10.8698800000
H 12.4505397042 2.2362590293 11.3152488146
H 11.9816114739 1.5743953808 10.0624752591
3
i = 4, E = -17.1756781817
O 12.2353220000 1.3766420000 10.8698800000
H 12.4670738864 2.2347914626 11.3233267637
H 11.9858588967 1.5759447348 10.0700677105
3
i = 5, E = -17.1797106759
O 12.2353220000 1.3766420000 10.8698800000
H 12.4911432948 2.2336446197 11.3267047559
H 11.9873800783 1.5767805844 10.0734304778
3
i = 6, E = -17.1923031809
O 12.2353220000 1.3766420000 10.8698800000
H 12.4986756721 2.2241606104 11.3041979903
H 11.9584313252 1.5799993771 9.9925956493
3
i = 7, E = -17.2035816903
O 12.2353220000 1.3766420000 10.8698800000
H 12.4970970374 2.2326621003 11.3221014911
H 11.9795692003 1.5742496197 10.0251857516
3
i = 8, E = -17.2086865355
O 12.2353220000 1.3766420000 10.8698800000
H 12.4942554478 2.2343926541 11.3301376765
H 11.9921253053 1.5724004464 10.0144075211
3
i = 9, E = -17.2097588432
O 12.2353220000 1.3766420000 10.8698800000
H 12.4967718017 2.2303386111 11.3356550607
H 11.9984001112 1.5748723473 10.0057063100
3
i = 10, E = -17.2097738854
O 12.2353220000 1.3766420000 10.8698800000
H 12.4956193211 2.2307094778 11.3353829598
H 11.9974953282 1.5748181892 10.0061670961
3
i = 11, E = -17.2097743147
O 12.2353220000 1.3766420000 10.8698800000
H 12.4958126530 2.2307307497 11.3354474167
H 11.9975707151 1.5748080129 10.0062997584
3
i = 12, E = -17.2097882159
O 12.2353220000 1.3766420000 10.8698800000
H 12.4958126530 2.2307307497 11.3354474167
H 11.9975707151 1.5748080129 10.0062997584
&GLOBAL
PROJECT H2O
RUN_TYPE GEO_OPT
PRINT_LEVEL MEDIUM
&END GLOBAL
&FORCE_EVAL
METHOD QS
&SUBSYS
&CELL
ABC 12.4138 12.4138 12.4138
&END CELL
&COORD
O 12.235322 1.376642 10.869880
H 12.415139 2.233125 11.257611
H 11.922476 1.573799 9.986994
&END COORD
&KIND H
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q1
&END KIND
&KIND O
BASIS_SET DZVP-GTH-PADE
POTENTIAL GTH-PADE-q6
&END KIND
&END SUBSYS
&DFT
BASIS_SET_FILE_NAME ../../BASIS_SET
POTENTIAL_FILE_NAME ../../GTH_POTENTIALS
&QS
EPS_DEFAULT 1.0E-7
&END QS
&MGRID
CUTOFF 100
NGRIDS 4
REL_CUTOFF 30
&END MGRID
&SCF
SCF_GUESS ATOMIC
EPS_SCF 1.0E-05
MAX_SCF 200
&DIAGONALIZATION T
ALGORITHM STANDARD
&END DIAGONALIZATION
&MIXING T
ALPHA 0.5
METHOD PULAY_MIXING
NPULAY 5
&END MIXING
&PRINT
&RESTART OFF
&END RESTART
&END PRINT
&END SCF
&XC
&XC_FUNCTIONAL PADE
&END XC_FUNCTIONAL
&END XC
&END DFT
&END FORCE_EVAL
&MOTION
&GEO_OPT
TYPE MINIMIZATION
MAX_DR 1.0E-03
MAX_FORCE 1.0E-03
RMS_DR 1.0E-03