From 576c7685835ea05261d87fe117b7b41443e80e35 Mon Sep 17 00:00:00 2001 From: Ask Hjorth Larsen <asklarsen@gmail.com> Date: Mon, 10 Oct 2016 15:44:35 +0200 Subject: [PATCH] parse MD variables and tons of interatomic potential tables --- parser/parser-gulp/main.py | 142 ++++++++++++++++++++++++++++++------- 1 file changed, 116 insertions(+), 26 deletions(-) diff --git a/parser/parser-gulp/main.py b/parser/parser-gulp/main.py index cdf3162..9f0ee54 100644 --- a/parser/parser-gulp/main.py +++ b/parser/parser-gulp/main.py @@ -225,6 +225,11 @@ class GulpContext(object): data = self.data ctable = data.pop('gulp_coordinate_table', None) + if ctable is None: + return # This is probably an MD simulation or something. + # I am sure we will get to suffer because of this relatively arbitrary + # return, but what can you do + symbols = ctable[:, 0] gulp_labels = ctable[:, 1] positions = ctable[:, 2:5].astype(float) @@ -411,6 +416,8 @@ def get_input_system_sm(): name='input-conf', sections=['section_system'], subMatchers=[ + SM(r'\s*Formula\s*=\s*(?P<x_gulp_formula>\S+)', + name='formula'), SM(r'\s*Dimensionality\s*=\s*(?P<x_gulp_pbc>\d+)', name='pbc'), SM(r'\s*Symmetry\s*:', @@ -537,6 +544,9 @@ def get_header_sm(): name='header', endReStr=r'\s*Job Started', subMatchers=[ + SM(r'\*\s+.*?\s*\*$', + name='Julian and friends', + repeats=True), SM(r'\*\*+'), SM(r'\*\s*Version\s*=\s*(?P<program_version>\S+)', name='version'), @@ -548,7 +558,54 @@ def get_header_sm(): repeats=True, sections=['x_gulp_section_main_keyword'], name='mainkw') - ]) + ]), + SM(r'\*\s*(?P<x_gulp_title>.*?)\s*\*$', + name='title') + ]) + return m + +def get_gulp_potential_species_pattern(nspecies): + tokens = [] + for i in range(1, nspecies + 1): + tokens.append(r'(?P<x_gulp_forcefield_species_%d>\w+)\s*' % i) + tokens.append(r'(?P<x_gulp_forcefield_speciestype_%d>\S+)\s*' % i) + return ''.join(tokens) + + +def get_forcefield_table_sm(header, columnheaderpattern, tablepattern, name): + m = SM(header,#r'\s*(General interatomic|Intramolecular|Intermolecular) potentials :', + name=name, + subMatchers=[ + SM(columnheaderpattern,#r'\s*Atom\s*Types\s*Potential\s*A*\s*B\s*C\s*D', + #Atom Types Potential A B C D Cutoffs(Ang) + # 1 2 Min Max + # ------------------------------------------------------------------------------- + #O s La s Buckingham 0.570E+04 0.299 38.9 0.00 0.000 24.000 + subMatchers=[ + SM(r'----------+', name='potentials', + endReStr=r'----------+', + subMatchers=[ + SM(tablepattern,#''.join(tokens), + #get_gulp_potential_species_pattern(2) + + #r'(?P<x_gulp_forcefield_species_1>\w+)\s*' + #r'(?P<x_gulp_forcefield_speciestype_1>\S+)\s*' + #r'(?P<x_gulp_forcefield_species_2>\S+)\s*' + #r'(?P<x_gulp_forcefield_speciestype_2>\S+)\s*' + # The SRGlue potential is badly written in the table and unparseable. + # Probably a bug. So just ignore it. + #r'(SRGlue|(?P<x_gulp_forcefield_potential_name>\b.{1,14}?)\s*' + #r'(?P<x_gulp_forcefield_parameter_a>\S+)\s*' + #r'(?P<x_gulp_forcefield_parameter_b>\S+)\s*' + #r'(?P<x_gulp_forcefield_parameter_c>\S+)\s*' + # r'(?P<x_gulp_forcefield_parameter_d>\S+)\s*' + # r'(?P<x_gulp_forcefield_cutoff_min>\S+\s*)' + # r'(?P<x_gulp_forcefield_cutoff_max>\S+))$', + name='table', + sections=['x_gulp_section_forcefield'], + repeats=True + ), + ]), + ]), ]) return m @@ -567,33 +624,65 @@ def get_general_input_sm(): r'\s*(\S+)\s*(\S+)\s*\d+\s*\S+\s*(\S+)', r'-------+') ]), - SM(r'\s*General interatomic potentials :', - name='potentials'), - SM(r'\s*Atom\s*Types\s*Potential\s*A*\s*B\s*C\s*D', - #Atom Types Potential A B C D Cutoffs(Ang) - # 1 2 Min Max - # ------------------------------------------------------------------------------- - #O s La s Buckingham 0.570E+04 0.299 38.9 0.00 0.000 24.000 + get_forcefield_table_sm(r'\s*(General interatomic|Intramolecular) potentials :', + r'\s*Atom\s*Types\s*Potential\s*A*\s*B\s*C\s*D', + get_gulp_potential_species_pattern(2) + + r'(SRGlue|' + r'(?P<x_gulp_forcefield_potential_name>\b.{1,14}?)\s*' + r'(?P<x_gulp_forcefield_parameter_a>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_b>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_c>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_d>\S+)\s*' + r'(?P<x_gulp_forcefield_cutoff_min>\S+\s*)' + r'(?P<x_gulp_forcefield_cutoff_max>\S+)$)', + name='interatomic'), + get_forcefield_table_sm(r'\s*(Intermolecular) potentials :', # Basically a copy of the one just above + r'\s*Atom\s*Types\s*Potential\s*A*\s*B\s*C\s*D', + get_gulp_potential_species_pattern(2) + + r'(SRGlue|(?P<x_gulp_forcefield_potential_name>\b.{1,14}?)\s*' + r'(?P<x_gulp_forcefield_parameter_a>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_b>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_c>\S+)\s*' + r'(?P<x_gulp_forcefield_parameter_d>\S+)\s*' + r'(?P<x_gulp_forcefield_cutoff_min>\S+\s*)' + r'(?P<x_gulp_forcefield_cutoff_max>\S+))$', + name='intermolecular'), + get_forcefield_table_sm(r'\s*General Three-body potentials :', + r'\s*Atom\s*Atom\s*Atom\s*Force Constants\s*Theta', + get_gulp_potential_species_pattern(3) + + r'(?P<x_gulp_forcefield_threebody_1>\S+)\s*' + r'(?P<x_gulp_forcefield_threebody_2>\S+)\s*' + r'(?P<x_gulp_forcefield_threebody_3>\S+)\s*' + r'(?P<x_gulp_forcefield_threebody_theta>\S+)\s*', + name='3-body'), + get_forcefield_table_sm(r'\s*General Four-body potentials :', + r'\s*Atom Types\s*Force cst\s*Sign\s*Phase\s*Phi0', + get_gulp_potential_species_pattern(4) + + r'(?P<x_gulp_forcefield_fourbody_force_constant>\S+)\s*' + r'(?P<x_gulp_forcefield_fourbody_sign>\S+)\s*' + r'(?P<x_gulp_forcefield_fourbody_phase>\S+)\s*' + r'(?P<x_gulp_forcefield_fourbody_phi0>\S+)\s*', + name='3-body'), + ]) + return m + +def get_md_sm(): + m = SM(r'\*\s*Molecular Dynamics', + name='md', + subMatchers=[ + SM(r'\s*Molecular dynamics production :', + name='mdstep', subMatchers=[ - SM(r'----------+', name='potentials', - endReStr=r'----------+', + SM(r'\s*\*\*\s*Time :\s*(?P<x_gulp_md_time__ps>\S+)\s*ps :', + sections=['section_system', 'section_single_configuration_calculation'], + repeats=True, subMatchers=[ - SM(r'(?P<x_gulp_forcefield_species_1>\w+)\s*' - r'(?P<x_gulp_forcefield_speciestype_1>\S+)\s*' - r'(?P<x_gulp_forcefield_species_2>\S+)\s*' - r'(?P<x_gulp_forcefield_speciestype_2>\S+)\s*' - # The SRGlue potential is badly written in the table and unparseable. - # Probably a bug. So just ignore it. - r'(SRGlue|(?P<x_gulp_forcefield_potential_name>\b.{1,14}?)\s*' - r'(?P<x_gulp_forcefield_parameter_a>\S+)\s*' - r'(?P<x_gulp_forcefield_parameter_b>\S+)\s*' - r'(?P<x_gulp_forcefield_parameter_c>\S+)\s*' - r'(?P<x_gulp_forcefield_parameter_d>\S+)\s*\S+\s*\S+$)', - name='forcefield', - sections=['x_gulp_section_forcefield'], - repeats=True - ), - ]), + SM(r'\s*Kinetic energy\s*\(eV\)\s*=\s*(?P<x_gulp_md_kinetic_energy__eV>\S+)'), + SM(r'\s*Potential energy\s*\(eV\)\s*=\s*(?P<x_gulp_md_potential_energy__eV>\S+)'), + SM(r'\s*Total energy\s*\(eV\)\s*=\s*(?P<x_gulp_md_total_energy__eV>\S+)'), + SM(r'\s*Temperature\s*\(K\)\s*=\s*(?P<x_gulp_md_temperature__K>\S+)'), + SM(r'\s*Pressure\s*\(GPa\)\s*=\s*(?P<x_gulp_md_pressure__GPa>\S+)'), + ]) ]) ]) return m @@ -610,6 +699,7 @@ infoFileDescription = SM( get_general_input_sm(), get_output_config_sm(), get_optimise_sm(), # note British spelling + get_md_sm(), SM(r'x^', name='impossible') # 'Parse' the whole file ]) -- GitLab