parser_gaussian.py 84.8 KB
Newer Older
1 2 3 4
from __future__ import division
from builtins import str
from builtins import range
from builtins import object
5
from functools import reduce
6
import setup_paths
7
from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM
8
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
9
from nomadcore.caching_backend import CachingLevel
10
from nomadcore.unit_conversion.unit_conversion import convert_unit
11 12
import os, sys, json, logging
import numpy as np
13
import ase
14 15 16 17 18 19 20
import re

############################################################
# This is the parser for the output file of Gaussian.
############################################################

logger = logging.getLogger("nomad.GaussianParser")
21

Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
22
# description of the output
23 24 25
mainFileDescription = SM(
    name = 'root',
    weak = True,
26
    forwardMatch = True, 
27 28 29
    startReStr = "",
    subMatchers = [
        SM(name = 'newRun',
30
           startReStr = r"\s*Cite this work as:",
31 32 33
           repeats = True,
           required = True,
           forwardMatch = True,
34 35
           fixedStartValues={ 'program_name': 'Gaussian', 'program_basis_set_type': 'gaussians' },
           sections   = ['section_run'],
36 37
           subMatchers = [
               SM(name = 'header',
38 39
                  startReStr = r"\s*Cite this work as:",
                  forwardMatch = True,
40 41 42 43
                  subMatchers = [
                      SM(r"\s*Cite this work as:"),
                      SM(r"\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,"),
                      SM(r"\s\*\*\*\*\*\*\*\*\*\*\*\**"),
44
                      SM(r"\s*Gaussian\s*(?P<program_version>[0-9]+):\s*(?P<x_gaussian_program_implementation>[A-Za-z0-9-.]+)\s*(?P<x_gaussian_program_release_date>[0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)"),
45
                      SM(r"\s*(?P<x_gaussian_program_execution_date>[0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)"),
46
                      ]
47
             ),
48 49 50 51 52
               SM(name = 'globalparams',
                  startReStr = r"\s*%\w*=",
                  subFlags = SM.SubFlags.Unordered,
                  forwardMatch = True,
                  subMatchers = [
53 54 55
                      SM(r"\s*%[Cc]hk=(?P<x_gaussian_chk_file>[A-Za-z0-9.]*)"),
                      SM(r"\s*%[Mm]em=(?P<x_gaussian_memory>[A-Za-z0-9.]*)"),
                      SM(r"\s*%[Nn][Pp]roc=(?P<x_gaussian_number_of_processors>[A-Za-z0-9.]*)")
56
                      ]
57 58 59 60 61 62 63
             ),
               SM (name = 'SectionMethod',
               sections = ['section_method'],
                   startReStr = r"\s*#",
                   forwardMatch = True,
                   subMatchers = [
                       SM(r"\s*(?P<x_gaussian_settings>([a-zA-Z0-9-/=(),#*+:]*\s*)+)"),
64
                       SM(r"\s*(?P<x_gaussian_settings>([a-zA-Z0-9-/=(),#*+:]*\s*)+)"),
65
                       ]
66
             ),
67
               SM(name = 'charge_multiplicity_cell_masses',
68
               sections = ['section_system'],
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
69
		  startReStr = r"\s*Charge =",
70
                  endReStr = r"\s*Leave Link  101\s*",
71
                  subFlags = SM.SubFlags.Unordered,
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
72 73
                  forwardMatch = True,
                  subMatchers = [
74
                      SM(r"\s*Charge =\s*(?P<x_gaussian_total_charge>[-+0-9]*) Multiplicity =\s*(?P<x_gaussian_spin_target_multiplicity>[0-9]*)"),
75
                      SM(r"\s*(Tv|Tv\s*[0]|TV|TV\s*[0])\s*(?P<x_gaussian_geometry_lattice_vector_x>[0-9.]*)\s+(?P<x_gaussian_geometry_lattice_vector_y>[0-9.]*)\s+(?P<x_gaussian_geometry_lattice_vector_z>[0-9.]*)", repeats = True),
76 77 78 79 80 81 82 83 84 85 86 87
                      SM(r"\s*AtmWgt=\s+(?P<x_gaussian_atomic_masses>[0-9.]+(\s+[0-9.]+)(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?)", repeats = True)
                      ]
             ),
            SM (name = 'SingleConfigurationCalculationWithSystemDescription',
                startReStr = "\s*Standard orientation:",
                repeats = False,
                forwardMatch = True,
                subMatchers = [
                SM (name = 'SingleConfigurationCalculation',
                  startReStr = "\s*Standard orientation:",
                  repeats = True,
                  forwardMatch = True,
88
                  sections = ['section_single_configuration_calculation'],
89 90 91
                  subMatchers = [
                  SM(name = 'geometry',
                   sections  = ['x_gaussian_section_geometry'],
92
                   startReStr = r"\s*Standard orientation:",
93
                   endReStr = r"\s*Rotational constants",
94
                      subMatchers = [
95
                      SM(r"\s+[0-9]+\s+(?P<x_gaussian_atomic_number>[0-9]+)\s+[0-9]+\s+(?P<x_gaussian_atom_x_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_y_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_z_coord__angstrom>[-+0-9EeDd.]+)",repeats = True),
96 97
                      SM(r"\s*Rotational constants")
                    ]
98 99 100 101 102 103 104 105 106 107
                ),
                    SM(name = 'SectionHybridCoeffs',
                    sections = ['x_gaussian_section_hybrid_coeffs'],
                    startReStr = r"\s*IExCor=",
                    forwardMatch = True,
                    subMatchers = [
                     SM(r"\s*IExCor=\s*[0-9]+\s*DFT=[A-Z]\s*Ex\+Corr=[A-Z0-9]+\s*ExCW=[0-9]\s*ScaHFX=\s*(?P<hybrid_xc_coeff1>[0-9.]+)"),
                     SM(r"\s*IExCor=\s*[0-9]+\s*DFT=[A-Z]\s*Ex\=[A-Z0-9]+\s*Corr=[a-zA-Z0-9]+\s*ExCW=[0-9]\s*ScaHFX=\s*(?P<hybrid_xc_coeff1>[0-9.]+)"),
                     SM(r"\s*ScaDFX=\s*(?P<hybrid_xc_coeff2>[0-9.]+\s*[0-9.]+\s*[0-9.]+\s*[0-9.]+)")
                    ]
108 109
                ),
                    SM(name = 'TotalEnergyScfGaussian',
110
                    sections  = ['section_scf_iteration'],
111 112
                    startReStr = r"\s*Requested convergence on RMS",
                    forwardMatch = False,
113
                    repeats = True,
114
                    subMatchers = [
115 116 117
                     SM(r"\s*Cycle\s+[0-9]+|\s*Initial guess <Sx>="),
                     SM(r"\s*E=\s*(?P<energy_total_scf_iteration__hartree>[-+0-9.]+)\s*Delta-E=\s*(?P<x_gaussian_delta_energy_total_scf_iteration__hartree>[-+0-9.]+)"),
                     SM(r"\s*(?P<x_gaussian_single_configuration_calculation_converged>SCF Done):\s*E\((?P<x_gaussian_hf_detect>[A-Z0-9]+)\)\s*=\s*(?P<x_gaussian_energy_scf__hartree>[-+0-9.]+)"),
118 119
                     SM(r"\s*NFock=\s*[0-9]+\s*Conv=(?P<x_gaussian_energy_error__hartree>[-+0-9EeDd.]+)\s*"),
                     SM(r"\s*KE=\s*(?P<x_gaussian_electronic_kinetic_energy__hartree>[-+0-9EeDd.]+)\s*"),
120
                     SM(r"\s*Annihilation of the first spin contaminant"),
121
                     SM(r"\s*[A-Z][*][*][0-9]\s*before annihilation\s*(?P<spin_S2>[0-9.]+),\s*after\s*(?P<x_gaussian_after_annihilation_spin_S2>[0-9.]+)"),
122
                     SM(r"\s*[()A-Z0-9]+\s*=\s*[-+0-9D.]+\s*[()A-Z0-9]+\s*=\s*(?P<x_gaussian_perturbation_energy__hartree>[-+0-9D.]+)"),
123
                    ]
124
                ),
125 126 127 128 129
                    SM(name = 'PerturbationEnergies',
                    sections = ['x_gaussian_section_moller_plesset'],
                    startReStr = r"\s*E2 =\s*",
                    forwardMatch = True,
                    subMatchers = [
130 131 132 133 134 135
                     SM(r"\s*E2 =\s*(?P<x_gaussian_mp2_correction_energy__hartree>[-+0-9EeDd.]+)\s*EUMP2 =\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
                     SM(r"\s*E3=\s*(?P<x_gaussian_mp3_correction_energy__hartree>[-+0-9EeDd.]+)\s*EUMP3=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)\s*"),
                     SM(r"\s*E4\(DQ\)=\s*(?P<x_gaussian_mp4dq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(DQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)\s*"),
                     SM(r"\s*E4\(SDQ\)=\s*(?P<x_gaussian_mp4sdq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(SDQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
                     SM(r"\s*E4\(SDTQ\)=\s*(?P<x_gaussian_mp4sdtq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(SDTQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
                     SM(r"\s*DEMP5 =\s*(?P<x_gaussian_mp5_correction_energy__hartree>[-+0-9EeDd.]+)\s*MP5 =\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
136 137 138 139 140 141 142 143
                     ]
                ),
                    SM(name = 'CoupledClusterEnergies',
                    sections = ['x_gaussian_section_coupled_cluster'],
                    startReStr = r"\s*CCSD\(T\)\s*",
                    endReStr = r"\s*Population analysis using the SCF density",
                    forwardMatch = True,
                    subMatchers = [
144 145
                     SM(r"\s*DE\(Corr\)=\s*(?P<x_gaussian_ccsd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CORR\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
                     SM(r"\s*CCSD\(T\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
146 147 148 149 150 151 152 153
                     ]
                ),
                    SM(name = 'QuadraticCIEnergies',
                    sections = ['x_gaussian_section_quadratic_ci'],
                    startReStr = r"\s*Quadratic Configuration Interaction\s*",
                    endReStr = r"\s*Population analysis using the SCF density",
                    forwardMatch = True,
                    subMatchers = [
154 155 156 157
                     SM(r"\s*DE\(Z\)=\s*(?P<x_gaussian_qcisd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(Z\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
                     SM(r"\s*DE\(Corr\)=\s*(?P<x_gaussian_qcisd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CORR\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
                     SM(r"\s*QCISD\(T\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
                     SM(r"\s*DE5\s*=\s*(?P<x_gaussian_qcisdtq_correction_energy__hartree>[-+0-9EeDd.]+)\s*QCISD\(TQ\)\s*=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
158 159 160 161 162 163 164 165
                     ]
                ),
                    SM(name = 'CIEnergies',
                    sections = ['x_gaussian_section_ci'],
                    startReStr = r"\s*Configuration Interaction\s*",
                    endReStr = r"\s*Population analysis using the SCF density",
                    forwardMatch = True,
                    subMatchers = [
166
                     SM(r"\s*DE\(CI\)=\s*(?P<x_gaussian_ci_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CI\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
167 168 169 170 171 172 173 174 175 176
                     ]
                ),
                    SM(name = 'SemiempiricalEnergies',
                    sections = ['x_gaussian_section_semiempirical'],
                    startReStr = r"\s*[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+\s*",
                    endReStr = r"\s*Population analysis using the SCF density",
                    forwardMatch = True,
                    subMatchers = [
                     SM(r"\s*(?P<x_gaussian_semiempirical_method>[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+)"),
                     SM(r"\s*It=\s*[0-9]+\s*PL=\s*[-+0-9EeDd.]+\s*DiagD=[A-Z]\s*ESCF=\s*(?P<x_gaussian_semiempirical_energy>[-+0-9.]+)\s*", repeats = True),
177
                     SM(r"\s*Energy=\s*(?P<energy_total>[-+0-9EeDd.]+)"),
178 179 180 181 182 183 184 185 186
                     ]
                ),
                    SM(name = 'MolecularMechanicsEnergies',
                    sections = ['x_gaussian_section_molmech'],
                    startReStr = r"\s*[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+\s*",
                    forwardMatch = False,
                    repeats = True,
                    subMatchers = [
                     SM(r"\s*(?P<x_gaussian_molmech_method>[a-zA-Z0-9]+\s*calculation of energy[a-z,. ]+)"),
187
                     SM(r"\s*Energy=\s*(?P<energy_total>[-+0-9EeDd.]+)\s*NIter=\s*[0-9.]"),
188 189 190 191 192 193 194 195 196 197
                     ]
                ),
                  SM(name = 'ExcitedStates',
                   sections  = ['x_gaussian_section_excited_initial'],
                   startReStr = r"\s*Excitation energies and oscillator strengths",
                   forwardMatch = False,
                   repeats = True,
                   subMatchers = [
                    SM(name = 'ExcitedStates',
                    sections = ['x_gaussian_section_excited'],
198 199
                    startReStr = r"\s*Excited State",
                    forwardMatch = False,
200 201 202 203 204 205 206 207 208 209 210 211
                    repeats = True,
                    subMatchers = [
                     SM(r"\s*Excited State\s*(?P<x_gaussian_excited_state_number>[0-9]+):\s*[-+0-9A-Za-z.\?]+\s*(?P<x_gaussian_excited_energy__eV>[0-9.]+)\s*eV\s*[0-9.]+\s*nm\s*f=(?P<x_gaussian_excited_oscstrength>[0-9.]+)\s*<[A-Z][*][*][0-9]>=(?P<x_gaussian_excited_spin_squared>[0-9.]+)"),
                     SM(r"\s*(?P<x_gaussian_excited_transition>[0-9A-Z]+\s*->\s*[0-9A-Z]+\s*[-+0-9.]+)", repeats = True),
                     SM(r"\s*This state for optimization|\r?\n"),
                     ]
                    )
                   ]
               ),  
                  SM(name = 'CASSCFStates',
                   sections = ['x_gaussian_section_casscf'],
                   startReStr = r"\s*EIGENVALUES AND\s*",
212
                   forwardMatch = True,
213
                   repeats = False,
214
                   subMatchers = [
215 216 217 218
                    SM(r"\s*EIGENVALUES AND\s*"),
                    SM(r"\s*\(\s*[0-9]+\)\s*EIGENVALUE\s*(?P<x_gaussian_casscf_energy__hartree>[-+0-9.]+)", repeats = True),
                   ]
               ),
219
                  SM(name = 'Geometry_optimization',
220
                  sections  = ['x_gaussian_section_geometry_optimization_info'],
221
                  startReStr = r"\s*Optimization completed.",
222
                  forwardMatch = True,
223
                  subMatchers = [
224
                  SM(r"\s*(?P<x_gaussian_geometry_optimization_converged>Optimization completed)"),
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
                  SM(r"\s*(?P<x_gaussian_geometry_optimization_converged>Optimization stopped)"),
                  SM(r"\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[-+0-9EeDd.]+\s+[-+0-9EeDd.]+\s+[-+0-9EeDd.]+",repeats = True),
                  SM(r"\s*Distance matrix|\s*Rotational constants|\s*Stoichiometry")
                    ]
               ),
                SM(name = 'Orbital symmetries',
                sections = ['x_gaussian_section_orbital_symmetries'],
                startReStr = r"\s+Population analysis",
                subFlags = SM.SubFlags.Sequenced,
                subMatchers = [
                      SM(r"\s*Orbital symmetries"), 
                      SM(r"\s*Alpha Orbitals"),
                      SM(r"\s*Occupied\s+(?P<x_gaussian_alpha_occ_symmetry_values>\((.+)\))?"),
                      SM(r"\s+(?P<x_gaussian_alpha_occ_symmetry_values>\((.+)\)?)", repeats = True),
                      SM(r"\s*Virtual\s+(?P<x_gaussian_alpha_vir_symmetry_values>\((.+)\))?"),
                      SM(r"\s+(?P<x_gaussian_alpha_vir_symmetry_values>\((.+)\)?)", repeats = True),
                      SM(r"\s*Beta Orbitals"),
                      SM(r"\s*Occupied\s+(?P<x_gaussian_beta_occ_symmetry_values>\((.+)\))?"),
                      SM(r"\s+(?P<x_gaussian_beta_occ_symmetry_values>\((.+)\)?)", repeats = True),
                      SM(r"\s*Virtual\s+(?P<x_gaussian_beta_vir_symmetry_values>\((.+)\))?"),
                      SM(r"\s+(?P<x_gaussian_beta_vir_symmetry_values>\((.+)\)?)", repeats = True),
                      ]
             ),
                SM(name = 'Electronicstatesymmetry',
                sections = ['x_gaussian_section_symmetry'],
                startReStr = r"\s*The electronic state is",
                forwardMatch = True,
                subMatchers = [
253
                      SM(r"\s*The electronic state is\s*(?P<x_gaussian_elstate_symmetry>[A-Z0-9-']+)[.]")
254 255 256
                      ]
             ),
                SM(name = 'Eigenvalues',
257
                sections = ['section_eigenvalues'],
258 259 260 261 262 263 264 265 266 267 268
                startReStr = r"\s*Alpha  occ. eigenvalues --",
                forwardMatch = True,
                subFlags = SM.SubFlags.Sequenced,
                subMatchers = [
                      SM(r"\s*Alpha  occ. eigenvalues --\s+(?P<x_gaussian_alpha_occ_eigenvalues_values>(.+)?)", repeats = True), 
                      SM(r"\s*Alpha virt. eigenvalues --\s+(?P<x_gaussian_alpha_vir_eigenvalues_values>(.+)?)", repeats = True),
                      SM(r"\s*Beta  occ. eigenvalues --\s+(?P<x_gaussian_beta_occ_eigenvalues_values>(.+)?)", repeats = True),
                      SM(r"\s*Beta virt. eigenvalues --\s+(?P<x_gaussian_beta_vir_eigenvalues_values>(.+)?)", repeats = True),
                      SM(r"\s*- Condensed to atoms (all electrons)"),
                      ]
             ),
269 270 271 272 273 274 275 276 277 278
                   SM(name = 'ForcesGaussian',
                   sections  = ['x_gaussian_section_atom_forces'],
                   startReStr = "\s*Center\s+Atomic\s+Forces ",
                   forwardMatch = True,
                   subMatchers = [
                    SM(r"\s*Center\s+Atomic\s+Forces "),
                    SM(r"\s+[0-9]+\s+[0-9]+\s+(?P<x_gaussian_atom_x_force__hartree_bohr_1>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_y_force__hartree_bohr_1>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_z_force__hartree_bohr_1>[-+0-9EeDd.]+)",repeats = True),
                    SM(r"\s*Cartesian Forces:\s+")
                    ]
                ),
279 280
                SM(name = 'Multipoles',
                  sections = ['x_gaussian_section_molecular_multipoles'],
281 282
                  startReStr = r"\s*Electronic spatial extent",
                  forwardMatch = False,
283
                  subMatchers = [
284
                      SM(r"\s*Charge=(?P<charge>\s*[-0-9.]+)"),
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
                      SM(r"\s*Dipole moment "), 
                      SM(r"\s+\w+=\s+(?P<dipole_moment_x>[-+0-9EeDd.]+)\s+\w+=\s+(?P<dipole_moment_y>[-+0-9EeDd.]+)\s+\w+=\s+(?P<dipole_moment_z>[-+0-9EeDd.]+)"),
                      SM(r"\s*Quadrupole moment"), 
                      SM(r"\s+\w+=\s+(?P<quadrupole_moment_xx>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_yy>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_zz>[0-9-.]+)"), 
                      SM(r"\s+\w+=\s+(?P<quadrupole_moment_xy>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_xz>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_yz>[0-9-.]+)"),
                      SM(r"\s*Traceless Quadrupole moment"),
                      SM(r"\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+"),
                      SM(r"\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+"),
                      SM(r"\s*Octapole moment"),
                      SM(r"\s+\w+=\s+(?P<octapole_moment_xxx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_yyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_zzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xyy>[-+0-9EeDd.]+)"),
                      SM(r"\s+\w+=\s+(?P<octapole_moment_xxy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_yzz>[-+0-9EeDd.]+)"),
                      SM(r"\s+\w+=\s+(?P<octapole_moment_yyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xyz>[-+0-9EeDd.]+)"),
                      SM(r"\s*Hexadecapole moment"),
                      SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxxx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxxy>[-+0-9EeDd.]+)"),
                      SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzzx>[-+0-9EeDd.]+)"),
                      SM(r"\s+\w+=\s+(?P<hexadecapole_moment_zzzy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyzz>[-+0-9EeDd.]+)"),
                      SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzxy>[-+0-9EeDd.]+)")
                      ]
             ),    
304
                SM (name = 'Frequencies',
305
                     sections = ['x_gaussian_section_frequencies'],
306
                     startReStr = r"\s*Frequencies --\s+(?:(?:[-]?[0-9]+\.\d*)\s*(?:[-]?[-0-9]+\.\d*)?\s*(?:[-]?[-0-9]+\.\d*)?)",
307 308
                     endReStr = r"\s*- Thermochemistry -",
                     forwardMatch = True,
309
                     repeats = False,
310
                     subMatchers = [
311 312 313 314 315 316 317 318 319 320 321 322 323
                       SM (name = 'Frequencies',
                         startReStr = r"\s*Frequencies --\s+(?:(?:[-]?[0-9]+\.\d*)\s*(?:[-]?[-0-9]+\.\d*)?\s*(?:[-]?[-0-9]+\.\d*)?)",
                         forwardMatch = True,
                         repeats = True,
                         subFlags = SM.SubFlags.Unordered,
                         subMatchers = [
                           SM(r"\s*Frequencies --\s+(?P<x_gaussian_frequency_values>([-]?[0-9]+\.\d*)\s*([-]?[-0-9]+\.\d*)?\s*([-]?[-0-9]+\.\d*)?)", repeats = True),
                           SM(r"\s*Red. masses --\s+(?P<x_gaussian_reduced_masses>(.+))", repeats = True),
                           SM(r"\s*[0-9]+\s*[0-9]+\s*(?P<x_gaussian_normal_modes>([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+))", repeats = True),
                           SM(r"\s*[0-9]+\s*([0-9]+)?\s*([0-9]+)?"),
                         ])
                     ]
                ),
324 325 326
                SM(name = 'Thermochemistry',
                sections = ['x_gaussian_section_thermochem'],
                startReStr = r"\s*Temperature",
327 328
                forwardMatch = True,
                subMatchers = [
329 330
                      SM(r"\s*Temperature\s*(?P<x_gaussian_temperature>[0-9.]+)\s*Kelvin.\s*Pressure\s*(?P<x_gaussian_pressure__atmosphere>[0-9.]+)\s*Atm."),
                      SM(r"\s*Principal axes and moments of inertia in atomic units:"),
331
                      SM(r"\s*Eigenvalues --\s*(?P<x_gaussian_moment_of_inertia_X__amu_angstrom_angstrom>(\d+\.\d{5}))\s*?(?P<x_gaussian_moment_of_inertia_Y__amu_angstrom_angstrom>(\d+\.\d{5}))\s*?(?P<x_gaussian_moment_of_inertia_Z__amu_angstrom_angstrom>(\d+\.\d{5}))"),
332 333 334 335 336 337 338 339 340 341 342 343 344 345
                      SM(r"\s*Zero-point correction=\s*(?P<x_gaussian_zero_point_energy__hartree>[0-9.]+)"),
                      SM(r"\s*Thermal correction to Energy=\s*(?P<x_gaussian_thermal_correction_energy__hartree>[0-9.]+)"),
                      SM(r"\s*Thermal correction to Enthalpy=\s*(?P<x_gaussian_thermal_correction_enthalpy__hartree>[0-9.]+)"),
                      SM(r"\s*Thermal correction to Gibbs Free Energy=\s*(?P<x_gaussian_thermal_correction_free_energy__hartree>[0-9.]+)"), 
                      ]
             ),       
                SM(name = 'Forceconstantmatrix',
                sections = ['x_gaussian_section_force_constant_matrix'],
                startReStr = r"\s*Force constants in Cartesian coordinates",
                forwardMatch = True,
                subMatchers = [
                      SM(r"\s*Force constants in Cartesian coordinates"),
                      SM(r"\s*[0-9]+\s*(?P<x_gaussian_force_constants>(\-?\d+\.\d*[-+D0-9]+)\s*(\-?\d+\.\d*[-+D0-9]+)?\s*(\-?\d+\.\d*[-+D0-9]+)?\s*(\-?\d+\.\d*[-+D0-9]+)?\s*(\-?\d+\.\d*[-+D0-9]+)?)", repeats = True),
                      SM(r"\s*Force constants in internal coordinates")
346
                      ]
347 348 349
             ),
                SM(name = 'CompositeModelEnergies',
                sections = ['x_gaussian_section_models'],
350
                startReStr = r"\s*Temperature=\s*",
351 352 353
                forwardMatch = False,
                repeats = True,
                subMatchers = [
354 355 356 357 358 359 360 361 362 363 364 365 366 367
                 SM(r"\s*G1\(0 K\)=\s*[-+0-9.]+\s*G1 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G2\(0 K\)=\s*[-+0-9.]+\s*G2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G2MP2\(0 K\)=\s*[-+0-9.]+\s*G2MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G3\(0 K\)=\s*[-+0-9.]+\s*G3 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G3MP2\(0 K\)=\s*[-+0-9.]+\s*G3MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G4\(0 K\)=\s*[-+0-9.]+\s*G4 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*G4MP2\(0 K\)=\s*[-+0-9.]+\s*G4MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*CBS-4 \(0 K\)=\s*[-+0-9.]+\s*CBS-4 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*CBS-q \(0 K\)=\s*[-+0-9.]+\s*CBS-q Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*CBS-Q \(0 K\)=\s*[-+0-9.]+\s*CBS-Q Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*CBS-QB3 \(0 K\)=\s*[-+0-9.]+\s*CBS-QB3 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*W1U  \(0 K\)=\s*[-+0-9.]+\s*W1U   Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*W1RO  \(0 K\)=\s*[-+0-9.]+\s*W1RO  Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
                 SM(r"\s*W1BD  \(0 K\)=\s*[-+0-9.]+\s*W1BD  Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
368
                       ]
369 370 371 372 373 374 375 376
             ),
                SM(name = 'run times',
                  sections = ['x_gaussian_section_times'],
                  startReStr = r"\s*Job cpu time:",
                  forwardMatch = True,
                  subMatchers = [
                      SM(r"\s*Job cpu time:\s*(?P<x_gaussian_program_cpu_time>\s*[0-9]+\s*[a-z]+\s*[0-9]+\s*[a-z]+\s*[0-9]+\s*[a-z]+\s*[0-9.]+\s*[a-z]+)"),
                      SM(r"\s*Normal termination of Gaussian\s*[0-9]+\s* at \s*(?P<x_gaussian_program_termination_date>[A-Za-z]+\s*[A-Za-z]+\s*[0-9]+\s*[0-9:]+\s*[0-9]+)"),
377
                      ]
378 379 380 381 382 383
             )
          ])
        ])
      ])
    ])

384 385 386 387 388 389 390 391 392
# loading metadata from nomad-meta-info/meta_info/nomad_meta_info/gaussian.nomadmetainfo.json
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/gaussian.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)

parserInfo = {
  "name": "parser_gaussian",
  "version": "1.0"
}

393
class GaussianParserContext(object):
394 395 396 397 398 399 400 401 402 403 404 405
      """Context for parsing Gaussian output file.

        This class keeps tracks of several Gaussian settings to adjust the parsing to them.
        The onClose_ functions allow processing and writing of cached values after a section is closed.
        They take the following arguments:
        backend: Class that takes care of writing and caching of metadata.
        gIndex: Index of the section that is closed.
        section: The cached values and sections that were found in the section that is closed.
      """
      def __init__(self):
        # dictionary of energy values, which are tracked between SCF iterations and written after convergence
        self.totalEnergyList = {
406 407 408
                                'x_gaussian_hf_detect': None,
                                'x_gaussian_energy_scf': None,
                                'x_gaussian_perturbation_energy': None,
409 410 411
                                'x_gaussian_electronic_kinetic_energy': None,
                                'x_gaussian_energy_electrostatic': None,
                                'x_gaussian_energy_error': None,
412
                               }
413

414 415 416 417 418 419
      def initialize_values(self):
        """Initializes the values of certain variables.

        This allows a consistent setting and resetting of the variables,
        when the parsing starts and when a section_run closes.
        """
420 421
        self.secMethodIndex = None
        self.secSystemDescriptionIndex = None
422 423
        # start with -1 since zeroth iteration is the initialization
        self.scfIterNr = -1
424
        self.singleConfCalcs = []
425
        self.scfConvergence = False
426
        self.geoConvergence = False
427
        self.scfenergyconverged = 0.0
428 429 430
        self.scfkineticenergyconverged = 0.0
        self.scfelectrostaticenergy = 0.0
        self.periodicCalc = False
431 432

      def startedParsing(self, path, parser):
433
        self.parser = parser
434 435
        # save metadata
        self.metaInfoEnv = self.parser.parserBuilder.metaInfoEnv
436 437
        # allows to reset values if the same superContext is used to parse different files
        self.initialize_values()
438

439 440 441 442 443 444
      def onClose_section_run(self, backend, gIndex, section):
          """Trigger called when section_run is closed.

          Write convergence of geometry optimization.
          Variables are reset to ensure clean start for new run.
          """
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
445 446
          global sampling_method
          sampling_method = ""
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
          # write geometry optimization convergence
          gIndexTmp = backend.openSection('section_frame_sequence')
          backend.addValue('geometry_optimization_converged', self.geoConvergence)
          backend.closeSection('section_frame_sequence', gIndexTmp)
          # frame sequence
          if self.geoConvergence:
              sampling_method = "geometry_optimization"
          elif len(self.singleConfCalcs) > 1:
              pass # to do
          else:
              return
          samplingGIndex = backend.openSection("section_sampling_method")
          backend.addValue("sampling_method", sampling_method)
          backend.closeSection("section_sampling_method", samplingGIndex)
          frameSequenceGIndex = backend.openSection("section_frame_sequence")
          backend.addValue("frame_sequence_to_sampling_ref", samplingGIndex)
          backend.addArrayValues("frame_sequence_local_frames_ref", np.asarray(self.singleConfCalcs))
          backend.closeSection("section_frame_sequence", frameSequenceGIndex)
          # reset all variables
          self.initialize_values()

468 469 470
      def onClose_x_gaussian_section_geometry(self, backend, gIndex, section):
        xCoord = section["x_gaussian_atom_x_coord"]
        yCoord = section["x_gaussian_atom_y_coord"]
471 472
        zCoord = section["x_gaussian_atom_z_coord"]
        numbers = section["x_gaussian_atomic_number"]
473
        atom_coords = np.zeros((len(xCoord),3), dtype=float)
474
        atom_numbers = np.zeros(len(xCoord), dtype=int)
475
        atomic_symbols = np.empty((len(xCoord)), dtype=object)
476
        for i in range(len(xCoord)):
477 478 479 480 481 482
           atom_coords[i,0] = xCoord[i]
           atom_coords[i,1] = yCoord[i]
           atom_coords[i,2] = zCoord[i]
        for i in range(len(xCoord)):
          atom_numbers[i] = numbers[i]
          atomic_symbols[i] = ase.data.chemical_symbols[atom_numbers[i]]
483 484 485
        gIndexTmp = backend.openSection("section_system")
        backend.addArrayValues("atom_labels", atomic_symbols)
        backend.addArrayValues("atom_positions", atom_coords)
486
        backend.addValue("x_gaussian_number_of_atoms",len(atomic_symbols))
487
        backend.closeSection("section_system", gIndexTmp)
488 489 490 491 492 493 494 495 496 497

      def onClose_x_gaussian_section_atom_forces(self, backend, gIndex, section):
        xForce = section["x_gaussian_atom_x_force"]
        yForce = section["x_gaussian_atom_y_force"]
        zForce = section["x_gaussian_atom_z_force"]
        atom_forces = np.zeros((len(xForce),3), dtype=float)
        for i in range(len(xForce)):
           atom_forces[i,0] = xForce[i]
           atom_forces[i,1] = yForce[i]
           atom_forces[i,2] = zForce[i]
498
        backend.addArrayValues("atom_forces_raw", atom_forces)
499

500 501
      def onOpen_section_single_configuration_calculation(self, backend, gIndex, section):
          self.singleConfCalcs.append(gIndex)
502

503
      def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
504 505 506 507 508
        """Trigger called when section_single_configuration_calculation is closed.
         Write number of SCF iterations and convergence.
         Check for convergence of geometry optimization.
        """
        # write SCF convergence and reset
509
        backend.addValue('single_configuration_calculation_converged', self.scfConvergence)
510 511 512
        self.scfConvergence = False
        # start with -1 since zeroth iteration is the initialization
        self.scfIterNr = -1
513 514 515 516 517 518 519 520 521 522 523
        # write the references to section_method and section_system
        backend.addValue('single_configuration_to_calculation_method_ref', self.secMethodIndex)
        backend.addValue('single_configuration_calculation_to_system_ref', self.secSystemDescriptionIndex)

      def onClose_x_gaussian_section_geometry_optimization_info(self, backend, gIndex, section):
        # check for geometry optimization convergence
        if section['x_gaussian_geometry_optimization_converged'] is not None:
           if section['x_gaussian_geometry_optimization_converged'] == ['Optimization completed']:
              self.geoConvergence = True
           elif section['x_gaussian_geometry_optimization_converged'] == ['Optimization stopped']:
              self.geoConvergence = False
524

525
      def onClose_section_scf_iteration(self, backend, gIndex, section):
526 527 528 529
        # count number of SCF iterations
        self.scfIterNr += 1
        # check for SCF convergence
        if section['x_gaussian_single_configuration_calculation_converged'] is not None:
530
           self.scfConvergence = True
531 532 533 534 535 536 537 538 539 540 541 542 543 544
           if section['x_gaussian_energy_scf']:
               self.scfenergyconverged = float(str(section['x_gaussian_energy_scf']).replace("[","").replace("]","").replace("D","E"))
               self.scfcharacter = section['x_gaussian_hf_detect']
               if (self.scfcharacter != ['RHF'] and self.scfcharacter != ['ROHF'] and self.scfcharacter != ['UHF']):
                  self.energytotal = self.scfenergyconverged
                  backend.addValue('energy_total', self.energytotal)
               else:
                  pass
               if section['x_gaussian_electronic_kinetic_energy']:
                  self.scfkineticenergyconverged = float(str(section['x_gaussian_electronic_kinetic_energy']).replace("[","").replace("]","").replace("D","E"))
                  self.scfelectrostaticenergy = self.scfenergyconverged - self.scfkineticenergyconverged
                  backend.addValue('x_gaussian_energy_electrostatic', self.scfelectrostaticenergy)

      def onClose_section_eigenvalues(self, backend, gIndex, section):
545 546
          eigenenergies = str(section["x_gaussian_alpha_occ_eigenvalues_values"])
          eigenen1 = []
547
          energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
548
          eigenen1 = np.append(eigenen1, energy)
549 550 551 552 553
          if(section["x_gaussian_beta_occ_eigenvalues_values"]):
             occoccupationsalp = np.ones(len(eigenen1), dtype=float)
          else:
             occoccupationsalp = 2.0 * np.ones(len(eigenen1), dtype=float)

554 555
          eigenenergies = str(section["x_gaussian_alpha_vir_eigenvalues_values"])
          eigenen2 = []
556
          energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
557 558
          eigenen2 = np.append(eigenen2, energy)
          viroccupationsalp = np.zeros(len(eigenen2), dtype=float)
559 560 561 562 563 564 565 566 567 568 569
          eigenenconalp = np.zeros(len(eigenen1) + len(eigenen2))
          eigenenconalp = np.concatenate((eigenen1,eigenen2), axis=0)
          eigenenconalp = convert_unit(eigenenconalp, "hartree", "J")
          occupconalp = np.concatenate((occoccupationsalp, viroccupationsalp), axis=0)
          eigenenconalpnew = np.reshape(eigenenconalp,(1, 1, len(eigenenconalp)))
          occupconalpnew = np.reshape(occupconalp,(1, 1, len(occupconalp)))
          if(section["x_gaussian_beta_occ_eigenvalues_values"]):
             pass
          else:
             backend.addArrayValues("eigenvalues_values", eigenenconalpnew)
             backend.addArrayValues("eigenvalues_occupation", occupconalpnew)
570

571 572 573
          if(section["x_gaussian_beta_occ_eigenvalues_values"]):
             eigenenergies = str(section["x_gaussian_beta_occ_eigenvalues_values"])
             eigenen1 = []
574
             energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
575 576 577 578
             eigenen1 = np.append(eigenen1, energy)
             occoccupationsbet = np.ones(len(eigenen1), dtype=float)
             eigenenergies = str(section["x_gaussian_beta_vir_eigenvalues_values"])
             eigenen2 = []
579
             energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
580 581
             eigenen2 = np.append(eigenen2, energy)
             viroccupationsbet = np.zeros(len(eigenen2), dtype=float)
582 583 584 585 586 587 588 589 590 591
             eigenenconbet = np.zeros(len(eigenen1) + len(eigenen2))
             eigenenconbet = np.concatenate((eigenen1,eigenen2), axis=0)
             eigenenconbet = convert_unit(eigenenconbet, "hartree", "J")
             occupconbet = np.concatenate((occoccupationsbet, viroccupationsbet), axis=0)
             eigenenall = np.concatenate((eigenenconalp,eigenenconbet), axis=0)
             occupall = np.concatenate((occupconalp,occupconbet), axis=0)
             eigenenall = np.reshape(eigenenall,(2, 1, len(eigenenconalp)))
             occupall = np.reshape(occupall,(2, 1, len(occupconalp)))
             backend.addArrayValues("eigenvalues_values", eigenenall)
             backend.addArrayValues("eigenvalues_occupation", occupall)
592 593 594 595

      def onClose_x_gaussian_section_orbital_symmetries(self, backend, gIndex, section):
          symoccalpha = str(section["x_gaussian_alpha_occ_symmetry_values"])
          symviralpha = str(section["x_gaussian_alpha_vir_symmetry_values"])
596 597 598 599
          if(section["x_gaussian_beta_occ_symmetry_values"]):
             symoccbeta = str(section["x_gaussian_beta_occ_symmetry_values"])
             symvirbeta = str(section["x_gaussian_beta_vir_symmetry_values"])

600
          symmetry = [str(f) for f in symoccalpha[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("'E","E").replace("G'","G").replace("\"A'\"","A'").split()]
601 602
          sym1 = []
          sym1 = np.append(sym1, symmetry)  
603
          symmetry = [str(f) for f in symviralpha[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
604 605 606 607
          sym2 = []
          sym2 = np.append(sym2, symmetry)
          symmetrycon = np.concatenate((sym1, sym2), axis=0)
          backend.addArrayValues("x_gaussian_alpha_symmetries", symmetrycon) 
608 609

          if(section["x_gaussian_beta_occ_symmetry_values"]):
610
             symmetry = [str(f) for f in symoccbeta[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
611 612
             sym1 = []
             sym1 = np.append(sym1, symmetry)
613
             symmetry = [str(f) for f in symvirbeta[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630
             sym2 = []
             sym2 = np.append(sym2, symmetry)
             symmetrycon = np.concatenate((sym1, sym2), axis=0)
             backend.addArrayValues("x_gaussian_beta_symmetries", symmetrycon)

      def onClose_x_gaussian_section_molecular_multipoles(self, backend, gIndex, section):
          if(section["quadrupole_moment_xx"]):
             x_gaussian_number_of_lm_molecular_multipoles = 35
          else:
             x_gaussian_number_of_lm_molecular_multipoles = 4

          x_gaussian_molecular_multipole_m_kind = 'polynomial'

          char = str(section["charge"])
          cha = str([char])
          charge = [float(f) for f in cha[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").replace('"','').split()]

Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
631 632 633 634 635 636 637
          if(section["dipole_moment_x"]):
            dipx = section["dipole_moment_x"]
            dipy = section["dipole_moment_y"]
            dipz = section["dipole_moment_z"]
            dip = str([dipx, dipy, dipz])
            dipoles = [float(f) for f in dip[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").split()]
            dipoles = convert_unit(dipoles, "debye", "coulomb * meter")
638

Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
639 640 641 642 643 644 645 646 647 648
          if(section["quadrupole_moment_xx"]):
            quadxx = section["quadrupole_moment_xx"]
            quadxy = section["quadrupole_moment_xy"]
            quadyy = section["quadrupole_moment_yy"]
            quadxz = section["quadrupole_moment_xz"]
            quadyz = section["quadrupole_moment_yz"]
            quadzz = section["quadrupole_moment_zz"]
            quad = str([quadxx, quadxy, quadyy, quadxz, quadyz, quadzz])
            quadrupoles = [float(f) for f in quad[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").split()] 
            quadrupoles = convert_unit(quadrupoles, "debye * angstrom", "coulomb * meter**2")
649 650

          if(section["octapole_moment_xxx"]):
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
651 652 653 654 655 656 657 658 659 660 661 662 663
            octaxxx = section["octapole_moment_xxx"]
            octayyy = section["octapole_moment_yyy"]
            octazzz = section["octapole_moment_zzz"]
            octaxyy = section["octapole_moment_xyy"]
            octaxxy = section["octapole_moment_xxy"]
            octaxxz = section["octapole_moment_xxz"]
            octaxzz = section["octapole_moment_xzz"]
            octayzz = section["octapole_moment_yzz"]
            octayyz = section["octapole_moment_yyz"]
            octaxyz = section["octapole_moment_xyz"]
            octa = str([octaxxx, octayyy, octazzz, octaxyy, octaxxy, octaxxz, octaxzz, octayzz, octayyz, octaxyz])
            octapoles = [float(f) for f in octa[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").split()]
            octapoles = convert_unit(octapoles, "debye * angstrom**2", "coulomb * meter**3")
664 665

          if(section["hexadecapole_moment_xxxx"]):
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
            hexadecaxxxx = section["hexadecapole_moment_xxxx"]
            hexadecayyyy = section["hexadecapole_moment_yyyy"]
            hexadecazzzz = section["hexadecapole_moment_zzzz"]
            hexadecaxxxy = section["hexadecapole_moment_xxxy"]
            hexadecaxxxz = section["hexadecapole_moment_xxxz"]
            hexadecayyyx = section["hexadecapole_moment_yyyx"]
            hexadecayyyz = section["hexadecapole_moment_yyyz"]
            hexadecazzzx = section["hexadecapole_moment_zzzx"]
            hexadecazzzy = section["hexadecapole_moment_zzzy"]
            hexadecaxxyy = section["hexadecapole_moment_xxyy"]
            hexadecaxxzz = section["hexadecapole_moment_xxzz"]
            hexadecayyzz = section["hexadecapole_moment_yyzz"]
            hexadecaxxyz = section["hexadecapole_moment_xxyz"]
            hexadecayyxz = section["hexadecapole_moment_yyxz"]
            hexadecazzxy = section["hexadecapole_moment_zzxy"]
            hexa = str([hexadecaxxxx, hexadecayyyy, hexadecazzzz, hexadecaxxxy, hexadecaxxxz, hexadecayyyx, hexadecayyyz,
            hexadecazzzx, hexadecazzzy, hexadecaxxyy, hexadecaxxzz, hexadecayyzz, hexadecaxxyz, hexadecayyxz, hexadecazzxy])
            hexadecapoles = [float(f) for f in hexa[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").split()]
            hexadecapoles = convert_unit(hexadecapoles, "debye * angstrom**3", "coulomb * meter**4")
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719

          if(section["quadrupole_moment_xx"]):
             multipoles = np.hstack((charge, dipoles, quadrupoles, octapoles, hexadecapoles))
          else:
             multipoles = np.hstack((charge, dipoles)) 

          x_gaussian_molecular_multipole_values = np.resize(multipoles, (x_gaussian_number_of_lm_molecular_multipoles))

          backend.addArrayValues("x_gaussian_molecular_multipole_values", x_gaussian_molecular_multipole_values)
          backend.addValue("x_gaussian_molecular_multipole_m_kind", x_gaussian_molecular_multipole_m_kind)

      def onClose_x_gaussian_section_frequencies(self, backend, gIndex, section):
          frequencies = str(section["x_gaussian_frequency_values"])
          vibfreqs = []
          freqs = [float(f) for f in frequencies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace("\\n","").replace(" ."," 0.").replace(" -."," -0.").split()]
          vibfreqs = np.append(vibfreqs, freqs)
          vibfreqs = convert_unit(vibfreqs, "inversecm", "J")
          backend.addArrayValues("x_gaussian_frequencies", vibfreqs)

          masses = str(section["x_gaussian_reduced_masses"])
          vibreducedmasses = []
          reduced = [float(f) for f in masses[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").split()]
          vibreducedmasses = np.append(vibreducedmasses, reduced)
          vibreducedmasses = convert_unit(vibreducedmasses, "amu", "kilogram")
          backend.addArrayValues("x_gaussian_red_masses", vibreducedmasses)

          vibnormalmodes = []
          vibdisps = str(section["x_gaussian_normal_modes"])
          disps = [float(s) for s in vibdisps[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace("\\n","").replace(" ."," 0.").replace(" -."," -0.").split()]
          dispsnew = np.zeros(len(disps), dtype = float)

#  Reorder disps 

          if len(vibfreqs) % 3 == 0:
             k = 0
720
             for p in range(0,len(vibfreqs) // 3):
721
                M = int(len(disps)/len(vibfreqs)) * (p+1) 
722
                for m in range(3):
723
                  for n in range(M - int(len(disps) / len(vibfreqs)),M,3):
724 725 726 727 728 729
                    for l in range(3):
                      dispsnew[k] = disps[3*(n + m) + l]
                      k = k + 1
          elif len(vibfreqs) % 3 != 0:
             k = 0
             for p in range(len(vibfreqs)-1,0,-3):
730
                M = (len(disps) - int(len(disps) / len(vibfreqs))) // p
731
                for m in range(3):
732
                  for n in range(M - int(len(disps) / len(vibfreqs)),M,3):
733 734 735
                    for l in range(3):
                      dispsnew[k] = disps[3*(n + m) + l]
                      k = k + 1
736
             for m in range(int(len(disps) / len(vibfreqs))):
737 738 739 740
                   dispsnew[k] = disps[k]
                   k = k + 1

          vibnormalmodes = np.append(vibnormalmodes, dispsnew)
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
741 742 743 744
          if len(vibfreqs) != 0:
            natoms = int(len(disps) / len(vibfreqs) / 3)
            vibnormalmodes = np.reshape(vibnormalmodes,(len(vibfreqs),natoms,3))
            backend.addArrayValues("x_gaussian_normal_mode_values", vibnormalmodes)
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798

      def onClose_x_gaussian_section_force_constant_matrix(self, backend, gIndex, section):

          forcecnstvalues = []
          forceconst = str(section["x_gaussian_force_constants"])
          numbers = [float(s) for s in forceconst[1:].replace("'","").replace(",","").replace("]","").replace("\\n","").replace("D","E").replace(" ."," 0.").replace(" -."," -0.").split()]
          length = len(numbers)
          dim = int(((1 + 8 * length)**0.5 - 1) / 2) 
          cartforceconst = np.zeros([dim, dim])
          forcecnstvalues = np.append(forcecnstvalues, numbers) 
          if dim > 6:
             l = 0
             for i in range(0,5):
                for k in range(0,i+1):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(5,dim):
                for k in range(0,5):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(5,dim-2): 
                for k in range(5,i+1):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(dim-2,dim):
                for k in range(5,dim-2):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(dim-2,dim):
                for k in range(i,dim):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
          elif dim == 6:
             l = 0
             for i in range(0,5):
                for k in range(0,i+1):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(5,dim):
                for k in range(0,5):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
             for i in range(dim,dim):
                for k in range(i,dim):
                   l = l + 1
                   cartforceconst[i,k] = forcecnstvalues[l-1]
 
          for i in range(0,dim):
             for k in range(i+1,dim):
                 cartforceconst[i,k] = cartforceconst[k,i]

          cartforceconst = convert_unit(cartforceconst, "forceAu / bohr", "J / (meter**2)")

          backend.addArrayValues("x_gaussian_force_constant_values", cartforceconst) 
799

800 801 802 803
      def onOpen_section_method(self, backend, gIndex, section):
        # keep track of the latest method section
        self.secMethodIndex = gIndex

804
      def onClose_section_method(self, backend, gIndex, section):
805
       # handling of xc functional
806
       # Dictionary for conversion of xc functional name in Gaussian to metadata format.
807
       # The individual x and c components of the functional are given as dictionaries.
808
       # Possible key of such a dictionary is 'name'.
809
        xcDict = {
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
              'S':          [{'name': 'LDA_X'}],
              'XA':	    [{'name': 'X_ALPHA'}],
              'VWN':        [{'name': 'LDA_C_VWN'}],
              'VWN3':       [{'name': 'LDA_C_VWN_3'}],
              'LSDA':       [{'name': 'LDA_X'}, {'name': 'LDA_C_VWN'}], 
              'B':          [{'name': 'GGA_X_B88'}],
              'BLYP':       [{'name': 'GGA_C_LYP'}, {'name': 'GGA_X_B88'}],
              'PBEPBE':     [{'name': 'GGA_C_PBE'}, {'name': 'GGA_X_PBE'}],
              'PBEH':       [{'name': 'GGA_X_PBEH'}],
              'WPBEH':      [{'name': 'GGA_X_WPBEH'}],
              'PW91PW91':   [{'name': 'GGA_C_PW91'}, {'name': 'GGA_X_PW91'}],
              'M06L':       [{'name': 'MGGA_C_M06_L'}, {'name': 'MGGA_X_M06_L'}],
              'M11L':       [{'name': 'MGGA_C_M11_L'}, {'name': 'MGGA_X_M11_L'}],
              'SOGGA11':    [{'name': 'GGA_XC_SOGGA11'}],
              'MN12L':      [{'name': 'GGA_XC_MN12L'}],
              'N12':        [{'name': 'GGA_C_N12'}, {'name': 'GGA_X_N12'}],
              'VSXC':       [{'name': 'MGGA_XC_VSXC'}],
              'HCTH93':     [{'name': 'GGA_XC_HCTH_93'}],
              'HCTH147':    [{'name': 'GGA_XC_HCTH_147'}],
              'HCTH407':    [{'name': 'GGA_XC_HCTH_407'}],
              'HCTH':       [{'name': 'GGA_XC_HCTH_407'}],
              'B97D':       [{'name': 'GGA_XC_B97D'}],
              'B97D3':      [{'name': 'GGA_XC_B97D3'}],
              'MPW':        [{'name': 'GGA_X_MPW'}],
              'G96':        [{'name': 'GGA_X_G96'}],
              'O':          [{'name': 'GGA_X_O'}],
              'BRX':        [{'name': 'GGA_X_BRX'}],
              'PKZB':       [{'name': 'GGA_C_PKZB'}, {'name': 'GGA_X_PKZB'}],
              'PL':         [{'name': 'C_PL'}],
              'P86':        [{'name': 'GGA_C_P86'}],
              'B95':        [{'name': 'MGGA_C_B95'}],
              'KCIS':       [{'name': 'GGA_C_KCIS'}],
              'BRC':        [{'name': 'GGA_C_BRC'}],
              'VP86':       [{'name': 'GGA_C_VP86'}],
              'V5LYP':      [{'name': 'GGA_C_V5LYP'}],
              'THCTH':      [{'name': 'MGGA_XC_TAU_HCTH'}],
              'TPSSTPSS':   [{'name': 'MGGA_C_TPSS'}, {'name': 'MGGA_X_TPSS'}],
              'B3LYP':      [{'name': 'HYB_GGA_XC_B3LYP'}], 
              'B3PW91':     [{'name': 'HYB_GGA_XC_B3PW91'}],
              'B3P86':      [{'name': 'HYB_GGA_XC_B3P86'}], 
              'B1B95':      [{'name': 'HYB_GGA_XC_B1B95'}],
              'MPW1PW91':   [{'name': 'HYB_GGA_XC_MPW1PW91'}],
              'MPW1LYP':    [{'name': 'HYB_GGA_XC_MPW1LYP'}],
              'MPW1PBE':    [{'name': 'HYB_GGA_XC_MPW1PBE'}],
              'MPW3PBE':    [{'name': 'HYB_GGA_XC_MPW3PBE'}],
              'B98':        [{'name': 'HYB_GGA_XC_B98'}],
              'B971':       [{'name': 'HYB_GGA_XC_B971'}],
              'B972':       [{'name': 'HYB_GGA_XC_B972'}],
              'O3LYP':      [{'name': 'HYB_GGA_XC_O3LYP'}], 
              'TPSSH':      [{'name': 'HYB_GGA_XC_TPSSh'}],
              'BMK':        [{'name': 'HYB_MGGA_XC_BMK'}],
              'X3LYP':      [{'name': 'HYB_GGA_XC_X3LYP'}],
              'THCTHHYB':   [{'name': 'HYB_MGGA_XC_THCTHHYB'}],
              'BHANDH':     [{'name': 'HYB_GGA_XC_BHANDH'}],
              'BHANDHLYP':  [{'name': 'HYB_GGA_XC_BHANDHLYP'}],
              'APF':        [{'name': 'HYB_GGA_XC_APF'}],
              'APFD':       [{'name': 'HYB_GGA_XC_APFD'}],
              'B97D':       [{'name': 'HYB_GGA_XC_B97D'}],
              'RHF':        [{'name': 'RHF_X'}],
              'UHF':        [{'name': 'UHF_X'}],
              'ROHF':       [{'name': 'ROHF_X'}],
              'OHSE2PBE':   [{'name': 'HYB_GGA_XC_HSE03'}],
              'HSEH1PBE':   [{'name': 'HYB_GGA_XC_HSE06'}],
              'OHSE1PBE':   [{'name': 'HYB_GGA_XC_HSEOLD'}],
              'PBEH1PBE':   [{'name': 'HYB_GGA_XC_PBEH1PBE'}],
875
              'PBE1PBE':    [{'name': 'HYB_GGA_XC_PBE1PBE'}],
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
              'M05':        [{'name': 'HYB_MGGA_XC_M05'}],
              'M052X':      [{'name': 'HYB_MGGA_XC_M05_2X'}],
              'M06':        [{'name': 'HYB_MGGA_XC_M06'}],
              'M062X':      [{'name': 'HYB_MGGA_XC_M06_2X'}],
              'M06HF':      [{'name': 'HYB_MGGA_XC_M06_HF'}],
              'M11':        [{'name': 'HYB_MGGA_XC_M11'}],
              'SOGGAX11':   [{'name': 'HYB_MGGA_XC_SOGGA11_X'}],
              'MN12SX':     [{'name': 'HYB_MGGA_XC_MN12_SX'}],
              'N12SX':      [{'name': 'HYB_MGGA_XC_N12_SX'}],
              'LC-WPBE':    [{'name': 'LC-WPBE'}],
              'CAM-B3LYP':  [{'name': 'CAM-B3LYP'}],
              'WB97':       [{'name': 'WB97'}],
              'WB97X':      [{'name': 'WB97X'}],
              'WB97XD':     [{'name': 'WB97XD'}],
              'HISSBPBE':   [{'name': 'HISSBPBE'}],
              'B2PLYP':     [{'name': 'B2PLYP'}],
              'MPW2PLYP':   [{'name': 'MPW2PLYP'}],
              'B2PLYPD':    [{'name': 'B2PLYPD'}],
              'MPW2PLYPD':  [{'name': 'MPW2PLYPD'}],
              'B97D3':      [{'name': 'B97D3'}], 
              'B2PLYPD3':   [{'name': 'B2PLYPD3'}],
              'MPW2PLYPD3': [{'name': 'MPW2PLYPD3'}],
              'LC-':        [{'name': 'LONG-RANGE CORRECTED'}],
899 900 901 902 903 904 905 906 907
             }

        methodDict = {
              'AMBER':     [{'name': 'Amber'}],
              'DREIDING':  [{'name': 'Dreiding'}],
              'UFF':       [{'name': 'UFF'}],
              'AM1':       [{'name': 'AM1'}],
              'PM3':       [{'name': 'PM3'}],
              'PM3MM':     [{'name': 'PM3MM'}],
908
              'PM3D3':     [{'name': 'PM3D3'}],
909 910 911 912 913 914 915
              'PM6':       [{'name': 'PM6'}],
              'PDDG':      [{'name': 'PDDG'}],
              'CNDO':      [{'name': 'CNDO'}],
              'INDO':      [{'name': 'INDO'}],
              'MINDO':     [{'name': 'MINDO'}],
              'MINDO3':    [{'name': 'MINDO3'}],
              'ZINDO':     [{'name': 'ZINDO'}],
916 917
              'HUCKEL':    [{'name': 'HUCKEL'}],
              'EXTENDEDHUCKEL':    [{'name': 'HUCKEL'}],
918
              'ONIOM':     [{'name': 'ONIOM'}],
919
              'HF':        [{'name': 'HF'}],
920 921 922 923 924 925 926 927 928
              'RHF':       [{'name': 'RHF'}],
              'UHF':       [{'name': 'UHF'}],
              'ROHF':      [{'name': 'ROHF'}],
              'GVB':       [{'name': 'GVB'}],
              'DFT':       [{'name': 'DFT'}],
              'CID':       [{'name': 'CID'}],
              'CISD':      [{'name': 'CISD'}],
              'CIS':       [{'name': 'CIS'}],
              'BD':        [{'name': 'BD'}],
929
              'BD(T)':     [{'name': 'BD(T)'}],
930 931 932 933
              'CCD':       [{'name': 'CCD'}],
              'CCSD':      [{'name': 'CCSD'}],
              'EOMCCSD':   [{'name': 'EOMCCSD'}],
              'QCISD':     [{'name': 'QCISD'}],
934 935 936
              'CCSD(T)':   [{'name': 'CCSD(T)'}],
              'QCISD(T)':  [{'name': 'QCISD(T)'}],
              'QCISD(TQ)': [{'name': 'QCISD(TQ)'}],
937 938 939
              'MP2':       [{'name': 'MP2'}],
              'MP3':       [{'name': 'MP3'}],
              'MP4':       [{'name': 'MP4'}],
940 941 942 943 944 945
              'MP4DQ':     [{'name': 'MP4DQ'}],
              'MP4(DQ)':   [{'name': 'MP4DQ'}],
              'MP4SDQ':    [{'name': 'MP4SDQ'}],
              'MP4(SDQ)':  [{'name': 'MP4SDQ'}],
              'MP4SDTQ':   [{'name': 'MP4SDTQ'}],
              'MP4(SDTQ)': [{'name': 'MP4SDTQ'}],
946 947 948 949 950 951 952 953 954 955 956 957
              'MP5':       [{'name': 'MP5'}],
              'CAS':       [{'name': 'CASSCF'}],
              'CASSCF':    [{'name': 'CASSCF'}],
              'G1':        [{'name': 'G1'}],
              'G2':        [{'name': 'G2'}],
              'G2MP2':     [{'name': 'G2MP2'}],
              'G3':        [{'name': 'G3'}],
              'G3MP2':     [{'name': 'G3MP2'}],
              'G3B3':      [{'name': 'G3B3'}],
              'G3MP2B3':   [{'name': 'G3MP2B3'}],
              'G4':        [{'name': 'G4'}],
              'G4MP2':     [{'name': 'G4MP2'}],
958 959
              'CBSEXTRAP':   [{'name': 'CBSExtrapolate'}],
              'CBSEXTRAPOLATE':   [{'name': 'CBSExtrapolate'}],
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
              'CBS-4M':    [{'name': 'CBS-4M'}],
              'CBS-4O':    [{'name': 'CBS-4O'}],
              'CBS-QB3':   [{'name': 'CBS-QB3'}],
              'CBS-QB3O':  [{'name': 'CBS-QB3O'}],
              'CBS-APNO':  [{'name': 'CBS-APNO'}],
              'W1U':       [{'name': 'W1U'}],
              'W1BD':      [{'name': 'W1BD'}],
              'W1RO':      [{'name': 'W1RO'}],
             }

        basissetDict = {
              'STO-3G':      [{'name': 'STO-3G'}],
              '3-21G':       [{'name': '3-21G'}],
              '6-21G':       [{'name': '6-21G'}],
              '4-31G':       [{'name': '4-31G'}],
              '6-31G':       [{'name': '6-31G'}],
              '6-311G':      [{'name': '6-311G'}],
              'D95V':        [{'name': 'D95V'}],
              'D95':         [{'name': 'D95'}],
              'CC-PVDZ':     [{'name': 'cc-pVDZ'}],
              'CC-PVTZ':     [{'name': 'cc-pVTZ'}],
              'CC-PVQZ':     [{'name': 'cc-pVQZ'}],
              'CC-PV5Z':     [{'name': 'cc-pV5Z'}],
              'CC-PV6Z':     [{'name': 'cc-pV6Z'}],
              'SV':          [{'name': 'SV'}],
              'SVP':         [{'name': 'SVP'}],
              'TZV':         [{'name': 'TZV'}],
              'TZVP':        [{'name': 'TZVP'}],              
              'DEF2SV':      [{'name': 'Def2SV'}],
              'DEF2SVP':     [{'name': 'Def2SVP'}],
              'DEF2SVPP':    [{'name': 'Def2SVPP'}],
              'DEF2TZV':     [{'name': 'Def2TZV'}],
              'DEF2TZVP':    [{'name': 'Def2TZVP'}],
              'DEF2TZVPP':   [{'name': 'Def2TZVPP'}],
              'DEF2QZV':     [{'name': 'Def2QZV'}],
              'DEF2QZVP':    [{'name': 'Def2QZVP'}],
              'DEF2QZVPP':   [{'name': 'Def2QZVPP'}],
              'QZVP':        [{'name': 'QZVP'}],
              'MIDIX':       [{'name': 'MidiX'}],
              'EPR-II':      [{'name': 'EPR-II'}],
              'EPR-III':     [{'name': 'EPR-III'}],
              'UGBS':        [{'name': 'UGBS'}],     
              'MTSMALL':     [{'name': 'MTSmall'}],
              'DGDZVP':      [{'name': 'DGDZVP'}],
              'DGDZVP2':     [{'name': 'DGDZVP2'}],
              'DGTZVP':      [{'name': 'DGTZVP'}],
              'CBSB3':       [{'name': 'CBSB3'}],  
              'CBSB7':       [{'name': 'CBSB7'}],
              'SHC':         [{'name': 'SHC'}],
              'SEC':         [{'name': 'SHC'}],
              'CEP-4G':      [{'name': 'CEP-4G'}],
              'CEP-31G':     [{'name': 'CEP-31G'}],
              'CEP-121G':    [{'name': 'CEP-121G'}],
              'LANL1':       [{'name': 'LANL1'}],
              'LANL2':       [{'name': 'LANL2'}],
              'SDD':         [{'name': 'SDD'}],
              'OLDSDD':      [{'name': 'OldSDD'}], 
              'SDDALL':      [{'name': 'SDDAll'}],
              'GEN':         [{'name': 'General'}],
              'CHKBAS':      [{'name': 'CHKBAS'}],
              'EXTRABASIS':  [{'name': 'ExtraBasis'}],
              'DGA1':        [{'name': 'DGA1'}],
              'DGA2':        [{'name': 'DGA2'}],
              'SVPFIT':      [{'name': 'SVPFit'}],
              'TZVPFIT':     [{'name': 'TZVPFit'}],
              'W06':         [{'name': 'W06'}],
              'CHF':         [{'name': 'CHF'}],
1027 1028
              'FIT':         [{'name': 'FIT'}],
              'AUTO':        [{'name': 'AUTO'}],
1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
             }

        global xc, method, basisset, xcWrite, methodWrite, basissetWrite, methodreal, basissetreal, exc, corr, exccorr, methodprefix
        xc = None
        method = None
        basisset = None
        xcWrite = False
        methodWrite = False
        basissetWrite = False
        methodreal = None
        basissetreal = None 
        methodprefix = None
        exc = None
        corr = None
        exccorr = None
1044

1045
        settings = section["x_gaussian_settings"]
1046 1047 1048
        settings1 = str(settings[0]).strip()
        settings2 = str(settings[1]).strip()
        settings = [settings1, settings2]
1049 1050 1051
        settings = [''.join(map(str,settings))]
        settings = str(settings)
        settings = re.sub('[-]{2,}', '', settings)
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
1052
        backend.addValue("x_gaussian_settings_corrected", settings)
1053

Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
1054
        method1 = settings.replace("['#p ","").replace("['#P ","").replace("['#","")
1055 1056 1057 1058
        method1 = method1.upper()

        if 'ONIOM' not in method1: 
          if settings.find("/") >= 0:
Rosendo Valero Montero's avatar
Rosendo Valero Montero committed
1059
               method1 = settings.split('/')[0].replace("['#p ","").replace("['#P ","").replace("['#","")
1060 1061 1062
               method1 = method1.upper()
               for x in method1.split():
                  method2 = str(x)
1063
                  if method2 != 'RHF' and method2 != 'UHF' and method2 != 'ROHF' and method2 != 'UFF':
1064 1065 1066 1067 1068 1069
                     if (method2[0] == 'R' and method2[0:2] != 'RO') or method2[0] == 'U':
                        methodprefix = method2[0] 
                        method2 = method2[1:]
                     elif method2[0:2] == 'RO':
                        methodprefix = method2[0:2]
                        method2 = method2[2:]
1070
                  if method2[0:2] == 'SV' or method2[0] == 'B' or method2[0] == 'O':
1071
                     if method2[1] != '2' and method2[1] != '3':
1072 1073 1074 1075 1076 1077
                       if method2[0] in xcDict.keys() and method2[1:] in xcDict.keys():
                         exc = method2[0]
                         corr = method2[1:]
                         excfunc = xcDict[exc][0]['name']
                         corrfunc = xcDict[corr][0]['name']
                         xc = str(excfunc) + "_" + str(corrfunc)
1078
                  if method2[0:3] == 'BRX' or method2[0:3] == 'G96':
1079 1080 1081 1082 1083 1084
                    exc = method2[0:3]
                    corr = method2[3:]
                    if exc in xcDict.keys() and corr in xcDict.keys():
                      excfunc = xcDict[exc][0]['name']
                      corrfunc = xcDict[corr][0]['name']
                      xc = str(excfunc) + "_" + str(corrfunc)
1085
                  if method2[0:5] == 'WPBEH':
1086 1087 1088 1089 1090 1091
                    exc = method2[0:5]
                    corr = method2[6:]
                    if exc in xcDict.keys() and corr in xcDict.keys():
                      excfunc = xcDict[exc][0]['name']
                      corrfunc = xcDict[corr][0]['name']
                      xc = str(excfunc) + "_" + str(corrfunc)
1092 1093 1094 1095 1096 1097 1098
                  if method2[0:3] == 'LC-':
                     exccorr = method2[3:]
                     if exccorr in xcDict.keys():
                        xc = 'LC-' + xcDict.get([exccorr][-1])
                  if method2 in xcDict.keys():
                     xc = method2
                     xcWrite= True
1099
                     methodWrite = True 
1100 1101 1102 1103 1104 1105 1106 1107
                     method = 'DFT'
                  if method2 in methodDict.keys():
                     method = method2
                     methodWrite = True
                     methodreal = method2
                  else:
                     for n in range(2,9):
                        if method2[0:n] in methodDict.keys():
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
                          method = method2[0:n]
                          methodWrite = True
                          methodreal = method2
                        if method2[0:n] in xcDict.keys():
                          xc = method2[0:n]
                          xcWrite = True
                          methodWrite = True
                          method = 'DFT'
                        if method2[0:9] == 'CBSEXTRAP':
                          method = method2[0:9]
                          methodWrite = True
                          methodreal = method2
1120
               rest = settings.split('/')[1].replace("'","").replace("]","")
1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
               rest = rest.upper() 
               for x in rest.split():
                  if x in basissetDict.keys():
                     basisset = x
                     basissetWrite = True
                     basissetreal = x
                  if 'D95' in x:
                     method2 = x
                     basisset = method2[0:3]
                     basissetWrite = True
                     basissetreal = method2
                  if 'AUG-' in x:
                     method2 = x
                     basisset = method2[4:]
                     basissetWrite = True
                     basissetreal = method2
                  if 'UGBS' in x:
                     method2 = x
                     basisset = method2[0:4]
                     basissetWrite = True
                     basissetreal = method2
                  if 'CBSB7' in x:
                     method2 = x
                     basisset = method2[0:5]
                     basissetWrite = True
                     basissetreal = method2
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
                  if 'LANL1' in x:
                     method2 = x
                     basisset = method2[0:5]
                     basissetWrite = True
                     basissetreal = method2
                  if 'LANL2' in x:
                     method2 = x
                     basisset = method2[0:5]
                     basissetWrite = True
                     basissetreal = method2
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
                  if '6-31' in x:
                     method2 = x
                     if '6-311' in x:
                        basisset = '6-311G'
                        basissetWrite = True
                        basissetreal = '6-311' + method2[5:]
                     else:
                        basisset = '6-31G'
                        basissetWrite = True
                        basissetreal = '6-31' + method2[4:]
1167 1168 1169 1170 1171 1172 1173 1174 1175
                  slashes = settings.count('/')
                  if slashes > 1:
                    rest2 = settings.split()[1]
                    rest2 = rest2.upper()
                    for z in rest2.split('/'):
                       if z in basissetDict.keys():
                         basisset = z
                    basissetWrite = True
                    basissetreal = rest2.split('/')[1] + '/' + basisset
1176 1177 1178 1179 1180
          else:
               method1 = settings.split()
               for x in method1: 
                  method2 = str(x)
                  method2 = method2.upper() 
1181
                  if method2 != 'RHF' and method2 != 'UHF' and method2 != 'ROHF' and method2 != 'UFF':