Commit f68330c5 authored by Daniel Speckhard's avatar Daniel Speckhard

Fixed section partitioning error with section_system.

parent 018ff68a
Metadata-Version: 1.0
Name: gaussianparser
Version: 0.1
Summary: NOMAD parser implementation for Gaussian.
Home-page: UNKNOWN
Author: UNKNOWN
Author-email: UNKNOWN
License: APACHE 2.0
Description: UNKNOWN
Platform: UNKNOWN
README.md
setup.py
./gaussianparser/__init__.py
./gaussianparser/parser_gaussian.py
./gaussianparser/setup_paths.py
./gaussianparser.egg-info/PKG-INFO
./gaussianparser.egg-info/SOURCES.txt
./gaussianparser.egg-info/dependency_links.txt
./gaussianparser.egg-info/requires.txt
./gaussianparser.egg-info/top_level.txt
\ No newline at end of file
from gaussianparser.parser_gaussian import GaussianParser
# Copyright 2015-2018 Rosendo Valero, Fawzi Mohamed, Ankit Kariryaa
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from builtins import str
from builtins import range
from builtins import object
from functools import reduce
from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
from nomadcore.caching_backend import CachingLevel
from nomadcore.unit_conversion.unit_conversion import convert_unit
import os, sys, json, logging
import numpy as np
import ase
import re
############################################################
# This is the parser for the output file of Gaussian.
############################################################
logger = logging.getLogger("nomad.GaussianParser")
# description of the output
mainFileDescription = SM(
name = 'root',
weak = True,
forwardMatch = True,
startReStr = "",
subMatchers = [
SM(name = 'newRun',
startReStr = r"\s*Cite this work as:",
repeats = True,
required = True,
forwardMatch = True,
fixedStartValues={ 'program_name': 'Gaussian', 'program_basis_set_type': 'gaussians' },
sections = ['section_run', 'section_system'],
subMatchers = [
SM(name = 'header',
startReStr = r"\s*Cite this work as:",
forwardMatch = True,
subMatchers = [
SM(r"\s*Cite this work as:"),
SM(r"\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,"),
SM(r"\s\*\*\*\*\*\*\*\*\*\*\*\**"),
SM(r"\s*Gaussian\s*(?P<program_version>[0-9]+):\s*(?P<x_gaussian_program_implementation>[A-Za-z0-9-.]+)\s*(?P<x_gaussian_program_release_date>[0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)"),
SM(r"\s*(?P<x_gaussian_program_execution_date>[0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)"),
]
),
SM(name = 'globalparams',
startReStr = r"\s*%\w*=",
subFlags = SM.SubFlags.Unordered,
forwardMatch = True,
subMatchers = [
SM(r"\s*%[Cc]hk=(?P<x_gaussian_chk_file>[A-Za-z0-9.]*)"),
SM(r"\s*%[Mm]em=(?P<x_gaussian_memory>[A-Za-z0-9.]*)"),
SM(r"\s*%[Nn][Pp]roc=(?P<x_gaussian_number_of_processors>[A-Za-z0-9.]*)")
]
),
SM (name = 'SectionMethod',
sections = ['section_method'],
startReStr = r"\s*#",
forwardMatch = True,
subMatchers = [
SM(r"\s*(?P<x_gaussian_settings>([a-zA-Z0-9-/=(),#*+:]*\s*)+)"),
SM(r"\s*(?P<x_gaussian_settings>([a-zA-Z0-9-/=(),#*+:]*\s*)+)"),
]
),
SM(name = 'charge_multiplicity_cell_masses',
startReStr = r"\s*Charge =",
endReStr = r"\s*Leave Link 101\s*",
subFlags = SM.SubFlags.Unordered,
forwardMatch = True,
subMatchers = [
SM(r"\s*Charge =\s*(?P<x_gaussian_total_charge>[-+0-9]*) Multiplicity =\s*(?P<x_gaussian_spin_target_multiplicity>[0-9]*)"),
SM(r"\s*(Tv|Tv\s*[0]|TV|TV\s*[0])\s*(?P<x_gaussian_geometry_lattice_vector_x>[0-9.]+)\s+(?P<x_gaussian_geometry_lattice_vector_y>[0-9.]+)\s+(?P<x_gaussian_geometry_lattice_vector_z>[0-9.]+)", repeats = True),
SM(r"\s*AtmWgt=\s+(?P<x_gaussian_atomic_masses>[0-9.]+(\s+[0-9.]+)(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?(\s+[0-9.]+)?)", repeats = True)
]
),
SM (name = 'SingleConfigurationCalculationWithSystemDescription',
startReStr = "\s*Standard orientation:",
repeats = False,
forwardMatch = True,
subMatchers = [
SM (name = 'SingleConfigurationCalculation',
startReStr = "\s*Standard orientation:",
repeats = True,
forwardMatch = True,
sections = ['section_single_configuration_calculation'],
subMatchers = [
SM(name = 'geometry',
sections = ['x_gaussian_section_geometry'],
startReStr = r"\s*Standard orientation:",
endReStr = r"\s*Rotational constants",
subMatchers = [
SM(r"\s+[0-9]+\s+(?P<x_gaussian_atomic_number>[0-9]+)\s+[0-9]*\s+(?P<x_gaussian_atom_x_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_y_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_z_coord__angstrom>[-+0-9EeDd.]+)",repeats = True),
SM(r"\s*Rotational constants")
]
),
SM(name = 'SectionHybridCoeffs',
sections = ['x_gaussian_section_hybrid_coeffs'],
startReStr = r"\s*IExCor=",
forwardMatch = True,
subMatchers = [
SM(r"\s*IExCor=\s*[0-9-]+\s*DFT=[A-Z]\s*Ex\+Corr=[a-zA-Z0-9]+\s*ExCW=[0-9]\s*ScaHFX=\s*(?P<hybrid_xc_coeff1>[0-9.]+)"),
SM(r"\s*IExCor=\s*[0-9-]+\s*DFT=[A-Z]\s*Ex\=[a-zA-Z0-9+]+\s*Corr=[ a-zA-Z0-9]+\s*?ExCW=[0-9]\s*ScaHFX=\s*(?P<hybrid_xc_coeff1>[0-9.]+)"),
SM(r"\s*IExCor=\s*[0-9-]+\s*DFT=[A-Z]\s*Ex\=[a-zA-Z0-9+]+\s*Corr=[ a-zA-Z0-9]+\s*ScaHFX=\s*(?P<hybrid_xc_coeff1>[0-9.]+)"),
SM(r"\s*ScaDFX=\s*(?P<hybrid_xc_coeff2>[0-9.]+\s*[0-9.]+\s*[0-9.]+\s*[0-9.]+)")
]
),
SM(name = 'TotalEnergyScfGaussian',
sections = ['section_scf_iteration'],
startReStr = r"\s*Requested convergence on RMS",
forwardMatch = False,
repeats = True,
subMatchers = [
SM(r"\s*Cycle\s+[0-9]+|\s*Initial guess <Sx>="),
SM(r"\s*E=\s*(?P<energy_total_scf_iteration__hartree>[-+0-9.]+)\s*Delta-E=\s*(?P<x_gaussian_delta_energy_total_scf_iteration__hartree>[-+0-9.]+)"),
SM(r"\s*(?P<x_gaussian_single_configuration_calculation_converged>SCF Done):\s*E\((?P<x_gaussian_hf_detect>[A-Z0-9]+)\)\s*=\s*(?P<x_gaussian_energy_scf__hartree>[-+0-9.]+)"),
SM(r"\s*NFock=\s*[0-9]+\s*Conv=(?P<x_gaussian_energy_error__hartree>[-+0-9EeDd.]+)\s*"),
SM(r"\s*KE=\s*(?P<x_gaussian_electronic_kinetic_energy__hartree>[-+0-9EeDd.]+)\s*"),
SM(r"\s*Annihilation of the first spin contaminant"),
SM(r"\s*[A-Z][*][*][0-9]\s*before annihilation\s*(?P<spin_S2>[0-9.]+),\s*after\s*(?P<x_gaussian_after_annihilation_spin_S2>[0-9.]+)"),
SM(r"\s*[()A-Z0-9]+\s*=\s*[-+0-9D.]+\s*[()A-Z0-9]+\s*=\s*(?P<x_gaussian_perturbation_energy__hartree>[-+0-9D.]+)"),
]
),
SM(name = 'PerturbationEnergies',
sections = ['x_gaussian_section_moller_plesset'],
startReStr = r"\s*E2 =\s*",
forwardMatch = True,
subMatchers = [
SM(r"\s*E2 =\s*(?P<x_gaussian_mp2_correction_energy__hartree>[-+0-9EeDd.]+)\s*EUMP2 =\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
SM(r"\s*E3=\s*(?P<x_gaussian_mp3_correction_energy__hartree>[-+0-9EeDd.]+)\s*EUMP3=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)\s*"),
SM(r"\s*E4\(DQ\)=\s*(?P<x_gaussian_mp4dq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(DQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)\s*"),
SM(r"\s*E4\(SDQ\)=\s*(?P<x_gaussian_mp4sdq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(SDQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
SM(r"\s*E4\(SDTQ\)=\s*(?P<x_gaussian_mp4sdtq_correction_energy__hartree>[-+0-9EeDd.]+)\s*UMP4\(SDTQ\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
SM(r"\s*DEMP5 =\s*(?P<x_gaussian_mp5_correction_energy__hartree>[-+0-9EeDd.]+)\s*MP5 =\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
]
),
SM(name = 'CoupledClusterEnergies',
sections = ['x_gaussian_section_coupled_cluster'],
startReStr = r"\s*CCSD\(T\)\s*",
endReStr = r"\s*Population analysis using the SCF density",
forwardMatch = True,
subMatchers = [
SM(r"\s*DE\(Corr\)=\s*(?P<x_gaussian_ccsd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CORR\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
SM(r"\s*CCSD\(T\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
]
),
SM(name = 'QuadraticCIEnergies',
sections = ['x_gaussian_section_quadratic_ci'],
startReStr = r"\s*Quadratic Configuration Interaction\s*",
endReStr = r"\s*Population analysis using the SCF density",
forwardMatch = True,
subMatchers = [
SM(r"\s*DE\(Z\)=\s*(?P<x_gaussian_qcisd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(Z\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
SM(r"\s*DE\(Corr\)=\s*(?P<x_gaussian_qcisd_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CORR\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
SM(r"\s*QCISD\(T\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)"),
SM(r"\s*DE5\s*=\s*(?P<x_gaussian_qcisdtq_correction_energy__hartree>[-+0-9EeDd.]+)\s*QCISD\(TQ\)\s*=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
]
),
SM(name = 'CIEnergies',
sections = ['x_gaussian_section_ci'],
startReStr = r"\s*Configuration Interaction\s*",
endReStr = r"\s*Population analysis using the SCF density",
forwardMatch = True,
subMatchers = [
SM(r"\s*DE\(CI\)=\s*(?P<x_gaussian_ci_correction_energy__hartree>[-+0-9EeDd.]+)\s*E\(CI\)=\s*(?P<energy_total__hartree>[-+0-9EeDd.]+)", repeats = True),
]
),
SM(name = 'SemiempiricalEnergies',
sections = ['x_gaussian_section_semiempirical'],
startReStr = r"\s*[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+\s*",
endReStr = r"\s*Population analysis using the SCF density",
forwardMatch = True,
subMatchers = [
SM(r"\s*(?P<x_gaussian_semiempirical_method>[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+)"),
SM(r"\s*It=\s*[0-9]+\s*PL=\s*[-+0-9EeDd.]+\s*DiagD=[A-Z]\s*ESCF=\s*(?P<x_gaussian_semiempirical_energy>[-+0-9.]+)\s*", repeats = True),
SM(r"\s*Energy=\s*(?P<energy_total>[-+0-9EeDd.]+)"),
]
),
SM(name = 'MolecularMechanicsEnergies',
sections = ['x_gaussian_section_molmech'],
startReStr = r"\s*[-A-Z0-9]+\s*calculation of energy[a-zA-Z,. ]+\s*",
forwardMatch = False,
repeats = True,
subMatchers = [
SM(r"\s*(?P<x_gaussian_molmech_method>[a-zA-Z0-9]+\s*calculation of energy[a-z,. ]+)"),
SM(r"\s*Energy=\s*(?P<energy_total>[-+0-9EeDd.]+)\s*NIter=\s*[0-9.]"),
]
),
SM(name = 'ExcitedStates',
sections = ['x_gaussian_section_excited_initial'],
startReStr = r"\s*Excitation energies and oscillator strengths",
forwardMatch = False,
repeats = True,
subMatchers = [
SM(name = 'ExcitedStates',
sections = ['x_gaussian_section_excited'],
startReStr = r"\s*Excited State",
forwardMatch = False,
repeats = True,
subMatchers = [
SM(r"\s*Excited State\s*(?P<x_gaussian_excited_state_number>[0-9]+):\s*[-+0-9A-Za-z.\?]+\s*(?P<x_gaussian_excited_energy__eV>[0-9.]+)\s*eV\s*[0-9.]+\s*nm\s*f=(?P<x_gaussian_excited_oscstrength>[0-9.]+)\s*<[A-Z][*][*][0-9]>=(?P<x_gaussian_excited_spin_squared>[0-9.]+)"),
SM(r"\s*(?P<x_gaussian_excited_transition>[0-9A-Z]+\s*->\s*[0-9A-Z]+\s*[-+0-9.]+)", repeats = True),
SM(r"\s*This state for optimization|\r?\n"),
]
)
]
),
SM(name = 'CASSCFStates',
sections = ['x_gaussian_section_casscf'],
startReStr = r"\s*EIGENVALUES AND\s*",
forwardMatch = True,
repeats = False,
subMatchers = [
SM(r"\s*EIGENVALUES AND\s*"),
SM(r"\s*\(\s*[0-9]+\)\s*EIGENVALUE\s*(?P<x_gaussian_casscf_energy__hartree>[-+0-9.]+)", repeats = True),
]
),
SM(name = 'Geometry_optimization',
sections = ['x_gaussian_section_geometry_optimization_info'],
startReStr = r"\s*Optimization completed.",
forwardMatch = True,
subMatchers = [
SM(r"\s*(?P<x_gaussian_geometry_optimization_converged>Optimization completed)"),
SM(r"\s*(?P<x_gaussian_geometry_optimization_converged>Optimization stopped)"),
SM(r"\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+[-+0-9EeDd.]+\s+[-+0-9EeDd.]+\s+[-+0-9EeDd.]+",repeats = True),
SM(r"\s*Distance matrix|\s*Rotational constants|\s*Stoichiometry")
]
),
SM(name = 'Orbital symmetries',
sections = ['x_gaussian_section_orbital_symmetries'],
startReStr = r"\s+Population analysis",
subFlags = SM.SubFlags.Sequenced,
subMatchers = [
SM(r"\s*Orbital symmetries"),
SM(r"\s*Alpha Orbitals"),
SM(r"\s*Occupied\s+(?P<x_gaussian_alpha_occ_symmetry_values>\((.+)\))?"),
SM(r"\s+(?P<x_gaussian_alpha_occ_symmetry_values>\((.+)\)?)", repeats = True),
SM(r"\s*Virtual\s+(?P<x_gaussian_alpha_vir_symmetry_values>\((.+)\))?"),
SM(r"\s+(?P<x_gaussian_alpha_vir_symmetry_values>\((.+)\)?)", repeats = True),
SM(r"\s*Beta Orbitals"),
SM(r"\s*Occupied\s+(?P<x_gaussian_beta_occ_symmetry_values>\((.+)\))?"),
SM(r"\s+(?P<x_gaussian_beta_occ_symmetry_values>\((.+)\)?)", repeats = True),
SM(r"\s*Virtual\s+(?P<x_gaussian_beta_vir_symmetry_values>\((.+)\))?"),
SM(r"\s+(?P<x_gaussian_beta_vir_symmetry_values>\((.+)\)?)", repeats = True),
]
),
SM(name = 'Electronicstatesymmetry',
sections = ['x_gaussian_section_symmetry'],
startReStr = r"\s*The electronic state is",
forwardMatch = True,
subMatchers = [
SM(r"\s*The electronic state is\s*(?P<x_gaussian_elstate_symmetry>[A-Z0-9-']+)[.]")
]
),
SM(name = 'Eigenvalues',
sections = ['section_eigenvalues'],
startReStr = r"\s*Alpha occ. eigenvalues --",
forwardMatch = True,
subFlags = SM.SubFlags.Sequenced,
subMatchers = [
SM(r"\s*Alpha occ. eigenvalues --\s+(?P<x_gaussian_alpha_occ_eigenvalues_values>-?[^\s.-]+\s+|(\-?\d*\.\d*)\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?)", repeats = True),
SM(r"\s*Alpha virt. eigenvalues --\s+(?P<x_gaussian_alpha_vir_eigenvalues_values>-?[^\s.-]+\s+|(\-?\d*\.\d*)\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?)", repeats = True),
SM(r"\s*Beta occ. eigenvalues --\s+(?P<x_gaussian_beta_occ_eigenvalues_values>-?[^\s.-]+\s+|(\-?\d*\.\d*)\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?)", repeats = True),
SM(r"\s*Beta virt. eigenvalues --\s+(?P<x_gaussian_beta_vir_eigenvalues_values>-?[^\s.-]+\s+|(\-?\d*\.\d*)\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?\s+(\-?\d*\.\d*)?)", repeats = True),
SM(r"\s*- Condensed to atoms (all electrons)"),
]
),
SM(name = 'ForcesGaussian',
sections = ['x_gaussian_section_atom_forces'],
startReStr = "\s*Center\s+Atomic\s+Forces ",
forwardMatch = True,
subMatchers = [
SM(r"\s*Center\s+Atomic\s+Forces "),
SM(r"\s+[0-9]+\s+[0-9]+\s+(?P<x_gaussian_atom_x_force__hartree_bohr_1>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_y_force__hartree_bohr_1>[-+0-9EeDd.]+)\s+(?P<x_gaussian_atom_z_force__hartree_bohr_1>[-+0-9EeDd.]+)",repeats = True),
SM(r"\s*Cartesian Forces:\s+")
]
),
SM(name = 'Multipoles',
sections = ['x_gaussian_section_molecular_multipoles'],
startReStr = r"\s*Electronic spatial extent",
forwardMatch = False,
subMatchers = [
SM(r"\s*Charge=(?P<charge>\s*[-0-9.]+)"),
SM(r"\s*Dipole moment "),
SM(r"\s+\w+=\s+(?P<dipole_moment_x>[-+0-9EeDd.]+)\s+\w+=\s+(?P<dipole_moment_y>[-+0-9EeDd.]+)\s+\w+=\s+(?P<dipole_moment_z>[-+0-9EeDd.]+)"),
SM(r"\s*Quadrupole moment"),
SM(r"\s+\w+=\s+(?P<quadrupole_moment_xx>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_yy>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_zz>[0-9-.]+)"),
SM(r"\s+\w+=\s+(?P<quadrupole_moment_xy>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_xz>[0-9-.]+)\s+\w+=\s+(?P<quadrupole_moment_yz>[0-9-.]+)"),
SM(r"\s*Traceless Quadrupole moment"),
SM(r"\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+"),
SM(r"\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+\s+\w+=\s+[0-9-.]+"),
SM(r"\s*Octapole moment"),
SM(r"\s+\w+=\s+(?P<octapole_moment_xxx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_yyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_zzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xyy>[-+0-9EeDd.]+)"),
SM(r"\s+\w+=\s+(?P<octapole_moment_xxy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_yzz>[-+0-9EeDd.]+)"),
SM(r"\s+\w+=\s+(?P<octapole_moment_yyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<octapole_moment_xyz>[-+0-9EeDd.]+)"),
SM(r"\s*Hexadecapole moment"),
SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxxx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxxy>[-+0-9EeDd.]+)"),
SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyx>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzzx>[-+0-9EeDd.]+)"),
SM(r"\s+\w+=\s+(?P<hexadecapole_moment_zzzy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxyy>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_xxzz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyzz>[-+0-9EeDd.]+)"),
SM(r"\s+\w+=\s+(?P<hexadecapole_moment_xxyz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_yyxz>[-+0-9EeDd.]+)\s+\w+=\s+(?P<hexadecapole_moment_zzxy>[-+0-9EeDd.]+)")
]
),
SM (name = 'Frequencies',
sections = ['x_gaussian_section_frequencies'],
startReStr = r"\s*Frequencies --\s+(?:(?:[-]?[0-9]+\.\d*)\s*(?:[-]?[-0-9]+\.\d*)?\s*(?:[-]?[-0-9]+\.\d*)?)",
endReStr = r"\s*- Thermochemistry -",
forwardMatch = True,
repeats = False,
subMatchers = [
SM (name = 'Frequencies',
startReStr = r"\s*Frequencies --\s+(?:(?:[-]?[0-9]+\.\d*)\s*(?:[-]?[-0-9]+\.\d*)?\s*(?:[-]?[-0-9]+\.\d*)?)",
forwardMatch = True,
repeats = True,
subFlags = SM.SubFlags.Unordered,
subMatchers = [
SM(r"\s*Frequencies --\s+(?P<x_gaussian_frequency_values>([-]?[0-9]+\.\d*)\s*([-]?[-0-9]+\.\d*)?\s*([-]?[-0-9]+\.\d*)?)", repeats = True),
SM(r"\s*Red. masses --\s+(?P<x_gaussian_reduced_masses>(.+))", repeats = True),
SM(r"\s*[0-9]+\s*[0-9]+\s*(?P<x_gaussian_normal_modes>([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+)\s*([-0-9.]+))", repeats = True),
SM(r"\s*[0-9]+\s*([0-9]+)?\s*([0-9]+)?"),
])
]
),
SM(name = 'Thermochemistry',
sections = ['x_gaussian_section_thermochem'],
startReStr = r"\s*Temperature",
forwardMatch = True,
subMatchers = [
SM(r"\s*Temperature\s*(?P<x_gaussian_temperature>[0-9.]+)\s*Kelvin.\s*Pressure\s*(?P<x_gaussian_pressure__atmosphere>[0-9.]+)\s*Atm."),
SM(r"\s*Principal axes and moments of inertia in atomic units:"),
SM(r"\s*Eigenvalues --\s*(?P<x_gaussian_moment_of_inertia_X__amu_angstrom_angstrom>(\d+\.\d{5}))\s*?(?P<x_gaussian_moment_of_inertia_Y__amu_angstrom_angstrom>(\d+\.\d{5}))\s*?(?P<x_gaussian_moment_of_inertia_Z__amu_angstrom_angstrom>(\d+\.\d{5}))"),
SM(r"\s*Zero-point correction=\s*(?P<x_gaussian_zero_point_energy__hartree>[0-9.]+)"),
SM(r"\s*Thermal correction to Energy=\s*(?P<x_gaussian_thermal_correction_energy__hartree>[0-9.]+)"),
SM(r"\s*Thermal correction to Enthalpy=\s*(?P<x_gaussian_thermal_correction_enthalpy__hartree>[0-9.]+)"),
SM(r"\s*Thermal correction to Gibbs Free Energy=\s*(?P<x_gaussian_thermal_correction_free_energy__hartree>[0-9.]+)"),
]
),
SM(name = 'Forceconstantmatrix',
sections = ['x_gaussian_section_force_constant_matrix'],
startReStr = r"\s*Force constants in Cartesian coordinates",
forwardMatch = True,
subMatchers = [
SM(r"\s*Force constants in Cartesian coordinates"),
SM(r"\s*[0-9]+\s*(?P<x_gaussian_force_constants>(-?\d*\.\d*D?\+?\-?\d+)|(\-?\d*\.\d*[-+DE0-9]+)\s*(\-?\d*\.\d*[-+DE0-9]+)?\s*(\-?\d*\.\d*[-+DE0-9]+)?\s*(\-?\d*\.\d*[-+DE0-9]+)?\s*(\-?\d*\.\d*[-+DE0-9]+)?)", repeats = True),
SM(r"\s*Force constants in internal coordinates")
]
),
SM(name = 'CompositeModelEnergies',
sections = ['x_gaussian_section_models'],
startReStr = r"\s*Temperature=\s*",
forwardMatch = False,
repeats = True,
subMatchers = [
SM(r"\s*G1\(0 K\)=\s*[-+0-9.]+\s*G1 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G2\(0 K\)=\s*[-+0-9.]+\s*G2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G2MP2\(0 K\)=\s*[-+0-9.]+\s*G2MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G3\(0 K\)=\s*[-+0-9.]+\s*G3 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G3MP2\(0 K\)=\s*[-+0-9.]+\s*G3MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G4\(0 K\)=\s*[-+0-9.]+\s*G4 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*G4MP2\(0 K\)=\s*[-+0-9.]+\s*G4MP2 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*CBS-4 \(0 K\)=\s*[-+0-9.]+\s*CBS-4 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*CBS-q \(0 K\)=\s*[-+0-9.]+\s*CBS-q Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*CBS-Q \(0 K\)=\s*[-+0-9.]+\s*CBS-Q Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*CBS-QB3 \(0 K\)=\s*[-+0-9.]+\s*CBS-QB3 Energy=\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*W1U \(0 K\)=\s*[-+0-9.]+\s*W1U Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*W1RO \(0 K\)=\s*[-+0-9.]+\s*W1RO Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
SM(r"\s*W1BD \(0 K\)=\s*[-+0-9.]+\s*W1BD Electronic Energy\s*(?P<energy_total__hartree>[-+0-9.]+)"),
]
),
SM(name = 'run times',
sections = ['x_gaussian_section_times'],
startReStr = r"\s*Job cpu time:",
forwardMatch = True,
subMatchers = [
SM(r"\s*Job cpu time:\s*(?P<x_gaussian_program_cpu_time>\s*[0-9]+\s*[a-z]+\s*[0-9]+\s*[a-z]+\s*[0-9]+\s*[a-z]+\s*[0-9.]+\s*[a-z]+)"),
SM(r"\s*Normal termination of Gaussian\s*[0-9]+\s* at \s*(?P<x_gaussian_program_termination_date>[A-Za-z]+\s*[A-Za-z]+\s*[0-9]+\s*[0-9:]+\s*[0-9]+)"),
]
)
])
])
])
])
import nomad_meta_info
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(nomad_meta_info.__file__)), "gaussian.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)
parserInfo = {
"name": "parser_gaussian",
"version": "1.0"
}
class GaussianParserContext(object):
"""Context for parsing Gaussian output file.
This class keeps tracks of several Gaussian settings to adjust the parsing to them.
The onClose_ functions allow processing and writing of cached values after a section is closed.
They take the following arguments:
backend: Class that takes care of writing and caching of metadata.
gIndex: Index of the section that is closed.
section: The cached values and sections that were found in the section that is closed.
"""
def __init__(self):
# dictionary of energy values, which are tracked between SCF iterations and written after convergence
self.totalEnergyList = {
'x_gaussian_hf_detect': None,
'x_gaussian_energy_scf': None,
'x_gaussian_perturbation_energy': None,
'x_gaussian_electronic_kinetic_energy': None,
'x_gaussian_energy_electrostatic': None,
'x_gaussian_energy_error': None,
}
def initialize_values(self):
"""Initializes the values of certain variables.
This allows a consistent setting and resetting of the variables,
when the parsing starts and when a section_run closes.
"""
self.secMethodIndex = None
self.secSystemDescriptionIndex = 0
# start with -1 since zeroth iteration is the initialization
self.scfIterNr = -1
self.singleConfCalcs = []
self.scfConvergence = False
self.geoConvergence = False
self.scfenergyconverged = 0.0
self.scfkineticenergyconverged = 0.0
self.scfelectrostaticenergy = 0.0
self.periodicCalc = False
def startedParsing(self, path, parser):
self.parser = parser
# save metadata
self.metaInfoEnv = self.parser.parserBuilder.metaInfoEnv
# allows to reset values if the same superContext is used to parse different files
self.initialize_values()
def onClose_section_run(self, backend, gIndex, section):
"""Trigger called when section_run is closed.
Write convergence of geometry optimization.
Variables are reset to ensure clean start for new run.
"""
global sampling_method
sampling_method = ""
# write geometry optimization convergence
gIndexTmp = backend.openSection('section_frame_sequence')
backend.addValue('geometry_optimization_converged', self.geoConvergence)
backend.closeSection('section_frame_sequence', gIndexTmp)
# frame sequence
if self.geoConvergence:
sampling_method = "geometry_optimization"
elif len(self.singleConfCalcs) > 1:
pass # to do
else:
return
samplingGIndex = backend.openSection("section_sampling_method")
backend.addValue("sampling_method", sampling_method)
backend.closeSection("section_sampling_method", samplingGIndex)
frameSequenceGIndex = backend.openSection("section_frame_sequence")
backend.addValue("frame_sequence_to_sampling_ref", samplingGIndex)
backend.addArrayValues("frame_sequence_local_frames_ref", np.asarray(self.singleConfCalcs))
backend.closeSection("section_frame_sequence", frameSequenceGIndex)
# reset all variables
self.initialize_values()
def onClose_x_gaussian_section_geometry(self, backend, gIndex, section):
xCoord = section["x_gaussian_atom_x_coord"]
yCoord = section["x_gaussian_atom_y_coord"]
zCoord = section["x_gaussian_atom_z_coord"]
numbers = section["x_gaussian_atomic_number"]
atom_coords = np.zeros((len(xCoord),3), dtype=float)
atom_numbers = np.zeros(len(xCoord), dtype=int)
atomic_symbols = np.empty((len(xCoord)), dtype=object)
for i in range(len(xCoord)):
atom_coords[i,0] = xCoord[i]
atom_coords[i,1] = yCoord[i]
atom_coords[i,2] = zCoord[i]
for i in range(len(xCoord)):
atom_numbers[i] = numbers[i]
atomic_symbols[i] = ase.data.chemical_symbols[atom_numbers[i]]
if self.secSystemDescriptionIndex > 0:
self.secSystemDescriptionIndex = backend.openSection("section_system")
backend.addArrayValues("atom_labels", atomic_symbols, self.secSystemDescriptionIndex)
backend.addArrayValues("atom_positions", atom_coords, self.secSystemDescriptionIndex)
backend.addValue("x_gaussian_number_of_atoms",len(atomic_symbols), self.secSystemDescriptionIndex)
if self.secSystemDescriptionIndex > 0:
backend.closeSection("section_system", self.secSystemDescriptionIndex)
else:
self.secSystemDescriptionIndex = 1
def onClose_x_gaussian_section_atom_forces(self, backend, gIndex, section):
xForce = section["x_gaussian_atom_x_force"]
yForce = section["x_gaussian_atom_y_force"]
zForce = section["x_gaussian_atom_z_force"]
atom_forces = np.zeros((len(xForce),3), dtype=float)
for i in range(len(xForce)):
atom_forces[i,0] = xForce[i]
atom_forces[i,1] = yForce[i]
atom_forces[i,2] = zForce[i]
backend.addArrayValues("atom_forces_raw", atom_forces)
def onOpen_section_single_configuration_calculation(self, backend, gIndex, section):
self.singleConfCalcs.append(gIndex)
def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
"""Trigger called when section_single_configuration_calculation is closed.
Write number of SCF iterations and convergence.
Check for convergence of geometry optimization.
"""
# write SCF convergence and reset
backend.addValue('single_configuration_calculation_converged', self.scfConvergence)
self.scfConvergence = False
# start with -1 since zeroth iteration is the initialization
self.scfIterNr = -1
# write the references to section_method and section_system
backend.addValue('single_configuration_to_calculation_method_ref', self.secMethodIndex)
backend.addValue('single_configuration_calculation_to_system_ref', self.secSystemDescriptionIndex)
def onClose_x_gaussian_section_geometry_optimization_info(self, backend, gIndex, section):
# check for geometry optimization convergence
if section['x_gaussian_geometry_optimization_converged'] is not None:
if section['x_gaussian_geometry_optimization_converged'] == ['Optimization completed']:
self.geoConvergence = True
elif section['x_gaussian_geometry_optimization_converged'] == ['Optimization stopped']:
self.geoConvergence = False
def onClose_section_scf_iteration(self, backend, gIndex, section):
# count number of SCF iterations
self.scfIterNr += 1
# check for SCF convergence
if section['x_gaussian_single_configuration_calculation_converged'] is not None:
self.scfConvergence = True
if section['x_gaussian_energy_scf']:
self.scfenergyconverged = float(str(section['x_gaussian_energy_scf']).replace("[","").replace("]","").replace("D","E"))
self.scfcharacter = section['x_gaussian_hf_detect']
if (self.scfcharacter != ['RHF'] and self.scfcharacter != ['ROHF'] and self.scfcharacter != ['UHF']):
self.energytotal = self.scfenergyconverged
backend.addValue('energy_total', self.energytotal)
else:
pass
if section['x_gaussian_electronic_kinetic_energy']:
self.scfkineticenergyconverged = float(str(section['x_gaussian_electronic_kinetic_energy']).replace("[","").replace("]","").replace("D","E"))
self.scfelectrostaticenergy = self.scfenergyconverged - self.scfkineticenergyconverged
backend.addValue('x_gaussian_energy_electrostatic', self.scfelectrostaticenergy)
def onClose_section_eigenvalues(self, backend, gIndex, section):
eigenenergies = str(section["x_gaussian_alpha_occ_eigenvalues_values"])
eigenen1 = []
if('*' in eigenenergies):
energy = [0.0]
else:
energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
eigenen1 = np.append(eigenen1, energy)
if(section["x_gaussian_beta_occ_eigenvalues_values"]):
occoccupationsalp = np.ones(len(eigenen1), dtype=float)
else:
occoccupationsalp = 2.0 * np.ones(len(eigenen1), dtype=float)
eigenenergies = str(section["x_gaussian_alpha_vir_eigenvalues_values"])
eigenen2 = []
if('*' in eigenenergies):
energy = [0.0]
else:
energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
eigenen2 = np.append(eigenen2, energy)
viroccupationsalp = np.zeros(len(eigenen2), dtype=float)
leneigenenconalp = len(eigenen1) + len(eigenen2)
eigenenconalp = np.concatenate((eigenen1,eigenen2), axis=0)
eigenenconalp = convert_unit(eigenenconalp, "hartree", "J")
occupconalp = np.concatenate((occoccupationsalp, viroccupationsalp), axis=0)
eigenenconalpnew = np.reshape(eigenenconalp,(1, 1, len(eigenenconalp)))
occupconalpnew = np.reshape(occupconalp,(1, 1, len(occupconalp)))
if(section["x_gaussian_beta_occ_eigenvalues_values"]):
pass
else:
backend.addArrayValues("eigenvalues_values", eigenenconalpnew)
backend.addArrayValues("eigenvalues_occupation", occupconalpnew)
if(section["x_gaussian_beta_occ_eigenvalues_values"]):
eigenenergies = str(section["x_gaussian_beta_occ_eigenvalues_values"])
eigenen1 = []
if('*' in eigenenergies):
energy = [0.0]
else:
energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
eigenen1 = np.append(eigenen1, energy)
occoccupationsbet = np.ones(len(eigenen1), dtype=float)
eigenenergies = str(section["x_gaussian_beta_vir_eigenvalues_values"])
eigenen2 = []
if('*' in eigenenergies):
energy = [0.0]
else:
energy = [float(f) for f in eigenenergies[1:].replace("'","").replace(",","").replace("]","").replace("one","").replace(" ."," 0.").replace(" -."," -0.").replace("\\n","").replace("-"," -").split()]
eigenen2 = np.append(eigenen2, energy)
viroccupationsbet = np.zeros(len(eigenen2), dtype=float)
leneigenenconbet = len(eigenen1) + len(eigenen2)
eigenenconbet = np.concatenate((eigenen1,eigenen2), axis=0)
eigenenconbet = convert_unit(eigenenconbet, "hartree", "J")
occupconbet = np.concatenate((occoccupationsbet, viroccupationsbet), axis=0)
if(leneigenenconalp >= leneigenenconbet):
eigenenall = np.zeros(2*leneigenenconalp)
occupall = np.zeros(2*leneigenenconalp)
else:
eigenenall = np.zeros(2*leneigenenconbet)
occupall = np.zeros(2*leneigenenconbet)
eigenenall[:len(eigenenconalp) + len(eigenenconbet)] = np.concatenate((eigenenconalp,eigenenconbet), axis=0)
occupall[:len(occupconalp) + len(occupconbet)] = np.concatenate((occupconalp,occupconbet), axis=0)
eigenenall = np.reshape(eigenenall,(2, 1, max(len(eigenenconalp),len(eigenenconbet))))
occupall = np.reshape(occupall,(2, 1, max(len(occupconalp),len(occupconbet))))
backend.addArrayValues("eigenvalues_values", eigenenall)
backend.addArrayValues("eigenvalues_occupation", occupall)
def onClose_x_gaussian_section_orbital_symmetries(self, backend, gIndex, section):
symoccalpha = str(section["x_gaussian_alpha_occ_symmetry_values"])
symviralpha = str(section["x_gaussian_alpha_vir_symmetry_values"])
if(section["x_gaussian_beta_occ_symmetry_values"]):
symoccbeta = str(section["x_gaussian_beta_occ_symmetry_values"])
symvirbeta = str(section["x_gaussian_beta_vir_symmetry_values"])
symmetry = [str(f) for f in symoccalpha[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("'E","E").replace("G'","G").replace("\"A'\"","A'").split()]
sym1 = []
sym1 = np.append(sym1, symmetry)
symmetry = [str(f) for f in symviralpha[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
sym2 = []
sym2 = np.append(sym2, symmetry)
symmetrycon = np.concatenate((sym1, sym2), axis=0)
backend.addArrayValues("x_gaussian_alpha_symmetries", symmetrycon)
if(section["x_gaussian_beta_occ_symmetry_values"]):
symmetry = [str(f) for f in symoccbeta[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
sym1 = []
sym1 = np.append(sym1, symmetry)
symmetry = [str(f) for f in symvirbeta[1:].replace(",","").replace("(","").replace(")","").replace("]","").replace("'A","A").replace("\\'","'").replace("A''","A'").replace("\"A'\"","A'").replace("'E","E").replace("G'","G").split()]
sym2 = []
sym2 = np.append(sym2, symmetry)
symmetrycon = np.concatenate((sym1, sym2), axis=0)
backend.addArrayValues("x_gaussian_beta_symmetries", symmetrycon)
def onClose_x_gaussian_section_molecular_multipoles(self, backend, gIndex, section):
if(section["quadrupole_moment_xx"]):
x_gaussian_number_of_lm_molecular_multipoles = 35
else:
x_gaussian_number_of_lm_molecular_multipoles = 4
x_gaussian_molecular_multipole_m_kind = 'polynomial'
char = str(section["charge"])
cha = str([char])
charge = [float(f) for f in cha[1:].replace("-."," -0.").replace("'."," 0.").replace("'","").replace("[","").replace("]","").replace(",","").replace('"','').split()]
if(section["d