parser_gaussian.py 4.77 KB
Newer Older
1
import setup_paths
2
from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM
3
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
4 5 6
from nomadcore.caching_backend import CachingLevel
import os, sys, json, logging
import numpy as np
7 8

# description of the input
9 10 11 12 13 14
mainFileDescription = SM(
    name = 'root',
    weak = True,
    startReStr = "",
    subMatchers = [
        SM(name = 'newRun',
15
           startReStr = r"\s*Entering Link 1 ",
16 17 18
           repeats = True,
           required = True,
           forwardMatch = True,
19
           fixedStartValues={ 'program_basis_set_type': 'gaussians' },
20
           sections   = ['section_run','section_method'],
21 22
           subMatchers = [
               SM(name = 'header',
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
                  startReStr = r"\s*Entering Link 1 ",
                  subMatchers = [
                      SM(r"\s*Cite this work as:"),
                      SM(r"\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,"),
                      SM(r"\s\*\*\*\*\*\*\*\*\*\*\*\**"),
                      SM(r"\s*(?P<program_name>Gaussian)\s*(?P<program_version>[0-9]*:\s.*)")
                      ]
              ),
               SM(name = 'globalparams',
                  startReStr = r"\s*%\w*=",
                  subFlags = SM.SubFlags.Unordered,
                  forwardMatch = True,
                  subMatchers = [
                      SM(r"\s*%[Cc]hk=(?P<gaussian_chk_file>[A-Za-z0-9.]*)"),
                      SM(r"\s*%[Mm]em=(?P<gaussian_memory>[A-Za-z0-9.]*)"),
38
                      SM(r"\s*%[Nn][Pp]roc=(?P<gaussian_number_of_processors>[A-Za-z0-9.]*)")
39 40
                      ]
              ),
41 42
               SM(name = 'charge_multiplicity',
	          sections  = ['section_system_description','gaussian_section_labels'],
43 44 45
		  startReStr = r"\s*Symbolic Z-matrix:",
                      subMatchers = [
		      SM(r"\s*Charge =\s*(?P<total_charge>[-+0-9]+) Multiplicity =\s*(?P<target_multiplicity>[0-9]+)"),
46 47 48 49 50 51 52 53 54 55 56 57 58 59
                      SM(r"\s*(?P<gaussian_atom_label>\D{1,2}?(?=\s*[0-9,-]))",repeats = True),
                      SM(r"\s*(?P<gaussian_atom_label>\w{1,2}(?=\s))",repeats = True),
                      SM(r"\s*Variables:|\s*NAtoms=|\s*Z-MATRIX"),
                      SM(r"\s*"),
                      ]
              ),
               SM(name = 'geometry',
                  sections  = ['section_system_description','gaussian_section_geometry'],
                  startReStr = r"\s*Z-Matrix orientation:|\s*Input orientation:|\s*Standard orientation:",
                      subMatchers = [
                      SM(r"\s+[0-9]+\s+[0-9]+\s+[0-9]+\s+(?P<gaussian_atom_x_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<gaussian_atom_y_coord__angstrom>[-+0-9EeDd.]+)\s+(?P<gaussian_atom_z_coord__angstrom>[-+0-9EeDd.]+)",repeats = True)
                      ]
              ), 
                   SM(r"\s*Symmetry|\s*Stoichiometry")
60 61
           ])
])
62 63 64 65 66 67 68 69 70 71

# loading metadata from nomad-meta-info/meta_info/nomad_meta_info/gaussian.nomadmetainfo.json
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/gaussian.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)

parserInfo = {
  "name": "parser_gaussian",
  "version": "1.0"
}

72 73 74
class GaussianParserContext(object):
    """main place to keep the parser status, open ancillary files,..."""
    def __init__(self):
75
        pass
76

77 78
    def startedParsing(self, path, parser):
        self.parser = parser
79

80 81 82 83
    def onClose_gaussian_section_labels(self, backend, gIndex, section):
        labels = section["gaussian_atom_label"]
        backend.addValue("atom_label", labels)

84 85 86 87 88 89 90 91 92 93
    def onClose_gaussian_section_geometry(self, backend, gIndex, section):
	xCoord = section["gaussian_atom_x_coord"]
	yCoord = section["gaussian_atom_y_coord"]
        zCoord = section["gaussian_atom_z_coord"]
        atom_positions = np.zeros((len(xCoord),3), dtype=float)
	for i in range(len(xCoord)):
	    atom_positions[i,0] = xCoord[i]
            atom_positions[i,1] = yCoord[i]
            atom_positions[i,2] = zCoord[i]
	backend.addArrayValues("atom_position", atom_positions)
94 95

# which values to cache or forward (mapping meta name -> CachingLevel)
96 97 98 99 100 101
cachingLevelForMetaName = {
	"gaussian_atom_x_coord": CachingLevel.Cache,
        "gaussian_atom_y_coord": CachingLevel.Cache,
        "gaussian_atom_z_coord": CachingLevel.Cache,
	"gaussian_atom_label": CachingLevel.Cache,
	"gaussian_section_geometry": CachingLevel.Ignore,
102
        "gaussian_section_labels": CachingLevel.Ignore,
103
}
104 105

if __name__ == "__main__":
106 107
    mainFunction(mainFileDescription, metaInfoEnv, parserInfo,
                 cachingLevelForMetaName = cachingLevelForMetaName,
Mohamed, Fawzi Roberto (fawzi)'s avatar
cleanup  
Mohamed, Fawzi Roberto (fawzi) committed
108
                 superContext = GaussianParserContext())