Skip to content
Snippets Groups Projects
Commit faabb262 authored by Ask Hjorth Larsen's avatar Ask Hjorth Larsen
Browse files

clean up unused bits. Move common functionality into new util module. Write...

clean up unused bits.  Move common functionality into new util module.  Write different parser log outputs to different files.  Convert metadata to correct datatypes and get exact metadata names right (case sensitive)
parent 3c5f244a
Branches
Tags
No related merge requests found
......@@ -7,94 +7,30 @@ from nomadcore.unit_conversion.unit_conversion \
import register_userdefined_quantity
import os, sys, json
OCT_ENERGY_UNIT_NAME = 'usrOctEnergyUnit'
f_num = r'[-+]?\d*\.\d*'
e_num = r'[-+\d.EeDd]*'
i_num = r'[-+\d]*'
def numpattern(id, unit=None, pattern=f_num):
if unit is None:
pat = r'(?P<%(id)s>%(pattern)s)'
else:
pat = r'(?P<%(id)s__%(unit)s>%(pattern)s)'
return pat % dict(id=id, unit=unit, pattern=pattern)
from util import OCT_ENERGY_UNIT_NAME, f_num, i_num, numpattern, integer
# Match lines like: " Total = -7.05183\n"
def oct_energy_sm(octname, nomadname):
pattern = (r'\s*%(octname)s\s*=\s*%(pattern)s'
% dict(octname=octname,
pattern=numpattern(nomadname,
unit='hartree'))) # XXXXXXXXXXXX
unit=OCT_ENERGY_UNIT_NAME)))
#print 'oct energy sm', pattern
return SM(pattern,
name=octname)
def adhoc_register_octopus_energy_unit(parser):
#print 'GRRRRRRRRRRRRRRRRRRR'
line = parser.fIn.readline()
unit = line.rsplit('[', 2)[1].split(']', 2)[0]
if unit == 'H':
oct_energy_unit = 'hartree'
else:
assert unit == 'eV'
oct_energy_unit = 'eV'
register_userdefined_quantity(OCT_ENERGY_UNIT_NAME, oct_energy_unit)
sm_get_oct_energy_unit = SM(r'Eigenvalues\s*\[(H|eV)\]',
forwardMatch=True,
weak=True,
name='define_energy_unit',
adHoc=adhoc_register_octopus_energy_unit,
required=True)
sm_kpoints = SM(r'\**\s*Brillouin zone sampling\s* \**',
name='brillouin zone',
required=False,
sections=['section_eigenvalues'],
subMatchers=[
SM(r'Number of symmetry-reduced k-points\s*=\s*'
r'(?P<number_of_eigenvalues_kpoints>%s)' % i_num,
name='nkpts'),
SM(r'List of k-points:',
name='kpts header'),
SM(r'\s*%(i)s\s*%(kpt)s\s*%(kpt)s\s*%(kpt)s\s*%(f)s'
% dict(i=i_num, f=f_num,
kpt=numpattern('eigenvalues_kpoints')),
name='kpt',
repeats=True),
SM(r'\*+',
name='end')
])
sm_eig_occ = SM(r'\s*#st\s*Spin\s*Eigenvalue\s*Occupation',
name='eig_occ_columns',
sections=['section_eigenvalues'],
required=True,
subMatchers=[ # TODO spin directions
SM(r'\s*\d+\s*..\s*%(eig)s\s*%(occ)s'
% dict(eig=numpattern('eigenvalues_values',
OCT_ENERGY_UNIT_NAME),
occ=numpattern('eigenvalues_occupation')),
required=True,
repeats=True,
name='eig_occ_line'),
SM(r'\s*', name='whitespace')
])
sm_scfconv = SM(r'SCF converged in\s*%s\s*iterations'
% integer('x_octopus_info_scf_converged_iterations'),
sections=['section_run'])
sm_energy = SM(r'Energy \[(H|eV)\]:', required=True, name='energy_header',
subMatchers=[
oct_energy_sm('Total', 'energy_total'),
oct_energy_sm('Free', 'energy_free'),
oct_energy_sm('Ion-ion', 'x_octopus_info_energy_ion_ion'),
oct_energy_sm('Eigenvalues', 'energy_sum_eigenvalues'),
oct_energy_sm('Hartree', 'energy_electrostatic'),
#oct_energy_sm(r'Int\[n.v_xc\]', ''),
oct_energy_sm('Exchange', 'energy_X'),
oct_energy_sm('Correlation', 'energy_C'),
oct_energy_sm('vanderWaals', 'energy_van_der_Waals'),
......@@ -103,7 +39,7 @@ sm_energy = SM(r'Energy \[(H|eV)\]:', required=True, name='energy_header',
])
mainFileDescription = SM(
infoFileDescription = SM(
name='root',
weak=True,
startReStr='',
......@@ -111,18 +47,12 @@ mainFileDescription = SM(
sections=['section_single_configuration_calculation'],
subFlags=SM.SubFlags.Sequenced,
subMatchers=[
sm_get_oct_energy_unit,
#sm_kpoints,
sm_eig_occ,
sm_scfconv,
sm_energy,
])
# loading metadata from nomad-meta-info/meta_info/nomad_meta_info/octopus.nomadmetainfo.json
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/octopus.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)
parserInfo = {
"name": "parser_octopus",
"name": "info_parser_octopus",
"version": "1.0"
}
......@@ -137,19 +67,25 @@ class OctopusParserContext(object):
def onClose_section_single_configuration_calculation(self, backend, gIndex, section):
"""trigger called when section_single_configuration_calculation is closed"""
pass
#backend.addValue("", self.scfIterNr)
logging.getLogger("nomadcore.parsing").info("closing section_single_configuration_calculation gIndex %d %s", gIndex, section.simpleValues)
self.scfIterNr = 0
#logging.getLogger("nomadcore.parsing").info("closing section_single_configuration_calculation gIndex %d %s", gIndex, section.simpleValues)
#self.scfIterNr = 0
def onClose_section_scf_iteration(self, backend, gIndex, section):
"""trigger called when section_scf_iteration is closed"""
logging.getLogger("nomadcore.parsing").info("closing section_scf_iteration bla gIndex %d %s", gIndex, section.simpleValues)
self.scfIterNr += 1
# which values to cache or forward (mapping meta name -> CachingLevel)
cachingLevelForMetaName = {}
if __name__ == "__main__":
mainFunction(mainFileDescription, metaInfoEnv, parserInfo,
cachingLevelForMetaName = cachingLevelForMetaName,
superContext = OctopusParserContext())
pass
#logging.getLogger("nomadcore.parsing").info("closing section_scf_iteration bla gIndex %d %s", gIndex, section.simpleValues)
#self.scfIterNr += 1
def parse_infofile(metaInfoEnv, pew, fname):
with open('info-parser.log', 'w') as fd:
mainFunction(infoFileDescription,
metaInfoEnv,
parserInfo,
outF=fd,
cachingLevelForMetaName={},
superBackend=pew,
superContext=OctopusParserContext(),
mainFile=fname)
from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM
from util import numpattern, i_num, f_num, e_num, word, integer
parserInfo = {
"name": "logfile_parser_octopus",
"version": "1.0"
}
logFileDescription = SM(
name='root',
weak=True,
startReStr='',
fixedStartValues={'program_name': 'octopus'},
sections=['section_run'],
subFlags=SM.SubFlags.Sequenced,
subMatchers=[
SM(r'Version\s*:\s*%s' % word('program_version')),
SM(r'Revision\s*:\s*%s' % integer('x_octopus_log_svn_revision')),
#SM(r'Input: [SmearingFunction = %s]' % word(''))
# Grr. But we have to convert semi_conducting to some other word.
]
)
class OctopusLogFileParserContext(object):
def startedParsing(self, name, parser):
pass
def parse_logfile(metaInfoEnv, pew, fname):
with open('logfile-parse.log', 'w') as fd:
mainFunction(logFileDescription,
metaInfoEnv,
parserInfo,
outF=fd,
cachingLevelForMetaName={},
superBackend=pew,
superContext=OctopusLogFileParserContext(),
mainFile=fname)
#for key in metaInfoEnv:
# print('key', key)
......@@ -13,6 +13,8 @@ from nomadcore.parser_backend import JsonParseEventsWriterBackend
from nomadcore.unit_conversion.unit_conversion import convert_unit, \
register_userdefined_quantity
from octopus_info_parser import parse_infofile
from octopus_logfile_parser import parse_logfile
"""This is the Octopus parser.
......@@ -36,11 +38,28 @@ those if many uploaded calculations contain those formats. I think it
is largely irrelevant.
"""
def normalize_names(names):
return [name.lower() for name in names]
OCT_ENERGY_UNIT_NAME = 'usrOctEnergyUnit'
OCT_LENGTH_UNIT_NAME = 'usrOctLengthUnit'
metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/octopus.nomadmetainfo.json"))
metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None)
metaInfoEnv, warnings = loadJsonFile(filePath=metaInfoPath,
dependencyLoader=None,
extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS,
uri=None)
# Dictionary of all meta info:
metaInfoKinds = metaInfoEnv.infoKinds.copy()
all_metadata_names = list(metaInfoKinds.keys())
normalized2real = dict(zip(normalize_names(all_metadata_names), all_metadata_names))
# We need access to this information because we want/need to dynamically convert
# extracted metadata to its correct type. Thus we need to know the type.
# Also since input is case insensitive, we need to convert normalized (lowercase)
# metadata names to their real names which are normally CamelCase.
parserInfo = {
......@@ -52,9 +71,6 @@ def read_parser_log(path):
exec_kwargs = {}
with open(path) as fd:
for line in fd:
# Skip noise:
#if line.endswith('# default\n'):
# continue
# Remove comment:
line = line.split('#', 1)[0].strip()
tokens = line.split('=')
......@@ -65,6 +81,10 @@ def read_parser_log(path):
name = name.strip().lower()
value = value.strip()
if ' ' in name:
# Not a real name
continue
#print(name)
# Octopus questionably writes this one twice
#if name != 'speciesprojectorspherethreshold':
exec_kwargs[name] = value
......@@ -82,7 +102,24 @@ def read_input_file(path):
return kwargs
def override_keywords(kwargs, parser_log_kwargs):
def is_octopus_logfile(fname):
fd = open(fname)
for lineno in range(20):
line = fd.next()
if '|0) ~ (0) |' in line: # Eyes from Octopus logo
return True
return False
def find_octopus_logfile(dirname):
allfnames = glob('%s/*' % dirname)
for fname in allfnames:
if os.path.isfile(fname) and is_octopus_logfile(fname):
return fname
return None
def override_keywords(kwargs, parser_log_kwargs, fd):
# Some variables we can get from the input file, but they may
# contain arithmetic and variable assignments which cannot
# just be parsed into a final value. The output of the
......@@ -92,7 +129,9 @@ def override_keywords(kwargs, parser_log_kwargs):
# rely on the number! We will take some variables from the
# exec/parser.log but most will just be verbatim from the
# input file whether they can be parsed or not.
exec_override_keywords = set(['radius', 'lsize', 'spacing'])
exec_override_keywords = set(['radius',
#'lsize',
'spacing'])
outkwargs = kwargs.copy()
......@@ -112,17 +151,14 @@ def override_keywords(kwargs, parser_log_kwargs):
print('Keyword %s with value %s overridden by value '
'%s obtained from parser log'
% (name, kwargs[name], parser_log_kwargs[name]))
% (name, kwargs[name], parser_log_kwargs[name]),
file=fd)
outkwargs[name] = parser_log_kwargs[name]
return outkwargs
def normalize_names(names):
return [name.lower() for name in names]
def register_units(kwargs):
def register_units(kwargs, fd):
units = kwargs.get('units', 'atomic').lower()
if units == 'atomic':
length_unit = 'bohr'
......@@ -138,19 +174,43 @@ def register_units(kwargs):
if 'unitsoutput' in kwargs:
raise ValueError('UnitsOutput not supported')
print('Set units: energy=%s, length=%s' % (energy_unit, length_unit))
print('Set units: energy=%s, length=%s' % (energy_unit, length_unit),
file=fd)
register_userdefined_quantity(OCT_ENERGY_UNIT_NAME, energy_unit)
register_userdefined_quantity(OCT_LENGTH_UNIT_NAME, length_unit)
metadata_dtypes = {'b': bool,
'C': str,
'f': float} # Integer?
# Convert (<normalized name>, <extracted string>) into
# (<real metadata name>, <value of correct type>)
def regularize_metadata_entry(normalized_name, value):
realname = normalized2real[normalized_name]
assert realname in metaInfoEnv, 'No such metadata: %s' % realname
metainfo = metaInfoEnv[realname]
dtype = metainfo['dtypeStr']
converted_value = metadata_dtypes[dtype](value)
return realname, converted_value
def register_octopus_keywords(pew, category, kwargs):
skip = set(['mixingpreconditioner', 'mixinterval'])
for keyword in kwargs:
if keyword in skip: # XXXX
continue
# How do we get the metadata type?
pew.addValue('x_octopus_%s_%s' % (category, keyword),
kwargs[keyword])
normalized_name = 'x_octopus_%s_%s' % (category, keyword)
name, value = regularize_metadata_entry(normalized_name, kwargs[keyword])
pew.addValue(name, value)
def parse(fname):
pew = JsonParseEventsWriterBackend(metaInfoEnv)
def parse(fname, fd):
# fname refers to the static/info file.
pew = JsonParseEventsWriterBackend(metaInfoEnv,
fileOut=open('json-writer.log', 'w'))
# this context manager shamelessly copied from GPAW parser
# Where should Python code be put if it is used by multiple parsers?
......@@ -162,28 +222,29 @@ def parse(fname):
with open_section('section_run'):
pew.addValue('program_name', 'Octopus')
print()
print(file=fd)
staticdirname, _basefname = os.path.split(fname)
dirname, _static = os.path.split(staticdirname)
inp_path = os.path.join(dirname, 'inp')
parser_log_path = os.path.join(dirname, 'exec', 'parser.log')
print('Read Octopus keywords from input file %s' % inp_path)
print('Read Octopus keywords from input file %s' % inp_path,
file=fd)
kwargs = read_input_file(inp_path)
register_octopus_keywords(pew, 'input', kwargs)
print('Read processed Octopus keywords from octparse logfile %s'
% parser_log_path)
% parser_log_path, file=fd)
parser_log_kwargs = read_parser_log(parser_log_path)
register_octopus_keywords(pew, 'parserlog', parser_log_kwargs)
print('Override certain keywords with processed keywords')
kwargs = override_keywords(kwargs, parser_log_kwargs)
print('Override certain keywords with processed keywords', file=fd)
kwargs = override_keywords(kwargs, parser_log_kwargs, fd)
register_units(kwargs)
register_units(kwargs, fd)
print('Read as ASE calculator')
print('Read as ASE calculator', file=fd)
calc = aseoct.Octopus(dirname)
atoms = calc.get_atoms()
......@@ -198,6 +259,18 @@ def parse(fname):
nspins = calc.get_number_of_spins()
nkpts = len(calc.get_k_point_weights())
print('Parse info file using SimpleMatcher', file=fd)
parse_infofile(metaInfoEnv, pew, fname)
logfile = find_octopus_logfile(dirname)
if logfile is None:
print('No stdout logfile found', file=fd)
else:
print('Found stdout logfile %s' % logfile, file=fd)
print('Parse logfile using SimpleMatcher', file=fd)
parse_logfile(metaInfoEnv, pew, logfile)
print('Add parsed values', file=fd)
with open_section('section_system'):
# The Atoms object will always have a cell, even if it was not
# used in the Octopus calculation! Thus, to be more honest,
......@@ -216,7 +289,8 @@ def parse(fname):
with open_section('section_single_configuration_calculation'):
with open_section('section_method'):
pew.addValue('number_of_spin_channels', nspins)
#pew.addValue('total_charge', XXX)
pew.addValue('total_charge',
float(parser_log_kwargs['excesscharge']))
oct_theory_level = kwargs.get('theorylevel', 'dft')
theory_levels = dict(#independent_particles='',
......@@ -237,25 +311,16 @@ def parse(fname):
default_xc = ['lda_x_1d + lda_c_1d_csc',
'lda_x_2d + lda_c_2d_amgb',
'lda_x + lda_c_pz_mod'][ndim - 1]
pew.addValue('XC_functional',
kwargs.get('xcfunctional', default_xc))
# atom_positions
#section_frame_sequence ######### only enc
#XC_functional_name
#simulation_cell
#section_system
#program_version
#section_method
#atom_labels
#program_basis_set_type
#section_XC_functionals
#configuration_periodic_dimensions
#section_sampling_method ######### only enc
xcfunctional = kwargs.get('xcfunctional', default_xc)
xcfunctional = ''.join(xcfunctional.split()).upper()
with open_section('section_XC_functionals'):
pew.addValue('XC_functional_name', xcfunctional)
# Convergence parameters?
with open_section('section_eigenvalues'):
pew.addValue('eigenvalues_kind', 'normal') # XXX huh?
if kwargs.get('theorylevel', 'dft') == 'dft':
pew.addValue('eigenvalues_kind', 'normal')
eig = np.zeros((nspins, nkpts, nbands))
occ = np.zeros((nspins, nkpts, nbands))
......@@ -272,4 +337,6 @@ def parse(fname):
if __name__ == '__main__':
fname = sys.argv[1]
parse(fname)
logfname = 'parse.log'
with open(logfname, 'w') as fd:
parse(fname, fd)
OCT_ENERGY_UNIT_NAME = 'usrOctEnergyUnit'
f_num = r'[-+]?(\d*\.\d+|\d+\.\d*)' # e.g.: 0.7 1. -.1
e_num = r'[-+]?\d*\.\d+[EeDd][-+]\d*' # e.g.: -7.642e-300
i_num = r'[-+\d]*'
def numpattern(id, unit=None, pattern=f_num):
if unit is None:
pat = r'(?P<%(id)s>%(pattern)s)'
else:
pat = r'(?P<%(id)s__%(unit)s>%(pattern)s)'
return pat % dict(id=id, unit=unit, pattern=pattern)
def pat(meta, regex):
return '(?P<%s>%s)' % (meta, regex)
def word(meta):
return pat(meta, regex=r'\S*')
def integer(meta):
return pat(meta, regex=i_num)
def floating(meta):
return pat(meta, regex='%s|%s' % (f_num, e_num))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment