diff --git a/.gitignore b/.gitignore index 6a421b3996164e8d9dafb38c729702d5bdf16393..0a7765d13ab62d1dcb5e85da05d3cc4d0b44c105 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ syntax: glob *.pyc *.bk *.swp +*.egg-info .DS_Store *.annotate diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb9ee2d0118f895954c9d67348e884d436f309e --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +# Copyright 2016-2019 Hjorth Larsen +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from setuptools import setup, find_packages + +def main(): + setup( + name='siestaparser', + version='0.1', + description='NOMAD parser implementation for Siesta.', + author='Hjorth Larsen', + author_email='asklarsen@gmail.com', + license='GPL3', + package_dir={'': './'}, + packages=find_packages(), + include_package_data=True, + install_requires=[ + 'nomadcore', + ], + ) + + +if __name__ == '__main__': + main() diff --git a/siestaparser/__init__.py b/siestaparser/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64dbafb1fac0b2bb592cb40569fa8f4f0a2e656a --- /dev/null +++ b/siestaparser/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2016-2018 Lauri Himanen, Fawzi Mohamed, Ankit Kariryaa +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from siestaparser.main import SiestaParser diff --git a/siestaparser/check.sh b/siestaparser/check.sh new file mode 100755 index 0000000000000000000000000000000000000000..866138698f44116fe1891777a2645056e698bb19 --- /dev/null +++ b/siestaparser/check.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +python main.py test/H2O/out --annotate 2>&1 | tee test/H2O/json +python main.py test/H2O-relax/out --annotate 2>&1 | tee test/H2O-relax/json +python main.py test/Al-bulk/out --annotate 2>&1 | tee test/Al-bulk/json +python main.py test/Al-slab/out --annotate 2>&1 | tee test/Al-slab/json +python main.py test/MgO/out --annotate 2>&1 | tee test/MgO/json +python main.py test/smeagol-Au-leads/Au.out --annotate 2>&1 | tee test/smeagol-Au-leads/json +python main.py test/smeagol-Au-scregion/Au.out --annotate 2>&1 | tee test/smeagol-Au-scregion/json diff --git a/siestaparser/inputvars.py b/siestaparser/inputvars.py new file mode 100644 index 0000000000000000000000000000000000000000..7c729a01d5d731193286881a00f4ae8c7bb3b979 --- /dev/null +++ b/siestaparser/inputvars.py @@ -0,0 +1,386 @@ +# Copyright 2016-2018 Ask Hjorth Larsen, Fawzi Mohamed +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Main author and maintainer: Ask Hjorth Larsen <asklarsen@gmail.com> + +varlist = [ + '%endblock', + 'Atom-Setup-Only', + 'Atom.Debug.KB.Generation', + 'AtomCoorFormatOut', + 'AtomLeftVcte', + 'AtomRightVcte', + 'AtomicCoordinatesFormat', + 'BasisPressure', + 'BornCharge', + 'BuildSuperCell', + 'BulkLead', + 'BulkTransport', + 'BulkTransvCellSize', + 'BulkTransvCellSizeX', + 'BulkTransvCellSizeY', + 'BulkTransvCellSizeZ', + 'CB.MaxKappa', + 'CB.WriteComplexBands', + 'CDFT', + 'CDFT.MaxIter', + 'COOP.Write', + 'CalcIETS', + 'CalcMPSH', + 'ChangeKgridInMD', + 'Compat-pre-v4-DM-H', + 'DM.AllowExtrapolation', + 'DM.AllowReuse', + 'DM.EnergyTolerance', + 'DM.FIRE.Mixing', + 'DM.FormattedFiles', + 'DM.FormattedInput', + 'DM.FormattedOutput', + 'DM.HarrisTolerance', + 'DM.KickMixingWeight', + 'DM.MixSCF1', + 'DM.MixingWeight', + 'DM.NormalizationTolerance', + 'DM.NormalizeDuringSCF', + 'DM.NumberBroyden', + 'DM.NumberKick', + 'DM.NumberPulay', + 'DM.OccupancyTolerance', + 'DM.Pulay.Avoid.First.After.Kick', + 'DM.PulayOnFile', + 'DM.RequireEnergyConvergence', + 'DM.RequireHarrisConvergence', + 'DM.Tolerance', + 'DM.UseSaveDM', + 'Delta', + 'DeltaWorkfunction', + 'Diag.AllInOne', + 'Diag.DivideAndConquer', + 'Diag.Memory', + 'Diag.NoExpert', + 'Diag.ParallelOverK', + 'Diag.PreRotate', + 'Diag.Use2D', + 'DiagMemory', + 'DiagScale', + 'DirectPhi', + 'DivideAndConquer', + 'EM.AddRhoGate', + 'EM.AddVgIsolatedLocalCharges', + 'EM.COOPCalculate', + 'EM.COOPNumberOfBonds', + 'EM.DebugRhoGate', + 'EM.NetRhoGateCharge', + 'EM.PrintLimits', + 'EM.RhoGateLxMax', + 'EM.RhoGateLxMin', + 'EM.RhoGateLyMax', + 'EM.RhoGateLyMin', + 'EM.RhoGateLzMax', + 'EM.RhoGateLzMin', + 'EM.TRCAddVCDFT', + 'EM.TimeReversal', + 'EM.Timings', + 'EM.addV', + 'EMPDOSKSO', + 'EMTransport', + 'ElectronicTemperature', + 'EnergLowestBound', + 'FilterCutoff', + 'FilterTol', + 'FinalTransmRange', + 'FixAuxiliaryCell', + 'FixAuxillaryCell', + 'FixSpin', + 'ForceAuxCell', + 'FullRamp', + 'HSetupOnly', + 'Harris_functional', + 'HartreeLeadsBottom', + 'HartreeLeadsLeft', + 'HartreeLeadsRight', + 'Ik_Select', + 'InitTransmRange', + 'InitTransport', + 'KB.New.Reference.Orbitals', + 'KB.Rmax', + 'LDAU.units', + 'LDAUForces', + 'LDAU_METHOD', + 'LatticeConstant', + 'LongOutput', + 'MD.AnnealOption', + 'MD.BulkModulus', + 'MD.FCAcousticSumRule', + 'MD.FCAtomRestart', + 'MD.FCAxisRestart', + 'MD.FCDispl', + 'MD.FCEigenVectors', + 'MD.FCIR', + 'MD.FCRead', + 'MD.FCfirst', + 'MD.FClast', + 'MD.FinalTimeStep', + 'MD.FireQuench', + 'MD.InitialTemperature', + 'MD.InitialTimeStep', + 'MD.LengthTimeStep', + 'MD.MaxCGDispl', + 'MD.MaxForceTol', + 'MD.MaxStressTol', + 'MD.NoseMass', + 'MD.NumCGsteps', + 'MD.NumNEBsteps', + 'MD.ParrinelloRahmanMass', + 'MD.Quench', + 'MD.RelaxCellOnly', + 'MD.RemoveIntraMolecularPressure', + 'MD.TRCSampling', + 'MD.TRCSkip', + 'MD.TargetPressure', + 'MD.TargetTemperature', + 'MD.TauRelax', + 'MD.Timing', + 'MD.TypeOfRun', + 'MD.UseSaveCG', + 'MD.UseSaveNEB', + 'MD.UseSaveXV', + 'MD.UseSaveZM', + 'MD.UseStructFile', + 'MD.VariableCell', + 'MM.Cutoff', + 'MM.UnitsDistance', + 'MM.UnitsEnergy', + 'MPSHAtomFirst', + 'MPSHAtomLast', + 'MPSHOrbFirst', + 'MPSHOrbLast', + 'MaxBondDistance', + 'MaxSCFIterations', + 'MeshCutoff', + 'MeshSubDivisions', + 'MinSCFIterations', + 'MixCharge', + 'MixHamiltonian', + 'MixedParallel', + 'MonitorForcesInSCF', + 'MullikenInSCF', + 'NC.OrbitalRotationEnd', + 'NC.OrbitalRotationStart', + 'NEB.SkipEdge', + 'NEnergReal', + 'NIVPoints', + 'NPoles', + 'NSlices', + 'NTransmPoints', + 'NaiveAuxiliaryCell', + 'NeglNonOverlapInt', + 'NenergImCircle', + 'NenergImLine', + 'NetCharge', + 'NonCollinearSpin', + 'NumSkipWriteDM', + 'NumberLinearMix', + 'NumberOfAtoms', + 'NumberOfEigenStates', + 'NumberOfSpecies', + 'Number_of_species', + 'ON.ChemicalPotential', + 'ON.ChemicalPotentialOrder', + 'ON.ChemicalPotentialRc', + 'ON.ChemicalPotentialTemperature', + 'ON.ChemicalPotentialUse', + 'ON.MaxNumIter', + 'ON.UseSaveLWF', + 'ON.eta', + 'ON.eta_alpha', + 'ON.eta_beta', + 'ON.etol', + 'ON.functional', + 'OccupationFunction', + 'On.RcLWF', + 'OpticalCalculation', + 'Optim.Broyden', + 'Output-Structure-Only', + 'PAO.BasisSize', + 'PAO.BasisType', + 'PAO.EnergyShift', + 'PAO.Filter', + 'PAO.FixSplitTable', + 'PAO.Keep.Findp.Bug', + 'PAO.NewSplitCode', + 'PAO.OldStylePolorbs', + 'PAO.SoftDefault', + 'PAO.SoftInnerRadius', + 'PAO.SoftPotential', + 'PAO.SplitNorm', + 'PAO.SplitNormH', + 'PAO.SplitTailNorm', + 'PS.SIC', + 'ParallelOverK', + 'PartialChargesAtEveryGeometry', + 'PartialChargesAtEveryScfStep', + 'PoissonMultigrid', + 'Print_ldau', + 'ProcessorGridX', + 'ProcessorGridY', + 'ProcessorGridZ', + 'ProjectionInSCF', + 'ProjectionMethod', + 'RcSpatial', + 'ReInitialiseDM', + 'ReadHamiltonian', + 'ReadKPIN', + 'ReparametrizePseudos', + 'Restricted.Radial.Grid', + 'Rmax.Radial.Grid', + 'RotateSpin.Phi', + 'RotateSpin.Theta', + 'SCF.LinearMixingAfterPulay', + 'SCF.MixAfterConvergence', + 'SCF.MixingWeightAfterPulay', + 'SCF.Pulay.Damping', + 'SCF.Pulay.DebugSVD', + 'SCF.Pulay.RcondSVD', + 'SCF.Pulay.UseSVD', + 'SCF.PulayDmaxRegion', + 'SCF.PulayMinimumHistory', + 'SCF.Read.Charge.NetCDF', + 'SCF.Read.Deformation.Charge.NetCDF', + 'SCF.Recompute-H-After-Scf', + 'SCF.Want.Variational.EKS', + 'SCFMustConverge', + 'SIC.Flavour', + 'SIC.Lambda', + 'SIC.NoRelaxation', + 'SIC.Npop', + 'SIC.PopDMConv', + 'SIC.PopKgridFactor', + 'SIC.PopSmatSparsity', + 'SIC.ProjectionMode', + 'SIC.ProjectionType', + 'SIC.Rot_Inv', + 'STT.Calculation', + 'STT.LinearResponse', + 'SaveBaderCharge', + 'SaveDeltaRho', + 'SaveElectrostaticPotential', + 'SaveHS', + 'SaveInitialChargeDensity', + 'SaveIonicCharge', + 'SaveNeutralAtomPotential', + 'SaveRho', + 'SaveRhoXC', + 'SaveTotalCharge', + 'SaveTotalPotential', + 'Scissor.Operator', + 'SetBulkTransvCell', + 'Siesta2Wannier90.NumberOfBands', + 'Siesta2Wannier90.NumberOfBandsDown', + 'Siesta2Wannier90.NumberOfBandsUp', + 'Siesta2Wannier90.WriteAmn', + 'Siesta2Wannier90.WriteEig', + 'Siesta2Wannier90.WriteMmn', + 'Siesta2Wannier90.WriteUnk', + 'Sigma.DSigmaDE', + 'SignatureRecords', + 'SimulateDoping', + 'SingleExcitation', + 'SkipLastIter', + 'SlabDipoleCorrection', + 'SolutionMethod', + 'SpinConfLeads', + 'SpinOrbit', + 'SpinPolarized', + 'SystemLabel', + 'SystemName', + 'TS.MixH', + 'TimeReversal', + 'TimeReversalSymmetryForKpoints', + 'TrCoefficients', + 'TryMemoryIncrease', + 'UseDomainDecomposition', + 'UseNewDiagk', + 'UseSaveData', + 'UseSpatialDecomposition', + 'UseStructFile', + 'UseTreeTimer', + 'VFinal', + 'VGate', + 'VInitial', + 'Vna.Filter', + 'WarningMinimumAtomicDistance', + 'WriteBands', + 'WriteCoorCerius', + 'WriteCoorInitial', + 'WriteCoorStep', + 'WriteCoorXmol', + 'WriteDM', + 'WriteDM.History.NetCDF', + 'WriteDM.NetCDF', + 'WriteDMHS.History.NetCDF', + 'WriteDMHS.NetCDF', + 'WriteDMT', + 'WriteDenchar', + 'WriteDiagdS', + 'WriteEDM', + 'WriteEigenvalues', + 'WriteForces', + 'WriteHSDeriv', + 'WriteHamiltonPop', + 'WriteHirshfeldPop', + 'WriteIonPlotFiles', + 'WriteKbands', + 'WriteKpoints', + 'WriteMDXmol', + 'WriteMDhistory', + 'WriteMullikenPop', + 'WriteProjections', + 'WriteSpinMulliken', + 'WriteSpinSCF', + 'WriteVNA', + 'WriteVoronoiPop', + 'WriteWaveFunctions', + 'XML.AbortOnErrors', + 'XML.AbortOnWarnings', + 'XML.Write', + 'ZBroadeningG', + 'ZLeftVcte', + 'ZM.CalcAllForces', + 'ZM.ForceTolAngle', + 'ZM.ForceTolLength', + 'ZM.MaxDisplAngle', + 'ZM.MaxDisplLength', + 'ZM.UnitsAngle', + 'ZM.UnitsLength', + 'ZRightVcte', + 'ZVGateL', + 'ZVGateR', + 'ZeemanTermBx', + 'ZeemanTermBy', + 'ZeemanTermBz', + 'alloc_report_level', + 'alloc_report_threshold', + 'blocksize', + 'compat-pre-v4-dynamics', + 'fdf-debug', + 'kgrid_cutoff', + 'processorY', + 'timer_report_threshold', + 'user-basis', + 'user-basis-netcdf', + 'xc.authors', + 'xc.functional', +] diff --git a/siestaparser/main.py b/siestaparser/main.py new file mode 100644 index 0000000000000000000000000000000000000000..aeffb80e8ac047bc4bfec910fbad8d745e696435 --- /dev/null +++ b/siestaparser/main.py @@ -0,0 +1,697 @@ +# Copyright 2016-2018 Ask Hjorth Larsen, Fawzi Mohamed +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Main author and maintainer: Ask Hjorth Larsen <asklarsen@gmail.com> + +from __future__ import print_function +import os +import sys +import logging +from glob import glob +import re + +import numpy as np +from ase.data import chemical_symbols +from ase import Atoms + +from nomadcore.simple_parser import (mainFunction, SimpleMatcher as SM, + AncillaryParser) +from nomadcore.local_meta_info import loadJsonFile, InfoKindEl +from nomadcore.unit_conversion.unit_conversion \ + import register_userdefined_quantity, convert_unit + +from siestaparser.inputvars import varlist + +# metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/siesta.nomadmetainfo.json")) +# metaInfoEnv, warnings = loadJsonFile(filePath=metaInfoPath, +# dependencyLoader=None, +# extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS, +# uri=None) + +parser_info = {'name':'siesta-parser', 'version': '1.0'} + + +def siesta_energy(title, meta, **kwargs): + return SM(r'siesta:\s*%s\s*=\s*(?P<%s__eV>\S+)' % (title, meta), + name=meta, **kwargs) + + +def line_iter(fd, linepattern = re.compile(r'\s*([^#]+)')): + # Strip off comments and whitespace, return only non-empty strings + for line in fd: + match = linepattern.match(line) + if match: + line = match.group().rstrip() + if line: + yield line + + +def get_input_metadata(inputvars_file, use_new_format): + inputvars = {} + blocks = {} + + varset = set(varlist) + + lower_vars = {} + for var in varlist: + lower_vars[var.lower()] = var + + def addvar(tokens): + name = tokens[0] + val = ' '.join(tokens[1:]) + name = name.lower() + if name in lower_vars: + name = lower_vars[name] + inputvars[name] = val + + currentblock = None + + with open(inputvars_file) as fd: + lines = line_iter(fd) + + for line in lines: + tokens = line.split() + assert len(tokens) > 0 + + if tokens[0].startswith('%'): + if tokens[0].lower() == '%block': + #assert currentblock == None + currentblock = [] + blocks[tokens[1]] = currentblock + elif tokens[0].lower() == '%endblock': + currentblock = None + else: + raise ValueError('Unknown: %s' % tokens[0]) + else: + if use_new_format: + if line.startswith(' '): + if currentblock is None: + continue # Ignore. Probably some warning + #assert currentblock is not None, line + currentblock.append(tokens) + else: + currentblock = None + addvar(tokens) + else: + if currentblock is not None: + currentblock.append(tokens) + else: + addvar(tokens) + + return inputvars, blocks + + +""" +%block PAO.Basis # Define Basis set +O 2 # Species label, number of l-shells + n=2 0 2 # n, l, Nzeta + 3.305 2.479 + 1.000 1.000 + n=2 1 2 P 1 # n, l, Nzeta, Polarization, NzetaPol + 3.937 2.542 + 1.000 1.000 +H 1 # Species label, number of l-shells + n=1 0 2 P 1 # n, l, Nzeta, Polarization, NzetaPol + 4.709 3.760 + 1.000 1.000 +%endblock PAO.Basis +""" + + + +class SiestaContext(object): + def __init__(self): + self.fname = None # The file that we are parsing + self.dirname = None # Base directory of calculations + #self.parser = None # The parser object + self.format = None # 'old' or 'new'; when parsing version + + # label and things determined by label + self.label = None + self.files = None # Dict of files + self.blocks = None # Dict of input blocks (coords, cell, etc.) + + self.data = {} + self.special_input_vars = {} + + self.system_meta = {} + self.section_refs = {} # {name: gindex, ...} + self.simulation_type = None + + def adhoc_format_new(self, parser): + assert self.format is None + self.format = 'new' + + def adhoc_format_old(self, parser): + assert self.format is None + self.format = 'old' + + def adhoc_set_label(self, parser): + # ASSUMPTION: the parser fIn is in the 'root' of whatever was uploaded. + # This may not be true. Figure out how to do this in general. + line = parser.fIn.readline() + assert line.startswith('reinit: System Label:') + self.label = label = line.split()[-1] + dirname = self.dirname + + files = {} + + for fileid in ['EIG', 'KP']: + path = os.path.join(dirname, '%s.%s' % (label, fileid)) + if os.path.isfile(path): + files[fileid] = path + # Warn if files are not present? + # + # Also: input file + # input parser logfile + # + # what else? We already get force/stress/positions from stdout. + if self.format == 'new': + inplogfiles = glob('%s/fdf-*.log' % dirname) + assert len(inplogfiles) == 1 + if inplogfiles: + inplogfiles.sort() + files['inputlog'] = inplogfiles[0] + else: + assert self.format == 'old', self.format + files['inputlog'] = os.path.join(dirname, 'out.fdf') + self.files = files + + def adhoc_set_simulation_type(self, parser): + line = parser.fIn.readline() + + if self.simulation_type is not None: + return + + line = line.strip() + + if line.startswith('Single-point'): + self.simulation_type = 'singlepoint' + elif 'opt' in line or 'move' in line: + self.simulation_type = 'optimization' + else: + raise ValueError('Todo: recognize simulation type "%s"' % line) + + def startedParsing(self, fname, parser): + self.fname = fname + path = os.path.abspath(fname) + self.dirname, _ = os.path.split(path) + #self.parser = parser + + #def onClose_x_siesta_section_xc_authors(self, backend, gindex, section): + + def onClose_section_frame_sequence(self, backend, gindex, section): + backend.addValue('frame_sequence_to_sampling_ref', + self.section_refs['sampling_method']) + + def onOpen_section_sampling_method(self, backend, gindex, section): + self.section_refs['sampling_method'] = gindex + + def onOpen_section_frame_sequence(self, backend, gindex, section): + self.section_refs['frame_sequence'] = gindex + + def onClose_section_sampling_method(self, backend, gindex, section): + simtype = self.simulation_type + assert simtype is not None + if simtype == 'optimization': + backend.addValue('sampling_method', 'geometry_optimization') + elif simtype == 'singlepoint': + pass + else: + raise ValueError('XXX: %s' % simtype) + + def onClose_section_eigenvalues(self, backend, gindex, section): + self.read_eigenvalues(backend) + + def onOpen_section_method(self, backend, gindex, section): + self.section_refs['method'] = gindex + + def onClose_section_method(self, backend, gindex, section): + temp = self.special_input_vars['ElectronicTemperature'] + temp, unit = temp.split() + assert unit == 'Ry' # Siesta always converts to Ry here I think + temp = float(temp) + temp = convert_unit(temp, 'rydberg') + backend.addValue('smearing_width', temp) + + #simtype = self.special_input_vars['MD.TypeOfRun'] + #print('SIMTYPE', simtype) + #sdfsdf + + def onOpen_section_system(self, backend, gindex, section): + self.section_refs['system'] = gindex + + def onClose_section_system(self, backend, gindex, section): + data = self.data + meta = self.system_meta + + latvec = data.pop('block_lattice_vectors', None) + if latvec is not None: + latvec = latvec.astype(float) + size = self.special_input_vars['LatticeConstant'] + size, unit = size.split() + #assert unit == 'ang', unit + unit = {'ang': 'angstrom', + 'Bohr': 'bohr'}[unit] + + size = float(size) + size = convert_unit(size, unit) + meta['simulation_cell'] = latvec * size + + cell = data.pop('outcell_ang', None) + cell2 = data.pop('auto_unit_cell_ang', None) + if cell2 is not None: + cell = cell2 + + if cell is not None: + cell = cell.astype(float) + cell = convert_unit(cell, 'angstrom') + meta['simulation_cell'] = cell + + labels = data.pop('block_species_label', None) + if labels is not None: + assert labels.shape[1] == 1 + labels = labels[:, 0] + self.labels = labels + + block_coords_and_species = self.data.pop('block_coords_and_species_from_inputlog', None) + + block_coords_and_species = data.pop('block_coords_and_species', block_coords_and_species) + coords_and_species = data.pop('coords_and_species', None) + + if coords_and_species is None: + coords_and_species = block_coords_and_species + + if coords_and_species is not None: + coords = coords_and_species[:, :3].astype(float) + + unit = self.special_input_vars['AtomicCoordinatesFormat'] + if unit == 'Ang': + coords = convert_unit(coords, 'angstrom') + elif unit in ['Fractional', 'ScaledCartesian']: + a = Atoms('%dX' % len(coords), + scaled_positions=coords, + cell=meta['simulation_cell']) + coords = a.positions + else: + raise ValueError('Unknown: %s' % unit) + meta['atom_positions'] = coords + + species_index = coords_and_species[:, 3].astype(int) + + atom_labels = np.array([self.labels[i - 1] for i in species_index]) + meta['atom_labels'] = atom_labels + + positions = self.data.pop('outcoord_ang', None) + if positions is not None: + positions = convert_unit(positions.astype(float), 'angstrom') + meta['atom_positions'] = positions + + for key, value in meta.items(): + backend.addArrayValues(key, value) + + backend.addArrayValues('configuration_periodic_dimensions', + np.ones(3, bool)) + + assert len(self.data) == 0, self.data + + def onClose_section_run(self, backend, gindex, section): + pass + + def onClose_section_single_configuration_calculation(self, backend, + gindex, section): + forces = self.data.pop('forces_ev_ang', None) + if forces is not None: + forces = forces.astype(float) + forces = convert_unit(forces, 'eV/angstrom') + backend.addArrayValues('atom_forces_free_raw', forces) + + stress = self.data.pop('stress_tensor_ev_ang', None) + if stress is not None: + stress = stress.astype(float) + stress = convert_unit(stress, 'eV/angstrom^3') + backend.addArrayValues('stress_tensor', stress) + + backend.addValue('single_configuration_to_calculation_method_ref', + self.section_refs['method']) + backend.addValue('single_configuration_calculation_to_system_ref', + self.section_refs['system']) + + def onClose_x_siesta_section_input(self, backend, gindex, section): + inputvars_file = self.files.get('inputlog') + if inputvars_file is None: + raise ValueError('no input logfile!') + + inputvars, blocks = get_input_metadata(inputvars_file, + self.format == 'new') + for varname, value in inputvars.items(): + backend.addValue('x_siesta_input_%s' % varname, value) + + for special_name in ['LatticeConstant', + 'AtomicCoordinatesFormat', + 'AtomicCoordinatesFormatOut', + 'ElectronicTemperature']: + self.special_input_vars[special_name] = inputvars.get(special_name) + + self.blocks = blocks + self.data['block_coords_and_species_from_inputlog'] = np.array(blocks['AtomicCoordinatesAndAtomicSpecies'], str) + + authors = section['x_siesta_xc_authors'] + if authors is None: + raise ValueError('XC authors not found!') + + assert len(authors) == 1 + authors = authors[0] + + # XXX Case sensitive? + mapping = {'CA': ('LDA_X', 'LDA_C_PZ'), + 'PZ': ('LDA_X', 'LDA_C_PZ'), + 'PW92': ('LDA_X', 'LDA_C_PW'), + #'PW91': '', + 'PBE': ('GGA_X_PBE', 'GGA_C_PBE'), + 'revPBE': ('GGA_X_PBE_R', 'GGA_C_PBE'), + 'RPBE': ('GGA_X_RPBE', 'GGA_C_PBE'), + #'WC': ('GGA_X_WC', ), + # Siesta does not mention which correlation is used with + # the WC functional. Is it just the PBE one? + 'AM05': ('GGA_X_AM05', 'GGA_C_AM05'), + 'PBEsol': ('GGA_X_PBE_SOL', 'GGA_C_PBE_SOL'), + 'BLYP': ('GGA_X_B88', 'GGA_C_LYP'), + 'DF1': ('gga_x_pbe_r', 'vdw_c_df1'), + 'DRSLL': ('gga_x_pbe_r', 'vdw_c_df1'), + 'LMKLL': ('gga_x_rpw86', 'vdw_c_df2'), + 'DF2': ('gga_x_rpw86', 'vdw_c_df2'), + 'KBM': ('GGA_X_OPTB88_VDW', 'vdw_c_df1'), + 'C09': ('GGA_X_C09X', 'vdw_c_df1'), + 'BH': ('GGA_X_LV_RPW86', 'vdw_c_df1'), + } + xc = mapping.get(authors) + + if xc is None: + raise ValueError('XC functional %s unsupported by parser' + % authors) + + for funcname in xc: + gid = backend.openSection('section_XC_functionals') + backend.addValue('XC_functional_name', funcname) + backend.closeSection('section_XC_functionals', gid) + + def read_eigenvalues(self, backend): + eigfile = self.files.get('EIG') + if eigfile is None: + return + + with open(eigfile) as fd: + eFermi = float(next(fd).strip()) + nbands, nspins, nkpts = [int(n) for n in next(fd).split()] + + tokens = [] + for line in fd: + tokens.extend(line.split()) + + tokens = iter(tokens) + + eps = np.empty((nspins, nkpts, nbands)) + for k in range(nkpts): + kindex = int(next(tokens)) + assert k + 1 == kindex + for s in range(nspins): + eps[s, k, :] = [next(tokens) for _ in range(nbands)] + unread = list(tokens) + assert len(unread) == 0 + assert s == nspins - 1 + assert k == nkpts - 1 + + # Where does SIESTA store the occupations? + backend.addArrayValues('eigenvalues_values', convert_unit(eps, 'eV')) + + kpfile = self.files.get('KP') + if kpfile is None: + return + + + with open(kpfile) as fd: + tokens = fd.read().split() + + nkpts = int(tokens[0]) + data = np.array(tokens[1:], object).reshape(-1, 5) + coords = data[:, 1:4].astype(float) + weights = data[:, 4].astype(float) + backend.addArrayValues('eigenvalues_kpoints', coords) + # XXX metadata for Fermi level? + # k-point weights? + + def save_array(self, key, dtype=float, istart=0, iend=None, + unit=None): + return get_array(key, dtype=dtype, istart=istart, iend=iend, unit=unit, + storage=self.data) + + def multi_sm(self, name, startpattern, linepattern, endmatcher=None, + conflict='fail', # 'fail', 'keep', 'overwrite' + *args, **kwargs): + + pat = re.compile(linepattern) # XXX how to get compiled pattern? + ngroups = pat.groups + + allgroups = [] + def addline(parser): + line = parser.fIn.readline() + match = pat.match(line) + assert match is not None + thislinegroups = match.groups() + assert len(thislinegroups) == ngroups + allgroups.append(thislinegroups) + + def savearray(parser): + arr = np.array(allgroups, dtype=object) + del allgroups[:] + if name in self.data: + if conflict == 'fail': + raise ValueError('grrr %s %s' % (name, self.data[name])) + elif conflict == 'keep': + return # Do not save array + elif conflict == 'overwrite': + pass + else: + raise ValueError('Unknown keyword %s' % conflict) + if arr.size > 0: + self.data[name] = arr + + if endmatcher is None: + endmatcher = r'.*' + + if hasattr(endmatcher, 'swapcase'): + endmatcher = SM(endmatcher, + endReStr='', + forwardMatch=True, + name='%s-end' % name, + adHoc=savearray) + + sm = SM(startpattern, + name=name, + subMatchers=[ + SM(linepattern, + name='%s-line' % name, + repeats=True, + forwardMatch=True, + required=True, + adHoc=addline), + endmatcher, + ], **kwargs) + return sm + +context = SiestaContext() + +def get_header_matcher(): + m = SM(r'', + name='header', + fixedStartValues={'program_name': 'Siesta', + 'program_basis_set_type': 'numeric AOs'}, + weak=True, + forwardMatch=True, + subMatchers=[ + SM(r'Siesta Version: (?P<program_version>\S+)', + name='version', + adHoc=context.adhoc_format_new), + SM(r'SIESTA\s*(?P<program_version>.+)', + name='alt-version', adHoc=context.adhoc_format_old), + SM(r'Architecture\s*:\s*(?P<x_siesta_arch>.+)', name='arch'), + SM(r'Compiler flags\s*:\s*(?P<x_siesta_compilerflags>.+)', + name='flags'), + ]) + return m + + +def anycase(string): + tokens = [] + for letter in list(string): + if letter.isalpha(): + tokens.append('[%s%s]' % (letter.upper(), + letter.lower())) + else: + tokens.append(letter) + return ''.join(tokens) + +welcome_pattern = r'\s*\*\s*WELCOME TO SIESTA\s*\*' + +def get_input_matcher(): + m = SM(welcome_pattern, + name='welcome', + sections=['section_method', 'x_siesta_section_input'], + fixedStartValues={'electronic_structure_method': 'DFT', + 'smearing_kind': 'fermi'}, + subFlags=SM.SubFlags.Unordered, + subMatchers=[ + SM(r'NumberOfAtoms\s*(?P<number_of_atoms>\d+)', + name='natoms'), + context.multi_sm('block_species_label', + anycase(r'%block ChemicalSpeciesLabel'), + r'\s*\d+\s*\d+\s*(\S+)', + conflict='keep'), + context.multi_sm('block_coords_and_species', + anycase(r'%block AtomicCoordinatesAndAtomicSpecies'), + r'\s*(\S+)\s*(\S+)\s*(\S+)\s*(\d+)'), + context.multi_sm('block_lattice_vectors', + anycase(r'%block LatticeVectors'), + r'(?!%)\s*(\S+)\s*(\S+)\s*(\S+)'), + SM(r'%s\s*(?P<x_siesta_xc_authors>\S+)' % anycase('xc\.authors'), + name='xc authors', + fixedStartValues={'x_siesta_xc_authors': 'CA'}), + #SM(r'MD.TypeOfRun\s*(?P<x_siesta_typeofrun>\S+)', + # fixedStartValues={'x_siesta_typeofrun': 'none'}), + SM(r'reinit: System Name:\s*(?P<system_name>.+)', + name='sysname'), + SM(r'reinit: System Label:\s*(?P<x_siesta_system_label>\S+)', + name='syslabel', forwardMatch=True, + adHoc=context.adhoc_set_label), + context.multi_sm('coords_and_species', + r'siesta: Atomic coordinates \(Bohr\) and species', + r'siesta:\s*(\S+)\s*(\S+)\s*(\S+)\s*(\d+)'), + context.multi_sm('auto_unit_cell_ang', + r'siesta: Automatic unit cell vectors \(Ang\):', + r'siesta:\s*(\S+)\s*(\S+)\s*(\S+)'), + # XXX must be an array with number of spin channels! + #SM(r'Total number of electrons:\s*(?P<number_of_electrons>\S+)', + # name='nelectrons'), + ]) + return m + + +step_pattern = r'\s*(Single-point calculation|Begin[^=]+=\s*\d+)' + +def get_step_matcher(): + m = SM(step_pattern, + name='step', + forwardMatch=True, + adHoc=context.adhoc_set_simulation_type, + sections=['section_single_configuration_calculation'], + subFlags=SM.SubFlags.Unordered, + subMatchers=[ + SM(r'\s*=============+', name='====='), + context.multi_sm('outcoord_ang', + r'outcoor: Atomic coordinates \(Ang\):', + r'\s*(\S+)\s*(\S+)\s*(\S+)'), + context.multi_sm('outcell_ang', + r'outcell: Unit cell vectors \(Ang\):', + r'\s*(\S+)\s*(\S+)\s*(\S+)'), + SM(r'siesta: E_KS\(eV\)\s*=\s*(?P<energy_total__eV>\S+)', + name='etotal'), + context.multi_sm('forces_ev_ang', + r'siesta: Atomic forces \(eV/Ang\):', + r'(?:siesta:)?\s+\d+\s*(\S+)\s*(\S+)\s*(\S+)'), + context.multi_sm('stress_tensor_ev_ang', + r'siesta: Stress tensor \(static\) \(eV/Ang\*\*3\):', + r'siesta:\s*(\S+)\s*(\S+)\s*(\S+)'), + SM(r'siesta: Final energy \(eV\):', + endReStr='siesta:\s*Total\s*=\s*\S+', + name='Efinal', + subMatchers=[ + siesta_energy('Band Struct\.', 'energy_sum_eigenvalues'), + siesta_energy('Kinetic', 'electronic_kinetic_energy'), + siesta_energy('Hartree', 'energy_electrostatic'), + #siesta_energy('Ext\. field', ''), + siesta_energy('Exch\.-corr\.', 'energy_XC'), + #siesta_energy('Ion-electron', ''), + #siesta_energy('Ion-Ion', ''), + #siesta_energy('Ekinion', ''), + # energy_total was matched above already + #siesta_energy('Total', 'energy_total'), + SM(r'', weak=True, name='trigger_readeig', + sections=['section_eigenvalues']), + ]), + ]) + return m + + +mainFileDescription = SM( + r'', + name='root', + #weak=True, + sections=['section_run'], + subMatchers=[ + get_header_matcher(), + SM(r'(%s|%s)' % (welcome_pattern, step_pattern), + name='system-section', + #weak=True, + forwardMatch=True, + repeats=True, + required=True, + sections=['section_system', 'section_frame_sequence', 'section_sampling_method'], + subMatchers=[ + get_input_matcher(), + get_step_matcher(), + ]), + SM(r'x^', # Make sure whole file is parsed + name='end') + ]) + + +import nomad_meta_info +metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(nomad_meta_info.__file__)), "siesta.nomadmetainfo.json")) +metaInfoEnv, warnings = loadJsonFile(filePath = metaInfoPath, dependencyLoader = None, extraArgsHandling = InfoKindEl.ADD_EXTRA_ARGS, uri = None) + +class SiestaParser(): + """ A proper class envolop for running this parser from within python. """ + def __init__(self, backend, **kwargs): + self.backend_factory = backend + + def parse(self, mainfile): + from unittest.mock import patch + logging.info('siesta parser started') + logging.getLogger('nomadcore').setLevel(logging.WARNING) + backend = self.backend_factory(metaInfoEnv) + with patch.object(sys, 'argv', ['<exe>', '--uri', 'nmd://uri', mainfile]): + mainFunction( + mainFileDescription, + metaInfoEnv, + parser_info, + cachingLevelForMetaName={}, + superContext=SiestaContext(), + superBackend=backend) + + return backend + + +def main(**kwargs): + mainFunction(mainFileDescription=mainFileDescription, + metaInfoEnv=metaInfoEnv, + parserInfo=parser_info, + cachingLevelForMetaName={}, + superContext=context, + **kwargs) + +if __name__ == '__main__': + main() + diff --git a/siestaparser/mkjson.py b/siestaparser/mkjson.py new file mode 100644 index 0000000000000000000000000000000000000000..42295db6660949e477c3e90a6242c5faf0b300eb --- /dev/null +++ b/siestaparser/mkjson.py @@ -0,0 +1,123 @@ +# Copyright 2016-2018 Ask Hjorth Larsen, Fawzi Mohamed +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Main author and maintainer: Ask Hjorth Larsen <asklarsen@gmail.com> + +from __future__ import print_function + +# TEMPLATES TAKEN FROM OCTOPUS. KEEP SYNCHRONIZED. +json_header = """{ + "type": "nomad_meta_info_1_0", + "description": "autogenerated nomad meta info for %(parser)s parser. The file which generates this maintained in the parser's git repository.", + "dependencies": [ { + "relativePath": "common.nomadmetainfo.json" + }], + "metaInfos": [ %(info)s] +}""" + + +json_template = """{ + "description": "%(description)s", + "dtypeStr": "%(dtypeStr)s", + "name": "%(name)s", + "repeats": false, + "shape": [], + "superNames": [ + "%(supername)s" + ] + }""" + +json_section_template = """{ + "description": "%(description)s", + "kindStr": "type_abstract_document_content", + "name": "%(name)s", + "superNames": [ + "x_siesta_section_input" + ] + }, { + "description": "input section", + "kindStr": "type_section", + "name": "x_siesta_section_input", + "superNames": [ + "section_method" + ] + }""" +# END OF TEMPLATES FROM OCTOPUS + +import sys +#fname = sys.argv[1] + +inputvars_fd = open('inputvars.py', 'w') +print('varlist = [', file=inputvars_fd) + +def getlines(): + fd = open('test/H2O/fdf-95611.log') + for line in fd: + if not line[:1].isalpha(): # Get rid of blocks + continue + yield line + + withinblock = False + fd = open('test/smeagol-Au-leads/out.fdf') + for line in fd: + if line.startswith('%block'): + withinblock = True + elif line.startswith('%endblock'): + withinblock = False + if withinblock: + continue + yield line + +seen = set() + +#with open(fname) as fd: +#for line in getlines(): +if 1: + varnames = [] + for line in getlines(): + + # Get rid of comments + line = line.strip().rsplit('#', 1)[0].strip() + if not line: + continue + varname = line.split()[0] + lowervarname = varname.lower() + if lowervarname in seen: + continue # Already registered + + seen.add(lowervarname) + varnames.append(varname) + varnames = list(set(varnames)) + varnames.sort() + + jsontokens = [] + for var in varnames: + print(" '%s'," % var, file=inputvars_fd) + json = json_template % dict(description=r'siesta input variable \"%s\"' + % var, + name='x_siesta_input_%s' % var, + dtypeStr='C', + supername='x_siesta_input') + jsontokens.append(json) + section = json_section_template % dict(description='siesta input ' + 'variables', + name='x_siesta_input') + jsontokens.append(section) + + txt = json_header % dict(parser='siesta', + info=', '.join(jsontokens)) + print(txt) + +print(']', file=inputvars_fd) diff --git a/siestaparser/pycheck.py b/siestaparser/pycheck.py new file mode 100755 index 0000000000000000000000000000000000000000..eb14e4e0646c59fd8fa94de547866b89b3246b4e --- /dev/null +++ b/siestaparser/pycheck.py @@ -0,0 +1,35 @@ +# Copyright 2016-2018 Ask Hjorth Larsen, Fawzi Mohamed +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Main author and maintainer: Ask Hjorth Larsen <asklarsen@gmail.com> + +from __future__ import print_function +import os + +from main import main + +testdir = '../../test/examples' +dirnames = ['H2O', + 'H2O-relax', + 'Al-slab', + 'Al-uc', + 'Fe', + 'MgO'] + + +for dirname in dirnames: + fname = os.path.join(testdir, dirname, 'out') + with open('out.pycheck.%s.txt' % dirname, 'w') as outfd: + main(mainFile=fname, outF=outfd)