Skip to content
Snippets Groups Projects
Select Git revision
  • a6f432f5356db5ea5f29b9a401f007dbb539ac8c
  • master default protected
  • legacy-metainfo
  • nomad-coe
  • preMetaRename
  • 1.2.0
  • 1.1.0
  • 1.0
  • 0.0.0
9 results

OnetepCellParser.py

Blame
  • OnetepCellParser.py 20.48 KiB
    # Copyright 2016-2018 Martina Stella, Fawzi Mohamed
    #
    #   Licensed under the Apache License, Version 2.0 (the "License");
    #   you may not use this file except in compliance with the License.
    #   You may obtain a copy of the License at
    #
    #     http://www.apache.org/licenses/LICENSE-2.0
    #
    #   Unless required by applicable law or agreed to in writing, software
    #   distributed under the License is distributed on an "AS IS" BASIS,
    #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    #   See the License for the specific language governing permissions and
    #   limitations under the License.
    
    from builtins import range
    from builtins import object
    import numpy as np
    import nomadcore.ActivateLogging
    from nomadcore.caching_backend import CachingLevel
    from nomadcore.simple_parser import mainFunction
    from nomadcore.simple_parser import SimpleMatcher as SM
    from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
    from .OnetepCommon import get_metaInfo
    import logging, os, re, sys
    
    
    
    ############################################################
    # This is the parser for the *.cell file of Onetep.
    ############################################################
    
    logger = logging.getLogger("nomad.OnetepCellParser")
    
    class OnetepCellParserContext(object):
        """Context for parsing Onetep *.cell file.
    
    
        The onClose_ functions allow processing and writing of cached values after a section is closed.
        They take the following arguments:
            backend: Class that takes care of wrting and caching of metadata.
            gIndex: Index of the section that is closed.
            section: The cached values and sections that were found in the section that is closed.
        """
        def __init__(self, writeMetaData = True):
            """Args:
                writeMetaData: Deteremines if metadata is written or stored in class attributes.
            """
            self.writeMetaData = writeMetaData
            self.cell_store = []
            self.at_nr = []
            self.onetep_atom_positions_store=[]
            self.atom_labels_store=[]
    
        def startedParsing(self, fInName, parser):
            """Function is called when the parsing starts and the compiled parser is obtained.
    
            Args:
                fInName: The file name on which the current parser is running.
                parser: The compiled parser. Is an object of the class SimpleParser in nomadcore.simple_parser.py.
            """
            self.parser = parser
    
            # get unit from metadata for band energies
            # allows to reset values if the same superContext is used to parse different files
            # self.band_energies = None
            # self.band_k_points = None
            # self.band_occupations = None
    
            # self.k_crd = []
            # self.k_sgt_start_end = []
        def onClose_x_onetep_section_cell(self, backend, gIndex, section):
            """trigger called when _onetep_section_cell is closed"""
            # get cached values for onetep_cell_vector
            units = section['x_onetep_units']
            vet = section['x_onetep_cell_vector']
            bohr_to_m = float(5.29177211e-11)
            ang_to_m = float(1e-10)
            if units:
                if units[0] == 'bohr':
                    vet[0] = vet[0].split()
                    vet[0] = [float(i) for i in vet[0]]
                    vet[0]= [i * bohr_to_m for i in vet[0]]
                    vet[1] = vet[1].split()
                    vet[1] = [float(i) for i in vet[1]]
                    vet[1]= [i * bohr_to_m for i in vet[1]]
                    vet[2] = vet[2].split()
                    vet[2] = [float(i) for i in vet[2]]
                    vet[2]= [i * bohr_to_m for i in vet[2]]
                    self.cell_store.append(vet[0])
                    self.cell_store.append(vet[1])
                    self.cell_store.append(vet[2]) # Reconstructing the unit cell vector by vector
                elif units[0] == 'ang':
                    vet[0] = vet[0].split()
                    vet[0] = [float(i) for i in vet[0]]
                    vet[0]= [i * ang_to_m for i in vet[0]]
                    vet[1] = vet[1].split()
                    vet[1] = [float(i) for i in vet[1]]
                    vet[1]= [i * ang_to_m for i in vet[1]]
                    vet[2] = vet[2].split()
                    vet[2] = [float(i) for i in vet[2]]
                    vet[2]= [i * ang_to_m for i in vet[2]]
                    self.cell_store.append(vet[0])
                    self.cell_store.append(vet[1])
                    self.cell_store.append(vet[2]) # Reconstructing t
            else:
                vet[0] = vet[0].split()
                vet[0] = [float(i) for i in vet[0]]
                vet[0]= [i * bohr_to_m for i in vet[0]]
                vet[1] = vet[1].split()
                vet[1] = [float(i) for i in vet[1]]
                vet[1]= [i * bohr_to_m for i in vet[1]]
                vet[2] = vet[2].split()
                vet[2] = [float(i) for i in vet[2]]
                vet[2]= [i * bohr_to_m for i in vet[2]]
                self.cell_store.append(vet[0])
                self.cell_store.append(vet[1])
                self.cell_store.append(vet[2]) # Reco
    
        def onClose_section_system(self, backend, gIndex, section):
            unitsap = section['x_onetep_units_atom_position']
            pos = section['x_onetep_store_atom_positions']
    
            bohr_to_m = float(5.29177211e-11)
            ang_to_m = float(1e-10)
            if unitsap is not None:
                if unitsap[0] == 'bohr':
    
                    pos = section['x_onetep_store_atom_positions']
    
                    if pos:
                        self.at_nr = len(pos)
                        for i in range(0, self.at_nr):
                            pos[i] = pos[i].split()
                            pos[i] = [float(j) for j in pos[i]]
                            pos[i]= [ii * bohr_to_m for ii in pos[i]]
                            self.onetep_atom_positions_store.append(pos[i])
    
                elif unitsap[0] == 'ang':
                    pos = section['x_onetep_store_atom_positions']
    
                    if pos:
                        self.at_nr = len(pos)
                        for i in range(0, self.at_nr):
                            pos[i] = pos[i].split()
                            pos[i] = [float(j) for j in pos[i]]
                            pos[i]= [ii * ang_to_m for ii in pos[i]]
    
                            self.onetep_atom_positions_store.append(pos[i])
                else:
                    pass
    
    
    
    
    
            if pos:
                bohr_to_m = float(5.29177211e-11)
                self.at_nr = len(pos)
                for i in range(0, self.at_nr):
                    pos[i] = pos[i].split()
                    pos[i] = [float(j) for j in pos[i]]
                    pos[i]= [ii * bohr_to_m for ii in pos[i]]
                    self.onetep_atom_positions_store.append(pos[i])
            #get cached values of onetep_store_atom_labels
    
            lab = section['x_onetep_store_atom_labels']
    
            for i in range(0, len(lab)):
                lab[i] = re.sub('\s+', ' ', lab[i]).strip()
            self.atom_labels_store.extend(lab)
    
    
    
    def build_OnetepCellFileSimpleMatcher():
        """Builds the SimpleMatcher to parse the *.cell file of Onetep.
    
        SimpleMatchers are called with 'SM (' as this string has length 4,
        which allows nice formating of nested SimpleMatchers in python.
    
        Returns:
           SimpleMatcher that parses *.cell file of Onetep.
        """
    
        return SM(name = "systemDescription",
                    startReStr = "",
                    forwardMatch = True,
                    sections = ["section_system"],
                    subMatchers = [
    
               # cell information
                        SM(name = 'cellInformation',
                            startReStr = r"\%block\slattice\_cart\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onetep_section_cell
                        SM(name = 'cellInformation',
                            startReStr = r"\s*\%block\slattice\_cart\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onetep_section_cell
                        SM(name = 'cellInformation',
                            startReStr = r"\%block\s*lattice\_cart\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onet
                        SM(name = 'cellInformation',
                            startReStr = r"\s*\%block\s*lattice\_cart\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onet
    
                        SM(name = 'cellInformation',
                            startReStr = r"\%BLOCK\sLATTICE\_CART\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onetep_section_cell
                        SM(name = 'cellInformation',
                            startReStr = r"\s*\%BLOCK\sLATTICE\_CART\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onetep_section_cell
                        SM(name = 'cellInformation',
                            startReStr = r"\%BLOCK\s*LATTICE\_CART\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onet
                        SM(name = 'cellInformation',
                            startReStr = r"\s*\%BLOCK\s*LATTICE\_CART\s*",
                            # forwardMatch = True,
                            sections = ["x_onetep_section_cell"],
                                subMatchers = [
                                    SM(r"(?P<x_onetep_units>[A-Za-z]+)"),
                                    SM(r"\s*(?P<x_onetep_cell_vector>[-+0-9.eEdD]+\s+[-+0-9.eEdD]+\s+[-+0-9.eEd]+)",
                         #SM(r"\s*(?P<onetep_cell_vector>[\d\.]+\s+[\d\.]+\s+[\d\.]+) \s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*\s*[-+0-9.eEdD]*",
                            # endReStr = "\%endblock\s*lattice\_cart\s*",
                                    repeats = True),
    
                                ]), # CLOSING onet
    
                        SM(startReStr = r"\%block\spositions\_abs\s*",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
    
                            SM(r"(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOSING onet
                        SM(startReStr = r"\s*\%block\spositions\_abs\s*",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"\s*(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOS
                        SM(startReStr = r"\%block\s*positions\_abs\s*",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOSING onetep_section_atom_position
                        SM(startReStr = r"\s*\%block\s*positions\_abs\s*",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"\s*(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOSI
    
    
    
                        SM(startReStr = r"\%BLOCK\sPOSITIONS\_ABS",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOSING onet
                        SM(startReStr = r"\s*\%BLOCK\sPOSITIONS\_ABS",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"\s*(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOS
                        SM(startReStr = r"\%BLOCK\s*POSITIONS\_ABS",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # endReStr = "\n",
                                repeats = True)
    
                                     ]), # CLOSING onetep_section_atom_position
                        SM(startReStr = r"\s*\%BLOCK\s*POSITIONS\_ABS",
                            forwardMatch = True,
                            sections = ["x_onetep_section_atom_positions"],
                            endReStr = "\n",
                            subMatchers = [
                            SM(r"(?P<x_onetep_units_atom_position>[A-Za-z]+)"),
                            SM(r"\s*(?P<x_onetep_store_atom_labels>[A-Za-z0-9]+)\s*(?P<x_onetep_store_atom_positions>[-\d\.]+\s+[-\d\.]+\s+[-\d\.]+)",
                                # ,
                                repeats = True)
    
                                     ]), # CLOSI
                       ]) # CLOSING SM systemDescription
    
    
    
    
    
    def get_cachingLevelForMetaName(metaInfoEnv, CachingLvl):
        """Sets the caching level for the metadata.
    
        Args:
            metaInfoEnv: metadata which is an object of the class InfoKindEnv in nomadcore.local_meta_info.py.
            CachingLvl: Sets the CachingLevel for the sections k_band, run, and single_configuration_calculation.
                This allows to run the parser without opening new sections.
    
        Returns:
            Dictionary with metaname as key and caching level as value.
        """
        # manually adjust caching of metadata
        cachingLevelForMetaName = {
                                   'section_system': CachingLvl,
                                    # 'section_run': CachingLvl,
                                    # 'section_single_configuration_calculation': CachingLvl,
                                  }
        # Set all band metadata to Cache as they need post-processsing.
        for name in metaInfoEnv.infoKinds:
            if name.startswith('x_onetep_'):
                cachingLevelForMetaName[name] = CachingLevel.Cache
        return cachingLevelForMetaName
    
    
    def main(CachingLvl):
        """Main function.
    
        Set up everything for the parsing of the Onetep *.cell file and run the parsing.
    
        Args:
            CachingLvl: Sets the CachingLevel for the sections k_band, run, and single_configuration_calculation.
                This allows to run the parser without opening new sections.
        """
        # get band.out file description
        OnetepCellFileSimpleMatcher = build_OnetepCellFileSimpleMatcher()
        # loading metadata from nomad-meta-info/meta_info/nomad_meta_info/Onetep.nomadmetainfo.json
        metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/onetep.nomadmetainfo.json"))
        metaInfoEnv = get_metaInfo(metaInfoPath)
        # set parser info
        parserInfo = {'name':'Onetep-cell-parser', 'version': '1.0'}
        # get caching level for metadata
        cachingLevelForMetaName = get_cachingLevelForMetaName(metaInfoEnv, CachingLvl)
        # start parsing
        mainFunction(mainFileDescription = OnetepCellFileSimpleMatcher,
                     metaInfoEnv = metaInfoEnv,
                     parserInfo = parserInfo,
                     cachingLevelForMetaName = cachingLevelForMetaName,
                     superContext = OnetepCellParserContext())
    
    if __name__ == "__main__":
        main(CachingLevel.Forward)