Commit 59e5ec08 authored by Alvin Noe Ladines's avatar Alvin Noe Ladines
Browse files

Implemented new gpaw parser

parent a1a5478c
Pipeline #95136 passed with stages
in 23 minutes and 55 seconds
Subproject commit be75c7bf0cb4d1d26ee375cc1719e4fdcd50bc12
Subproject commit b3850be63dab9ce9a823eb2a32ff7ced541265be
from .file_parser import FileParser
from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern
from .xml_parser import XMLParser
from .tar_parser import TarParser
......@@ -20,28 +20,39 @@ from typing import Any, Dict
import gzip
import bz2
import lzma
import tarfile
class FileParser:
'''
Base class for parsers. The parse method implemented here simply sets the parsed
quantities as attributes of the class. The parse method specific to a file type
Base class for parsers. The parse method specific to a file type
should be implemented in the corresponding child class. The parsed quantities are
stored in results. One can access a quantity by using the get method.
stored in results. One can access a quantity by using the get method or as attribute.
Arguments:
mainfile: the file to be parsed
logger: optional logger
'''
def __init__(self, mainfile: str, logger=None):
self._mainfile: str = os.path.abspath(mainfile) if mainfile else mainfile
self.logger = logger if logger else logging
def __init__(self, mainfile=None, logger=None, open=None):
self._mainfile: Any = None
self._mainfile_obj: Any = None
if isinstance(mainfile, str):
self._mainfile = os.path.abspath(mainfile)
self._mainfile_obj = None
elif hasattr(mainfile, 'name'):
self._mainfile = mainfile.name
self._mainfile_obj = mainfile
self._open = open
self.logger = logger if logger is not None else logging
self._results: Dict[str, Any] = None
# a key is necessary for xml parsers, where parsing is done dynamically
self._key: str = None
self._kwargs: Dict[str, Any] = None
self._file_handler: Any = None
def init_parameters(self):
pass
@property
def results(self):
if self._results is None:
......@@ -55,12 +66,22 @@ class FileParser:
def maindir(self):
return os.path.dirname(self._mainfile)
@property
def mainfile_obj(self):
if self._mainfile_obj is None:
try:
self._mainfile_obj = self.open(self._mainfile)
except Exception:
pass
return self._mainfile_obj
@property
def mainfile(self):
if self._mainfile is None:
return
if not os.path.isfile(self._mainfile):
if self._mainfile_obj is None and not os.path.isfile(self._mainfile):
return
return self._mainfile
......@@ -68,19 +89,29 @@ class FileParser:
def mainfile(self, val):
self._results = None
self._file_handler = None
self._mainfile = os.path.abspath(val) if val is not None else val
@property
def open(self):
if self.mainfile.endswith('.gz'):
self._mainfile = None
if isinstance(val, str):
self._mainfile = os.path.abspath(val)
self._mainfile_obj = None
elif hasattr(val, 'name'):
self._mainfile = val.name
self._mainfile_obj = val
self.init_parameters()
def open(self, mainfile):
open_file = self._open
if open_file is None:
if mainfile.endswith('.gz'):
open_file = gzip.open
elif self.mainfile.endswith('.bz2'):
elif mainfile.endswith('.bz2'):
open_file = bz2.open
elif self.mainfile.endswith('.xz'):
elif mainfile.endswith('.xz'):
open_file = lzma.open
elif mainfile.endswith('.tar'):
open_file = tarfile.open
else:
open_file = open
return open_file
return open_file(mainfile)
def get(self, key: str, default: Any = None, unit: str = None, **kwargs):
'''
......@@ -118,13 +149,10 @@ class FileParser:
elif isinstance(key, int):
return self[int]
def parse(self, quantity_key: str = None):
'''
Sets quantities in result as class attributes.
'''
for key, val in self._results.items():
try:
setattr(self, key, val)
except Exception:
def __getattr__(self, key):
if key not in self._results:
self.parse(key)
return self._results.get(key)
def parse(self, quantity_key: str = None, **kwargs):
pass
return self
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile
from .file_parser import FileParser
class TarParser(FileParser):
def __init__(self, mainfile=None, logger=None):
super().__init__(mainfile, logger, tarfile.open)
self._names_map = None
@property
def names_map(self):
if self._names_map is None:
if self.mainfile_obj is not None:
self._names_map = {f.lower(): f for f in self.mainfile_obj.getnames()}
return self._names_map
def parse(self, key):
if self._results is None:
self._results = dict()
if self.mainfile_obj is None:
return
name = self.names_map.get(key, key)
try:
val = self.mainfile_obj.extractfile(name)
except Exception:
val = None
self._results[key] = val
......@@ -232,9 +232,10 @@ class DataTextParser(FileParser):
def __init__(self, **kwargs):
self._dtype: Type = kwargs.get('dtype', float)
mainfile: str = kwargs.get('mainfile', None)
self._mainfile_contents: str = kwargs.get('mainfile_contents', None)
logger = kwargs.get('logger', None)
logger = logger if logger is not None else logging
super().__init__(mainfile, logger=logger)
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.init_parameters()
def init_parameters(self):
......@@ -249,11 +250,13 @@ class DataTextParser(FileParser):
Returns the loaded data
'''
if self._file_handler is None:
if self.mainfile is None:
return
try:
self._file_handler = np.loadtxt(self.mainfile, dtype=self._dtype)
if self.mainfile is not None:
self._file_handler = np.loadtxt(self.mainfile)
else:
if self._mainfile_contents is None:
self._mainfile_contents = self.mainfile_obj.read()
self._file_handler = np.fromstring(self._mainfile_contents, dtype=self._dtype)
except Exception:
return
......@@ -278,7 +281,7 @@ class TextParser(FileParser):
file_length: length of the chunk to be read from the file
'''
def __init__(self, mainfile=None, quantities=None, logger=None, findall=True, **kwargs):
super().__init__(mainfile, logger)
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self._quantities: List[Quantity] = quantities
self.findall: bool = findall
self._kwargs = kwargs
......@@ -530,5 +533,4 @@ class TextParser(FileParser):
if quantity.name not in self._results:
self._parse_quantity(quantity)
super().parse()
return self
......@@ -17,6 +17,7 @@ import os
import re
import numpy as np
from xml.etree import ElementTree
from lxml import etree
from nomad.parsing.file_parser import FileParser
......@@ -30,9 +31,9 @@ class XMLParser(FileParser):
logger: logger
convert: specifies if quantities are converted automatically
'''
def __init__(self, mainfile: str = None, logger=None, convert: bool = True):
super().__init__(mainfile, logger=logger)
self.convert = convert
def __init__(self, mainfile: str = None, logger=None, **kwargs):
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.convert = kwargs.get('convert', True)
self.init_parameters()
def init_parameters(self):
......@@ -50,8 +51,16 @@ class XMLParser(FileParser):
if self.mainfile is None:
return
try:
self._file_handler = ElementTree.parse(self.open(self.mainfile)).getroot()
self._file_handler = ElementTree.parse(self.mainfile_obj).getroot()
except Exception:
try:
# I cannot use the lxml XMLParser directly because it is not compatible with
# the ElementTree implementation.
xml = etree.parse(self.mainfile_mainfile_obj, parser=etree.XMLParser(recover=True))
self._file_handler = ElementTree.fromstring(etree.tostring(xml))
except Exception:
pass
self.logger.error('failed to load xml file')
self.init_parameters()
......
......@@ -36,6 +36,7 @@ from crystalparser import CrystalParser
from fhiaimsparser import FHIAimsParser
from excitingparser import ExcitingParser
from abinitparser import AbinitParser
from gpawparser import GPAWParser
try:
# these packages are not available without parsing extra, which is ok, if the
......@@ -262,19 +263,20 @@ parsers = [
# r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
),
# match gpaw2 first, other .gpw files are then considered to be "gpaw1"
LegacyParser(
name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParser2Wrapper',
mainfile_binary_header=b'GPAW',
mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)'
),
LegacyParser(
name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParserWrapper',
mainfile_name_re=(r'^.*\.gpw$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)'
),
# LegacyParser(
# name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
# parser_class_name='gpawparser.GPAWParser2Wrapper',
# mainfile_binary_header=b'GPAW',
# mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
# mainfile_mime_re=r'application/(x-tar|octet-stream)'
# ),
# LegacyParser(
# name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
# parser_class_name='gpawparser.GPAWParserWrapper',
# mainfile_name_re=(r'^.*\.gpw$'),
# mainfile_mime_re=r'application/(x-tar|octet-stream)'
# ),
GPAWParser(),
LegacyParser(
name='parsers/atk', code_name='AtomistixToolKit', code_homepage='https://www.synopsys.com/silicon/quantumatk.html',
parser_class_name='atkparser.ATKParserWrapper',
......
......@@ -39,6 +39,7 @@ mdtraj
mdanalysis
nomadcore
nomad_dos_fingerprints
lxml
# [infrastructure]
optimade==0.8.1
......
......@@ -60,7 +60,7 @@ parser_examples = [
('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/gpaw', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment