Commit 59e5ec08 authored by Alvin Noe Ladines's avatar Alvin Noe Ladines
Browse files

Implemented new gpaw parser

parent a1a5478c
Pipeline #95136 passed with stages
in 23 minutes and 55 seconds
Subproject commit be75c7bf0cb4d1d26ee375cc1719e4fdcd50bc12 Subproject commit b3850be63dab9ce9a823eb2a32ff7ced541265be
from .file_parser import FileParser from .file_parser import FileParser
from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern
from .xml_parser import XMLParser from .xml_parser import XMLParser
from .tar_parser import TarParser
...@@ -20,28 +20,39 @@ from typing import Any, Dict ...@@ -20,28 +20,39 @@ from typing import Any, Dict
import gzip import gzip
import bz2 import bz2
import lzma import lzma
import tarfile
class FileParser: class FileParser:
''' '''
Base class for parsers. The parse method implemented here simply sets the parsed Base class for parsers. The parse method specific to a file type
quantities as attributes of the class. The parse method specific to a file type
should be implemented in the corresponding child class. The parsed quantities are should be implemented in the corresponding child class. The parsed quantities are
stored in results. One can access a quantity by using the get method. stored in results. One can access a quantity by using the get method or as attribute.
Arguments: Arguments:
mainfile: the file to be parsed mainfile: the file to be parsed
logger: optional logger logger: optional logger
''' '''
def __init__(self, mainfile: str, logger=None): def __init__(self, mainfile=None, logger=None, open=None):
self._mainfile: str = os.path.abspath(mainfile) if mainfile else mainfile self._mainfile: Any = None
self.logger = logger if logger else logging self._mainfile_obj: Any = None
if isinstance(mainfile, str):
self._mainfile = os.path.abspath(mainfile)
self._mainfile_obj = None
elif hasattr(mainfile, 'name'):
self._mainfile = mainfile.name
self._mainfile_obj = mainfile
self._open = open
self.logger = logger if logger is not None else logging
self._results: Dict[str, Any] = None self._results: Dict[str, Any] = None
# a key is necessary for xml parsers, where parsing is done dynamically # a key is necessary for xml parsers, where parsing is done dynamically
self._key: str = None self._key: str = None
self._kwargs: Dict[str, Any] = None self._kwargs: Dict[str, Any] = None
self._file_handler: Any = None self._file_handler: Any = None
def init_parameters(self):
pass
@property @property
def results(self): def results(self):
if self._results is None: if self._results is None:
...@@ -55,12 +66,22 @@ class FileParser: ...@@ -55,12 +66,22 @@ class FileParser:
def maindir(self): def maindir(self):
return os.path.dirname(self._mainfile) return os.path.dirname(self._mainfile)
@property
def mainfile_obj(self):
if self._mainfile_obj is None:
try:
self._mainfile_obj = self.open(self._mainfile)
except Exception:
pass
return self._mainfile_obj
@property @property
def mainfile(self): def mainfile(self):
if self._mainfile is None: if self._mainfile is None:
return return
if not os.path.isfile(self._mainfile): if self._mainfile_obj is None and not os.path.isfile(self._mainfile):
return return
return self._mainfile return self._mainfile
...@@ -68,19 +89,29 @@ class FileParser: ...@@ -68,19 +89,29 @@ class FileParser:
def mainfile(self, val): def mainfile(self, val):
self._results = None self._results = None
self._file_handler = None self._file_handler = None
self._mainfile = os.path.abspath(val) if val is not None else val self._mainfile = None
if isinstance(val, str):
@property self._mainfile = os.path.abspath(val)
def open(self): self._mainfile_obj = None
if self.mainfile.endswith('.gz'): elif hasattr(val, 'name'):
open_file = gzip.open self._mainfile = val.name
elif self.mainfile.endswith('.bz2'): self._mainfile_obj = val
open_file = bz2.open self.init_parameters()
elif self.mainfile.endswith('.xz'):
open_file = lzma.open def open(self, mainfile):
else: open_file = self._open
open_file = open if open_file is None:
return open_file if mainfile.endswith('.gz'):
open_file = gzip.open
elif mainfile.endswith('.bz2'):
open_file = bz2.open
elif mainfile.endswith('.xz'):
open_file = lzma.open
elif mainfile.endswith('.tar'):
open_file = tarfile.open
else:
open_file = open
return open_file(mainfile)
def get(self, key: str, default: Any = None, unit: str = None, **kwargs): def get(self, key: str, default: Any = None, unit: str = None, **kwargs):
''' '''
...@@ -118,13 +149,10 @@ class FileParser: ...@@ -118,13 +149,10 @@ class FileParser:
elif isinstance(key, int): elif isinstance(key, int):
return self[int] return self[int]
def parse(self, quantity_key: str = None): def __getattr__(self, key):
''' if key not in self._results:
Sets quantities in result as class attributes. self.parse(key)
''' return self._results.get(key)
for key, val in self._results.items():
try: def parse(self, quantity_key: str = None, **kwargs):
setattr(self, key, val) pass
except Exception:
pass
return self
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile
from .file_parser import FileParser
class TarParser(FileParser):
def __init__(self, mainfile=None, logger=None):
super().__init__(mainfile, logger, tarfile.open)
self._names_map = None
@property
def names_map(self):
if self._names_map is None:
if self.mainfile_obj is not None:
self._names_map = {f.lower(): f for f in self.mainfile_obj.getnames()}
return self._names_map
def parse(self, key):
if self._results is None:
self._results = dict()
if self.mainfile_obj is None:
return
name = self.names_map.get(key, key)
try:
val = self.mainfile_obj.extractfile(name)
except Exception:
val = None
self._results[key] = val
...@@ -232,9 +232,10 @@ class DataTextParser(FileParser): ...@@ -232,9 +232,10 @@ class DataTextParser(FileParser):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self._dtype: Type = kwargs.get('dtype', float) self._dtype: Type = kwargs.get('dtype', float)
mainfile: str = kwargs.get('mainfile', None) mainfile: str = kwargs.get('mainfile', None)
self._mainfile_contents: str = kwargs.get('mainfile_contents', None)
logger = kwargs.get('logger', None) logger = kwargs.get('logger', None)
logger = logger if logger is not None else logging logger = logger if logger is not None else logging
super().__init__(mainfile, logger=logger) super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.init_parameters() self.init_parameters()
def init_parameters(self): def init_parameters(self):
...@@ -249,11 +250,13 @@ class DataTextParser(FileParser): ...@@ -249,11 +250,13 @@ class DataTextParser(FileParser):
Returns the loaded data Returns the loaded data
''' '''
if self._file_handler is None: if self._file_handler is None:
if self.mainfile is None:
return
try: try:
self._file_handler = np.loadtxt(self.mainfile, dtype=self._dtype) if self.mainfile is not None:
self._file_handler = np.loadtxt(self.mainfile)
else:
if self._mainfile_contents is None:
self._mainfile_contents = self.mainfile_obj.read()
self._file_handler = np.fromstring(self._mainfile_contents, dtype=self._dtype)
except Exception: except Exception:
return return
...@@ -278,7 +281,7 @@ class TextParser(FileParser): ...@@ -278,7 +281,7 @@ class TextParser(FileParser):
file_length: length of the chunk to be read from the file file_length: length of the chunk to be read from the file
''' '''
def __init__(self, mainfile=None, quantities=None, logger=None, findall=True, **kwargs): def __init__(self, mainfile=None, quantities=None, logger=None, findall=True, **kwargs):
super().__init__(mainfile, logger) super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self._quantities: List[Quantity] = quantities self._quantities: List[Quantity] = quantities
self.findall: bool = findall self.findall: bool = findall
self._kwargs = kwargs self._kwargs = kwargs
...@@ -530,5 +533,4 @@ class TextParser(FileParser): ...@@ -530,5 +533,4 @@ class TextParser(FileParser):
if quantity.name not in self._results: if quantity.name not in self._results:
self._parse_quantity(quantity) self._parse_quantity(quantity)
super().parse()
return self return self
...@@ -17,6 +17,7 @@ import os ...@@ -17,6 +17,7 @@ import os
import re import re
import numpy as np import numpy as np
from xml.etree import ElementTree from xml.etree import ElementTree
from lxml import etree
from nomad.parsing.file_parser import FileParser from nomad.parsing.file_parser import FileParser
...@@ -30,9 +31,9 @@ class XMLParser(FileParser): ...@@ -30,9 +31,9 @@ class XMLParser(FileParser):
logger: logger logger: logger
convert: specifies if quantities are converted automatically convert: specifies if quantities are converted automatically
''' '''
def __init__(self, mainfile: str = None, logger=None, convert: bool = True): def __init__(self, mainfile: str = None, logger=None, **kwargs):
super().__init__(mainfile, logger=logger) super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.convert = convert self.convert = kwargs.get('convert', True)
self.init_parameters() self.init_parameters()
def init_parameters(self): def init_parameters(self):
...@@ -50,8 +51,16 @@ class XMLParser(FileParser): ...@@ -50,8 +51,16 @@ class XMLParser(FileParser):
if self.mainfile is None: if self.mainfile is None:
return return
try: try:
self._file_handler = ElementTree.parse(self.open(self.mainfile)).getroot() self._file_handler = ElementTree.parse(self.mainfile_obj).getroot()
except Exception: except Exception:
try:
# I cannot use the lxml XMLParser directly because it is not compatible with
# the ElementTree implementation.
xml = etree.parse(self.mainfile_mainfile_obj, parser=etree.XMLParser(recover=True))
self._file_handler = ElementTree.fromstring(etree.tostring(xml))
except Exception:
pass
self.logger.error('failed to load xml file') self.logger.error('failed to load xml file')
self.init_parameters() self.init_parameters()
......
...@@ -36,6 +36,7 @@ from crystalparser import CrystalParser ...@@ -36,6 +36,7 @@ from crystalparser import CrystalParser
from fhiaimsparser import FHIAimsParser from fhiaimsparser import FHIAimsParser
from excitingparser import ExcitingParser from excitingparser import ExcitingParser
from abinitparser import AbinitParser from abinitparser import AbinitParser
from gpawparser import GPAWParser
try: try:
# these packages are not available without parsing extra, which is ok, if the # these packages are not available without parsing extra, which is ok, if the
...@@ -262,19 +263,20 @@ parsers = [ ...@@ -262,19 +263,20 @@ parsers = [
# r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file. # r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
), ),
# match gpaw2 first, other .gpw files are then considered to be "gpaw1" # match gpaw2 first, other .gpw files are then considered to be "gpaw1"
LegacyParser( # LegacyParser(
name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/', # name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParser2Wrapper', # parser_class_name='gpawparser.GPAWParser2Wrapper',
mainfile_binary_header=b'GPAW', # mainfile_binary_header=b'GPAW',
mainfile_name_re=(r'^.*\.(gpw2|gpw)$'), # mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)' # mainfile_mime_re=r'application/(x-tar|octet-stream)'
), # ),
LegacyParser( # LegacyParser(
name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/', # name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParserWrapper', # parser_class_name='gpawparser.GPAWParserWrapper',
mainfile_name_re=(r'^.*\.gpw$'), # mainfile_name_re=(r'^.*\.gpw$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)' # mainfile_mime_re=r'application/(x-tar|octet-stream)'
), # ),
GPAWParser(),
LegacyParser( LegacyParser(
name='parsers/atk', code_name='AtomistixToolKit', code_homepage='https://www.synopsys.com/silicon/quantumatk.html', name='parsers/atk', code_name='AtomistixToolKit', code_homepage='https://www.synopsys.com/silicon/quantumatk.html',
parser_class_name='atkparser.ATKParserWrapper', parser_class_name='atkparser.ATKParserWrapper',
......
...@@ -39,6 +39,7 @@ mdtraj ...@@ -39,6 +39,7 @@ mdtraj
mdanalysis mdanalysis
nomadcore nomadcore
nomad_dos_fingerprints nomad_dos_fingerprints
lxml
# [infrastructure] # [infrastructure]
optimade==0.8.1 optimade==0.8.1
......
...@@ -60,7 +60,7 @@ parser_examples = [ ...@@ -60,7 +60,7 @@ parser_examples = [
('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'), ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'), ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'), ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'), ('parsers/gpaw', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/atk', 'tests/data/parsers/atk/Si2.nc'), ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
('parsers/gulp', 'tests/data/parsers/gulp/example6.got'), ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'), ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment