Commit 64d27a46 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Merge branch 'reprocess' into 'v0.10.1'

Reprocess

See merge request !284
parents bf3c06fa a77f662c
Pipeline #95944 passed with stages
in 23 minutes and 5 seconds
......@@ -48,8 +48,8 @@
branch = master
[submodule "dependencies/parsers/quantum-espresso"]
path = dependencies/parsers/quantum-espresso
url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-quantum-espresso.git
branch = nomad-fair
url = https://github.com/nomad-coe/nomad-parser-quantum-espresso.git
branch = master
[submodule "dependencies/parsers/abinit"]
path = dependencies/parsers/abinit
url = https://github.com/nomad-coe/nomad-parser-abinit.git
......
Subproject commit f7221709b1c0abe678a0cc9059545ea9edb612c1
Subproject commit 8fc5f723a72e5fe3722a43a68ca9246a7633a760
Subproject commit be75c7bf0cb4d1d26ee375cc1719e4fdcd50bc12
Subproject commit b3850be63dab9ce9a823eb2a32ff7ced541265be
Subproject commit 400ccf334212382aac2b1b424e5834dc979cf5af
Subproject commit 2fec8017097f8483c75496b6fc4d4917a9581c1c
Subproject commit 271105a96fb0d6a1689cf11042db5ef3d01e7d08
Subproject commit f425f19f2f83eb68481a0bf7248cbcfe6592443d
Subproject commit 08a51152d946dbbdba7a8b4bbeffcbe3907c1cee
Subproject commit ae88b8cccddbb4da886edf9c926f7f0c792b3002
......@@ -139,6 +139,8 @@ class OptimadeNormalizer(SystemBasedNormalizer):
def get_value(quantity_def, default: Any = None, numpy: bool = False, unit=None) -> Any:
try:
value = self.section_run.section_system[-1].m_get(quantity_def)
if value is None:
return
if type(value) == np.ndarray and not numpy:
return value.tolist()
if isinstance(value, list) and numpy:
......@@ -147,7 +149,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
if numpy and unit is not None:
if isinstance(value, pint.quantity._Quantity):
value = value.to(unit)
else:
elif value is not None:
value = value * unit
return value
......@@ -157,6 +159,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
from nomad.normalizing.system import normalized_atom_labels
nomad_species = get_value(section_system.atom_labels)
nomad_species = [] if nomad_species is None else nomad_species
# elements
atoms = normalized_atom_labels(nomad_species)
......@@ -196,7 +199,7 @@ class OptimadeNormalizer(SystemBasedNormalizer):
for species_label in set(nomad_species):
match = re.match(species_re, species_label)
element_label = match.group(1)
element_label = match.group(1) if match else species_label
species = optimade.m_create(Species)
species.name = species_label
......
from .file_parser import FileParser
from .text_parser import TextParser, DataTextParser, Quantity, ParsePattern
from .xml_parser import XMLParser
from .tar_parser import TarParser
......@@ -20,28 +20,39 @@ from typing import Any, Dict
import gzip
import bz2
import lzma
import tarfile
class FileParser:
'''
Base class for parsers. The parse method implemented here simply sets the parsed
quantities as attributes of the class. The parse method specific to a file type
Base class for parsers. The parse method specific to a file type
should be implemented in the corresponding child class. The parsed quantities are
stored in results. One can access a quantity by using the get method.
stored in results. One can access a quantity by using the get method or as attribute.
Arguments:
mainfile: the file to be parsed
logger: optional logger
'''
def __init__(self, mainfile: str, logger=None):
self._mainfile: str = os.path.abspath(mainfile) if mainfile else mainfile
self.logger = logger if logger else logging
def __init__(self, mainfile=None, logger=None, open=None):
self._mainfile: Any = None
self._mainfile_obj: Any = None
if isinstance(mainfile, str):
self._mainfile = os.path.abspath(mainfile)
self._mainfile_obj = None
elif hasattr(mainfile, 'name'):
self._mainfile = mainfile.name
self._mainfile_obj = mainfile
self._open = open
self.logger = logger if logger is not None else logging
self._results: Dict[str, Any] = None
# a key is necessary for xml parsers, where parsing is done dynamically
self._key: str = None
self._kwargs: Dict[str, Any] = None
self._file_handler: Any = None
def init_parameters(self):
pass
@property
def results(self):
if self._results is None:
......@@ -55,12 +66,22 @@ class FileParser:
def maindir(self):
return os.path.dirname(self._mainfile)
@property
def mainfile_obj(self):
if self._mainfile_obj is None:
try:
self._mainfile_obj = self.open(self._mainfile)
except Exception:
pass
return self._mainfile_obj
@property
def mainfile(self):
if self._mainfile is None:
return
if not os.path.isfile(self._mainfile):
if self._mainfile_obj is None and not os.path.isfile(self._mainfile):
return
return self._mainfile
......@@ -68,19 +89,29 @@ class FileParser:
def mainfile(self, val):
self._results = None
self._file_handler = None
self._mainfile = os.path.abspath(val) if val is not None else val
@property
def open(self):
if self.mainfile.endswith('.gz'):
open_file = gzip.open
elif self.mainfile.endswith('.bz2'):
open_file = bz2.open
elif self.mainfile.endswith('.xz'):
open_file = lzma.open
else:
open_file = open
return open_file
self._mainfile = None
if isinstance(val, str):
self._mainfile = os.path.abspath(val)
self._mainfile_obj = None
elif hasattr(val, 'name'):
self._mainfile = val.name
self._mainfile_obj = val
self.init_parameters()
def open(self, mainfile):
open_file = self._open
if open_file is None:
if mainfile.endswith('.gz'):
open_file = gzip.open
elif mainfile.endswith('.bz2'):
open_file = bz2.open
elif mainfile.endswith('.xz'):
open_file = lzma.open
elif mainfile.endswith('.tar'):
open_file = tarfile.open
else:
open_file = open
return open_file(mainfile)
def get(self, key: str, default: Any = None, unit: str = None, **kwargs):
'''
......@@ -118,13 +149,10 @@ class FileParser:
elif isinstance(key, int):
return self[int]
def parse(self, quantity_key: str = None):
'''
Sets quantities in result as class attributes.
'''
for key, val in self._results.items():
try:
setattr(self, key, val)
except Exception:
pass
return self
def __getattr__(self, key):
if key not in self._results:
self.parse(key)
return self._results.get(key)
def parse(self, quantity_key: str = None, **kwargs):
pass
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tarfile
from .file_parser import FileParser
class TarParser(FileParser):
def __init__(self, mainfile=None, logger=None):
super().__init__(mainfile, logger, tarfile.open)
self._names_map = None
@property
def names_map(self):
if self._names_map is None:
if self.mainfile_obj is not None:
self._names_map = {f.lower(): f for f in self.mainfile_obj.getnames()}
return self._names_map
def parse(self, key):
if self._results is None:
self._results = dict()
if self.mainfile_obj is None:
return
name = self.names_map.get(key, key)
try:
val = self.mainfile_obj.extractfile(name)
except Exception:
val = None
self._results[key] = val
......@@ -232,9 +232,10 @@ class DataTextParser(FileParser):
def __init__(self, **kwargs):
self._dtype: Type = kwargs.get('dtype', float)
mainfile: str = kwargs.get('mainfile', None)
self._mainfile_contents: str = kwargs.get('mainfile_contents', None)
logger = kwargs.get('logger', None)
logger = logger if logger is not None else logging
super().__init__(mainfile, logger=logger)
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.init_parameters()
def init_parameters(self):
......@@ -249,11 +250,13 @@ class DataTextParser(FileParser):
Returns the loaded data
'''
if self._file_handler is None:
if self.mainfile is None:
return
try:
self._file_handler = np.loadtxt(self.mainfile, dtype=self._dtype)
if self.mainfile is not None:
self._file_handler = np.loadtxt(self.mainfile)
else:
if self._mainfile_contents is None:
self._mainfile_contents = self.mainfile_obj.read()
self._file_handler = np.fromstring(self._mainfile_contents, dtype=self._dtype)
except Exception:
return
......@@ -278,7 +281,7 @@ class TextParser(FileParser):
file_length: length of the chunk to be read from the file
'''
def __init__(self, mainfile=None, quantities=None, logger=None, findall=True, **kwargs):
super().__init__(mainfile, logger)
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self._quantities: List[Quantity] = quantities
self.findall: bool = findall
self._kwargs = kwargs
......@@ -530,5 +533,4 @@ class TextParser(FileParser):
if quantity.name not in self._results:
self._parse_quantity(quantity)
super().parse()
return self
......@@ -31,9 +31,9 @@ class XMLParser(FileParser):
logger: logger
convert: specifies if quantities are converted automatically
'''
def __init__(self, mainfile: str = None, logger=None, convert: bool = True):
super().__init__(mainfile, logger=logger)
self.convert = convert
def __init__(self, mainfile: str = None, logger=None, **kwargs):
super().__init__(mainfile, logger=logger, open=kwargs.get('open', None))
self.convert = kwargs.get('convert', True)
self.init_parameters()
def init_parameters(self):
......@@ -51,8 +51,16 @@ class XMLParser(FileParser):
if self.mainfile is None:
return
try:
self._file_handler = ElementTree.parse(self.open(self.mainfile)).getroot()
self._file_handler = ElementTree.parse(self.mainfile_obj).getroot()
except Exception:
try:
# I cannot use the lxml XMLParser directly because it is not compatible with
# the ElementTree implementation.
xml = etree.parse(self.mainfile_mainfile_obj, parser=etree.XMLParser(recover=True))
self._file_handler = ElementTree.fromstring(etree.tostring(xml))
except Exception:
pass
self.logger.error('failed to load xml file')
try:
# I cannot use the lxml XMLParser directly because it is not compatible with
......
......@@ -36,6 +36,11 @@ from crystalparser import CrystalParser
from fhiaimsparser import FHIAimsParser
from excitingparser import ExcitingParser
from abinitparser import AbinitParser
from quantumespressoparser import QuantumEspressoParser
from gaussianparser import GaussianParser
from gpawparser import GPAWParser
from octopusparser import OctopusParser
from orcaparser import OrcaParser
try:
# these packages are not available without parsing extra, which is ok, if the
......@@ -209,36 +214,10 @@ parsers = [
name='parsers/band', code_name='BAND', code_homepage='https://www.scm.com/product/band_periodicdft/',
parser_class_name='bandparser.BANDParser',
mainfile_contents_re=r' +\* +Amsterdam Density Functional +\(ADF\)'),
LegacyParser(
name='parsers/gaussian', code_name='Gaussian', code_homepage='http://gaussian.com/',
parser_class_name='gaussianparser.GaussianParser',
mainfile_mime_re=r'.*',
mainfile_contents_re=(
r'\s*Cite this work as:'
r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9\.]*,')
),
LegacyParser(
name='parsers/quantumespresso', code_name='Quantum Espresso', code_homepage='https://www.quantum-espresso.org/',
parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
mainfile_contents_re=(
r'(Program PWSCF.*starts)|'
r'(Current dimensions of program PWSCF are)')
# r'^(.*\n)*'
# r'\s*Program (\S+)\s+v\.(\S+)(?:\s+\(svn\s+rev\.\s+'
# r'(\d+)\s*\))?\s+starts[^\n]+'
# r'(?:\s*\n?)*This program is part of the open-source Quantum')
),
QuantumEspressoParser(),
GaussianParser(),
AbinitParser(),
LegacyParser(
name='parsers/orca', code_name='ORCA', code_homepage='https://orcaforum.kofo.mpg.de/',
parser_class_name='orcaparser.OrcaParser',
mainfile_contents_re=(
r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
r'\s+\* O R C A \*\s*'
r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
r'\s*'
r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
),
OrcaParser(),
LegacyParser(
name='parsers/castep', code_name='CASTEP', code_homepage='http://www.castep.org/',
parser_class_name='castepparser.CastepParser',
......@@ -254,27 +233,22 @@ parsers = [
parser_class_name='libatomsparser.LibAtomsParserWrapper',
mainfile_contents_re=(r'\s*<GAP_params\s')
),
LegacyParser(
name='parsers/octopus', code_name='Octopus', code_homepage='https://octopus-code.org/',
parser_class_name='octopusparser.OctopusParserWrapper',
mainfile_contents_re=(r'\|0\) ~ \(0\) \|')
# We decided to use the octopus eyes instead of
# r'\*{32} Grid \*{32}Simulation Box:' since it was so far down in the file.
),
OctopusParser(),
# match gpaw2 first, other .gpw files are then considered to be "gpaw1"
LegacyParser(
name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParser2Wrapper',
mainfile_binary_header=b'GPAW',
mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)'
),
LegacyParser(
name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
parser_class_name='gpawparser.GPAWParserWrapper',
mainfile_name_re=(r'^.*\.gpw$'),
mainfile_mime_re=r'application/(x-tar|octet-stream)'
),
# LegacyParser(
# name='parsers/gpaw2', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
# parser_class_name='gpawparser.GPAWParser2Wrapper',
# mainfile_binary_header=b'GPAW',
# mainfile_name_re=(r'^.*\.(gpw2|gpw)$'),
# mainfile_mime_re=r'application/(x-tar|octet-stream)'
# ),
# LegacyParser(
# name='parsers/gpaw', code_name='GPAW', code_homepage='https://wiki.fysik.dtu.dk/gpaw/',
# parser_class_name='gpawparser.GPAWParserWrapper',
# mainfile_name_re=(r'^.*\.gpw$'),
# mainfile_mime_re=r'application/(x-tar|octet-stream)'
# ),
GPAWParser(),
LegacyParser(
name='parsers/atk', code_name='AtomistixToolKit', code_homepage='https://www.synopsys.com/silicon/quantumatk.html',
parser_class_name='atkparser.ATKParserWrapper',
......
......@@ -60,7 +60,7 @@ parser_examples = [
('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/gpaw', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment