Commit f0e4d4a1 authored by Daniel Speckhard's avatar Daniel Speckhard
Browse files

Added GPAW2 and ATK.

parent 91fe042a
......@@ -79,3 +79,6 @@
[submodule "dependencies/parsers/gpaw"]
path = dependencies/parsers/gpaw
url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-gpaw
[submodule "dependencies/parsers/atk"]
path = dependencies/parsers/atk
url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-atk
Subproject commit bab03a285bac95ee7c3d01727537065c8ae7f4d6
......@@ -93,17 +93,13 @@ def match_parser(mainfile: str, upload_files: files.StagingUploadFiles) -> 'Pars
"""
with upload_files.raw_file(mainfile, 'rb') as f:
compression, open_compressed = _compressions.get(f.read(3), (None, open))
mainfile_path = upload_files.raw_file_object(mainfile).os_path
with open_compressed(mainfile_path, 'rb') as f:
buffer = f.read(2048)
mime_type = magic.from_buffer(buffer, mime=True)
if mime_type.startswith('application') and not mime_type.endswith('xml'):
return None
for parser in parsers:
if parser.is_mainfile(mainfile_path, mime_type, buffer.decode('utf-8'), compression):
if parser.is_mainfile(mainfile_path, mime_type, buffer, compression):
# TODO: deal with multiple possible parser specs
return parser
......@@ -117,7 +113,7 @@ parsers = [
LegacyParser(
name='parsers/phonopy',
parser_class_name='phonopyparser.PhonopyParserWrapper',
mainfile_contents_re=r'', # Empty regex since this code calls other DFT codes.
# mainfile_contents_re=r'', # Empty regex since this code calls other DFT codes.
mainfile_name_re=(r'.*/phonopy-FHI-aims-displacement-0*1/control.in$')
),
LegacyParser(
......@@ -153,7 +149,8 @@ parsers = [
mainfile_contents_re=(
r'^(.*\n)*'
r'?\s*Invoking FHI-aims \.\.\.'
r'?\s*Version')
r'?\s*Version'),
mainfile_name_re= r'^.(?!.*phonopy-FHI-aims-displacement)'
),
LegacyParser(
name='parsers/cp2k',
......@@ -247,13 +244,14 @@ parsers = [
LegacyParser(
name='parsers/gaussian',
parser_class_name='gaussianparser.GaussianParser',
mainfile_contents_re=(
r'\s*Cite this work as:'
r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')
),
mainfile_contents_re=
# This previous file matching string was too far down the line.
# r'\s*Cite this work as:'
# r'\s*Gaussian [0-9]+, Revision [A-Za-z0-9.]*,'
# r'\s\*\*\*\*\*\*\*\*\*\*\*\**'
# r'\s*Gaussian\s*([0-9]+):\s*([A-Za-z0-9-.]+)\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)'
# r'\s*([0-9][0-9]?\-[A-Z][a-z][a-z]\-[0-9]+)')
r'Gaussian, Inc'),
LegacyParser(
name='parsers/quantumespresso',
parser_class_name='quantumespressoparser.QuantumEspressoParserPWSCF',
......@@ -301,9 +299,24 @@ parsers = [
),
LegacyParser(
name='parsers/gpaw',
parser_class_name='gpawparser.GpawParserWrapper',
mainfile_contents_re=r'', # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
mainfile_name_re=(r'.gpw$')
parser_class_name='gpawparser.GPAWParserWrapper',
mainfile_name_re=(r'^.*\.gpw$'),
mainfile_mime_re=r'application/x-tar'
),
LegacyParser(
name='parsers/gpaw2',
parser_class_name='gpawparser.GPAWParser2Wrapper',
# mainfile_contents_re=r'', # We can't read .gpw2 to match AFFormatGPAW'
mainfile_name_re=(r'^.*\.gpw2$'),
mainfile_mime_re=r'application/x-tar'
),
LegacyParser(
name='parsers/atk',
parser_class_name='atkparser.ATKParserWrapper',
# mainfile_contents_re=r'', # We can't read .gpw as txt - of UlmGPAW|AFFormatGPAW'
mainfile_name_re=r'^.*\.nc',
# The previously used mime type r'application/x-netcdf' wasn't found by magic library.
mainfile_mime_re=r'application/octet-stream'
)
]
......
......@@ -79,7 +79,7 @@ class LegacyParser(Parser):
"""
def __init__(
self, name: str, parser_class_name: str,
mainfile_contents_re: str,
mainfile_contents_re: str = None,
mainfile_mime_re: str = r'text/.*',
mainfile_name_re: str = r'.*',
supported_compressions: List[str] = []) -> None:
......@@ -88,13 +88,23 @@ class LegacyParser(Parser):
self.parser_class_name = parser_class_name
self._mainfile_mime_re = re.compile(mainfile_mime_re)
self._mainfile_name_re = re.compile(mainfile_name_re)
self._mainfile_contents_re = re.compile(mainfile_contents_re)
self._mainfile_contents_re = mainfile_contents_re
if self._mainfile_contents_re is not None:
self._mainfile_contents_re = re.compile(self._mainfile_contents_re)
self._supported_compressions = supported_compressions
def is_mainfile(self, filename: str, mime: str, buffer: str, compression: str = None) -> bool:
return self._mainfile_name_re.match(filename) is not None and \
self._mainfile_mime_re.match(mime) is not None and \
self._mainfile_contents_re.search(buffer) is not None and \
def is_mainfile(self, filename: str, mime: str, buffer: bytes, compression: str = None) -> bool:
if self._mainfile_contents_re is not None:
try: # Try to open the file as a string for regex matching.
buffer = buffer.decode('utf-8')
except UnicodeDecodeError:
return False # We're looking for a string match in a file that can't be converted to string.
if self._mainfile_contents_re.search(buffer) is None:
return False
return self._mainfile_mime_re.match(mime) is not None and \
self._mainfile_name_re.match(filename) is not None and \
(compression is None or compression in self._supported_compressions)
def run(self, mainfile: str, logger=None) -> LocalBackend:
......
......@@ -50,14 +50,16 @@ parser_examples = [
('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
('parsers/phonopy',
'tests/data/parsers/phonopy/control.in'),
('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw')]
('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
('parsers/atk', 'tests/data/parsers/atk/Si2.nc')]
faulty_unknown_one_d_matid_example = [
('parsers/template', 'tests/data/normalizers/no_sim_cell_boolean_positions.json')
]
# TODO @dts: Phonopy getting double counted. tmp.out in phonopy also.
correct_num_output_files = 27
correct_num_output_files = 30
class TestLocalBackend(object):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment