Commit afb86ccb authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Vasp parser matches all files with xml in it. Added test.

parent da312fff
.DS_Store
.pyenv/
__pycache__
.mypy_cache
......
......@@ -42,7 +42,7 @@ Parsers in NOMAD-coe use a *backend* to create output.
.. autoclass:: nomad.parsing.LocalBackend
"""
from typing import TextIO, Tuple, List, Any, Callable
from typing import TextIO, Tuple, List, Any, Callable, IO
from abc import ABCMeta, abstractmethod
from io import StringIO
import json
......@@ -578,12 +578,12 @@ class Parser():
self._main_file_re = re.compile(main_file_re)
self._main_contents_re = re.compile(main_contents_re)
def is_mainfile(self, upload, filename: str) -> bool:
def is_mainfile(self, filename: str, open: Callable[[str], IO[Any]]) -> bool:
""" Checks if a file is a mainfile via the parsers ``main_contents_re``. """
if self._main_file_re.match(filename):
file = None
try:
file = upload.open_file(filename)
file = open(filename)
return self._main_contents_re.match(file.read(500)) is not None
finally:
if file:
......@@ -623,7 +623,7 @@ parsers = [
Parser(
python_git=dependencies['parsers/vasp'],
parser_class_name='vaspparser.VASPParser',
main_file_re=r'^.*\.xml$',
main_file_re=r'^.*\.xml(\.[^\.]*)?$',
main_contents_re=(
r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*'
r'?\s*<modeling>'
......
......@@ -67,7 +67,7 @@ def extracting_task(task: Task, proc: UploadProc) -> UploadProc:
# TODO: deal with multiple possible parser specs
for filename in upload.filelist:
for parser in parsers:
if parser.is_mainfile(upload, filename):
if parser.is_mainfile(filename, lambda fn: upload.open_file(fn)):
tmp_mainfile = upload.get_path(filename)
calc_proc = CalcProc(filename, parser.name, tmp_mainfile)
proc.calc_procs.append(calc_proc)
......
......@@ -108,7 +108,7 @@ def test_presigned_url(upload_id):
def test_upload(uploaded_id: str):
with files.Upload(uploaded_id) as upload:
assert len(upload.filelist) == 106
assert len(upload.filelist) == 5
# now just try to open the first file (not directory), without error
for filename in upload.filelist:
if filename.endswith('.xml'):
......
......@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from nomad.parsing import JSONStreamWriter, parser_dict
import os
from io import StringIO
import json
import pytest
import sys
from nomadcore.local_meta_info import loadJsonFile
from nomad.parsing import JSONStreamWriter, parser_dict
from nomad.parsing import LocalBackend, BadContextURI
......@@ -189,3 +189,17 @@ def test_vasp_parser(parsed_vasp_example: LocalBackend):
assert status == 'ParseSuccess'
assert errors is None or len(errors) == 0
def test_match():
vasp_parser = parser_dict['parsers/vasp']
directory = './data/examples_vasp_6'
count = 0
for dirpath, _, filenames in os.walk(directory):
for filename in filenames:
fullname = os.path.join(dirpath, filename)
if vasp_parser.is_mainfile(fullname, lambda fn: open(fn)):
count += 1
assert count == 6
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment