diff --git a/.gitignore b/.gitignore index e8f39e893340f7ac4800a89d4d1b073e4fce037e..371e937230509c797b09fb59729fed1b3bff0be1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.DS_Store .pyenv/ __pycache__ .mypy_cache diff --git a/nomad/parsing.py b/nomad/parsing.py index 87a373789e13863655284a64e52e084c09e7f35f..2f3a1f9dafe2ab5b1ef5da16d19c453f3ea8e822 100644 --- a/nomad/parsing.py +++ b/nomad/parsing.py @@ -42,7 +42,7 @@ Parsers in NOMAD-coe use a *backend* to create output. .. autoclass:: nomad.parsing.LocalBackend """ -from typing import TextIO, Tuple, List, Any, Callable +from typing import TextIO, Tuple, List, Any, Callable, IO from abc import ABCMeta, abstractmethod from io import StringIO import json @@ -578,12 +578,12 @@ class Parser(): self._main_file_re = re.compile(main_file_re) self._main_contents_re = re.compile(main_contents_re) - def is_mainfile(self, upload, filename: str) -> bool: + def is_mainfile(self, filename: str, open: Callable[[str], IO[Any]]) -> bool: """ Checks if a file is a mainfile via the parsers ``main_contents_re``. """ if self._main_file_re.match(filename): file = None try: - file = upload.open_file(filename) + file = open(filename) return self._main_contents_re.match(file.read(500)) is not None finally: if file: @@ -623,7 +623,7 @@ parsers = [ Parser( python_git=dependencies['parsers/vasp'], parser_class_name='vaspparser.VASPParser', - main_file_re=r'^.*\.xml$', + main_file_re=r'^.*\.xml(\.[^\.]*)?$', main_contents_re=( r'^\s*<\?xml version="1\.0" encoding="ISO-8859-1"\?>\s*' r'?\s*<modeling>' diff --git a/nomad/processing/tasks.py b/nomad/processing/tasks.py index f1ea03c13aa0f2908ea82abddaf53822acd87959..705712c346477dfba660fe036a9e532373ce70df 100644 --- a/nomad/processing/tasks.py +++ b/nomad/processing/tasks.py @@ -67,7 +67,7 @@ def extracting_task(task: Task, proc: UploadProc) -> UploadProc: # TODO: deal with multiple possible parser specs for filename in upload.filelist: for parser in parsers: - if parser.is_mainfile(upload, filename): + if parser.is_mainfile(filename, lambda fn: upload.open_file(fn)): tmp_mainfile = upload.get_path(filename) calc_proc = CalcProc(filename, parser.name, tmp_mainfile) proc.calc_procs.append(calc_proc) diff --git a/tests/test_files.py b/tests/test_files.py index f7921f7a3f5e9617887c3bbfd8721abb0437ae6d..7845b5b6391c92fe50c15734ed988a977c5f10cf 100644 --- a/tests/test_files.py +++ b/tests/test_files.py @@ -108,7 +108,7 @@ def test_presigned_url(upload_id): def test_upload(uploaded_id: str): with files.Upload(uploaded_id) as upload: - assert len(upload.filelist) == 106 + assert len(upload.filelist) == 5 # now just try to open the first file (not directory), without error for filename in upload.filelist: if filename.endswith('.xml'): diff --git a/tests/test_parsing.py b/tests/test_parsing.py index 1ea26b5b4be7d5dde2047360c5adabb5939b7717..12f5cccc95da706a6b9a89f4d65ff99f98b6dab9 100644 --- a/tests/test_parsing.py +++ b/tests/test_parsing.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nomad.parsing import JSONStreamWriter, parser_dict +import os from io import StringIO import json import pytest -import sys from nomadcore.local_meta_info import loadJsonFile +from nomad.parsing import JSONStreamWriter, parser_dict from nomad.parsing import LocalBackend, BadContextURI @@ -189,3 +189,17 @@ def test_vasp_parser(parsed_vasp_example: LocalBackend): assert status == 'ParseSuccess' assert errors is None or len(errors) == 0 + + +def test_match(): + vasp_parser = parser_dict['parsers/vasp'] + directory = './data/examples_vasp_6' + + count = 0 + for dirpath, _, filenames in os.walk(directory): + for filename in filenames: + fullname = os.path.join(dirpath, filename) + if vasp_parser.is_mainfile(fullname, lambda fn: open(fn)): + count += 1 + + assert count == 6