test_parsing.py 9.78 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
3
4
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
5
6
7
8
9
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
Markus Scheidgen's avatar
Markus Scheidgen committed
10
#     http://www.apache.org/licenses/LICENSE-2.0
11
12
#
# Unless required by applicable law or agreed to in writing, software
Markus Scheidgen's avatar
Markus Scheidgen committed
13
# distributed under the License is distributed on an "AS IS" BASIS,
14
15
16
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markus Scheidgen's avatar
Markus Scheidgen committed
17
#
18

19
20
import json
import pytest
21
import os
22
from shutil import copyfile
23

24
from nomad import utils, files, datamodel
Markus Scheidgen's avatar
Markus Scheidgen committed
25
from nomad.datamodel import EntryArchive, EntryMetadata
26
from nomad.parsing import BrokenParser
Markus Scheidgen's avatar
Markus Scheidgen committed
27
from nomad.parsing.parsers import parser_dict, match_parser
Markus Scheidgen's avatar
Markus Scheidgen committed
28
from nomad.utils import dump_json
29

30
parser_examples = [
31
    ('parsers/random', 'test/data/parsers/random_0'),
32
    ('parsers/template', 'tests/data/templates/template.json'),
33
34
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
35
36
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
37
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
38
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
39
    ('parsers/vasp', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
40
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
41
    ('parsers/fhi-vibes', 'tests/data/parsers/fhi-vibes/molecular_dynamics.nc'),
42
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
43
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
44
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
45
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
46
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
47
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
48
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
49
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
52
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
54
    ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
57
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
59
    ('parsers/gpaw', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
60
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
61
62
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
63
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
64
65
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
66
67
68
69
70
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
71
    ('parser/onetep', 'tests/data/parsers/onetep/fluor/12-difluoroethane.out'),
72
    ('parsers/eels', 'tests/data/parsers/eels.json'),
73
74
    ('parsers/lobster', 'tests/data/parsers/lobster/NaCl/lobsterout'),
    ('parsers/aflow', 'tests/data/parsers/aflow/Ag1Co1O2_ICSD_246157/aflowlib.json'),
Lauri Himanen's avatar
Lauri Himanen committed
75
    ('parsers/mp', 'tests/data/parsers/mp/mp-1/materials.json'),
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
76
    ('parsers/asr', 'tests/data/parsers/asr/archive_ccdc26c4f32546c5a00ad03a093b73dc.json'),
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
77
    ('parsers/psi4', 'tests/data/parsers/psi4/adc1/output.ref'),
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
78
    ('parsers/yambo', 'tests/data/parsers/yambo/hBN/r-10b_1Ry_HF_and_locXC_gw0_em1d_ppa'),
79
    ('parsers/archive', 'tests/data/parsers/archive.json')
Daniel Speckhard's avatar
Daniel Speckhard committed
80
]
81

82
83
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
84
fixed_parser_examples = []
85
86
87
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
88
parser_examples = fixed_parser_examples
89

Markus Scheidgen's avatar
Markus Scheidgen committed
90
correct_num_output_files = 122
91

92
93
94
95
96
97
98
99

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


100
101
@pytest.fixture(scope='function')
def assert_parser_result(caplog):
Markus Scheidgen's avatar
Markus Scheidgen committed
102
    def _assert(entry_archive: EntryArchive, has_errors: bool = False, has_warnings: bool = None):
Markus Scheidgen's avatar
Markus Scheidgen committed
103
        errors_exist = False
Markus Scheidgen's avatar
Markus Scheidgen committed
104
        warnings_exist = False
Markus Scheidgen's avatar
Markus Scheidgen committed
105
106
107
        for record in caplog.get_records(when='call'):
            if record.levelname in ['ERROR', 'CRITICAL']:
                errors_exist = True
Markus Scheidgen's avatar
Markus Scheidgen committed
108
109
            if record.levelname in ['WARNING']:
                warnings_exist = True
Markus Scheidgen's avatar
Markus Scheidgen committed
110
        assert has_errors == errors_exist
Markus Scheidgen's avatar
Markus Scheidgen committed
111
        if has_warnings is not None:
112
            assert has_warnings == warnings_exist
113
114

    return _assert
115
116


Daniel Speckhard's avatar
Daniel Speckhard committed
117
def assert_parser_dir_unchanged(previous_wd, current_wd):
118
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
119
120
121
    assert previous_wd == current_wd


122
def run_parser(parser_name, mainfile):
123
    parser = parser_dict[parser_name]
Markus Scheidgen's avatar
Markus Scheidgen committed
124
125
    entry_archive = EntryArchive()
    metadata = entry_archive.m_create(EntryMetadata)
126
127
128
    parser.parse(mainfile, entry_archive, logger=utils.get_logger(__name__))
    if metadata.domain is None:
        metadata.domain = parser.domain
129

130
    return add_metadata(entry_archive, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
131
132


133
@pytest.fixture
Markus Scheidgen's avatar
Markus Scheidgen committed
134
def parsed_vasp_example() -> EntryArchive:
135
    return run_parser(
136
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
137
138


139
@pytest.fixture
Markus Scheidgen's avatar
Markus Scheidgen committed
140
def parsed_template_example() -> EntryArchive:
141
    return run_parser(
142
        'parsers/template', 'tests/data/templates/template.json')
143
144


Markus Scheidgen's avatar
Markus Scheidgen committed
145
def parse_file(parser_name_and_mainfile) -> EntryArchive:
146
    parser_name, mainfile = parser_name_and_mainfile
147
148
149
    return run_parser(parser_name, mainfile)


150
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
Markus Scheidgen's avatar
Markus Scheidgen committed
151
def parsed_example(request) -> EntryArchive:
152
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
153
154
    result = run_parser(parser_name, mainfile)
    return result
155
156


157
def add_metadata(entry_archive: EntryArchive, **kwargs) -> EntryArchive:
158
    entry_metadata = entry_archive.metadata
159
    entry_metadata.upload_id = 'test_upload_id'
David Sikter's avatar
David Sikter committed
160
    entry_metadata.entry_id = 'test_entry_id'
161
    entry_metadata.entry_hash = 'test_entry_hash'
162
163
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
Markus Scheidgen's avatar
Markus Scheidgen committed
164
    return entry_archive
165
166


167
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
168
def test_parser(parser_name, mainfile, assert_parser_result):
Daniel Speckhard's avatar
Daniel Speckhard committed
169
    previous_wd = os.getcwd()  # Get Working directory before parsing.
170
    parsed_example = run_parser(parser_name, mainfile)
171
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
172
173
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
174
175


176
def test_broken_xml_vasp(assert_parser_result):
177
178
179
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
Markus Scheidgen's avatar
Markus Scheidgen committed
180
    assert_parser_result(parsed_example, has_warnings=True)
181
182
183
184
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


185
186
187
188
189
190
191
192
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
193
    example_upload_id = 'example_upload_id'
David Sikter's avatar
David Sikter committed
194
    upload_files = files.StagingUploadFiles(example_upload_id, create=True)
195
196
    upload_files.add_rawfiles('tests/data/parsers')

197
    matched_mainfiles = {}
198
199
    for path_info in upload_files.raw_directory_list(recursive=True, files_only=True):
        mainfile = path_info.path
200
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
201
        if parser is not None and not isinstance(parser, BrokenParser):
202
            matched_mainfiles[mainfile] = parser
203

204
205
206
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
207
208
209
210
211
212
213
214
215
216
217
218
219
220


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
Markus Scheidgen's avatar
Markus Scheidgen committed
221
222
                    archive = datamodel.EntryArchive()
                    parser.parse(file_path, entry_archive=archive)
223
                    # check if the result can be dumped
Markus Scheidgen's avatar
Markus Scheidgen committed
224
                    dump_json(archive.m_to_dict())
Markus Scheidgen's avatar
Markus Scheidgen committed
225
226
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
227
228
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
229
230
231
232
233
234
235
236
237
238
239
240
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])