test_parsing.py 16.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files
23
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
24
from nomad.parsing import LocalBackend, BadContextURI
25

26
parser_examples = [
27
    ('parsers/random', 'test/data/parsers/random_0'),
28
    ('parsers/template', 'tests/data/parsers/template.json'),
29
30
31
    ('parsers/eels', 'tests/data/parsers/eels.json'),
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
32
33
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
36
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
37
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
38
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
39
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
40
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
41
42
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
43
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
44
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
45
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
46
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
47
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
48
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
50
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
52
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
55
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
57
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
59
60
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
61
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
62
63
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
64
65
66
67
68
69
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
70
]
71

72
73
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
74
fixed_parser_examples = []
75
76
77
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
78
parser_examples = fixed_parser_examples
79

80

81
correct_num_output_files = 50
82

Daniel Speckhard's avatar
Daniel Speckhard committed
83

84
85
86
87
88
89
class TestLocalBackend(object):

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

90
    def test_meta_info(self, meta_info, no_warn):
91
92
        assert 'section_topology' in meta_info

93
    def test_section(self, backend, no_warn):
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 1
        backend.closeSection('section_symmetry', 1)
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
        assert backend.get_sections('section_symmetry') == [0, 1]
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
        assert backend.get_sections('section_symmetry', 2) == [1]

137
138
139
140
141
142
143
144
145
146
147
148
149
150
    def test_section_override(self, backend, no_warn):
        """ Test whether we can overwrite values already in the backend."""
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

151
152
153
154
155
156
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

157
        g_index = backend.openSection('section_entry_info')
158
159
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
160
        backend.closeSection('section_entry_info', 0)
161
162

        assert backend.get_sections('section_run') == [0]
163
        assert backend.get_sections('section_entry_info') == [0]
164
165
166
167
168

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
169
        assert 'section_entry_info' in archive
170

171
    def test_subsection(self, backend: LocalBackend, no_warn):
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
    def test_open_section_of_specific_parent(self, backend: LocalBackend, no_warn):
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

        runs = backend.data['section_run']
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

    def test_open_section_of_specific_parent2(self, backend: LocalBackend, no_warn):
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

        runs = backend.data['section_run']
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1
        assert 'section_dos' not in run['section_single_configuration_calculation'][1]

227
    def test_context(self, backend: LocalBackend, no_warn):
228
229
230
231
232
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

233
234
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
235

236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

251
    def test_multi_context(self, backend: LocalBackend, no_warn):
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

267
    def test_bad_context(self, backend: LocalBackend, no_warn):
268
269
270
271
272
273
274
275
276
277
278
279
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

280
281
282
283
284
285
286
287
288

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
289
def test_stream_generator(pretty, no_warn):
290
291
292
293
294
295
296
297
298
299
300
301
302
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
320
321
322

    assert create_reference(example_data, pretty) == out.getvalue()

323

324
def assert_parser_result(backend, error=False):
325
    status, errors = backend.status
326
    assert status == 'ParseSuccess'
327
328
329
330
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
331
332


Daniel Speckhard's avatar
Daniel Speckhard committed
333
334
335
336
337
def assert_parser_dir_unchanged(previous_wd, current_wd):
    """Assert working directory has not been changed from parser."""
    assert previous_wd == current_wd


338
339
def run_parser(parser_name, mainfile):
    parser = parser_dict[parser_name]
340
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
341
    result.domain = parser.domain
342
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
343
344


345
346
347
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
348
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
349
350


351
352
353
354
355
356
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


357
358
359
360
361
362
@pytest.fixture(scope="session")
def parsed_template_no_system() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


363
364
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
365
366
367
    return run_parser(parser_name, mainfile)


368
369
370
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
371
372
    result = run_parser(parser_name, mainfile)
    return result
373
374


375
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
376
    backend.openNonOverlappingSection('section_entry_info')
377
    backend.addValue('upload_id', 'test_upload_id')
378
379
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
380
    backend.addValue('mainfile', 'test/mainfile.txt')
381
382
    for key, value in kwargs.items():
        backend.addValue(key, value)
383
    backend.closeNonOverlappingSection('section_entry_info')
384
    return backend
385
386


387
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
388
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
389
    previous_wd = os.getcwd()  # Get Working directory before parsing.
390
    parsed_example = run_parser(parser_name, mainfile)
391
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
392
393
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
394
395


396
397
398
399
400
401
402
403
404
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


405
406
407
408
409
410
411
412
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
413
414
415
416
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

417
    matched_mainfiles = {}
418
419
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
420
        if parser is not None and not isinstance(parser, BrokenParser):
421
            matched_mainfiles[mainfile] = parser
422

423
424
425
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])