test_parsing.py 17.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files
23
from nomad.metainfo.legacy import convert
24
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
25
26
from nomad.parsing import LocalBackend, BadContextURI, MetainfoBackend

27

28
parser_examples = [
29
    ('parsers/random', 'test/data/parsers/random_0'),
30
    ('parsers/template', 'tests/data/parsers/template.json'),
31
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
32
33
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
36
37
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
38
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
39
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
40
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
41
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
42
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
43
44
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
45
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
46
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
47
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
48
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
49
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
52
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
54
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
57
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
59
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
60
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
61
62
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
63
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
64
65
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
66
67
68
69
70
71
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
72
]
73

74
75
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
76
fixed_parser_examples = []
77
78
79
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
80
parser_examples = fixed_parser_examples
81

82

83
correct_num_output_files = 50
84

Daniel Speckhard's avatar
Daniel Speckhard committed
85

86
87
88
89
90
91
class TestLocalBackend(object):

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

92
    def test_meta_info(self, meta_info, no_warn):
93
94
        assert 'section_topology' in meta_info

95
    def test_section(self, backend, no_warn):
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 1
        backend.closeSection('section_symmetry', 1)
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
        assert backend.get_sections('section_symmetry') == [0, 1]
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
        assert backend.get_sections('section_symmetry', 2) == [1]

139
    def test_section_override(self, backend, no_warn):
140
        ''' Test whether we can overwrite values already in the backend.'''
141
142
143
144
145
146
147
148
149
150
151
152
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

153
154
155
156
157
158
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

159
        g_index = backend.openSection('section_entry_info')
160
161
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
162
        backend.closeSection('section_entry_info', 0)
163
164

        assert backend.get_sections('section_run') == [0]
165
        assert backend.get_sections('section_entry_info') == [0]
166
167
168
169
170

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
171
        assert 'section_entry_info' in archive
172

173
    def test_subsection(self, backend: LocalBackend, no_warn):
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    def test_open_section_of_specific_parent(self, backend: LocalBackend, no_warn):
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

        runs = backend.data['section_run']
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

    def test_open_section_of_specific_parent2(self, backend: LocalBackend, no_warn):
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

        runs = backend.data['section_run']
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1
        assert 'section_dos' not in run['section_single_configuration_calculation'][1]

229
    def test_context(self, backend: LocalBackend, no_warn):
230
231
232
233
234
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

235
236
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
237

238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

253
    def test_multi_context(self, backend: LocalBackend, no_warn):
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

269
    def test_bad_context(self, backend: LocalBackend, no_warn):
270
271
272
273
274
275
276
277
278
279
280
281
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

282
283
284
285
286
287
288
289
290

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
291
def test_stream_generator(pretty, no_warn):
292
293
294
295
296
297
298
299
300
301
302
303
304
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
322
323
324

    assert create_reference(example_data, pretty) == out.getvalue()

325

326
def assert_parser_result(backend, error=False):
327
    status, errors = backend.status
328
    assert status == 'ParseSuccess'
329
330
331
332
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
333
334


Daniel Speckhard's avatar
Daniel Speckhard committed
335
def assert_parser_dir_unchanged(previous_wd, current_wd):
336
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
337
338
339
    assert previous_wd == current_wd


340
def run_parser(parser_name, mainfile, backend_factory=None):
341
    parser = parser_dict[parser_name]
342
343
344
    if backend_factory is not None and hasattr(parser, 'backend_factory'):
        original_backend_factory = parser.backend_factory
        parser.backend_factory = backend_factory
345
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
346
    result.domain = parser.domain
347
348
    if backend_factory is not None and hasattr(parser, 'backend_factory'):
        parser.backend_factory = original_backend_factory
349
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
350
351


352
353
354
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
355
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
356
357


358
359
360
361
362
363
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


364
365
366
367
368
369
@pytest.fixture(scope="session")
def parsed_template_no_system() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


370
371
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
372
373
374
    return run_parser(parser_name, mainfile)


375
376
377
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
378
379
    result = run_parser(parser_name, mainfile)
    return result
380
381


382
383
384
385
386
387
388
389
390
391
392
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example_metainfo(request) -> LocalBackend:
    parser_name, mainfile = request.param

    def backend_factory(env, logger):
        return MetainfoBackend(convert(env), logger=logger)

    result = run_parser(parser_name, mainfile, backend_factory=backend_factory)
    return result


393
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
394
    backend.openNonOverlappingSection('section_entry_info')
395
    backend.addValue('upload_id', 'test_upload_id')
396
397
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
398
    backend.addValue('mainfile', 'test/mainfile.txt')
399
400
    for key, value in kwargs.items():
        backend.addValue(key, value)
401
    backend.closeNonOverlappingSection('section_entry_info')
402
    return backend
403
404


405
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
406
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
407
    previous_wd = os.getcwd()  # Get Working directory before parsing.
408
    parsed_example = run_parser(parser_name, mainfile)
409
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
410
411
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
412
413


414
415
416
417
def test_parser_metainfo(parsed_example_metainfo):
    pass


418
419
420
421
422
423
424
425
426
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


427
428
429
430
431
432
433
434
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
435
436
437
438
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

439
    matched_mainfiles = {}
440
441
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
442
        if parser is not None and not isinstance(parser, BrokenParser):
443
            matched_mainfiles[mainfile] = parser
444

445
446
447
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])