test_parsing.py 13.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20

21
from nomad import utils, files
22
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
23
from nomad.parsing import LocalBackend, BadContextURI
24

25
parser_examples = [
26
    ('parsers/random', 'test/data/parsers/random_0'),
27
    ('parsers/template', 'tests/data/parsers/template.json'),
28
29
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
30
31
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
32
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
33
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
34
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
35
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
36
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
37
38
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
39
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
40
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
41
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
42
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
43
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
44
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
45
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
46
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
47
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
48
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
51
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
53
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
55
56
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
57
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
58
59
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
60
61
62
63
64
65
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
66
]
67

68
69
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
70
fixed_parser_examples = []
71
72
73
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
74
parser_examples = fixed_parser_examples
75

76

77
correct_num_output_files = 44
78

Daniel Speckhard's avatar
Daniel Speckhard committed
79

80
81
82
83
84
85
class TestLocalBackend(object):

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

86
    def test_meta_info(self, meta_info, no_warn):
87
88
        assert 'section_topology' in meta_info

89
    def test_section(self, backend, no_warn):
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 1
        backend.closeSection('section_symmetry', 1)
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
        assert backend.get_sections('section_symmetry') == [0, 1]
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
        assert backend.get_sections('section_symmetry', 2) == [1]

133
134
135
136
137
138
139
140
141
142
143
144
145
146
    def test_section_override(self, backend, no_warn):
        """ Test whether we can overwrite values already in the backend."""
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

147
148
149
150
151
152
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

153
        g_index = backend.openSection('section_entry_info')
154
155
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
156
        backend.closeSection('section_entry_info', 0)
157
158

        assert backend.get_sections('section_run') == [0]
159
        assert backend.get_sections('section_entry_info') == [0]
160
161
162
163
164

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
165
        assert 'section_entry_info' in archive
166

167
    def test_subsection(self, backend: LocalBackend, no_warn):
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

187
    def test_context(self, backend: LocalBackend, no_warn):
188
189
190
191
192
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

193
194
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
195

196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

211
    def test_multi_context(self, backend: LocalBackend, no_warn):
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

227
    def test_bad_context(self, backend: LocalBackend, no_warn):
228
229
230
231
232
233
234
235
236
237
238
239
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

240
241
242
243
244
245
246
247
248

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
249
def test_stream_generator(pretty, no_warn):
250
251
252
253
254
255
256
257
258
259
260
261
262
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
280
281
282

    assert create_reference(example_data, pretty) == out.getvalue()

283

284
def assert_parser_result(backend, error=False):
285
    status, errors = backend.status
286
    assert status == 'ParseSuccess'
287
288
289
290
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
291
292


Daniel Speckhard's avatar
Daniel Speckhard committed
293
294
295
296
297
def assert_parser_dir_unchanged(previous_wd, current_wd):
    """Assert working directory has not been changed from parser."""
    assert previous_wd == current_wd


298
299
def run_parser(parser_name, mainfile):
    parser = parser_dict[parser_name]
300
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
301
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
302
303


304
305
306
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
307
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
308
309


310
311
312
313
314
315
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


316
317
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
318
319
320
    return run_parser(parser_name, mainfile)


321
322
323
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
324
325
    result = run_parser(parser_name, mainfile)
    return result
326
327


328
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
329
    backend.openNonOverlappingSection('section_entry_info')
330
    backend.addValue('upload_id', 'test_upload_id')
331
332
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
333
    backend.addValue('mainfile', 'test/mainfile.txt')
334
335
    for key, value in kwargs.items():
        backend.addValue(key, value)
336
    backend.closeNonOverlappingSection('section_entry_info')
337
    return backend
338
339


340
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
341
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
342
    previous_wd = os.getcwd()  # Get Working directory before parsing.
343
    parsed_example = run_parser(parser_name, mainfile)
344
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
345
346
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
347
348


349
350
351
352
353
354
355
356
357
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


358
359
360
361
362
def test_match(raw_files, no_warn):
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

363
    matched_mainfiles = {}
364
365
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
366
        if parser is not None and not isinstance(parser, BrokenParser):
367
            matched_mainfiles[mainfile] = parser
368

369
370
371
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])