test_parsing.py 12.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20

21
from nomad import utils, files
22
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
23
from nomad.parsing import LocalBackend, BadContextURI
24

25
parser_examples = [
26
    ('parsers/random', 'test/data/parsers/random_0'),
27
    ('parsers/template', 'tests/data/parsers/template.json'),
28
29
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
30
31
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
32
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
33
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
34
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
35
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
36
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
37
38
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
39
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
40
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
41
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
42
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
43
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
44
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
45
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
46
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
47
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
48
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
51
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
53
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
55
56
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
57
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
58
59
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
60
61
62
63
64
65
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
66
]
67

68
69
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
70
fixed_parser_examples = []
71
72
73
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
74
parser_examples = fixed_parser_examples
75

76

77
correct_num_output_files = 44
78

Daniel Speckhard's avatar
Daniel Speckhard committed
79

80
81
82
83
84
85
class TestLocalBackend(object):

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

86
    def test_meta_info(self, meta_info, no_warn):
87
88
        assert 'section_topology' in meta_info

89
    def test_section(self, backend, no_warn):
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

111
112
113
114
115
116
117
118
119
120
121
122
123
124
    def test_section_override(self, backend, no_warn):
        """ Test whether we can overwrite values already in the backend."""
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

125
126
127
128
129
130
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

131
        g_index = backend.openSection('section_entry_info')
132
133
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
134
        backend.closeSection('section_entry_info', 0)
135
136

        assert backend.get_sections('section_run') == [0]
137
        assert backend.get_sections('section_entry_info') == [0]
138
139
140
141
142

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
143
        assert 'section_entry_info' in archive
144

145
    def test_subsection(self, backend: LocalBackend, no_warn):
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

165
    def test_context(self, backend: LocalBackend, no_warn):
166
167
168
169
170
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

171
172
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
173

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

189
    def test_multi_context(self, backend: LocalBackend, no_warn):
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

205
    def test_bad_context(self, backend: LocalBackend, no_warn):
206
207
208
209
210
211
212
213
214
215
216
217
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

218
219
220
221
222
223
224
225
226

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
227
def test_stream_generator(pretty, no_warn):
228
229
230
231
232
233
234
235
236
237
238
239
240
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
258
259
260

    assert create_reference(example_data, pretty) == out.getvalue()

261

262
def assert_parser_result(backend, error=False):
263
    status, errors = backend.status
264
    assert status == 'ParseSuccess'
265
266
267
268
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
269
270


Daniel Speckhard's avatar
Daniel Speckhard committed
271
272
273
274
275
def assert_parser_dir_unchanged(previous_wd, current_wd):
    """Assert working directory has not been changed from parser."""
    assert previous_wd == current_wd


276
277
def run_parser(parser_name, mainfile):
    parser = parser_dict[parser_name]
278
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
279
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
280
281


282
283
284
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
285
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
286
287


288
289
290
291
292
293
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


294
295
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
296
297
298
    return run_parser(parser_name, mainfile)


299
300
301
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
302
303
    result = run_parser(parser_name, mainfile)
    return result
304
305


306
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
307
    backend.openNonOverlappingSection('section_entry_info')
308
    backend.addValue('upload_id', 'test_upload_id')
309
310
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
311
    backend.addValue('mainfile', 'test/mainfile.txt')
312
313
    for key, value in kwargs.items():
        backend.addValue(key, value)
314
    backend.closeNonOverlappingSection('section_entry_info')
315
    return backend
316
317


318
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
319
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
320
    previous_wd = os.getcwd()  # Get Working directory before parsing.
321
    parsed_example = run_parser(parser_name, mainfile)
322
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
323
324
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
325
326


327
328
329
330
331
332
333
334
335
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


336
337
338
339
340
def test_match(raw_files, no_warn):
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

341
    matched_mainfiles = {}
342
343
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
344
        if parser is not None and not isinstance(parser, BrokenParser):
345
            matched_mainfiles[mainfile] = parser
346

347
348
349
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])