test_parsing.py 12.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20

21
from nomad import utils, files
22
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
23
from nomad.parsing import LocalBackend, BadContextURI
24

25
parser_examples = [
26
    ('parsers/random', 'test/data/parsers/random_0'),
27
    ('parsers/template', 'tests/data/parsers/template.json'),
28
29
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
30
31
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
32
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
33
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
34
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
35
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
36
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
37
38
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
39
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
40
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
41
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
42
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
43
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
44
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
45
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
46
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
47
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
48
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
51
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
53
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
55
56
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
57
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
58
59
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
60
61
62
63
64
65
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
66
]
67

68
69
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
70
fixed_parser_examples = []
71
72
73
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
74
parser_examples = fixed_parser_examples
75

76

77
correct_num_output_files = 43
78

Daniel Speckhard's avatar
Daniel Speckhard committed
79

80
81
82
83
84
85
class TestLocalBackend(object):

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

86
    def test_meta_info(self, meta_info, no_warn):
87
88
        assert 'section_topology' in meta_info

89
    def test_section(self, backend, no_warn):
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

111
112
113
114
115
116
117
118
119
120
121
122
123
124
    def test_section_override(self, backend, no_warn):
        """ Test whether we can overwrite values already in the backend."""
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

125
126
127
128
129
130
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

131
        g_index = backend.openSection('section_entry_info')
132
133
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
134
        backend.closeSection('section_entry_info', 0)
135
136

        assert backend.get_sections('section_run') == [0]
137
        assert backend.get_sections('section_entry_info') == [0]
138
139
140
141
142

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
143
        assert 'section_entry_info' in archive
144

145
    def test_subsection(self, backend: LocalBackend, no_warn):
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

165
    def test_context(self, backend: LocalBackend, no_warn):
166
167
168
169
170
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

171
172
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
173

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

189
    def test_multi_context(self, backend: LocalBackend, no_warn):
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

205
    def test_bad_context(self, backend: LocalBackend, no_warn):
206
207
208
209
210
211
212
213
214
215
216
217
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

218
219
220
221
222
223
224
225
226

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
227
def test_stream_generator(pretty, no_warn):
228
229
230
231
232
233
234
235
236
237
238
239
240
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
258
259
260

    assert create_reference(example_data, pretty) == out.getvalue()

261

262
263
def assert_parser_result(backend):
    status, errors = backend.status
264
265
    assert status == 'ParseSuccess'
    assert errors is None or len(errors) == 0
266
267


Daniel Speckhard's avatar
Daniel Speckhard committed
268
269
270
271
272
def assert_parser_dir_unchanged(previous_wd, current_wd):
    """Assert working directory has not been changed from parser."""
    assert previous_wd == current_wd


273
274
def run_parser(parser_name, mainfile):
    parser = parser_dict[parser_name]
275
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
276
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
277
278


279
280
281
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
282
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
283
284


285
286
287
288
289
290
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


291
292
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
293
294
295
    return run_parser(parser_name, mainfile)


296
297
298
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
299
300
    result = run_parser(parser_name, mainfile)
    return result
301
302


303
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
304
    backend.openNonOverlappingSection('section_entry_info')
305
    backend.addValue('upload_id', 'test_upload_id')
306
307
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
308
    backend.addValue('mainfile', 'test/mainfile.txt')
309
310
    for key, value in kwargs.items():
        backend.addValue(key, value)
311
    backend.closeNonOverlappingSection('section_entry_info')
312
    return backend
313
314


315
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
316
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
317
    previous_wd = os.getcwd()  # Get Working directory before parsing.
318
    parsed_example = run_parser(parser_name, mainfile)
319
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
320
321
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
322
323


324
325
326
327
328
def test_match(raw_files, no_warn):
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

329
    matched_mainfiles = {}
330
331
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
332
        if parser is not None and not isinstance(parser, BrokenParser):
333
            matched_mainfiles[mainfile] = parser
334

335
336
337
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])