test_parsing.py 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
21

from nomadcore.local_meta_info import loadJsonFile
22
import nomad_meta_info
23

24
from nomad import utils, files
25
from nomad.parsing import JSONStreamWriter, parser_dict, match_parser, BrokenParser
26
from nomad.parsing import LocalBackend, BadContextURI
27

28
parser_examples = [
29
    ('parsers/random', 'test/data/parsers/random_0'),
30
    ('parsers/template', 'tests/data/parsers/template.json'),
31
32
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
33
34
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
35
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
36
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
37
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
38
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
39
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
40
41
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
42
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
43
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
44
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
45
    ('parsers/band', 'tests/data/parsers/band_adf.out'),  # causes spglib to segfault
46
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
47
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
48
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
49
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
51
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
52
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
54
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
55
56
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
57
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
58
59
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
60
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
61
62
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
63
64
65
66
67
68
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
69
]
70

71
72
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
73
fixed_parser_examples = []
74
75
76
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
77
parser_examples = fixed_parser_examples
78

79

80
correct_num_output_files = 41
81

Daniel Speckhard's avatar
Daniel Speckhard committed
82

83
84
85
86
class TestLocalBackend(object):

    @pytest.fixture(scope='session')
    def meta_info(self):
87
88
        file_dir = os.path.dirname(os.path.abspath(nomad_meta_info.__file__))
        path = os.path.join(file_dir, 'all.nomadmetainfo.json')
89
90
91
92
93
94
95
        meta_info, _ = loadJsonFile(path)
        return meta_info

    @pytest.fixture(scope='function')
    def backend(self, meta_info):
        return LocalBackend(meta_info, debug=True)

96
    def test_meta_info(self, meta_info, no_warn):
97
98
        assert 'section_topology' in meta_info

99
    def test_section(self, backend, no_warn):
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

121
122
123
124
125
126
127
128
129
130
131
132
133
134
    def test_section_override(self, backend, no_warn):
        """ Test whether we can overwrite values already in the backend."""
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
        output = StringIO()
        backend.write_json(output)
        assert backend.get_value('atom_labels').tolist() == expected_value

135
136
137
138
139
140
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

141
        g_index = backend.openSection('section_entry_info')
142
143
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
144
        backend.closeSection('section_entry_info', 0)
145
146

        assert backend.get_sections('section_run') == [0]
147
        assert backend.get_sections('section_entry_info') == [0]
148
149
150
151
152

        output = StringIO()
        backend.write_json(output)
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
153
        assert 'section_entry_info' in archive
154

155
    def test_subsection(self, backend: LocalBackend, no_warn):
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        runs = backend.data['section_run']
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

175
    def test_context(self, backend: LocalBackend, no_warn):
176
177
178
179
180
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

181
182
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

        runs = backend.data['section_run']
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

199
    def test_multi_context(self, backend: LocalBackend, no_warn):
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        assert len(backend.data['section_method']) == 1

215
    def test_bad_context(self, backend: LocalBackend, no_warn):
216
217
218
219
220
221
222
223
224
225
226
227
        try:
            backend.openContext('section_run/0')
            assert False
        except BadContextURI:
            pass

        try:
            backend.openContext('dsfds')
            assert False
        except BadContextURI:
            pass

228
229
230
231
232
233
234
235
236

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


@pytest.mark.parametrize("pretty", [False, True])
237
def test_stream_generator(pretty, no_warn):
238
239
240
241
242
243
244
245
246
247
248
249
250
    example_data = [
        {
            'key1': 'value',
            'key2': 1
        },
        {
            'key': {
                'key': 'value'
            }
        }
    ]

    out = StringIO()
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
    writer = JSONStreamWriter(out, pretty=pretty)
    writer.open_array()
    writer.open_object()
    writer.key('key1')
    writer.value('value')
    writer.key('key2')
    writer.value(1)
    writer.close_object()
    writer.open_object()
    writer.key('key')
    writer.open_object()
    writer.key('key')
    writer.value('value')
    writer.close_object()
    writer.close_object()
    writer.close_array()
    writer.close()
268
269
270

    assert create_reference(example_data, pretty) == out.getvalue()

271

272
273
def assert_parser_result(backend):
    status, errors = backend.status
274
275
    assert status == 'ParseSuccess'
    assert errors is None or len(errors) == 0
276
277


Daniel Speckhard's avatar
Daniel Speckhard committed
278
279
280
281
282
def assert_parser_dir_unchanged(previous_wd, current_wd):
    """Assert working directory has not been changed from parser."""
    assert previous_wd == current_wd


283
284
def run_parser(parser_name, mainfile):
    parser = parser_dict[parser_name]
285
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
286
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
287
288


289
290
291
@pytest.fixture
def parsed_vasp_example() -> LocalBackend:
    return run_parser(
292
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
293
294


295
296
297
298
299
300
@pytest.fixture
def parsed_template_example() -> LocalBackend:
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


301
302
303
# Function used by normalizer tests.
def parse_file(parser_name_and_mainfile) -> LocalBackend:
    parser_name, mainfile = parser_name_and_mainfile
304
305
306
    return run_parser(parser_name, mainfile)


307
308
309
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
def parsed_example(request) -> LocalBackend:
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
310
311
    result = run_parser(parser_name, mainfile)
    return result
312
313


314
def add_calculation_info(backend: LocalBackend, **kwargs) -> LocalBackend:
315
    backend.openNonOverlappingSection('section_entry_info')
316
    backend.addValue('upload_id', 'test_upload_id')
317
318
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
319
    backend.addValue('mainfile', 'test/mainfile.txt')
320
321
    for key, value in kwargs.items():
        backend.addValue(key, value)
322
    backend.closeNonOverlappingSection('section_entry_info')
323
    return backend
324
325


326
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
327
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
328
    previous_wd = os.getcwd()  # Get Working directory before parsing.
329
    parsed_example = run_parser(parser_name, mainfile)
330
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
331
332
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
333
334


335
336
337
338
339
def test_match(raw_files, no_warn):
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

340
    matched_mainfiles = {}
341
342
    for mainfile in upload_files.raw_file_manifest():
        parser = match_parser(mainfile, upload_files)
343
        if parser is not None and not isinstance(parser, BrokenParser):
344
            matched_mainfiles[mainfile] = parser
345

346
347
348
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])