test_parsing.py 15.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files
23
from nomad.parsing import parser_dict, match_parser, BrokenParser, BadContextUri, Backend
24

25

26
parser_examples = [
27
    ('parsers/random', 'test/data/parsers/random_0'),
28
    ('parsers/template', 'tests/data/parsers/template.json'),
29
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
30
31
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
32
33
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
36
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
37
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
38
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
39
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
40
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
41
42
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
43
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
44
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
45
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
46
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
47
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
48
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
50
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
52
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
55
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
57
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
59
60
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
61
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
62
63
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
64
65
66
67
68
69
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
70
]
71

72
73
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
74
fixed_parser_examples = []
75
76
77
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
78
parser_examples = fixed_parser_examples
79

80

81
correct_num_output_files = 50
82

Daniel Speckhard's avatar
Daniel Speckhard committed
83

84
class TestBackend(object):
85
86

    @pytest.fixture(scope='function')
87
88
    def backend(self):
        return Backend('common')
89

90
91
92
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
93

94
    def test_section(self, backend, no_warn):
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

116
117
118
119
120
121
122
123
124
125
126
127
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
128
129
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
130
131
132
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
133
        assert backend.get_sections('section_symmetry') == [0, 0]
134
135
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
136
        assert backend.get_sections('section_symmetry', 2) == [0]
137

138
    def test_section_override(self, backend, no_warn):
139
        ''' Test whether we can overwrite values already in the backend.'''
140
141
142
143
144
145
146
147
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
148
        assert backend.get_value('atom_labels') == expected_value
149

150
151
152
153
154
155
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

156
        g_index = backend.openSection('section_entry_info')
157
158
        assert g_index == 0
        backend.addValue('parser_name', 'p0')
159
        backend.closeSection('section_entry_info', 0)
160
161

        assert backend.get_sections('section_run') == [0]
162
        assert backend.get_sections('section_entry_info') == [0]
163
164

        output = StringIO()
165
        json.dump(backend.resource.m_to_dict(), output)
166
167
        archive = json.loads(output.getvalue())
        assert 'section_run' in archive
168
        assert 'section_entry_info' in archive
169

170
    def test_subsection(self, backend: Backend, no_warn):
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

185
186
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
187
188
189
190
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

191
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
192
193
194
195
196
197
198
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

199
200
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
201
202
203
204
205
206
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

207
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
208
209
210
211
212
213
214
215
216
217
218
219
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

220
221
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
222
223
224
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
225
226
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
227

228
    def test_context(self, backend: Backend, no_warn):
229
230
231
232
233
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

234
235
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
236

237
238
239
240
241
242
243
244
245
246
247
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

248
249
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
250
251
252
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

253
    def test_multi_context(self, backend: Backend, no_warn):
254
255
256
257
258
259
260
261
262
263
264
265
266
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

267
268
269
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
270

271
    def test_bad_context(self, backend: Backend, no_warn):
272
273
274
        try:
            backend.openContext('section_run/0')
            assert False
275
        except BadContextUri:
276
277
278
279
280
            pass

        try:
            backend.openContext('dsfds')
            assert False
281
        except BadContextUri:
282
283
            pass

284
285
286
287
288
289
290
291

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


292
def assert_parser_result(backend, error=False):
293
    status, errors = backend.status
294
    assert status == 'ParseSuccess'
295
296
297
298
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
299
300


Daniel Speckhard's avatar
Daniel Speckhard committed
301
def assert_parser_dir_unchanged(previous_wd, current_wd):
302
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
303
304
305
    assert previous_wd == current_wd


306
def run_parser(parser_name, mainfile):
307
    parser = parser_dict[parser_name]
308
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
309
    result.domain = parser.domain
310
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
311
312


313
@pytest.fixture
314
def parsed_vasp_example() -> Backend:
315
    return run_parser(
316
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
317
318


319
@pytest.fixture
320
def parsed_template_example() -> Backend:
321
322
323
324
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


325
@pytest.fixture(scope="session")
326
def parsed_template_no_system() -> Backend:
327
328
329
330
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


331
def parse_file(parser_name_and_mainfile) -> Backend:
332
    parser_name, mainfile = parser_name_and_mainfile
333
334
335
    return run_parser(parser_name, mainfile)


336
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
337
def parsed_example(request) -> Backend:
338
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
339
340
    result = run_parser(parser_name, mainfile)
    return result
341
342


343
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
344
    backend.openNonOverlappingSection('section_entry_info')
345
    backend.addValue('upload_id', 'test_upload_id')
346
347
    backend.addValue('calc_id', 'test_calc_id')
    backend.addValue('calc_hash', 'test_calc_hash')
348
    backend.addValue('mainfile', 'test/mainfile.txt')
349
350
    for key, value in kwargs.items():
        backend.addValue(key, value)
351
    backend.closeNonOverlappingSection('section_entry_info')
352
    return backend
353
354


355
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
356
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
357
    previous_wd = os.getcwd()  # Get Working directory before parsing.
358
    parsed_example = run_parser(parser_name, mainfile)
359
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
360
361
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
362
363


364
365
366
367
368
369
370
371
372
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


373
374
375
376
377
378
379
380
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
381
382
383
384
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

385
    matched_mainfiles = {}
386
    for mainfile in upload_files.raw_file_manifest():
387
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
388
        if parser is not None and not isinstance(parser, BrokenParser):
389
            matched_mainfiles[mainfile] = parser
390

391
392
393
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])