test_parsing.py 16.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
23
from nomad.parsing import parser_dict, match_parser, BrokenParser, BadContextUri, Backend
24
from nomad.app import dump_json
25

26

27
parser_examples = [
28
    ('parsers/random', 'test/data/parsers/random_0'),
29
    ('parsers/template', 'tests/data/parsers/template.json'),
30
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
31
32
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
33
34
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
35
36
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
37
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
38
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
39
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
40
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
41
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
42
43
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
44
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
45
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
46
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
47
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
48
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
51
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
53
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
56
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
57
58
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
59
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
60
61
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
62
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
63
64
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
65
66
67
68
69
70
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
71
]
72

73
74
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
75
fixed_parser_examples = []
76
77
78
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
79
parser_examples = fixed_parser_examples
80

81

82
correct_num_output_files = 50
83

Daniel Speckhard's avatar
Daniel Speckhard committed
84

85
class TestBackend(object):
86
87

    @pytest.fixture(scope='function')
88
89
    def backend(self):
        return Backend('common')
90

91
92
93
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
94

95
    def test_section(self, backend, no_warn):
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

117
118
119
120
121
122
123
124
125
126
127
128
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
129
130
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
131
132
133
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
134
        assert backend.get_sections('section_symmetry') == [0, 0]
135
136
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
137
        assert backend.get_sections('section_symmetry', 2) == [0]
138

139
    def test_section_override(self, backend, no_warn):
140
        ''' Test whether we can overwrite values already in the backend.'''
141
142
143
144
145
146
147
148
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
149
        assert backend.get_value('atom_labels') == expected_value
150

151
152
153
154
155
156
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

157
158
159
160
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
161

162
        assert backend.get_sections('section_run') == [0, 1]
163
164

        output = StringIO()
165
        json.dump(backend.resource.m_to_dict(), output)
166
        archive = json.loads(output.getvalue())
167
        assert 'section_run' in archive['EntryArchive']
168

169
    def test_subsection(self, backend: Backend, no_warn):
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

184
185
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
186
187
188
189
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

190
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
191
192
193
194
195
196
197
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

198
199
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
200
201
202
203
204
205
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

206
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
207
208
209
210
211
212
213
214
215
216
217
218
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

219
220
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
221
222
223
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
224
225
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
226

227
    def test_context(self, backend: Backend, no_warn):
228
229
230
231
232
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

233
234
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
235

236
237
238
239
240
241
242
243
244
245
246
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

247
248
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
249
250
251
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

252
    def test_multi_context(self, backend: Backend, no_warn):
253
254
255
256
257
258
259
260
261
262
263
264
265
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

266
267
268
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
269

270
    def test_bad_context(self, backend: Backend, no_warn):
271
272
273
        try:
            backend.openContext('section_run/0')
            assert False
274
        except BadContextUri:
275
276
277
278
279
            pass

        try:
            backend.openContext('dsfds')
            assert False
280
        except BadContextUri:
281
282
            pass

283
284
285
286
287
288
289
290

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


291
def assert_parser_result(backend, error=False):
292
    status, errors = backend.status
293
    assert status == 'ParseSuccess'
294
295
296
297
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
298
299


Daniel Speckhard's avatar
Daniel Speckhard committed
300
def assert_parser_dir_unchanged(previous_wd, current_wd):
301
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
302
303
304
    assert previous_wd == current_wd


305
def run_parser(parser_name, mainfile):
306
    parser = parser_dict[parser_name]
307
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
308
    result.domain = parser.domain
309
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
310
311


312
@pytest.fixture
313
def parsed_vasp_example() -> Backend:
314
    return run_parser(
315
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
316
317


318
@pytest.fixture
319
def parsed_template_example() -> Backend:
320
321
322
323
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


324
@pytest.fixture(scope="session")
325
def parsed_template_no_system() -> Backend:
326
327
328
329
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


330
def parse_file(parser_name_and_mainfile) -> Backend:
331
    parser_name, mainfile = parser_name_and_mainfile
332
333
334
    return run_parser(parser_name, mainfile)


335
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
336
def parsed_example(request) -> Backend:
337
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
338
339
    result = run_parser(parser_name, mainfile)
    return result
340
341


342
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
343
344
345
346
347
348
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
349
    return backend
350
351


352
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
353
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
354
    previous_wd = os.getcwd()  # Get Working directory before parsing.
355
    parsed_example = run_parser(parser_name, mainfile)
356
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
357
358
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
359
360


361
362
363
364
365
366
367
368
369
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


370
371
372
373
374
375
376
377
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
378
379
380
381
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

382
    matched_mainfiles = {}
383
    for mainfile in upload_files.raw_file_manifest():
384
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
385
        if parser is not None and not isinstance(parser, BrokenParser):
386
            matched_mainfiles[mainfile] = parser
387

388
389
390
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
391
392
393
394
395
396
397
398
399
400
401
402
403
404


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
405
                    backend = parser.run(file_path)
406
407
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
408
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
409
410
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
411
412
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
413
414
415
416
417
418
419
420
421
422
423
424
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])