test_parsing.py 17.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
Markus Scheidgen's avatar
Markus Scheidgen committed
23
24
from nomad.parsing import BrokenParser, BadContextUri, Backend
from nomad.parsing.parsers import parser_dict, match_parser
25
from nomad.app import dump_json
26
from nomad.metainfo import MSection
27

28
parser_examples = [
29
    ('parsers/random', 'test/data/parsers/random_0'),
30
    ('parsers/template', 'tests/data/parsers/template.json'),
31
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
32
33
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
36
37
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
38
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
39
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
40
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
41
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
42
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
43
44
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
45
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
46
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
47
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
48
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
49
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
52
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
54
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
57
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
59
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
60
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
61
62
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
63
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
64
65
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
66
67
68
69
70
71
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
72
]
73

74
75
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
76
fixed_parser_examples = []
77
78
79
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
80
parser_examples = fixed_parser_examples
81

82

83
correct_num_output_files = 114
84

Daniel Speckhard's avatar
Daniel Speckhard committed
85

86
class TestBackend(object):
87
88

    @pytest.fixture(scope='function')
89
90
    def backend(self):
        return Backend('common')
91

92
93
94
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
95

96
    def test_section(self, backend, no_warn):
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

118
119
120
121
122
123
124
125
126
127
128
129
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
130
131
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
132
133
134
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
135
        assert backend.get_sections('section_symmetry') == [0, 0]
136
137
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
138
        assert backend.get_sections('section_symmetry', 2) == [0]
139

140
    def test_section_override(self, backend, no_warn):
141
        ''' Test whether we can overwrite values already in the backend.'''
142
143
144
145
146
147
148
149
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
150
        assert backend.get_value('atom_labels') == expected_value
151

152
153
154
155
156
157
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

158
159
160
161
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
162

163
        assert backend.get_sections('section_run') == [0, 1]
164
165

        output = StringIO()
166
        json.dump(backend.resource.m_to_dict(), output)
167
        archive = json.loads(output.getvalue())
168
        assert 'section_run' in archive['EntryArchive']
169

170
    def test_subsection(self, backend: Backend, no_warn):
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

185
186
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
187
188
189
190
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

191
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
192
193
194
195
196
197
198
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

199
200
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
201
202
203
204
205
206
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

207
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
208
209
210
211
212
213
214
215
216
217
218
219
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

220
221
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
222
223
224
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
225
226
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
227

228
    def test_context(self, backend: Backend, no_warn):
229
230
231
232
233
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

234
235
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
236

237
238
239
240
241
242
243
244
245
246
247
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

248
249
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
250
251
252
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

253
    def test_multi_context(self, backend: Backend, no_warn):
254
255
256
257
258
259
260
261
262
263
264
265
266
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

267
268
269
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
270

271
    def test_bad_context(self, backend: Backend, no_warn):
272
273
274
        try:
            backend.openContext('section_run/0')
            assert False
275
        except BadContextUri:
276
277
278
279
280
            pass

        try:
            backend.openContext('dsfds')
            assert False
281
        except BadContextUri:
282
283
            pass

284
285
286
287
288
289
290
291

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
@pytest.fixture(scope='function')
def assert_parser_result(caplog):
    def _assert(backend, error=False):
        status, errors = backend.status
        assert status == 'ParseSuccess'
        if error:
            if not errors:
                errors = []
                for record in caplog.get_records(when='call'):
                    if record.levelname in ['WARNING', 'ERROR', 'CRITICAL']:
                        errors.append(record.msg)
            assert len(errors) > 0
        else:
            assert errors is None or len(errors) == 0

    return _assert
308
309


Daniel Speckhard's avatar
Daniel Speckhard committed
310
def assert_parser_dir_unchanged(previous_wd, current_wd):
311
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
312
313
314
    assert previous_wd == current_wd


315
def run_parser(parser_name, mainfile):
316
    parser = parser_dict[parser_name]
317
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
318
319
    if isinstance(result, MSection):
        backend = Backend(parser._metainfo_env, parser.domain)
320
321
322
        root_section = result.m_def.name
        section_def = getattr(datamodel.EntryArchive, root_section)
        backend.entry_archive.m_add_sub_section(section_def, result)
323
324
        backend.resource.add(result)
        result = backend
325
    result.domain = parser.domain
326
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
327
328


329
@pytest.fixture
330
def parsed_vasp_example() -> Backend:
331
    return run_parser(
332
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
333
334


335
@pytest.fixture
336
def parsed_template_example() -> Backend:
337
338
339
340
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


341
@pytest.fixture(scope="session")
342
def parsed_template_no_system() -> Backend:
343
344
345
346
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


347
def parse_file(parser_name_and_mainfile) -> Backend:
348
    parser_name, mainfile = parser_name_and_mainfile
349
350
351
    return run_parser(parser_name, mainfile)


352
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
353
def parsed_example(request) -> Backend:
354
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
355
356
    result = run_parser(parser_name, mainfile)
    return result
357
358


359
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
360
361
362
363
364
365
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
366
    return backend
367
368


369
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
370
def test_parser(parser_name, mainfile, assert_parser_result):
Daniel Speckhard's avatar
Daniel Speckhard committed
371
    previous_wd = os.getcwd()  # Get Working directory before parsing.
372
    parsed_example = run_parser(parser_name, mainfile)
373
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
374
375
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
376
377


378
def test_broken_xml_vasp(assert_parser_result):
379
380
381
382
383
384
385
386
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


387
388
389
390
391
392
393
394
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
395
396
397
398
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

399
    matched_mainfiles = {}
400
    for mainfile in upload_files.raw_file_manifest():
401
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
402
        if parser is not None and not isinstance(parser, BrokenParser):
403
            matched_mainfiles[mainfile] = parser
404

405
406
407
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
408
409
410
411
412
413
414
415
416
417
418
419
420
421


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
422
                    backend = parser.run(file_path)
423
424
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
425
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
426
427
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
428
429
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
430
431
432
433
434
435
436
437
438
439
440
441
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])