test_parsing.py 16.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
23
from nomad.parsing import parser_dict, match_parser, BrokenParser, BadContextUri, Backend
24
from nomad.app import dump_json
25
from nomad.metainfo import MSection
26

27
parser_examples = [
28
    ('parsers/random', 'test/data/parsers/random_0'),
29
    ('parsers/template', 'tests/data/parsers/template.json'),
30
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
31
32
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
33
34
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
35
36
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
37
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
38
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
39
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
40
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
41
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
42
43
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
44
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
45
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
46
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
47
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
48
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
51
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
53
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
56
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
57
58
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
59
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
60
61
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
62
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
63
64
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
65
66
67
68
69
70
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
71
]
72

73
74
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
75
fixed_parser_examples = []
76
77
78
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
79
parser_examples = fixed_parser_examples
80

81

82
correct_num_output_files = 50
83

Daniel Speckhard's avatar
Daniel Speckhard committed
84

85
class TestBackend(object):
86
87

    @pytest.fixture(scope='function')
88
89
    def backend(self):
        return Backend('common')
90

91
92
93
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
94

95
    def test_section(self, backend, no_warn):
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

117
118
119
120
121
122
123
124
125
126
127
128
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
129
130
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
131
132
133
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
134
        assert backend.get_sections('section_symmetry') == [0, 0]
135
136
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
137
        assert backend.get_sections('section_symmetry', 2) == [0]
138

139
    def test_section_override(self, backend, no_warn):
140
        ''' Test whether we can overwrite values already in the backend.'''
141
142
143
144
145
146
147
148
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
149
        assert backend.get_value('atom_labels') == expected_value
150

151
152
153
154
155
156
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

157
158
159
160
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
161

162
        assert backend.get_sections('section_run') == [0, 1]
163
164

        output = StringIO()
165
        json.dump(backend.resource.m_to_dict(), output)
166
        archive = json.loads(output.getvalue())
167
        assert 'section_run' in archive['EntryArchive']
168

169
    def test_subsection(self, backend: Backend, no_warn):
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

184
185
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
186
187
188
189
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

190
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
191
192
193
194
195
196
197
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

198
199
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
200
201
202
203
204
205
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

206
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
207
208
209
210
211
212
213
214
215
216
217
218
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

219
220
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
221
222
223
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
224
225
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
226

227
    def test_context(self, backend: Backend, no_warn):
228
229
230
231
232
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

233
234
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
235

236
237
238
239
240
241
242
243
244
245
246
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

247
248
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
249
250
251
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

252
    def test_multi_context(self, backend: Backend, no_warn):
253
254
255
256
257
258
259
260
261
262
263
264
265
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

266
267
268
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
269

270
    def test_bad_context(self, backend: Backend, no_warn):
271
272
273
        try:
            backend.openContext('section_run/0')
            assert False
274
        except BadContextUri:
275
276
277
278
279
            pass

        try:
            backend.openContext('dsfds')
            assert False
280
        except BadContextUri:
281
282
            pass

283
284
285
286
287
288
289
290

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


291
def assert_parser_result(backend, error=False):
292
    status, errors = backend.status
293
    assert status == 'ParseSuccess'
294
295
296
297
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
298
299


Daniel Speckhard's avatar
Daniel Speckhard committed
300
def assert_parser_dir_unchanged(previous_wd, current_wd):
301
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
302
303
304
    assert previous_wd == current_wd


305
def run_parser(parser_name, mainfile):
306
    parser = parser_dict[parser_name]
307
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
308
309
    if isinstance(result, MSection):
        backend = Backend(parser._metainfo_env, parser.domain)
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
310
311
        root_section = datamodel.domains[parser.domain]['root_section']
        setattr(backend.entry_archive, root_section, result)
312
313
        backend.resource.add(result)
        result = backend
314
    result.domain = parser.domain
315
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
316
317


318
@pytest.fixture
319
def parsed_vasp_example() -> Backend:
320
    return run_parser(
321
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
322
323


324
@pytest.fixture
325
def parsed_template_example() -> Backend:
326
327
328
329
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


330
@pytest.fixture(scope="session")
331
def parsed_template_no_system() -> Backend:
332
333
334
335
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


336
def parse_file(parser_name_and_mainfile) -> Backend:
337
    parser_name, mainfile = parser_name_and_mainfile
338
339
340
    return run_parser(parser_name, mainfile)


341
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
342
def parsed_example(request) -> Backend:
343
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
344
345
    result = run_parser(parser_name, mainfile)
    return result
346
347


348
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
349
350
351
352
353
354
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
355
    return backend
356
357


358
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
359
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
360
    previous_wd = os.getcwd()  # Get Working directory before parsing.
361
    parsed_example = run_parser(parser_name, mainfile)
362
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
363
364
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
365
366


367
368
369
370
371
372
373
374
375
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


376
377
378
379
380
381
382
383
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
384
385
386
387
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

388
    matched_mainfiles = {}
389
    for mainfile in upload_files.raw_file_manifest():
390
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
391
        if parser is not None and not isinstance(parser, BrokenParser):
392
            matched_mainfiles[mainfile] = parser
393

394
395
396
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
397
398
399
400
401
402
403
404
405
406
407
408
409
410


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
411
                    backend = parser.run(file_path)
412
413
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
414
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
415
416
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
417
418
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
419
420
421
422
423
424
425
426
427
428
429
430
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])