test_parsing.py 16.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
23
from nomad.parsing import parser_dict, match_parser, BrokenParser, BadContextUri, Backend
24

25

26
parser_examples = [
27
    ('parsers/random', 'test/data/parsers/random_0'),
28
    ('parsers/template', 'tests/data/parsers/template.json'),
29
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
30
31
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
32
33
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
36
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
37
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
38
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
39
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
40
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
41
42
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
43
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
44
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
45
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
46
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
47
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
48
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
50
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
52
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
55
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
57
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
59
60
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
61
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
62
63
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
64
65
66
67
68
69
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
70
]
71

72
73
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
74
fixed_parser_examples = []
75
76
77
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
78
parser_examples = fixed_parser_examples
79

80

81
correct_num_output_files = 50
82

Daniel Speckhard's avatar
Daniel Speckhard committed
83

84
class TestBackend(object):
85
86

    @pytest.fixture(scope='function')
87
88
    def backend(self):
        return Backend('common')
89

90
91
92
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
93

94
    def test_section(self, backend, no_warn):
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

116
117
118
119
120
121
122
123
124
125
126
127
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
128
129
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
130
131
132
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
133
        assert backend.get_sections('section_symmetry') == [0, 0]
134
135
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
136
        assert backend.get_sections('section_symmetry', 2) == [0]
137

138
    def test_section_override(self, backend, no_warn):
139
        ''' Test whether we can overwrite values already in the backend.'''
140
141
142
143
144
145
146
147
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
148
        assert backend.get_value('atom_labels') == expected_value
149

150
151
152
153
154
155
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

156
157
158
159
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
160

161
        assert backend.get_sections('section_run') == [0, 1]
162
163

        output = StringIO()
164
        json.dump(backend.resource.m_to_dict(), output)
165
        archive = json.loads(output.getvalue())
166
        assert 'section_run' in archive['EntryArchive']
167

168
    def test_subsection(self, backend: Backend, no_warn):
169
170
171
172
173
174
175
176
177
178
179
180
181
182
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

183
184
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
185
186
187
188
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

189
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
190
191
192
193
194
195
196
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

197
198
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
199
200
201
202
203
204
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

205
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
206
207
208
209
210
211
212
213
214
215
216
217
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

218
219
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
220
221
222
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
223
224
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
225

226
    def test_context(self, backend: Backend, no_warn):
227
228
229
230
231
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

232
233
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
234

235
236
237
238
239
240
241
242
243
244
245
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

246
247
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
248
249
250
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

251
    def test_multi_context(self, backend: Backend, no_warn):
252
253
254
255
256
257
258
259
260
261
262
263
264
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

265
266
267
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
268

269
    def test_bad_context(self, backend: Backend, no_warn):
270
271
272
        try:
            backend.openContext('section_run/0')
            assert False
273
        except BadContextUri:
274
275
276
277
278
            pass

        try:
            backend.openContext('dsfds')
            assert False
279
        except BadContextUri:
280
281
            pass

282
283
284
285
286
287
288
289

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


290
def assert_parser_result(backend, error=False):
291
    status, errors = backend.status
292
    assert status == 'ParseSuccess'
293
294
295
296
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
297
298


Daniel Speckhard's avatar
Daniel Speckhard committed
299
def assert_parser_dir_unchanged(previous_wd, current_wd):
300
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
301
302
303
    assert previous_wd == current_wd


304
def run_parser(parser_name, mainfile):
305
    parser = parser_dict[parser_name]
306
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
307
    result.domain = parser.domain
308
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
309
310


311
@pytest.fixture
312
def parsed_vasp_example() -> Backend:
313
    return run_parser(
314
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
315
316


317
@pytest.fixture
318
def parsed_template_example() -> Backend:
319
320
321
322
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


323
@pytest.fixture(scope="session")
324
def parsed_template_no_system() -> Backend:
325
326
327
328
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


329
def parse_file(parser_name_and_mainfile) -> Backend:
330
    parser_name, mainfile = parser_name_and_mainfile
331
332
333
    return run_parser(parser_name, mainfile)


334
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
335
def parsed_example(request) -> Backend:
336
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
337
338
    result = run_parser(parser_name, mainfile)
    return result
339
340


341
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
342
343
344
345
346
347
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
348
    return backend
349
350


351
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
352
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
353
    previous_wd = os.getcwd()  # Get Working directory before parsing.
354
    parsed_example = run_parser(parser_name, mainfile)
355
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
356
357
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
358
359


360
361
362
363
364
365
366
367
368
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


369
370
371
372
373
374
375
376
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
377
378
379
380
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

381
    matched_mainfiles = {}
382
    for mainfile in upload_files.raw_file_manifest():
383
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
384
        if parser is not None and not isinstance(parser, BrokenParser):
385
            matched_mainfiles[mainfile] = parser
386

387
388
389
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
390
391
392
393
394
395
396
397
398
399
400
401
402
403


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
404
405
                    backend = parser.run(file_path)
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
406
407
408
409
410
411
412
413
414
415
416
417
418
419
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])