test_parsing.py 15 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
23
from nomad.parsing import BrokenParser, Backend
Markus Scheidgen's avatar
Markus Scheidgen committed
24
from nomad.parsing.parsers import parser_dict, match_parser
25
from nomad.app import dump_json
26

27
parser_examples = [
28
    ('parsers/random', 'test/data/parsers/random_0'),
29
    ('parsers/template', 'tests/data/parsers/template.json'),
30
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
31
32
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
33
34
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
35
36
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
37
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
38
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
39
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
40
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
41
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
42
43
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
44
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
45
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
46
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
47
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
48
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
49
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
51
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
52
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
53
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
54
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
56
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
57
58
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
59
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
60
61
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
62
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
63
64
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
65
66
67
68
69
70
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
71
]
72

73
74
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
75
fixed_parser_examples = []
76
77
78
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
79
parser_examples = fixed_parser_examples
80

81

82
correct_num_output_files = 114
83

Daniel Speckhard's avatar
Daniel Speckhard committed
84

85
class TestBackend(object):
86
87

    @pytest.fixture(scope='function')
88
89
    def backend(self):
        return Backend('common')
90

91
92
93
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
94

95
    def test_section(self, backend, no_warn):
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

117
118
119
120
121
122
123
124
125
126
127
128
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
129
130
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
131
132
133
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
134
        assert backend.get_sections('section_symmetry') == [0, 0]
135
136
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
137
        assert backend.get_sections('section_symmetry', 2) == [0]
138

139
    def test_section_override(self, backend, no_warn):
140
        ''' Test whether we can overwrite values already in the backend.'''
141
142
143
144
145
146
147
148
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
149
        assert backend.get_value('atom_labels') == expected_value
150

151
152
153
154
155
156
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

157
158
159
160
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
161

162
        assert backend.get_sections('section_run') == [0, 1]
163
164

        output = StringIO()
165
        json.dump(backend.resource.m_to_dict(), output)
166
        archive = json.loads(output.getvalue())
167
        assert 'section_run' in archive['EntryArchive']
168

169
    def test_subsection(self, backend: Backend, no_warn):
170
171
172
173
174
175
176
177
178
179
180
181
182
183
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

184
185
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
186
187
188
189
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

190
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
191
192
193
194
195
196
197
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

198
199
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
200
201
202
203
204
205
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

206
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
207
208
209
210
211
212
213
214
215
216
217
218
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

219
220
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
221
222
223
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
224
225
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
226

227
228
229
230
231
232
233
234

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
@pytest.fixture(scope='function')
def assert_parser_result(caplog):
    def _assert(backend, error=False):
        status, errors = backend.status
        assert status == 'ParseSuccess'
        if error:
            if not errors:
                errors = []
                for record in caplog.get_records(when='call'):
                    if record.levelname in ['WARNING', 'ERROR', 'CRITICAL']:
                        errors.append(record.msg)
            assert len(errors) > 0
        else:
            assert errors is None or len(errors) == 0

    return _assert
251
252


Daniel Speckhard's avatar
Daniel Speckhard committed
253
def assert_parser_dir_unchanged(previous_wd, current_wd):
254
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
255
256
257
    assert previous_wd == current_wd


258
def run_parser(parser_name, mainfile):
259
    parser = parser_dict[parser_name]
260
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
261

262
    result.domain = parser.domain
263
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
264
265


266
@pytest.fixture
267
def parsed_vasp_example() -> Backend:
268
    return run_parser(
269
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
270
271


272
@pytest.fixture
273
def parsed_template_example() -> Backend:
274
275
276
277
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


278
@pytest.fixture(scope="session")
279
def parsed_template_no_system() -> Backend:
280
281
282
283
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


284
def parse_file(parser_name_and_mainfile) -> Backend:
285
    parser_name, mainfile = parser_name_and_mainfile
286
287
288
    return run_parser(parser_name, mainfile)


289
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
290
def parsed_example(request) -> Backend:
291
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
292
293
    result = run_parser(parser_name, mainfile)
    return result
294
295


296
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
297
298
299
300
301
302
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
303
    return backend
304
305


306
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
307
def test_parser(parser_name, mainfile, assert_parser_result):
Daniel Speckhard's avatar
Daniel Speckhard committed
308
    previous_wd = os.getcwd()  # Get Working directory before parsing.
309
    parsed_example = run_parser(parser_name, mainfile)
310
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
311
312
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
313
314


315
def test_broken_xml_vasp(assert_parser_result):
316
317
318
319
320
321
322
323
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


324
325
326
327
328
329
330
331
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
332
333
334
335
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

336
    matched_mainfiles = {}
337
    for mainfile in upload_files.raw_file_manifest():
338
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
339
        if parser is not None and not isinstance(parser, BrokenParser):
340
            matched_mainfiles[mainfile] = parser
341

342
343
344
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
345
346
347
348
349
350
351
352
353
354
355
356
357
358


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
359
                    backend = parser.run(file_path)
360
361
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
362
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
363
364
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
365
366
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
367
368
369
370
371
372
373
374
375
376
377
378
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])