test_parsing.py 15.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
23
from nomad.parsing import BrokenParser, Backend
Markus Scheidgen's avatar
Markus Scheidgen committed
24
from nomad.parsing.parsers import parser_dict, match_parser
25
from nomad.app import dump_json
26
from nomad.metainfo import MSection
27

28
parser_examples = [
29
    ('parsers/random', 'test/data/parsers/random_0'),
30
    ('parsers/template', 'tests/data/parsers/template.json'),
31
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
32
33
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
36
37
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
38
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
39
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
40
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
41
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
42
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
43
44
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
45
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
46
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
47
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
48
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
49
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
52
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
54
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
57
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
59
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
60
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
61
62
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
63
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
64
65
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
66
67
68
69
70
71
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
72
]
73

74
75
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
76
fixed_parser_examples = []
77
78
79
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
80
parser_examples = fixed_parser_examples
81

82

83
correct_num_output_files = 114
84

Daniel Speckhard's avatar
Daniel Speckhard committed
85

86
class TestBackend(object):
87
88

    @pytest.fixture(scope='function')
89
90
    def backend(self):
        return Backend('common')
91

92
93
94
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
95

96
    def test_section(self, backend, no_warn):
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

118
119
120
121
122
123
124
125
126
127
128
129
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
130
131
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
132
133
134
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
135
        assert backend.get_sections('section_symmetry') == [0, 0]
136
137
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
138
        assert backend.get_sections('section_symmetry', 2) == [0]
139

140
    def test_section_override(self, backend, no_warn):
141
        ''' Test whether we can overwrite values already in the backend.'''
142
143
144
145
146
147
148
149
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
150
        assert backend.get_value('atom_labels') == expected_value
151

152
153
154
155
156
157
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

158
159
160
161
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
162

163
        assert backend.get_sections('section_run') == [0, 1]
164
165

        output = StringIO()
166
        json.dump(backend.resource.m_to_dict(), output)
167
        archive = json.loads(output.getvalue())
168
        assert 'section_run' in archive['EntryArchive']
169

170
    def test_subsection(self, backend: Backend, no_warn):
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

185
186
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
187
188
189
190
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

191
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
192
193
194
195
196
197
198
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

199
200
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
201
202
203
204
205
206
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

207
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
208
209
210
211
212
213
214
215
216
217
218
219
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

220
221
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
222
223
224
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
225
226
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
227

228
229
230
231
232
233
234
235

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
@pytest.fixture(scope='function')
def assert_parser_result(caplog):
    def _assert(backend, error=False):
        status, errors = backend.status
        assert status == 'ParseSuccess'
        if error:
            if not errors:
                errors = []
                for record in caplog.get_records(when='call'):
                    if record.levelname in ['WARNING', 'ERROR', 'CRITICAL']:
                        errors.append(record.msg)
            assert len(errors) > 0
        else:
            assert errors is None or len(errors) == 0

    return _assert
252
253


Daniel Speckhard's avatar
Daniel Speckhard committed
254
def assert_parser_dir_unchanged(previous_wd, current_wd):
255
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
256
257
258
    assert previous_wd == current_wd


259
def run_parser(parser_name, mainfile):
260
    parser = parser_dict[parser_name]
261
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
262
263
    if isinstance(result, MSection):
        backend = Backend(parser._metainfo_env, parser.domain)
264
265
266
        root_section = result.m_def.name
        section_def = getattr(datamodel.EntryArchive, root_section)
        backend.entry_archive.m_add_sub_section(section_def, result)
267
268
        backend.resource.add(result)
        result = backend
269
    result.domain = parser.domain
270
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
271
272


273
@pytest.fixture
274
def parsed_vasp_example() -> Backend:
275
    return run_parser(
276
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
277
278


279
@pytest.fixture
280
def parsed_template_example() -> Backend:
281
282
283
284
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


285
@pytest.fixture(scope="session")
286
def parsed_template_no_system() -> Backend:
287
288
289
290
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


291
def parse_file(parser_name_and_mainfile) -> Backend:
292
    parser_name, mainfile = parser_name_and_mainfile
293
294
295
    return run_parser(parser_name, mainfile)


296
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
297
def parsed_example(request) -> Backend:
298
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
299
300
    result = run_parser(parser_name, mainfile)
    return result
301
302


303
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
304
305
306
307
308
309
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
310
    return backend
311
312


313
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
314
def test_parser(parser_name, mainfile, assert_parser_result):
Daniel Speckhard's avatar
Daniel Speckhard committed
315
    previous_wd = os.getcwd()  # Get Working directory before parsing.
316
    parsed_example = run_parser(parser_name, mainfile)
317
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
318
319
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
320
321


322
def test_broken_xml_vasp(assert_parser_result):
323
324
325
326
327
328
329
330
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


331
332
333
334
335
336
337
338
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
339
340
341
342
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

343
    matched_mainfiles = {}
344
    for mainfile in upload_files.raw_file_manifest():
345
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
346
        if parser is not None and not isinstance(parser, BrokenParser):
347
            matched_mainfiles[mainfile] = parser
348

349
350
351
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
352
353
354
355
356
357
358
359
360
361
362
363
364
365


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
366
                    backend = parser.run(file_path)
367
368
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
369
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
370
371
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
372
373
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
374
375
376
377
378
379
380
381
382
383
384
385
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])