test_parsing.py 16.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
16
from io import StringIO
import json
17
import numpy as np
18
import pytest
19
import os
20
from shutil import copyfile
21

22
from nomad import utils, files, datamodel
Markus Scheidgen's avatar
Markus Scheidgen committed
23
24
from nomad.parsing import BrokenParser, BadContextUri, Backend
from nomad.parsing.parsers import parser_dict, match_parser
25
from nomad.app import dump_json
26
from nomad.metainfo import MSection
27

28
parser_examples = [
29
    ('parsers/random', 'test/data/parsers/random_0'),
30
    ('parsers/template', 'tests/data/parsers/template.json'),
31
    ('parsers/eels', 'tests/data/parsers/eels.txt'),
32
33
    ('parsers/aptfim', 'tests/data/parsers/aptfim.aptfim'),
    ('parsers/mpes', 'tests/data/parsers/mpes.meta'),
34
35
    ('parsers/exciting', 'tests/data/parsers/exciting/Ag/INFO.OUT'),
    ('parsers/exciting', 'tests/data/parsers/exciting/GW/INFO.OUT'),
36
37
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_nitrogen'),
    ('parsers/exciting', 'tests/data/parsers/exciting/nitrogen/INFO.OUT_carbon'),
Markus Scheidgen's avatar
Markus Scheidgen committed
38
    ('parsers/vasp', 'tests/data/parsers/vasp/vasp.xml'),
39
    ('parsers/vasp', 'tests/data/parsers/vasp_compressed/vasp.xml.gz'),
40
    ('parsers/vaspoutcar', 'tests/data/parsers/vasp_outcar/OUTCAR'),
speckhard's avatar
speckhard committed
41
    ('parsers/fhi-aims', 'tests/data/parsers/fhi-aims/aims.out'),
42
    ('parsers/cp2k', 'tests/data/parsers/cp2k/si_bulk8.out'),
43
44
    ('parsers/crystal', 'tests/data/parsers/crystal/si.out'),
    ('parsers/cpmd', 'tests/data/parsers/cpmd/geo_output.out'),
speckhard's avatar
speckhard committed
45
    ('parsers/nwchem', 'tests/data/parsers/nwchem/single_point/output.out'),
46
    ('parsers/bigdft', 'tests/data/parsers/bigdft/n2_output.out'),
47
    ('parsers/wien2k', 'tests/data/parsers/wien2k/AlN/AlN_ZB.scf'),
48
    ('parsers/band', 'tests/data/parsers/band_adf.out'),
49
    ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
50
    ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
51
    ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
52
    ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
Daniel Speckhard's avatar
Daniel Speckhard committed
53
    ('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep'),
54
    # ('parsers/dl-poly', 'tests/data/parsers/dl-poly/OUTPUT'),  # timeout on Matid System Classification
Daniel Speckhard's avatar
Daniel Speckhard committed
55
    ('parsers/lib-atoms', 'tests/data/parsers/lib-atoms/gp.xml'),
Daniel Speckhard's avatar
Daniel Speckhard committed
56
    ('parsers/octopus', 'tests/data/parsers/octopus/stdout.txt'),
57
    ('parsers/phonopy', 'tests/data/parsers/phonopy/phonopy-FHI-aims-displacement-01/control.in'),
Daniel Speckhard's avatar
Daniel Speckhard committed
58
59
    ('parsers/gpaw', 'tests/data/parsers/gpaw/Fe2.gpw'),
    ('parsers/gpaw2', 'tests/data/parsers/gpaw2/H2_lcao.gpw2'),
Daniel Speckhard's avatar
Daniel Speckhard committed
60
    ('parsers/atk', 'tests/data/parsers/atk/Si2.nc'),
61
62
    ('parsers/gulp', 'tests/data/parsers/gulp/example6.got'),
    ('parsers/siesta', 'tests/data/parsers/siesta/Fe/out'),
Markus Scheidgen's avatar
Markus Scheidgen committed
63
    ('parsers/elk', 'tests/data/parsers/elk/Al/INFO.OUT'),
64
65
    ('parsers/elastic', 'dependencies/parsers/elastic/test/examples/2nd/INFO_ElaStic'),  # 70Mb file 2big4git
    ('parsers/turbomole', 'tests/data/parsers/turbomole/acrolein.out'),
66
67
68
69
70
71
    ('parsers/gamess', 'tests/data/parsers/gamess/exam01.out'),
    ('parsers/dmol', 'tests/data/parsers/dmol3/h2o.outmol'),
    ('parser/fleur', 'tests/data/parsers/fleur/out'),
    ('parser/molcas', 'tests/data/parsers/molcas/test000.input.out'),
    ('parsers/qbox', 'tests/data/parsers/qbox/01_h2ogs.r'),
    ('parser/onetep', 'tests/data/parsers/onetep/single_point_2.out')
Daniel Speckhard's avatar
Daniel Speckhard committed
72
]
73

74
75
# We need to remove some cases with external mainfiles, which might not exist
# in all testing environments (e.g. in the nomad docker image)
Markus Scheidgen's avatar
Markus Scheidgen committed
76
fixed_parser_examples = []
77
78
79
for parser, mainfile in parser_examples:
    if os.path.exists(mainfile) or mainfile.startswith('tests'):
        fixed_parser_examples.append((parser, mainfile))
Markus Scheidgen's avatar
Markus Scheidgen committed
80
parser_examples = fixed_parser_examples
81

82

83
correct_num_output_files = 114
84

Daniel Speckhard's avatar
Daniel Speckhard committed
85

86
class TestBackend(object):
87
88

    @pytest.fixture(scope='function')
89
90
    def backend(self):
        return Backend('common')
91

92
93
94
    def test_meta_info(self, no_warn):
        from nomad.datamodel.metainfo import m_env
        assert 'section_topology' in m_env.all_definitions_by_name
95

96
    def test_section(self, backend, no_warn):
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

        g_index = backend.openSection('section_run')
        assert g_index == 1

        g_index = backend.openSection('section_run')
        assert g_index == 2

        backend.addValue('program_name', 't1', 1)
        backend.addValue('program_name', 't2', 2)

        backend.closeSection('section_run', 1)
        backend.closeSection('section_run', 2)

        assert backend.get_sections('section_run') == [0, 1, 2]
        for i in range(0, 3):
            assert backend.get_value('program_name', i) == 't%d' % i

118
119
120
121
122
123
124
125
126
127
128
129
    def test_sub_section(self, backend, no_warn):
        backend.openSection('section_run')

        backend.openNonOverlappingSection('section_system')
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
        backend.closeNonOverlappingSection('section_system')

        backend.openNonOverlappingSection('section_system')
130
131
        assert backend.openSection('section_symmetry') == 0
        backend.closeSection('section_symmetry', 0)
132
133
134
        backend.closeNonOverlappingSection('section_system')

        assert backend.get_sections('section_system') == [0, 1, 2]
135
        assert backend.get_sections('section_symmetry') == [0, 0]
136
137
        assert backend.get_sections('section_symmetry', 0) == [0]
        assert backend.get_sections('section_symmetry', 1) == []
138
        assert backend.get_sections('section_symmetry', 2) == [0]
139

140
    def test_section_override(self, backend, no_warn):
141
        ''' Test whether we can overwrite values already in the backend.'''
142
143
144
145
146
147
148
149
        expected_value = ['Cl', 'Zn']
        backend.openSection('section_run')
        backend.openSection('section_system')
        backend.addArrayValues('atom_labels', np.array(['Al', 'Zn']))
        backend.addArrayValues('atom_labels', np.array(expected_value), override=True)
        backend.closeSection('section_system', 0)

        backend.closeSection('section_run', 0)
150
        assert backend.get_value('atom_labels') == expected_value
151

152
153
154
155
156
157
    def test_two_sections(self, backend, no_warn):
        g_index = backend.openSection('section_run')
        assert g_index == 0
        backend.addValue('program_name', 't0')
        backend.closeSection('section_run', 0)

158
159
160
161
        g_index = backend.openSection('section_run')
        assert g_index == 1
        backend.addValue('program_name', 't1')
        backend.closeSection('section_run', 1)
162

163
        assert backend.get_sections('section_run') == [0, 1]
164
165

        output = StringIO()
166
        json.dump(backend.resource.m_to_dict(), output)
167
        archive = json.loads(output.getvalue())
168
        assert 'section_run' in archive['EntryArchive']
169

170
    def test_subsection(self, backend: Backend, no_warn):
171
172
173
174
175
176
177
178
179
180
181
182
183
184
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

        backend.openSection('section_run')
        backend.closeSection('section_run', 0)
        backend.closeSection('section_run', 1)

        backend.openSection('section_method')
        backend.closeSection('section_method', -1)

185
186
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
187
188
189
190
        assert len(runs) == 2
        assert len(runs[0]['section_method']) == 2
        assert len(runs[1]['section_method']) == 1

191
    def test_open_section_of_specific_parent(self, backend: Backend, no_warn):
192
193
194
195
196
197
198
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)
        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

199
200
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
201
202
203
204
205
206
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 1
        assert 'section_dos' in run['section_single_configuration_calculation'][0]
        assert len(run['section_single_configuration_calculation'][0]['section_dos']) == 1

207
    def test_open_section_of_specific_parent2(self, backend: Backend, no_warn):
208
209
210
211
212
213
214
215
216
217
218
219
        run_index = backend.openSection('section_run')
        scc_index = backend.openSection('section_single_configuration_calculation')
        backend.closeSection('section_single_configuration_calculation', scc_index)

        backend.closeSection(
            'section_single_configuration_calculation',
            backend.openSection('section_single_configuration_calculation'))

        dos_index = backend.openSection('section_dos', parent_index=scc_index)
        backend.closeSection('section_dos', dos_index)
        backend.closeSection('section_run', run_index)

220
221
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
222
223
224
        assert len(runs) == 1
        run = runs[0]
        assert len(run['section_single_configuration_calculation']) == 2
225
226
        assert len(run['section_single_configuration_calculation'][0].section_dos) == 1
        assert len(run['section_single_configuration_calculation'][1].section_dos) == 0
227

228
    def test_context(self, backend: Backend, no_warn):
229
230
231
232
233
        backend.openSection('section_run')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeSection('section_run', -1)

234
235
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)
236

237
238
239
240
241
242
243
244
245
246
247
        backend.openContext('/section_run/0')
        backend.addValue('program_name', 't1')
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/1')
        backend.addValue('program_name', 't2')
        backend.closeContext('/section_run/1')

        backend.openContext('/section_run/0/section_method/0')
        backend.closeContext('/section_run/0/section_method/0')

248
249
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
250
251
252
        assert runs[0]['program_name'] == 't1'
        assert runs[1]['program_name'] == 't2'

253
    def test_multi_context(self, backend: Backend, no_warn):
254
255
256
257
258
259
260
261
262
263
264
265
266
        backend.openSection('section_run')
        backend.closeSection('section_run', -1)

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

        backend.openContext('/section_run/0')
        backend.openSection('section_method')
        backend.closeSection('section_method', -1)
        backend.closeContext('/section_run/0')

267
268
269
        from nomad.datamodel.metainfo.public import section_run
        runs = backend.resource.all(section_run)
        assert len(runs[0].section_method) == 2
270

271
    def test_bad_context(self, backend: Backend, no_warn):
272
273
274
        try:
            backend.openContext('section_run/0')
            assert False
275
        except BadContextUri:
276
277
278
279
280
            pass

        try:
            backend.openContext('dsfds')
            assert False
281
        except BadContextUri:
282
283
            pass

284
285
286
287
288
289
290
291

def create_reference(data, pretty):
    if (pretty):
        return json.dumps(data, indent=2)
    else:
        return json.dumps(data, separators=(',', ':'))


292
def assert_parser_result(backend, error=False):
293
    status, errors = backend.status
294
    assert status == 'ParseSuccess'
295
296
297
298
    if error:
        assert len(errors) > 0
    else:
        assert errors is None or len(errors) == 0
299
300


Daniel Speckhard's avatar
Daniel Speckhard committed
301
def assert_parser_dir_unchanged(previous_wd, current_wd):
302
    '''Assert working directory has not been changed from parser.'''
Daniel Speckhard's avatar
Daniel Speckhard committed
303
304
305
    assert previous_wd == current_wd


306
def run_parser(parser_name, mainfile):
307
    parser = parser_dict[parser_name]
308
    result = parser.run(mainfile, logger=utils.get_logger(__name__))
309
310
    if isinstance(result, MSection):
        backend = Backend(parser._metainfo_env, parser.domain)
Alvin Noe Ladines's avatar
Alvin Noe Ladines committed
311
312
        root_section = datamodel.domains[parser.domain]['root_section']
        setattr(backend.entry_archive, root_section, result)
313
314
        backend.resource.add(result)
        result = backend
315
    result.domain = parser.domain
316
    return add_calculation_info(result, parser_name=parser_name)
Markus Scheidgen's avatar
Markus Scheidgen committed
317
318


319
@pytest.fixture
320
def parsed_vasp_example() -> Backend:
321
    return run_parser(
322
        'parsers/vasp', 'dependencies/parsers/vasp/test/examples/xml/perovskite.xml')
Markus Scheidgen's avatar
Markus Scheidgen committed
323
324


325
@pytest.fixture
326
def parsed_template_example() -> Backend:
327
328
329
330
    return run_parser(
        'parsers/template', 'tests/data/parsers/template.json')


331
@pytest.fixture(scope="session")
332
def parsed_template_no_system() -> Backend:
333
334
335
336
    return run_parser(
        'parsers/template', 'tests/data/parsers/template_no_system.json')


337
def parse_file(parser_name_and_mainfile) -> Backend:
338
    parser_name, mainfile = parser_name_and_mainfile
339
340
341
    return run_parser(parser_name, mainfile)


342
@pytest.fixture(params=parser_examples, ids=lambda spec: '%s-%s' % spec)
343
def parsed_example(request) -> Backend:
344
    parser_name, mainfile = request.param
Markus Scheidgen's avatar
Markus Scheidgen committed
345
346
    result = run_parser(parser_name, mainfile)
    return result
347
348


349
def add_calculation_info(backend: Backend, **kwargs) -> Backend:
350
351
352
353
354
355
    entry_metadata = backend.entry_archive.m_create(datamodel.EntryMetadata)
    entry_metadata.upload_id = 'test_upload_id'
    entry_metadata.calc_id = 'test_calc_id'
    entry_metadata.calc_hash = 'test_calc_hash'
    entry_metadata.mainfile = 'test/mainfile.txt'
    entry_metadata.m_update(**kwargs)
356
    return backend
357
358


359
@pytest.mark.parametrize('parser_name, mainfile', parser_examples)
360
def test_parser(parser_name, mainfile):
Daniel Speckhard's avatar
Daniel Speckhard committed
361
    previous_wd = os.getcwd()  # Get Working directory before parsing.
362
    parsed_example = run_parser(parser_name, mainfile)
363
    assert_parser_result(parsed_example)
Daniel Speckhard's avatar
Daniel Speckhard committed
364
365
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())
Markus Scheidgen's avatar
Markus Scheidgen committed
366
367


368
369
370
371
372
373
374
375
376
def test_broken_xml_vasp():
    parser_name, mainfile = 'parsers/vasp', 'tests/data/parsers/vasp/broken.xml'
    previous_wd = os.getcwd()  # Get Working directory before parsing.
    parsed_example = run_parser(parser_name, mainfile)
    assert_parser_result(parsed_example, error=True)
    # Check that cwd has not changed.
    assert_parser_dir_unchanged(previous_wd, current_wd=os.getcwd())


377
378
379
380
381
382
383
384
@pytest.fixture(scope='function')
def with_latin_1_file(raw_files):
    copyfile('tests/data/latin-1.out', 'tests/data/parsers/latin-1.out')
    yield
    os.remove('tests/data/parsers/latin-1.out')


def test_match(raw_files, with_latin_1_file, no_warn):
385
386
387
388
    example_upload_id = 'example_upload_id'
    upload_files = files.StagingUploadFiles(example_upload_id, create=True, is_authorized=lambda: True)
    upload_files.add_rawfiles('tests/data/parsers')

389
    matched_mainfiles = {}
390
    for mainfile in upload_files.raw_file_manifest():
391
        parser = match_parser(upload_files.raw_file_object(mainfile).os_path)
392
        if parser is not None and not isinstance(parser, BrokenParser):
393
            matched_mainfiles[mainfile] = parser
394

395
396
397
    assert len(matched_mainfiles) == correct_num_output_files, ', '.join([
        '%s: %s' % (parser.name, mainfile)
        for mainfile, parser in matched_mainfiles.items()])
Markus Scheidgen's avatar
Markus Scheidgen committed
398
399
400
401
402
403
404
405
406
407
408
409
410
411


def parser_in_dir(dir):
    for root, _, files in os.walk(dir):
        for file_name in files:
            file_path = os.path.join(root, file_name)

            if 'test' not in file_path:
                continue

            parser = match_parser(file_path)
            if parser is not None:

                try:
412
                    backend = parser.run(file_path)
413
414
                    # check if the result can be dumped
                    dump_json(backend.entry_archive.m_to_dict())
415
                    backend.resource.unload()
Markus Scheidgen's avatar
Markus Scheidgen committed
416
417
                except Exception as e:
                    print(file_path, parser, 'FAILURE', e)
418
419
                    import traceback
                    traceback.print_exc()
Markus Scheidgen's avatar
Markus Scheidgen committed
420
421
422
423
424
425
426
427
428
429
430
431
                else:
                    print(file_path, parser, 'SUCCESS')


if __name__ == '__main__':
    import sys
    import os

    assert len(sys.argv) == 2 and os.path.isdir(sys.argv[1]), \
        'One argument with an directory path is required.'

    parser_in_dir(sys.argv[1])