parse.py 4 KB
Newer Older
Markus Scheidgen's avatar
Markus Scheidgen committed
1
2
import typing
import os
3
import json
4
5
6
import click
import sys

Markus Scheidgen's avatar
Markus Scheidgen committed
7
8
from nomad import utils, parsing, normalizing, datamodel

Markus Scheidgen's avatar
Markus Scheidgen committed
9
import nomadcore
10

11
from .cli import cli
12
13
14


def parse(
15
        mainfile_path: str,
16
        parser_name: str = None,
Markus Scheidgen's avatar
Markus Scheidgen committed
17
18
        backend_factory: typing.Callable = None,
        strict: bool = True, logger=None):
19
    '''
20
21
    Run the given parser on the downloaded calculation. If no parser is given,
    do parser matching and use the respective parser.
22
    '''
23
    from nomad.parsing import parsers
24
25
    mainfile = os.path.basename(mainfile_path)

26
27
28
    if logger is None:
        logger = utils.get_logger(__name__)
    if parser_name is not None:
29
        parser = parsers.parser_dict.get(parser_name)
30
        assert parser is not None, 'the given parser must exist'
31
    else:
32
        parser = parsers.match_parser(mainfile_path, strict=strict)
Markus Scheidgen's avatar
Markus Scheidgen committed
33
        if isinstance(parser, parsing.MatchingParser):
34
35
36
            parser_name = parser.name
        else:
            parser_name = parser.__class__.__name__
37

38
    assert parser is not None, 'there is no parser matching %s' % mainfile
39
40
    logger = logger.bind(parser=parser.name)  # type: ignore
    logger.info('identified parser')
41
42
    if hasattr(parser, 'backend_factory'):
        setattr(parser, 'backend_factory', backend_factory)
43
44
45
46
47
48
49
50
51
52
53

    parser_backend = parser.run(mainfile_path, logger=logger)

    if not parser_backend.status[0] == 'ParseSuccess':
        logger.error('parsing was not successful', status=parser_backend.status)

    logger.info('ran parser')
    return parser_backend


def normalize(
Markus Scheidgen's avatar
Markus Scheidgen committed
54
55
        normalizer: typing.Union[str, typing.Callable], parser_backend=None,
        logger=None):
56
57
58
59
60
61

    if logger is None:
        logger = utils.get_logger(__name__)

    if isinstance(normalizer, str):
        normalizer = next(
Markus Scheidgen's avatar
Markus Scheidgen committed
62
            normalizer_instance for normalizer_instance in normalizing.normalizers
63
64
65
            if normalizer_instance.__class__.__name__ == normalizer)

    assert normalizer is not None, 'there is no normalizer %s' % str(normalizer)
66
    normalizer_instance = typing.cast(typing.Callable, normalizer)(parser_backend.entry_archive)
67
68
69
70
71
72
73
74
    logger = logger.bind(normalizer=normalizer_instance.__class__.__name__)
    logger.info('identified normalizer')

    normalizer_instance.normalize(logger=logger)
    logger.info('ran normalizer')
    return parser_backend


Markus Scheidgen's avatar
Markus Scheidgen committed
75
def normalize_all(parser_backend=None, logger=None):
76
    '''
77
    Parse the downloaded calculation and run the whole normalizer chain.
78
    '''
Markus Scheidgen's avatar
Markus Scheidgen committed
79
    for normalizer in normalizing.normalizers:
80
81
82
        if normalizer.domain == parser_backend.domain:
            parser_backend = normalize(
                normalizer, parser_backend=parser_backend, logger=logger)
83
84
85
86
87
88
89
90
91

    return parser_backend


@cli.command(help='Run parsing and normalizing locally.', name='parse')
@click.argument('MAINFILE', nargs=1, required=True, type=str)
@click.option('--show-backend', is_flag=True, default=False, help='Print the backend data.')
@click.option('--show-metadata', is_flag=True, default=False, help='Print the extracted repo metadata.')
@click.option('--skip-normalizers', is_flag=True, default=False, help='Do not run the normalizer.')
92
@click.option('--not-strict', is_flag=True, help='Do also match artificial parsers.')
93
@click.option('--parser', help='Skip matching and use the provided parser')
94
95
96
@click.option('--annotate', is_flag=True, help='Sub-matcher based parsers will create a .annotate file.')
def _parse(
        mainfile, show_backend, show_metadata, skip_normalizers, not_strict, parser,
97
        annotate):
Markus Scheidgen's avatar
Markus Scheidgen committed
98
    nomadcore.simple_parser.annotate = annotate
99
    kwargs = dict(strict=not not_strict, parser_name=parser)
100

101
    backend = parse(mainfile, **kwargs)
102
103
104
105
106

    if not skip_normalizers:
        normalize_all(backend)

    if show_backend:
107
108
        json.dump(backend.resource.m_to_dict(), sys.stdout, indent=2)

109
    if show_metadata:
Markus Scheidgen's avatar
Markus Scheidgen committed
110
        metadata = datamodel.EntryMetadata(domain='dft')  # TODO take domain from matched parser
111
        metadata.apply_domain_metadata(backend)
112
        json.dump(metadata.m_to_dict(), sys.stdout, indent=4)