FploInputParser.py 27.7 KB
Newer Older
Henning Glawe's avatar
Henning Glawe committed
1
#!/usr/bin/env python
Henning Glawe's avatar
Henning Glawe committed
2
3
4
5
6
7
8
9
10
11
12
13
"""FPLO uses C-inspired input files. They are not quite C, so no conventional
C parser library can be used.
Among the more complex features are nested structs, as well
as arrays-of-struct.

This module is implemented as follows:
1) tokenizer for the used C subset/dialect
2) transformation of tokenized output to concrete syntax tree
3) transformation of concrete syntax tree to abstract syntax tree (AST)
TODO:
4) transform AST to metaInfo backend calls
"""
Henning Glawe's avatar
Henning Glawe committed
14
15
16
17
18
19
20
21
22
23
24
import setup_paths
import re
import sys
import os
import logging
import json
from nomadcore.match_highlighter import ANSI

LOGGER = logging.getLogger(__name__)


25
26
27
28
29
30
31
32
33
34
35
36
class TokenMatchError(Exception):
    pass


class token(object):
    highlight_start = ''
    highlight_end = ANSI.RESET
    regex = None
    cRE_end_newline = re.compile(r'(.*?)(\n*)$')

    def __init__(self, line, pos_in_line):
        """token constructor takes re.match object as arg"""
37
38
        match = self.regex.match(line, pos_in_line)
        if match is None:
39
            raise TokenMatchError
40
        self._match = match
41
42
43
44
        self.value = self.match2value()

    def highlighted(self):
        """return ANSI-highlighted token"""
45
        m = self.cRE_end_newline.match(self._match.group(0))
46
47
        return self.highlight_start + m.group(1) + self.highlight_end + m.group(2)

48
49
50
    def match_end(self):
        return self._match.end()

51
52
53
    def match2value(self):
        return None

54
    def __str__(self):
Henning Glawe's avatar
Henning Glawe committed
55
56
57
58
        return str(self.value)

    def __repr__(self):
        return "%10s %s" % (self.__class__.__name__, repr(self.value))
59

60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

class token_literal(token):
    regex = re.compile(
        r'\s*' + r'(?:' + r'|'.join([
            # alternates for literals
            # RE_f,
            r'"(?P<str_d>[^"\\]*(?:\\\\|\\"|[^"]*)*)"',
            r"'(?P<str_s>[^'\\]*(?:\\\\|\\'|[^']*)*)'",
            r'(?P<float>' + (
                r'[+-]?' + # optional sign
                r'\d+(?=[\.eE])' + # positive lookahead: either decimal point or exponential part must follow
                r'(?:\.\d*)?' + #cover decimals if present
                r'(?:[eE][+-]\d+)?' # exponential part if present
            r')'),
            r'0x(?P<hex_int>[0-9a-fA-F]+)',
            r'0(?P<octal_int>[0-7]+)',
            r'(?P<decimal_int>[+-]?\d+)', # integer with optional sign
            r'(?P<logical>[tf])(?=\W)',
        ]) + r')'
    )

    def match2value(self):
82
        match = self._match
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        if match.group('str_d') is not None:
            return match.group('str_d')
        if match.group('str_s') is not None:
            return match.group('str_s')
        if match.group('float') is not None:
            return float(match.group('float'))
        if match.group('hex_int') is not None:
            return int(match.group('hex_int'), base=16)
        if match.group('octal_int') is not None:
            return int(match.group('octal_int'), base=8)
        if match.group('decimal_int') is not None:
            return int(match.group('decimal_int'))
        if match.group('logical') is not None:
            if match.group('logical') == 't':
                return True
            else:
                return False
        raise RuntimeError('no idea what to do with literal "%s"' % (match.group(0)))


class token_datatype(token):
    regex = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)')
    subtype_list = []
    subtype_dict = {}

    def match2value(self):
109
        value_index = self.subtype_dict.get(self._match.group(1), None)
110
        if value_index is None:
111
            raise TokenMatchError
112
113
        self.value_index = value_index
        value = self.subtype_list[value_index]
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
        return value

token_datatype.subtype_list = [
        'char',
        'int',
        'real',
        'logical',
        'flag',
    ]

token_datatype.subtype_dict = { token_datatype.subtype_list[i]: i for i in range(len(token_datatype.subtype_list)) }


class token_keyword(token):
    regex = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)')
    subtype_list = []
    subtype_dict = {}

    def match2value(self):
133
        value_index = self.subtype_dict.get(self._match.group(1), None)
134
        if value_index is None:
135
            raise TokenMatchError
136
137
        self.value_index = value_index
        value = self.subtype_list[value_index]
138
139
140
141
142
143
144
145
146
147
148
149
150
151
        return value

token_keyword.subtype_list = [
        'section',
        'struct',
    ]

token_keyword.subtype_dict = { token_keyword.subtype_list[i]: i for i in range(len(token_keyword.subtype_list)) }


class token_identifier(token):
    regex = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)')

    def match2value(self):
152
        return self._match.group(1)
153

Henning Glawe's avatar
Henning Glawe committed
154

155
156
157
158
159
160
161
162
163
164
165
166
class token_subscript_begin(token):
    regex = re.compile(r'\[')


class token_subscript_end(token):
    regex = re.compile(r'\]')


class token_operator(token):
    regex = re.compile(r'\s*(\+=|\-=|=|,|-|\+|/|\*)')

    def match2value(self):
167
        return self._match.group(1)
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185


class token_block_begin(token):
    regex = re.compile(r'\s*\{')


class token_block_end(token):
    regex = re.compile(r'\s*\}')


class token_statement_end(token):
    regex = re.compile(r'\s*;')


class token_line_comment(token):
    regex = re.compile(r'\s*(?:(//|#)|(/\*))(?P<comment>.*)')

    def match2value(self):
186
        return self._match.group('comment')
187
188
189
190
191
192
193
194

class token_trailing_whitespace(token):
    regex = re.compile(r'\s+$')

class token_bad_input(token):
    regex = re.compile('(.+)$')

    def match2value(self):
195
        return self._match.group(1)
196
197
198
199
200

class token_flag_value(token):
    regex = re.compile(r'\(([+-])\)')

    def match2value(self):
201
        if self._match.group(1) == '+':
202
203
204
205
206
207
208
209
210
211
212
            return True
        else:
            return False


token_literal.highlight_start = ANSI.FG_MAGENTA
token_datatype.highlight_start = ANSI.FG_YELLOW
token_keyword.highlight_start = ANSI.FG_BRIGHT_YELLOW
token_identifier.highlight_start = ANSI.FG_CYAN
token_subscript_begin.highlight_start = ANSI.FG_BRIGHT_GREEN
token_subscript_end.highlight_start = ANSI.FG_BRIGHT_GREEN
213
token_operator.highlight_start = ANSI.BEGIN_INVERT + ANSI.FG_YELLOW
214
215
216
217
218
219
220
221
token_block_begin.highlight_start = ANSI.FG_BRIGHT_CYAN
token_block_end.highlight_start = ANSI.FG_BRIGHT_CYAN
token_statement_end.highlight_start = ANSI.FG_BRIGHT_YELLOW
token_line_comment.highlight_start = ANSI.FG_BLUE
token_trailing_whitespace.highlight_start = ANSI.BG_BLUE
token_bad_input.highlight_start = ANSI.BEGIN_INVERT + ANSI.FG_BRIGHT_RED
token_flag_value.highlight_start = ANSI.FG_MAGENTA

222

223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
class AST_node(dict):
    """base class for abstract syntax tree nodes"""
    def __init__(self, name=None):
        self.name = name
        self.child = []

    def indented_str(self, indent=''):
        result = ANSI.BG_YELLOW + indent + ANSI.RESET + ('%-20s' % (self.__class__.__name__))
        if self.name is not None:
            result = result + ' ' + self.name
        result = result + '\n'
        child_indent = indent + '  '
        for child in self.child:
            if child is not None:
                result = result + child.indented_str(child_indent)
        return result

    def append(self, newchild):
        self.child.append(newchild)

    def __len__(self):
        return len(self.child)

246
247
248
249
250
251
252
253
254
    def declaration_nomadmetainfo(self, output_file, namespace):
        if self.name is not None:
            child_namespace = namespace + '.' + self.name
        else:
            child_namespace = namespace
        for child in self.child:
            if isinstance(child, AST_node):
                child.declaration_nomadmetainfo(output_file, child_namespace)

255
256
257
258
259
260
261
262

class AST_block(AST_node):
    """generic block (sequence of statements) in AST"""
    def append_block(self, src_block):
        for src_child in src_block.child:
            self.append(src_child)


263
class AST_root(AST_block):
264
265
266
267
268
    def declaration_nomadmetainfo(self, output_file, namespace):
        output_file.write((
            '{\n' +
            '  "type": "nomad_meta_info_1_0",\n' +
            '  "description": "FPLO input metainfo, autogenerated",\n' +
Henning Glawe's avatar
fix WS    
Henning Glawe committed
269
            '  "dependencies": [{\n' +
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
            '    "relativePath": "public.nomadmetainfo.json"\n' +
            '  }],\n' +
            '  "metainfos": [\n' +
            '    {\n' +
            '      "description": "FPLO input metainfo, autogenerated",\n' +
            '      "name": "%s",\n' +
            '      "kindStr": "type_section",\n' +
            '      "superNames": [ "section_method" ]\n' +
            '    }') % (namespace))
        AST_block.declaration_nomadmetainfo(self, output_file, namespace)
        output_file.write(
            '\n' +
            '  ]\n' +
            '}\n'
        )
285
286


287
288
class AST_section(AST_block):
    """section block (sequence of statements) in AST"""
289
290
291
292
    def declaration_nomadmetainfo(self, output_file, namespace):
        thisname = namespace + '.' + self.name
        output_file.write((
            ', {\n' +
293
294
295
296
297
            '      "description": "FPLO input section %s",\n' +
            '      "name": "%s",\n' +
            '      "kindStr": "type_section",\n' +
            '      "superNames": [ "%s" ]\n' +
            '    }') % (thisname, thisname, namespace))
298
        AST_block.declaration_nomadmetainfo(self, output_file, namespace)
299
300
301


class AST_datatype(AST_node):
302
303
304
305
306
    def nomad_kindStr(self):
        return None

    def nomad_dtypeStr(self):
        return None
307
308
309


class AST_datatype_primitive(AST_datatype):
310
311
312
313
314
315
316
317
318
    dtype2nomad = {
        'char': 'C',
        'int': 'i',
        'real': 'f',
        'logical': 'b',
    }

    def nomad_dtypeStr(self):
        return self.dtype2nomad[self.name]
319
320
321
322
323
324
325


class AST_datatype_struct(AST_datatype):
    def append_block(self, src_block):
        for src_child in src_block.child:
            self.append(src_child)

326
327
328
329
    def nomad_kindStr(self):
        return 'type_section'


330
class AST_datatype_flag(AST_datatype_struct):
331
332
    def nomad_kindStr(self):
        return 'type_section'
333

334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349

class AST_declaration(AST_node):
    """variable declaration in abstract syntax tree"""
    # children:
    #   0 - shape
    #   1 - datatype
    def __init__(self, name, datatype, shape=None):
        AST_node.__init__(self, name)
        self.child.append(shape)
        self.child.append(datatype)

    def set_shape(self, shape):
        if self.child[0] is not None:
            raise RuntimeError('already has shape: %s', self.name)
        self.child[0] = shape

350
351
352
353
354
355
    def declaration_nomadmetainfo(self, output_file, namespace):
        thisname = namespace + '.' + self.name
        kindStr = self.child[1].nomad_kindStr()
        dtypeStr = self.child[1].nomad_dtypeStr()
        output_file.write((
            ', {\n' +
356
357
358
            '      "description": "FPLO input %s",\n' +
            '      "name": "%s",\n' +
            '      "superNames": [ "%s" ],\n'
359
            ) % (thisname, thisname, namespace))
360
361
        if self.child[0] is not None:
            output_file.write('      "repeats": true,\n')
362
        if kindStr is not None:
363
            output_file.write('      "kindStr": "%s"\n' % (kindStr))
364
        elif dtypeStr is not None:
365
            output_file.write('      "dtypeStr": "%s"\n' % (dtypeStr))
366
367
368
369
370
371
        else:
            raise RuntimeError(
                "neither kindStr nor dtypeStr are defined for %s" % (
                    thisname
               )
            )
372
        output_file.write('    }')
373
374
        AST_node.declaration_nomadmetainfo(self, output_file, namespace)

375
376
377
378
379
380
381
382
383
384

class AST_shape(AST_node):
    # children are ints without indented_str method
    def indented_str(self, indent=''):
        result = ANSI.BG_YELLOW + indent + ANSI.RESET + ('%-20s [' % (self.__class__.__name__))
        result = result + ', '.join(map(str, self.child))
        result = result + ']\n'
        return result


385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
class AST_value(AST_node):
    pass


class AST_value_primitive(AST_value):
    # there is one child, a python literal
    def indented_str(self, indent=''):
        result = ANSI.BG_YELLOW + indent + ANSI.RESET + (
            '%-20s %s\n' % (self.__class__.__name__, str(self.child[0])))
        return result


class AST_value_list(AST_value):
    def indented_str(self, indent=''):
        result = ANSI.BG_YELLOW + indent + ANSI.RESET + (
            '%-20s %s\n' % (self.__class__.__name__, repr(self.child)))
        return result


class AST_assignment(AST_node):
    # children:
    # 0: target (type AST_declaration)
    # 1: value (type AST_value)
    pass


411
class concrete_node(object):
Henning Glawe's avatar
Henning Glawe committed
412
413
414
415
416
417
418
419
420
    def __init__(self, parent):
        self.items = []
        # backref
        self.parent = parent

    def append(self, item):
        self.items.append(item)

    def indented_dump(self, indent):
Henning Glawe's avatar
Henning Glawe committed
421
422
        if len(self.items) < 1:
            return ''
Henning Glawe's avatar
Henning Glawe committed
423
424
        result = indent + self.__class__.__name__ + ":\n"
        for item in self.items:
425
            if isinstance(item, concrete_node):
Henning Glawe's avatar
Henning Glawe committed
426
427
428
429
430
                result = result + item.indented_dump(indent + '  ')
            else:
                result = result + indent + '  ' + str(item) + '\n'
        return result

431
432
433
    def to_AST(self):
        raise Exception("unimplemented to_AST in %s" % (self.__class__.__name__))

434

435
class concrete_statement(concrete_node):
436
437
438
439
    def to_AST(self):
        if len(self.items) < 1:
            return None
        result = None
440
        pos_in_statement = 0
441
442
443
444
445
        # check declarations
        if isinstance(self.items[0], token_keyword):
            if self.items[0].value == 'section':
                result = AST_section(self.items[1].value) # name of section
                result.append_block(self.items[2].to_AST()) # section-block
446
                pos_in_statement = 3
447
448
449
450
            if self.items[0].value == 'struct':
                struct = AST_datatype_struct()
                struct.append_block(self.items[1].to_AST())
                result = AST_declaration(self.items[2].value, struct)
451
                pos_in_statement = 3
452
453
454
455
456
457
458
459
460
461
462
        elif isinstance(self.items[0], token_datatype) and self.items[0].value == 'flag':
            # special case for non-C-primtype 'flag'
            #   we will map this to struct of logicals, but need to evaluate
            #   RHS to get the names. messy.
            flag = AST_datatype_flag()
            result = AST_declaration(self.items[1].value, flag)
            if isinstance(self.items[2], concrete_subscript):
                # skip array shape
                pos_in_statement = 3
            else:
                pos_in_statement = 2
463
464
465
466
467
468
469
        elif isinstance(self.items[0], token_datatype):
            primtype = AST_datatype_primitive(self.items[0].value)
            if primtype.name == 'char' and isinstance(self.items[1], concrete_subscript):
                # ignore char length for now
                #   not correct in C, but all declared chars in FPLO input
                #   are char arrays
                declaration_name = self.items[2].value
470
                pos_in_statement = 3
471
472
            else:
                declaration_name = self.items[1].value
473
                pos_in_statement = 2
474
475
            result = AST_declaration(declaration_name, primtype)
        if (
476
477
                (len(self.items) > pos_in_statement) and
                isinstance(self.items[pos_in_statement], concrete_subscript)
478
479
480
481
            ):
            # subscript in LHS declares shape
            if not isinstance(result, AST_declaration):
                raise RuntimeError('encountered subscript on non-declaration')
482
483
            result.set_shape(self.items[pos_in_statement].to_AST_shape())
            pos_in_statement = pos_in_statement + 1
484
485
486
487
488
489
490
491
        if len(self.items) <= pos_in_statement:
            # we are done, nothing more in statement
            return result
        if not (isinstance(self.items[pos_in_statement], token_operator) and
                self.items[pos_in_statement].value == '='):
            raise RuntimeError('unexpected item following declaration: %s' % (
                repr(self.items[pos_in_statement])))
        # we have an assignment
492
        new_assignment = AST_assignment()
493
494
495
496
497
498
499
500
501
502
503
504
505
506
        new_assignment.append(result)
        pos_in_statement = pos_in_statement + 1
        if len(self.items) <= pos_in_statement:
            raise RuntimeError('missing values in assignment')
        if len(self.items) > pos_in_statement + 1:
            raise RuntimeError('too many values in assignment')
        concrete_values = self.items[pos_in_statement]
        if isinstance(concrete_values, token_literal):
            new_value = AST_value_primitive()
            new_value.append(concrete_values.value)
            new_assignment.append(new_value)
            return new_assignment
        if isinstance(concrete_values, concrete_block):
            new_value = AST_value_list()
507
508
509
510
511
512
513
514
515
            if isinstance(result.child[1], AST_datatype_flag):
                # special case for 'flag' datatype, need to evaluate RHS for names
                (flag_names, flag_values) = concrete_values.flag_names_values()
                for flag_name in flag_names:
                    result.child[1].append(AST_declaration(flag_name, AST_datatype_primitive('logical')))
                for flag_value in flag_values:
                    new_value.append(flag_value)
            else:
                new_value.child = concrete_values.python_value()
516
517
518
519
520
521
522
523
524
525
526
527
            new_assignment.append(new_value)
        return new_assignment

    def python_value(self):
        def eval_accumulated(items):
            if len(items) == 1:
                return items[0]
            elif len(items) == 3 and items[1] == '/':
                return items[0] / items[2]
            else:
                LOGGER.error('concrete_statement.python_value:accum: %s', str(items))
                sys.stderr.write(self.indented_dump(''))
Henning Glawe's avatar
Henning Glawe committed
528

529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
        result = []
        accum = []
        for item in self.items:
            if isinstance(item, token_literal):
                accum.append(item.value)
            elif isinstance(item, token_operator) and item.value == ',':
                if len(accum) > 0:
                    result.append(eval_accumulated(accum))
                accum = []
            elif isinstance(item, token_operator) and item.value == '/':
                # FPLO input contains fraction constants
                accum.append('/')
            elif isinstance(item, concrete_block):
                result.append(item.python_value())
            else:
                LOGGER.error('concrete_statement.python_value:item: %s', repr(item))
        if len(accum) > 0:
            result.append(eval_accumulated(accum))
        return result
Henning Glawe's avatar
Henning Glawe committed
548

549
550
551
552
553
554
555
556
557
558
    def flag_names_values(self):
        result_names = []
        result_values = []
        accum = []
        for item in self.items:
            if isinstance(item, (token_identifier, token_flag_value)):
                accum.append(item.value)
            elif isinstance(item, token_operator) and item.value == ',':
                if len(accum)!=2:
                    raise RuntimeError('flag_names_values encountered non-pair: ', str(accum))
Henning Glawe's avatar
Henning Glawe committed
559
560
561
                if accum[0] != 'NOT_USED':
                    result_names.append(accum[0])
                    result_values.append(accum[1])
562
563
564
565
566
567
                accum=[]
            else:
                raise RuntimeError('flag_names_values encountered unhandled item: ' + repr(item))
        if len(accum) > 0:
            if len(accum)!=2:
                raise RuntimeError('flag_names_values encountered non-pair: ', str(accum))
Henning Glawe's avatar
Henning Glawe committed
568
569
570
            if accum[0] != 'NOT_USED':
                result_names.append(accum[0])
                result_values.append(accum[1])
571
572
        return (result_names, result_values)

573
class concrete_block(concrete_node):
574
575
576
577
578
579
580
581
582
583
584
    def to_AST(self):
        if len(self.items) < 1:
            return None
        result = AST_block()
        for item in self.items:
            item_AST = item.to_AST()
            if item_AST is not None:
                result.append(item_AST)
        if len(result) is not None:
            return result
        return None
Henning Glawe's avatar
Henning Glawe committed
585

586
587
588
589
590
    def python_value(self):
        if len(self.items) != 1:
            raise RuntimeError('python_value for block containing !=1 statement')
        return self.items[0].python_value()

591
592
593
594
595
    def flag_names_values(self):
        if len(self.items) != 1:
            raise RuntimeError('flag_names for block containing !=1 statement')
        return self.items[0].flag_names_values()

596

597
598
599
600
601
602
603
604
605
606
607
608
609
610
class concrete_root(concrete_block):
    def to_AST(self):
        if len(self.items) < 1:
            return None
        result = AST_root()
        for item in self.items:
            item_AST = item.to_AST()
            if item_AST is not None:
                result.append(item_AST)
        if len(result) is not None:
            return result
        return None


611
class concrete_subscript(concrete_statement):
612
    def __str__(self):
Henning Glawe's avatar
Henning Glawe committed
613
614
615
616
617
        result = (
            '[' +
            ', '.join(map(lambda i: "'" + str(i) + "'", self.items)) +
            ']'
        )
618
        return result
Henning Glawe's avatar
Henning Glawe committed
619

Henning Glawe's avatar
Henning Glawe committed
620
621
622
623
624
625
626
627
    def __repr__(self):
        result = (
            '[' +
            ', '.join(map(lambda i: "'" + repr(i) + "'", self.items)) +
            ']'
        )
        return "%10s %s" % (self.__class__.__name__, result)

628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
    def to_AST_shape(self):
        result = AST_shape()
        for item in self.items:
            if isinstance(item, token_literal) and isinstance(item.value, int):
                result.append(item.value)
            elif isinstance(item, token_operator) and item.value == '*':
                # denote variable-length dimension by -1
                result.append(int(-1))
            elif isinstance(item, token_identifier):
                # TODO: check if length from identifier needs to be respected
                # for now treat as variable-length
                result.append(int(-1))
            else:
                raise Exception("unknown purpose of item in shape: %s" % (repr(item)))
        return result

Henning Glawe's avatar
Henning Glawe committed
644

Henning Glawe's avatar
Henning Glawe committed
645
646
647
648
649
650
651
652
653
class FploInputParser(object):
    """Parser for C-like FPLO input
    """
    def __init__(self, file_path, annotateFile = None):
        self.input_tree = {}
        self.file_path = file_path
        self.state = self.state_root
        self.__annotateFile = annotateFile
        self.bad_input = False
Henning Glawe's avatar
Henning Glawe committed
654
        # start with root block, and add empty statement to append to
655
        self.concrete_statements = concrete_root(None)
656
657
        self.concrete_statements.append(concrete_statement(self.concrete_statements))
        self.current_concrete_statement = self.concrete_statements.items[-1]
Henning Glawe's avatar
Henning Glawe committed
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681

    def parse(self):
        """open file and parse line-by-line"""
        with open(self.file_path, "r") as fIn:
            # process line-by-line
            for line in fIn:
                self.parse_line(line)
        # check if there was input flagged as 'bad'/'syntactically incorrect'
        if self.bad_input:
            # call bad-input hook
            self.onBad_input()
        # call end-of-file hook
        self.onEnd_of_file()

    def parse_line(self, line):
        """parse one line, delegating to the parser state handlers"""
        pos_in_line = 0
        while pos_in_line<len(line):
            new_pos_in_line = self.state(line, pos_in_line)
            # check if anything was parsed, otherwise cancel that line
            if new_pos_in_line is None:
                break
            else:
                pos_in_line = new_pos_in_line
682

683
684
685
686
687
    def _annotate(self, what):
        """write string to annotateFile if present"""
        if self.__annotateFile:
            self.__annotateFile.write(what)

Henning Glawe's avatar
Henning Glawe committed
688
689
    def state_root(self, line, pos_in_line):
        """state: no open section, i.e. at the root of the namelist"""
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
        this_token = None
        for try_token in [token_literal, token_flag_value, token_datatype,
                          token_keyword,
                          token_identifier, token_subscript_begin,
                          token_subscript_end, token_operator,
                          token_block_begin, token_block_end,
                          token_statement_end, token_line_comment,
                          token_trailing_whitespace,
                          token_bad_input,
                          ]:
            try:
                this_token = try_token(line, pos_in_line)
            except TokenMatchError:
                pass
            if this_token is not None:
                break
Henning Glawe's avatar
Henning Glawe committed
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
        if this_token is None:
            LOGGER.error("cannot match any token type to '%s'" % (
                line[pos_in_line:]))
            return None
        self._annotate(this_token.highlighted())
        if isinstance(this_token, token_block_begin):
            newblock = concrete_block(self.current_concrete_statement)
            newblock.append(concrete_statement(newblock))
            self.current_concrete_statement.append(newblock)
            self.current_concrete_statement = newblock.items[0]
        elif isinstance(this_token, token_block_end):
            self.current_concrete_statement = self.current_concrete_statement.parent.parent
        elif isinstance(this_token, token_subscript_begin):
            newsubscript = concrete_subscript(self.current_concrete_statement)
            self.current_concrete_statement.append(newsubscript)
            self.current_concrete_statement = newsubscript
        elif isinstance(this_token, token_subscript_end):
            self.current_concrete_statement = self.current_concrete_statement.parent
        elif isinstance(this_token, token_statement_end):
            self.current_concrete_statement.parent.append(concrete_statement(self.current_concrete_statement.parent))
            self.current_concrete_statement = self.current_concrete_statement.parent.items[-1]
        elif isinstance(this_token, token_bad_input):
            self.bad_input = True
        elif isinstance(this_token, (token_line_comment, token_trailing_whitespace)):
            # skip comments and trailing whitespace
            pass
        elif isinstance(this_token, (
                    token_literal, token_flag_value, token_operator,
                    token_datatype, token_keyword, token_identifier
                )):
            self.current_concrete_statement.append(this_token)
        else:
            raise Exception("Unhandled token type " + this_token.__class__.__name__)
739
        return this_token.match_end()
Henning Glawe's avatar
Henning Glawe committed
740
741
742
743
744
745
746

    def onBad_input(self):
        """hook: called at the end of parsing if there was any bad input"""
        pass

    def onEnd_of_file(self):
        """hook: called at the end of parsing"""
747
        self.AST = self.concrete_statements.to_AST()
Henning Glawe's avatar
Henning Glawe committed
748
749

if __name__ == "__main__":
750
751
752
753
754
755
756
757
    import argparse
    ARGPARSER = argparse.ArgumentParser(description='NOMAD parser for FPLO input')
    ARGPARSER.add_argument('--annotate', action='store_true', default=False,
                           help='write annotated/tokenized input files to stderr')
    ARGPARSER.add_argument('--dump_concrete', action='store_true', default=False,
                           help='write annotated/tokenized input files to stderr')
    ARGPARSER.add_argument('--dump_ast', action='store_true', default=False,
                           help='write annotated/tokenized input files to stderr')
758
759
    ARGPARSER.add_argument('--metainfo', action='store_true', default=False,
                           help='write nomadmetainfo to stdout')
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
    ARGPARSER.add_argument('fplo_input', type=str, nargs='+', help='FPLO input files')
    ARGS = ARGPARSER.parse_args()
    if ARGS.annotate:
        ANNOTATEFILE = sys.stderr
    else:
        ANNOTATEFILE = None
    for fplo_in in ARGS.fplo_input:
        parser = FploInputParser(fplo_in, annotateFile=ANNOTATEFILE)
        parser.parse()
        sys.stdout.flush()
        sys.stderr.flush()
        if ARGS.dump_concrete:
            sys.stderr.write('concrete syntax tree:\n')
            sys.stderr.write(parser.concrete_statements.indented_dump('  '))
            sys.stdout.flush()
            sys.stderr.flush()
        if ARGS.dump_ast:
            sys.stderr.write('abstract syntax tree:\n')
            sys.stderr.write(parser.AST.indented_str('  '))
            sys.stdout.flush()
            sys.stderr.flush()
781
782
        if ARGS.metainfo:
            parser.AST.declaration_nomadmetainfo(sys.stdout, 'x_fplo_in')