diff --git a/common/python/nomadcore/simple_parser.py b/common/python/nomadcore/simple_parser.py index a431ca48c056ac0a09ab6b4854bef7c30b8d655b..ce31ce48424dceedc15548737e382721c175e2dc 100644 --- a/common/python/nomadcore/simple_parser.py +++ b/common/python/nomadcore/simple_parser.py @@ -737,6 +737,23 @@ class CompiledMatcher(object): result['match'] = 1 # partial match result['matcher_does_nothing'] = True result['matcherName'] = 'coverageIgnore' + # flatten span: + # original: + # -- whole_re -- group1 -- group2 ------ group3 -------- + # [ [ (0,25) ], [ (4,5) ], [], [ (1,3), (10,15) ], ... ] + # flattened: + # [ 1, 0, 25, 1, 4, 5, 0, 2, 1, 3, 10, 15, ... ] + # per group (first one 'global'/whole RE span): + # number_of_captures, start1, end1, ... + # this encoding scheme supports "optional" groups (r"(?P<blah>\d+)?") + # and is future-compatible with multiple-captures-per-group + # supported by modules regex / re2 (r"(?:(?P<blah>\d+)\s+)+") + span_flat = [] + for g in result['span']: + span_flat.append(len(g)) + for s in g: + span_flat.extend(s) + result['span_flat'] = span_flat result['matchFlags'] = ( result['match'] | (result['coverageIgnore'] << 2) | targetStartEnd << 5 | int(result['matcher_does_nothing'] and result['match']) << 6)