diff options
author | Pablo Galindo <Pablogsal@gmail.com> | 2020-12-26 19:11:29 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-12-26 19:11:29 +0000 |
commit | 3bcc4ead3f66a58604b7ce87d14e909406c3b364 (patch) | |
tree | ab8cf61a0a589e5935f080a861b7b7866a6da7e7 /Tools | |
parent | bpo-16396: fix BPO number in changelog (GH-23951) (diff) | |
download | cpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.tar.gz cpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.tar.bz2 cpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.zip |
Add small validator utility for PEG grammars (GH-23519)
Diffstat (limited to 'Tools')
-rwxr-xr-x | Tools/peg_generator/pegen/__main__.py | 3 | ||||
-rw-r--r-- | Tools/peg_generator/pegen/validator.py | 52 |
2 files changed, 55 insertions, 0 deletions
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 1dcbaad1c38..c0f3b687587 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -14,6 +14,7 @@ import traceback from typing import Tuple from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator +from pegen.validator import validate_grammar def generate_c_code( @@ -128,6 +129,8 @@ def main() -> None: grammar, parser, tokenizer, gen = args.func(args) t1 = time.time() + validate_grammar(grammar) + if not args.quiet: if args.verbose: print("Raw Grammar:") diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py new file mode 100644 index 00000000000..0e3dd41cca4 --- /dev/null +++ b/Tools/peg_generator/pegen/validator.py @@ -0,0 +1,52 @@ +from pegen import grammar +from pegen.grammar import ( + Alt, + Cut, + Gather, + GrammarVisitor, + Group, + Lookahead, + NamedItem, + NameLeaf, + NegativeLookahead, + Opt, + PositiveLookahead, + Repeat0, + Repeat1, + Rhs, + Rule, + StringLeaf, +) + +class ValidationError(Exception): + pass + +class GrammarValidator(GrammarVisitor): + def __init__(self, grammar: grammar.Grammar): + self.grammar = grammar + self.rulename = None + + def validate_rule(self, rulename: str, node: Rule): + self.rulename = rulename + self.visit(node) + self.rulename = None + + +class SubRuleValidator(GrammarValidator): + def visit_Rhs(self, node: Rule): + for index, alt in enumerate(node.alts): + alts_to_consider = node.alts[index+1:] + for other_alt in alts_to_consider: + self.check_intersection(alt, other_alt) + + def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool: + if str(second_alt).startswith(str(first_alt)): + raise ValidationError( + f"In {self.rulename} there is an alternative that will " + f"never be visited:\n{second_alt}") + +def validate_grammar(the_grammar: grammar.Grammar): + for validator_cls in GrammarValidator.__subclasses__(): + validator = validator_cls(the_grammar) + for rule_name, rule in the_grammar.rules.items(): + validator.validate_rule(rule_name, rule) |