aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPablo Galindo <Pablogsal@gmail.com>2020-12-26 19:11:29 +0000
committerGitHub <noreply@github.com>2020-12-26 19:11:29 +0000
commit3bcc4ead3f66a58604b7ce87d14e909406c3b364 (patch)
treeab8cf61a0a589e5935f080a861b7b7866a6da7e7
parentbpo-16396: fix BPO number in changelog (GH-23951) (diff)
downloadcpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.tar.gz
cpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.tar.bz2
cpython-3bcc4ead3f66a58604b7ce87d14e909406c3b364.zip
Add small validator utility for PEG grammars (GH-23519)
-rw-r--r--Lib/test/test_peg_generator/test_grammar_validator.py51
-rwxr-xr-xTools/peg_generator/pegen/__main__.py3
-rw-r--r--Tools/peg_generator/pegen/validator.py52
3 files changed, 106 insertions, 0 deletions
diff --git a/Lib/test/test_peg_generator/test_grammar_validator.py b/Lib/test/test_peg_generator/test_grammar_validator.py
new file mode 100644
index 00000000000..2e72ff8df28
--- /dev/null
+++ b/Lib/test/test_peg_generator/test_grammar_validator.py
@@ -0,0 +1,51 @@
+import unittest
+from test import test_tools
+
+test_tools.skip_if_missing('peg_generator')
+with test_tools.imports_under_tool('peg_generator'):
+ from pegen.grammar_parser import GeneratedParser as GrammarParser
+ from pegen.validator import SubRuleValidator, ValidationError
+ from pegen.testutil import parse_string
+ from pegen.grammar import Grammar
+
+
+class TestPegen(unittest.TestCase):
+ def test_rule_with_no_collision(self) -> None:
+ grammar_source = """
+ start: bad_rule
+ sum:
+ | NAME '-' NAME
+ | NAME '+' NAME
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ validator = SubRuleValidator(grammar)
+ for rule_name, rule in grammar.rules.items():
+ validator.validate_rule(rule_name, rule)
+
+ def test_rule_with_simple_collision(self) -> None:
+ grammar_source = """
+ start: bad_rule
+ sum:
+ | NAME '+' NAME
+ | NAME '+' NAME ';'
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ validator = SubRuleValidator(grammar)
+ with self.assertRaises(ValidationError):
+ for rule_name, rule in grammar.rules.items():
+ validator.validate_rule(rule_name, rule)
+
+ def test_rule_with_collision_after_some_other_rules(self) -> None:
+ grammar_source = """
+ start: bad_rule
+ sum:
+ | NAME '+' NAME
+ | NAME '*' NAME ';'
+ | NAME '-' NAME
+ | NAME '+' NAME ';'
+ """
+ grammar: Grammar = parse_string(grammar_source, GrammarParser)
+ validator = SubRuleValidator(grammar)
+ with self.assertRaises(ValidationError):
+ for rule_name, rule in grammar.rules.items():
+ validator.validate_rule(rule_name, rule)
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py
index 1dcbaad1c38..c0f3b687587 100755
--- a/Tools/peg_generator/pegen/__main__.py
+++ b/Tools/peg_generator/pegen/__main__.py
@@ -14,6 +14,7 @@ import traceback
from typing import Tuple
from pegen.build import Grammar, Parser, Tokenizer, ParserGenerator
+from pegen.validator import validate_grammar
def generate_c_code(
@@ -128,6 +129,8 @@ def main() -> None:
grammar, parser, tokenizer, gen = args.func(args)
t1 = time.time()
+ validate_grammar(grammar)
+
if not args.quiet:
if args.verbose:
print("Raw Grammar:")
diff --git a/Tools/peg_generator/pegen/validator.py b/Tools/peg_generator/pegen/validator.py
new file mode 100644
index 00000000000..0e3dd41cca4
--- /dev/null
+++ b/Tools/peg_generator/pegen/validator.py
@@ -0,0 +1,52 @@
+from pegen import grammar
+from pegen.grammar import (
+ Alt,
+ Cut,
+ Gather,
+ GrammarVisitor,
+ Group,
+ Lookahead,
+ NamedItem,
+ NameLeaf,
+ NegativeLookahead,
+ Opt,
+ PositiveLookahead,
+ Repeat0,
+ Repeat1,
+ Rhs,
+ Rule,
+ StringLeaf,
+)
+
+class ValidationError(Exception):
+ pass
+
+class GrammarValidator(GrammarVisitor):
+ def __init__(self, grammar: grammar.Grammar):
+ self.grammar = grammar
+ self.rulename = None
+
+ def validate_rule(self, rulename: str, node: Rule):
+ self.rulename = rulename
+ self.visit(node)
+ self.rulename = None
+
+
+class SubRuleValidator(GrammarValidator):
+ def visit_Rhs(self, node: Rule):
+ for index, alt in enumerate(node.alts):
+ alts_to_consider = node.alts[index+1:]
+ for other_alt in alts_to_consider:
+ self.check_intersection(alt, other_alt)
+
+ def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool:
+ if str(second_alt).startswith(str(first_alt)):
+ raise ValidationError(
+ f"In {self.rulename} there is an alternative that will "
+ f"never be visited:\n{second_alt}")
+
+def validate_grammar(the_grammar: grammar.Grammar):
+ for validator_cls in GrammarValidator.__subclasses__():
+ validator = validator_cls(the_grammar)
+ for rule_name, rule in the_grammar.rules.items():
+ validator.validate_rule(rule_name, rule)