Skip to content

Commit

Permalink
Merge pull request #1261 from lark-parser/strict_mode
Browse files Browse the repository at this point in the history
Added strict-mode, enabled using the strict=True flag
  • Loading branch information
erezsh authored Mar 9, 2023
2 parents 0551acf + d97eaa3 commit a9d353e
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 15 deletions.
5 changes: 4 additions & 1 deletion lark/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@ class LexerConf(Serialize):
skip_validation: bool
use_bytes: bool
lexer_type: Optional[_LexerArgType]
strict: bool

def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None, callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False):
def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ignore: Collection[str]=(), postlex: 'Optional[PostLex]'=None,
callbacks: Optional[Dict[str, _Callback]]=None, g_regex_flags: int=0, skip_validation: bool=False, use_bytes: bool=False, strict: bool=False):
self.terminals = terminals
self.terminals_by_name = {t.name: t for t in self.terminals}
assert len(self.terminals) == len(self.terminals_by_name)
Expand All @@ -50,6 +52,7 @@ def __init__(self, terminals: Collection[TerminalDef], re_module: ModuleType, ig
self.re_module = re_module
self.skip_validation = skip_validation
self.use_bytes = use_bytes
self.strict = strict
self.lexer_type = None

def _deserialize(self):
Expand Down
6 changes: 5 additions & 1 deletion lark/lark.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class LarkOptions(Serialize):

start: List[str]
debug: bool
strict: bool
transformer: 'Optional[Transformer]'
propagate_positions: Union[bool, str]
maybe_placeholders: bool
Expand Down Expand Up @@ -81,6 +82,8 @@ class LarkOptions(Serialize):
debug
Display debug information and extra warnings. Use only when debugging (Default: ``False``)
When used with Earley, it generates a forest graph as "sppf.png", if 'dot' is installed.
strict
Throw an exception on any potential ambiguity, including shift/reduce conflicts, and regex collisions.
transformer
Applies the transformer to every parse tree (equivalent to applying it after the parse, but faster)
propagate_positions
Expand Down Expand Up @@ -156,6 +159,7 @@ class LarkOptions(Serialize):
# - Potentially in `lark.tools.__init__`, if it makes sense, and it can easily be passed as a cmd argument
_defaults: Dict[str, Any] = {
'debug': False,
'strict': False,
'keep_all_tokens': False,
'tree_class': None,
'cache': False,
Expand Down Expand Up @@ -424,7 +428,7 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
# TODO Deprecate lexer_callbacks?
self.lexer_conf = LexerConf(
self.terminals, re_module, self.ignore_tokens, self.options.postlex,
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes
self.options.lexer_callbacks, self.options.g_regex_flags, use_bytes=self.options.use_bytes, strict=self.options.strict
)

if self.options.parser:
Expand Down
11 changes: 7 additions & 4 deletions lark/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,14 @@ def __init__(self, conf: 'LexerConf', comparator=None) -> None:
# Mark this pair to not repeat warnings when multiple different BasicLexers see the same collision
comparator.mark(a, b)

# leave it as a warning for the moment
# raise LexError("Collision between Terminals %s and %s" % (a.name, b.name))
# Notify the user
message = f"Collision between Terminals {a.name} and {b.name}. "
example = comparator.get_example_overlap(a, b).format_multiline()
logger.warning(f"Collision between Terminals {a.name} and {b.name}. "
f"The lexer will choose between them arbitrarily.\n" + example)
if conf.strict:
raise LexError(f"{message}\n{example}")
logger.warning("%s The lexer will choose between them arbitrarily.\n%s", message, example)
elif conf.strict:
raise LexError("interegular must be installed for strict mode. Use `pip install 'lark[interegular]'`.")

# Init
self.newline_types = frozenset(t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp()))
Expand Down
3 changes: 2 additions & 1 deletion lark/parser_frontends.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,9 @@ def create_contextual_lexer(lexer_conf, parser, postlex, options):

def create_lalr_parser(lexer_conf, parser_conf, options=None):
debug = options.debug if options else False
strict = options.strict if options else False
cls = (options and options._plugins.get('LALR_Parser')) or LALR_Parser
return cls(parser_conf, debug=debug)
return cls(parser_conf, debug=debug, strict=strict)

_parser_creators['lalr'] = create_lalr_parser

Expand Down
3 changes: 2 additions & 1 deletion lark/parsers/grammar_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,9 @@ def calculate_sets(rules):


class GrammarAnalyzer:
def __init__(self, parser_conf, debug=False):
def __init__(self, parser_conf, debug=False, strict=False):
self.debug = debug
self.strict = strict

root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
for start in parser_conf.start}
Expand Down
16 changes: 12 additions & 4 deletions lark/parsers/lalr_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,8 @@ def traverse(x, S, N, X, R, G, F):


class LALR_Analyzer(GrammarAnalyzer):
def __init__(self, parser_conf, debug=False):
GrammarAnalyzer.__init__(self, parser_conf, debug)
def __init__(self, parser_conf, debug=False, strict=False):
GrammarAnalyzer.__init__(self, parser_conf, debug, strict)
self.nonterminal_transitions = []
self.directly_reads = defaultdict(set)
self.reads = defaultdict(set)
Expand Down Expand Up @@ -256,10 +256,18 @@ def compute_lalr1_states(self):
rules = [best[1]]
else:
reduce_reduce.append((state, la, rules))
continue

rule ,= rules
if la in actions:
if self.debug:
if self.strict:
raise GrammarError(f"Shift/Reduce conflict for terminal {la.name}. [strict-mode]\n ")
elif self.debug:
logger.warning('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logger.warning(' * %s', list(rules)[0])
logger.warning(' * %s', rule)
else:
logger.debug('Shift/Reduce conflict for terminal %s: (resolving as shift)', la.name)
logger.debug(' * %s', rule)
else:
actions[la] = (Reduce, list(rules)[0])
m[state] = { k.name: v for k, v in actions.items() }
Expand Down
4 changes: 2 additions & 2 deletions lark/parsers/lalr_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
###{standalone

class LALR_Parser(Serialize):
def __init__(self, parser_conf, debug=False):
analysis = LALR_Analyzer(parser_conf, debug=debug)
def __init__(self, parser_conf, debug=False, strict=False):
analysis = LALR_Analyzer(parser_conf, debug=debug, strict=strict)
analysis.compute_lalr()
callbacks = parser_conf.callbacks

Expand Down
2 changes: 1 addition & 1 deletion tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_debug(self):
self.assertIn("A", log)

def test_non_debug(self):
logger.setLevel(logging.DEBUG)
logger.setLevel(logging.WARNING)
collision_grammar = '''
start: as as
as: a*
Expand Down
19 changes: 19 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2625,6 +2625,25 @@ def __default__(self, data, children, meta):
b = parser.parse(s)
assert a == b

@unittest.skipIf(PARSER!='lalr', "strict mode is only supported in lalr for now")
def test_strict(self):
# Test regex collision
grammar = r"""
start: A | B
A: /e?rez/
B: /erez?/
"""

self.assertRaises(LexError, _Lark, grammar, strict=True)

# Test shift-reduce collision
grammar = r"""
start: a "."
a: "."+
"""
self.assertRaises(GrammarError, _Lark, grammar, strict=True)


_NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
_TestParser.__name__ = _NAME
Expand Down

0 comments on commit a9d353e

Please sign in to comment.