Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up test_parser.py, use xFail instead of skip where appropriate #1428

Merged
merged 3 commits into from
Jun 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 37 additions & 115 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,6 @@

from lark import Token, Transformer_NonRecursive, LexError

try:
from cStringIO import StringIO as cStringIO
except ImportError:
# Available only in Python 2.x, 3.x only has io.StringIO from below
cStringIO = None
from io import (
StringIO as uStringIO,
BytesIO,
Expand All @@ -28,6 +23,7 @@
except ImportError:
regex = None


import lark
from lark import logger
from lark.lark import Lark
Expand Down Expand Up @@ -399,6 +395,8 @@ def test_anon(self):

self.assertEqual( g.parse('abc').children[0], 'abc')


@unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
def test_earley(self):
g = Lark("""start: A "b" c
A: "a"+
Expand All @@ -421,8 +419,7 @@ def test_earley2(self):
l = Lark(grammar, parser='earley', lexer=LEXER)
l.parse(program)


@unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
@unittest.skipIf(LEXER != 'dynamic_complete', "Only relevant for the dynamic_complete parser")
def test_earley3(self):
"""Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)

Expand Down Expand Up @@ -758,6 +755,8 @@ def test_ambiguous_intermediate_node_conditionally_inlined_rule(self):
self.assertEqual(ambig_tree.data, '_ambig')
self.assertEqual(set(ambig_tree.children), expected)


@unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
def test_fruitflies_ambig(self):
grammar = """
start: noun verb noun -> simple
Expand Down Expand Up @@ -913,24 +912,6 @@ def test_cycles_with_child_filter(self):
self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))





# @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
# def test_not_all_derivations(self):
# grammar = """
# start: cd+ "e"

# !cd: "c"
# | "d"
# | "cd"

# """
# l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
# x = l.parse('cde')
# assert x.data != '_ambig', x
# assert len(x.children) == 1

_NAME = "TestFullEarley" + LEXER.capitalize()
_TestFullEarley.__name__ = _NAME
globals()[_NAME] = _TestFullEarley
Expand Down Expand Up @@ -1086,11 +1067,6 @@ def test_basic2(self):
assert x.data == 'start' and x.children == ['12', '2'], x


@unittest.skipIf(cStringIO is None, "cStringIO not available")
def test_stringio_bytes(self):
"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
_Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))

def test_stringio_unicode(self):
"""Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
_Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
Expand Down Expand Up @@ -1140,7 +1116,7 @@ def test_unicode_literal_range_escape(self):
""")
g.parse('abc')

@unittest.skipIf(sys.version_info < (3, 3), "re package did not support 32bit unicode escape sequence before Python 3.3")

def test_unicode_literal_range_escape2(self):
g = _Lark(r"""start: A+
A: "\U0000FFFF".."\U00010002"
Expand All @@ -1153,8 +1129,7 @@ def test_hex_literal_range_escape(self):
""")
g.parse('\x01\x02\x03')

@unittest.skipIf(sys.version_info[0]==2 or sys.version_info[:2]==(3, 4),
"bytes parser isn't perfect in Python2, exceptions don't work correctly")

def test_bytes_utf8(self):
g = r"""
start: BOM? char+
Expand Down Expand Up @@ -1305,49 +1280,6 @@ def test_empty_flatten_list(self):
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ())

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_single_item_flatten_list(self):
g = _Lark(r"""start: list
list: | item "," list
item : A
A: "a"
""")
r = g.parse("a,")

# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

# Sanity check: verify that 'list' contains exactly the one 'item' we've given it
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ('item',))

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_multiple_item_flatten_list(self):
g = _Lark(r"""start: list
#list: | item "," list
item : A
A: "a"
""")
r = g.parse("a,a,")

# Because 'list' is a flatten rule it's top-level element should *never* be expanded
self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))

# Sanity check: verify that 'list' contains exactly the two 'item's we've given it
[list] = r.children
self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))

@unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
def test_recurse_flatten(self):
"""Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
g = _Lark(r"""start: a | start a
a : A
A : "a" """)

# Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
# STree data structures, which uses recursion).
g.parse("a" * (sys.getrecursionlimit() // 4))

def test_token_collision(self):
g = _Lark(r"""start: "Hello" NAME
NAME: /\w/+
Expand Down Expand Up @@ -1459,20 +1391,6 @@ def test_g_regex_flags(self):
x1 = g.parse("ABBc")
x2 = g.parse("abdE")

# def test_string_priority(self):
# g = _Lark("""start: (A | /a?bb/)+
# A: "a" """)
# x = g.parse('abb')
# self.assertEqual(len(x.children), 2)

# # This parse raises an exception because the lexer will always try to consume
# # "a" first and will never match the regular expression
# # This behavior is subject to change!!
# # This won't happen with ambiguity handling.
# g = _Lark("""start: (A | /a?ab/)+
# A: "a" """)
# self.assertRaises(LexError, g.parse, 'aab')

def test_rule_collision(self):
g = _Lark("""start: "a"+ "b"
| "a"+ """)
Expand Down Expand Up @@ -1561,13 +1479,6 @@ def test_special_chars(self):
""")
x = g.parse('\n')


# def test_token_recurse(self):
# g = _Lark("""start: A
# A: B
# B: A
# """)

@unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_empty(self):
# Fails an Earley implementation without special handling for empty rules,
Expand Down Expand Up @@ -1649,13 +1560,6 @@ def test_token_flags(self):
tree = l.parse('aA')
self.assertEqual(tree.children, ['a', 'A'])

# g = """!start: "a"i "a"
# """
# self.assertRaises(GrammarError, _Lark, g)

# g = """!start: /a/i /a/
# """
# self.assertRaises(GrammarError, _Lark, g)

g = """start: NAME "," "a"
NAME: /[a-z_]/i /[a-z0-9_]/i*
Expand All @@ -1666,6 +1570,25 @@ def test_token_flags(self):
tree = l.parse('AB,a')
self.assertEqual(tree.children, ['AB'])

@unittest.skipIf(LEXER in ('basic', 'custom_old', 'custom_new'), "Requires context sensitive terminal selection")
def test_token_flags_collision(self):

g = """!start: "a"i "a"
"""
l = _Lark(g)
self.assertEqual(l.parse('aa').children, ['a', 'a'])
self.assertEqual(l.parse('Aa').children, ['A', 'a'])
self.assertRaises(UnexpectedInput, l.parse, 'aA')
self.assertRaises(UnexpectedInput, l.parse, 'AA')

g = """!start: /a/i /a/
"""
l = _Lark(g)
self.assertEqual(l.parse('aa').children, ['a', 'a'])
self.assertEqual(l.parse('Aa').children, ['A', 'a'])
self.assertRaises(UnexpectedInput, l.parse, 'aA')
self.assertRaises(UnexpectedInput, l.parse, 'AA')

def test_token_flags3(self):
l = _Lark("""!start: ABC+
ABC: "abc"i
Expand Down Expand Up @@ -1754,7 +1677,7 @@ def test_reduce_cycle(self):
self.assertEqual(len(tree.children), 2)


@unittest.skipIf(LEXER != 'basic', "basic lexer prioritization differs from dynamic lexer prioritization")
@unittest.skipIf('dynamic' in LEXER, "basic lexer prioritization differs from dynamic lexer prioritization")
def test_lexer_prioritization(self):
"Tests effect of priority on result"

Expand Down Expand Up @@ -2274,7 +2197,6 @@ def test_ranged_repeat_rules(self):



@unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
def test_priority_vs_embedded(self):
g = """
A.2: "a"
Expand Down Expand Up @@ -2407,7 +2329,7 @@ def test_meddling_unused(self):
parser = _Lark(grammar)


@unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
@unittest.skipIf(PARSER!='lalr' or LEXER == 'custom_old', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
def test_serialize(self):
grammar = """
start: _ANY b "C"
Expand Down Expand Up @@ -2512,7 +2434,7 @@ def test_regex_width_fallback(self):
"""
self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True)

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):

g = _Lark(r'''
Expand Down Expand Up @@ -2549,7 +2471,7 @@ def test_parser_interactive_parser(self):
res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))

@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
def test_error_with_interactive_parser(self):
def ignore_errors(e):
if isinstance(e, UnexpectedCharacters):
Expand Down Expand Up @@ -2584,18 +2506,18 @@ def ignore_errors(e):
s = "[0 1, 2,@, 3,,, 4, 5 6 ]$"
tree = g.parse(s, on_error=ignore_errors)

@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
def test_iter_parse(self):
ab_grammar = '!start: "a"* "b"*'
parser = Lark(ab_grammar, parser="lalr")
parser = _Lark(ab_grammar)
ip = parser.parse_interactive("aaabb")
i = ip.iter_parse()
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'a'
assert next(i) == 'b'

@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
@unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
def test_interactive_treeless_transformer(self):
grammar = r"""
start: SYM+
Expand All @@ -2617,7 +2539,7 @@ def SYM(self, token):
res = ip.feed_eof()
self.assertEqual(res.children, [1, 2, 1])

@unittest.skipIf(PARSER!='lalr', "Tree-less mode is only supported in lalr")
@unittest.skipIf(PARSER == 'earley', "Tree-less mode is not supported in earley")
def test_default_in_treeless_mode(self):
grammar = r"""
start: expr
Expand All @@ -2643,7 +2565,7 @@ def __default__(self, data, children, meta):
b = parser.parse(s)
assert a == b

@unittest.skipIf(PARSER!='lalr', "strict mode is only supported in lalr for now")
@unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now")
def test_strict(self):
# Test regex collision
grammar = r"""
Expand Down Expand Up @@ -2687,7 +2609,7 @@ def test_strict(self):
for _LEXER, _PARSER in _TO_TEST:
_make_parser_test(_LEXER, _PARSER)

for _LEXER in ('dynamic', 'dynamic_complete'):
for _LEXER in ('basic', 'dynamic', 'dynamic_complete'):
_make_full_earley_test(_LEXER)

if __name__ == '__main__':
Expand Down
Loading