diff --git a/tests/test_parser.py b/tests/test_parser.py index 74985015..1ab705b0 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -11,11 +11,6 @@ from lark import Token, Transformer_NonRecursive, LexError -try: - from cStringIO import StringIO as cStringIO -except ImportError: - # Available only in Python 2.x, 3.x only has io.StringIO from below - cStringIO = None from io import ( StringIO as uStringIO, BytesIO, @@ -28,6 +23,7 @@ except ImportError: regex = None + import lark from lark import logger from lark.lark import Lark @@ -399,6 +395,8 @@ def test_anon(self): self.assertEqual( g.parse('abc').children[0], 'abc') + + @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_earley(self): g = Lark("""start: A "b" c A: "a"+ @@ -421,8 +419,7 @@ def test_earley2(self): l = Lark(grammar, parser='earley', lexer=LEXER) l.parse(program) - - @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser") + @unittest.skipIf(LEXER != 'dynamic_complete', "Only relevant for the dynamic_complete parser") def test_earley3(self): """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result) @@ -758,6 +755,8 @@ def test_ambiguous_intermediate_node_conditionally_inlined_rule(self): self.assertEqual(ambig_tree.data, '_ambig') self.assertEqual(set(ambig_tree.children), expected) + + @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer") def test_fruitflies_ambig(self): grammar = """ start: noun verb noun -> simple @@ -913,24 +912,6 @@ def test_cycles_with_child_filter(self): self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])])) - - - - # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO - # def test_not_all_derivations(self): - # grammar = """ - # start: cd+ "e" - - # !cd: "c" - # | "d" - # | "cd" - - # """ - # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False) - # x = l.parse('cde') - # assert x.data != '_ambig', x - # assert len(x.children) == 1 - _NAME = "TestFullEarley" + LEXER.capitalize() _TestFullEarley.__name__ = _NAME globals()[_NAME] = _TestFullEarley @@ -1086,11 +1067,6 @@ def test_basic2(self): assert x.data == 'start' and x.children == ['12', '2'], x - @unittest.skipIf(cStringIO is None, "cStringIO not available") - def test_stringio_bytes(self): - """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object""" - _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" ')) - def test_stringio_unicode(self): """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object""" _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" ')) @@ -1140,7 +1116,7 @@ def test_unicode_literal_range_escape(self): """) g.parse('abc') - @unittest.skipIf(sys.version_info < (3, 3), "re package did not support 32bit unicode escape sequence before Python 3.3") + def test_unicode_literal_range_escape2(self): g = _Lark(r"""start: A+ A: "\U0000FFFF".."\U00010002" @@ -1153,8 +1129,7 @@ def test_hex_literal_range_escape(self): """) g.parse('\x01\x02\x03') - @unittest.skipIf(sys.version_info[0]==2 or sys.version_info[:2]==(3, 4), - "bytes parser isn't perfect in Python2, exceptions don't work correctly") + def test_bytes_utf8(self): g = r""" start: BOM? char+ @@ -1305,49 +1280,6 @@ def test_empty_flatten_list(self): [list] = r.children self.assertSequenceEqual([item.data for item in list.children], ()) - @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") - def test_single_item_flatten_list(self): - g = _Lark(r"""start: list - list: | item "," list - item : A - A: "a" - """) - r = g.parse("a,") - - # Because 'list' is a flatten rule it's top-level element should *never* be expanded - self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) - - # Sanity check: verify that 'list' contains exactly the one 'item' we've given it - [list] = r.children - self.assertSequenceEqual([item.data for item in list.children], ('item',)) - - @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") - def test_multiple_item_flatten_list(self): - g = _Lark(r"""start: list - #list: | item "," list - item : A - A: "a" - """) - r = g.parse("a,a,") - - # Because 'list' is a flatten rule it's top-level element should *never* be expanded - self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',)) - - # Sanity check: verify that 'list' contains exactly the two 'item's we've given it - [list] = r.children - self.assertSequenceEqual([item.data for item in list.children], ('item', 'item')) - - @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)") - def test_recurse_flatten(self): - """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening.""" - g = _Lark(r"""start: a | start a - a : A - A : "a" """) - - # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built - # STree data structures, which uses recursion). - g.parse("a" * (sys.getrecursionlimit() // 4)) - def test_token_collision(self): g = _Lark(r"""start: "Hello" NAME NAME: /\w/+ @@ -1459,20 +1391,6 @@ def test_g_regex_flags(self): x1 = g.parse("ABBc") x2 = g.parse("abdE") - # def test_string_priority(self): - # g = _Lark("""start: (A | /a?bb/)+ - # A: "a" """) - # x = g.parse('abb') - # self.assertEqual(len(x.children), 2) - - # # This parse raises an exception because the lexer will always try to consume - # # "a" first and will never match the regular expression - # # This behavior is subject to change!! - # # This won't happen with ambiguity handling. - # g = _Lark("""start: (A | /a?ab/)+ - # A: "a" """) - # self.assertRaises(LexError, g.parse, 'aab') - def test_rule_collision(self): g = _Lark("""start: "a"+ "b" | "a"+ """) @@ -1561,13 +1479,6 @@ def test_special_chars(self): """) x = g.parse('\n') - - # def test_token_recurse(self): - # g = _Lark("""start: A - # A: B - # B: A - # """) - @unittest.skipIf(PARSER == 'cyk', "No empty rules") def test_empty(self): # Fails an Earley implementation without special handling for empty rules, @@ -1649,13 +1560,6 @@ def test_token_flags(self): tree = l.parse('aA') self.assertEqual(tree.children, ['a', 'A']) - # g = """!start: "a"i "a" - # """ - # self.assertRaises(GrammarError, _Lark, g) - - # g = """!start: /a/i /a/ - # """ - # self.assertRaises(GrammarError, _Lark, g) g = """start: NAME "," "a" NAME: /[a-z_]/i /[a-z0-9_]/i* @@ -1666,6 +1570,25 @@ def test_token_flags(self): tree = l.parse('AB,a') self.assertEqual(tree.children, ['AB']) + @unittest.skipIf(LEXER in ('basic', 'custom_old', 'custom_new'), "Requires context sensitive terminal selection") + def test_token_flags_collision(self): + + g = """!start: "a"i "a" + """ + l = _Lark(g) + self.assertEqual(l.parse('aa').children, ['a', 'a']) + self.assertEqual(l.parse('Aa').children, ['A', 'a']) + self.assertRaises(UnexpectedInput, l.parse, 'aA') + self.assertRaises(UnexpectedInput, l.parse, 'AA') + + g = """!start: /a/i /a/ + """ + l = _Lark(g) + self.assertEqual(l.parse('aa').children, ['a', 'a']) + self.assertEqual(l.parse('Aa').children, ['A', 'a']) + self.assertRaises(UnexpectedInput, l.parse, 'aA') + self.assertRaises(UnexpectedInput, l.parse, 'AA') + def test_token_flags3(self): l = _Lark("""!start: ABC+ ABC: "abc"i @@ -1754,7 +1677,7 @@ def test_reduce_cycle(self): self.assertEqual(len(tree.children), 2) - @unittest.skipIf(LEXER != 'basic', "basic lexer prioritization differs from dynamic lexer prioritization") + @unittest.skipIf('dynamic' in LEXER, "basic lexer prioritization differs from dynamic lexer prioritization") def test_lexer_prioritization(self): "Tests effect of priority on result" @@ -2274,7 +2197,6 @@ def test_ranged_repeat_rules(self): - @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX def test_priority_vs_embedded(self): g = """ A.2: "a" @@ -2407,7 +2329,7 @@ def test_meddling_unused(self): parser = _Lark(grammar) - @unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") + @unittest.skipIf(PARSER!='lalr' or LEXER == 'custom_old', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)") def test_serialize(self): grammar = """ start: _ANY b "C" @@ -2512,7 +2434,7 @@ def test_regex_width_fallback(self): """ self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True) - @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") + @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment") def test_parser_interactive_parser(self): g = _Lark(r''' @@ -2549,7 +2471,7 @@ def test_parser_interactive_parser(self): res = ip_copy.feed_eof() self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) - @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") + @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now") def test_error_with_interactive_parser(self): def ignore_errors(e): if isinstance(e, UnexpectedCharacters): @@ -2584,10 +2506,10 @@ def ignore_errors(e): s = "[0 1, 2,@, 3,,, 4, 5 6 ]$" tree = g.parse(s, on_error=ignore_errors) - @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") + @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now") def test_iter_parse(self): ab_grammar = '!start: "a"* "b"*' - parser = Lark(ab_grammar, parser="lalr") + parser = _Lark(ab_grammar) ip = parser.parse_interactive("aaabb") i = ip.iter_parse() assert next(i) == 'a' @@ -2595,7 +2517,7 @@ def test_iter_parse(self): assert next(i) == 'a' assert next(i) == 'b' - @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") + @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment") def test_interactive_treeless_transformer(self): grammar = r""" start: SYM+ @@ -2617,7 +2539,7 @@ def SYM(self, token): res = ip.feed_eof() self.assertEqual(res.children, [1, 2, 1]) - @unittest.skipIf(PARSER!='lalr', "Tree-less mode is only supported in lalr") + @unittest.skipIf(PARSER == 'earley', "Tree-less mode is not supported in earley") def test_default_in_treeless_mode(self): grammar = r""" start: expr @@ -2643,7 +2565,7 @@ def __default__(self, data, children, meta): b = parser.parse(s) assert a == b - @unittest.skipIf(PARSER!='lalr', "strict mode is only supported in lalr for now") + @unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now") def test_strict(self): # Test regex collision grammar = r""" @@ -2687,7 +2609,7 @@ def test_strict(self): for _LEXER, _PARSER in _TO_TEST: _make_parser_test(_LEXER, _PARSER) -for _LEXER in ('dynamic', 'dynamic_complete'): +for _LEXER in ('basic', 'dynamic', 'dynamic_complete'): _make_full_earley_test(_LEXER) if __name__ == '__main__':