lark-parser · erezsh · Jun 20, 2024 · Jun 18, 2024 · Jun 20, 2024 · Jun 20, 2024
diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -11,11 +11,6 @@
 
 from lark import Token, Transformer_NonRecursive, LexError
 
-try:
-    from cStringIO import StringIO as cStringIO
-except ImportError:
-    # Available only in Python 2.x, 3.x only has io.StringIO from below
-    cStringIO = None
 from io import (
         StringIO as uStringIO,
         BytesIO,
@@ -28,6 +23,7 @@
 except ImportError:
     regex = None
 
+
 import lark
 from lark import logger
 from lark.lark import Lark
@@ -399,6 +395,8 @@ def test_anon(self):
 
             self.assertEqual( g.parse('abc').children[0], 'abc')
 
+
+        @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
         def test_earley(self):
             g = Lark("""start: A "b" c
                         A: "a"+
@@ -421,8 +419,7 @@ def test_earley2(self):
             l = Lark(grammar, parser='earley', lexer=LEXER)
             l.parse(program)
 
-
-        @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
+        @unittest.skipIf(LEXER != 'dynamic_complete', "Only relevant for the dynamic_complete parser")
         def test_earley3(self):
             """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
 
@@ -758,6 +755,8 @@ def test_ambiguous_intermediate_node_conditionally_inlined_rule(self):
             self.assertEqual(ambig_tree.data, '_ambig')
             self.assertEqual(set(ambig_tree.children), expected)
 
+
+        @unittest.skipIf(LEXER=='basic', "Requires dynamic lexer")
         def test_fruitflies_ambig(self):
             grammar = """
                 start: noun verb noun        -> simple
@@ -913,24 +912,6 @@ def test_cycles_with_child_filter(self):
             self.assertEqual(tree, Tree('a', [Tree('x', [Tree('b', [])])]))
 
 
-
-
-
-        # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet")  # TODO
-        # def test_not_all_derivations(self):
-        #     grammar = """
-        #     start: cd+ "e"
-
-        #     !cd: "c"
-        #        | "d"
-        #        | "cd"
-
-        #     """
-        #     l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
-        #     x = l.parse('cde')
-        #     assert x.data != '_ambig', x
-        #     assert len(x.children) == 1
-
     _NAME = "TestFullEarley" + LEXER.capitalize()
     _TestFullEarley.__name__ = _NAME
     globals()[_NAME] = _TestFullEarley
@@ -1086,11 +1067,6 @@ def test_basic2(self):
             assert x.data == 'start' and x.children == ['12', '2'], x
 
 
-        @unittest.skipIf(cStringIO is None, "cStringIO not available")
-        def test_stringio_bytes(self):
-            """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
-            _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
-
         def test_stringio_unicode(self):
             """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
             _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
@@ -1140,7 +1116,7 @@ def test_unicode_literal_range_escape(self):
                           """)
             g.parse('abc')
 
-        @unittest.skipIf(sys.version_info < (3, 3), "re package did not support 32bit unicode escape sequence before Python 3.3")
+
         def test_unicode_literal_range_escape2(self):
             g = _Lark(r"""start: A+
                           A: "\U0000FFFF".."\U00010002"
@@ -1153,8 +1129,7 @@ def test_hex_literal_range_escape(self):
                           """)
             g.parse('\x01\x02\x03')
 
-        @unittest.skipIf(sys.version_info[0]==2 or sys.version_info[:2]==(3, 4),
-                         "bytes parser isn't perfect in Python2, exceptions don't work correctly")
+
         def test_bytes_utf8(self):
             g = r"""
             start: BOM? char+
@@ -1305,49 +1280,6 @@ def test_empty_flatten_list(self):
             [list] = r.children
             self.assertSequenceEqual([item.data for item in list.children], ())
 
-        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
-        def test_single_item_flatten_list(self):
-            g = _Lark(r"""start: list
-                            list: | item "," list
-                            item : A
-                            A: "a"
-                         """)
-            r = g.parse("a,")
-
-            # Because 'list' is a flatten rule it's top-level element should *never* be expanded
-            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
-
-            # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
-            [list] = r.children
-            self.assertSequenceEqual([item.data for item in list.children], ('item',))
-
-        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
-        def test_multiple_item_flatten_list(self):
-            g = _Lark(r"""start: list
-                            #list: | item "," list
-                            item : A
-                            A: "a"
-                         """)
-            r = g.parse("a,a,")
-
-            # Because 'list' is a flatten rule it's top-level element should *never* be expanded
-            self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
-
-            # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
-            [list] = r.children
-            self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
-
-        @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
-        def test_recurse_flatten(self):
-            """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
-            g = _Lark(r"""start: a | start a
-                         a : A
-                         A : "a" """)
-
-            # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
-            # STree data structures, which uses recursion).
-            g.parse("a" * (sys.getrecursionlimit() // 4))
-
         def test_token_collision(self):
             g = _Lark(r"""start: "Hello" NAME
                         NAME: /\w/+
@@ -1459,20 +1391,6 @@ def test_g_regex_flags(self):
             x1 = g.parse("ABBc")
             x2 = g.parse("abdE")
 
-        # def test_string_priority(self):
-        #     g = _Lark("""start: (A | /a?bb/)+
-        #                  A: "a"  """)
-        #     x = g.parse('abb')
-        #     self.assertEqual(len(x.children), 2)
-
-        #     # This parse raises an exception because the lexer will always try to consume
-        #     # "a" first and will never match the regular expression
-        #     # This behavior is subject to change!!
-        #     # This won't happen with ambiguity handling.
-        #     g = _Lark("""start: (A | /a?ab/)+
-        #                  A: "a"  """)
-        #     self.assertRaises(LexError, g.parse, 'aab')
-
         def test_rule_collision(self):
             g = _Lark("""start: "a"+ "b"
                              | "a"+ """)
@@ -1561,13 +1479,6 @@ def test_special_chars(self):
                       """)
             x = g.parse('\n')
 
-
-        # def test_token_recurse(self):
-        #     g = _Lark("""start: A
-        #                  A: B
-        #                  B: A
-        #               """)
-
         @unittest.skipIf(PARSER == 'cyk', "No empty rules")
         def test_empty(self):
             # Fails an Earley implementation without special handling for empty rules,
@@ -1649,13 +1560,6 @@ def test_token_flags(self):
             tree = l.parse('aA')
             self.assertEqual(tree.children, ['a', 'A'])
 
-            # g = """!start: "a"i "a"
-            #     """
-            # self.assertRaises(GrammarError, _Lark, g)
-
-            # g = """!start: /a/i /a/
-            #     """
-            # self.assertRaises(GrammarError, _Lark, g)
 
             g = """start: NAME "," "a"
                    NAME: /[a-z_]/i /[a-z0-9_]/i*
@@ -1666,6 +1570,25 @@ def test_token_flags(self):
             tree = l.parse('AB,a')
             self.assertEqual(tree.children, ['AB'])
 
+        @unittest.skipIf(LEXER in ('basic', 'custom_old', 'custom_new'), "Requires context sensitive terminal selection")
+        def test_token_flags_collision(self):
+
+            g = """!start: "a"i "a"
+                """
+            l = _Lark(g)
+            self.assertEqual(l.parse('aa').children, ['a', 'a'])
+            self.assertEqual(l.parse('Aa').children, ['A', 'a'])
+            self.assertRaises(UnexpectedInput, l.parse, 'aA')
+            self.assertRaises(UnexpectedInput, l.parse, 'AA')
+
+            g = """!start: /a/i /a/
+                """
+            l = _Lark(g)
+            self.assertEqual(l.parse('aa').children, ['a', 'a'])
+            self.assertEqual(l.parse('Aa').children, ['A', 'a'])
+            self.assertRaises(UnexpectedInput, l.parse, 'aA')
+            self.assertRaises(UnexpectedInput, l.parse, 'AA')
+
         def test_token_flags3(self):
             l = _Lark("""!start: ABC+
                       ABC: "abc"i
@@ -1754,7 +1677,7 @@ def test_reduce_cycle(self):
             self.assertEqual(len(tree.children), 2)
 
 
-        @unittest.skipIf(LEXER != 'basic', "basic lexer prioritization differs from dynamic lexer prioritization")
+        @unittest.skipIf('dynamic' in LEXER, "basic lexer prioritization differs from dynamic lexer prioritization")
         def test_lexer_prioritization(self):
             "Tests effect of priority on result"
 
@@ -2274,7 +2197,6 @@ def test_ranged_repeat_rules(self):
 
 
 
-        @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now")  # TODO XXX
         def test_priority_vs_embedded(self):
             g = """
             A.2: "a"
@@ -2407,7 +2329,7 @@ def test_meddling_unused(self):
             parser = _Lark(grammar)
 
 
-        @unittest.skipIf(PARSER!='lalr' or 'custom' in LEXER, "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
+        @unittest.skipIf(PARSER!='lalr' or LEXER == 'custom_old', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
         def test_serialize(self):
             grammar = """
                 start: _ANY b "C"
@@ -2512,7 +2434,7 @@ def test_regex_width_fallback(self):
             """
             self.assertRaises((GrammarError, LexError, re.error), _Lark, g, regex=True)
 
-        @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
+        @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
         def test_parser_interactive_parser(self):
 
             g = _Lark(r'''
@@ -2549,7 +2471,7 @@ def test_parser_interactive_parser(self):
             res = ip_copy.feed_eof()
             self.assertEqual(res, Tree('start', ['a', 'b', 'b']))
 
-        @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
+        @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
         def test_error_with_interactive_parser(self):
             def ignore_errors(e):
                 if isinstance(e, UnexpectedCharacters):
@@ -2584,18 +2506,18 @@ def ignore_errors(e):
             s = "[0 1, 2,@, 3,,, 4, 5 6 ]$"
             tree = g.parse(s, on_error=ignore_errors)
 
-        @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
+        @unittest.skipIf(PARSER != 'lalr', "interactive_parser error handling only works with LALR for now")
         def test_iter_parse(self):
             ab_grammar = '!start: "a"* "b"*'
-            parser = Lark(ab_grammar, parser="lalr")
+            parser = _Lark(ab_grammar)
             ip = parser.parse_interactive("aaabb")
             i = ip.iter_parse()
             assert next(i) == 'a'
             assert next(i) == 'a'
             assert next(i) == 'a'
             assert next(i) == 'b'
 
-        @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
+        @unittest.skipIf(PARSER != 'lalr', "interactive_parser is only implemented for LALR at the moment")
         def test_interactive_treeless_transformer(self):
             grammar = r"""
                 start: SYM+
@@ -2617,7 +2539,7 @@ def SYM(self, token):
             res = ip.feed_eof()
             self.assertEqual(res.children, [1, 2, 1])
 
-        @unittest.skipIf(PARSER!='lalr', "Tree-less mode is only supported in lalr")
+        @unittest.skipIf(PARSER == 'earley', "Tree-less mode is not supported in earley")
         def test_default_in_treeless_mode(self):
             grammar = r"""
                 start: expr
@@ -2643,7 +2565,7 @@ def __default__(self, data, children, meta):
             b = parser.parse(s)
             assert a == b
 
-        @unittest.skipIf(PARSER!='lalr', "strict mode is only supported in lalr for now")
+        @unittest.skipIf(PARSER != 'lalr', "strict mode is only supported in lalr for now")
         def test_strict(self):
             # Test regex collision
             grammar = r"""
@@ -2687,7 +2609,7 @@ def test_strict(self):
 for _LEXER, _PARSER in _TO_TEST:
     _make_parser_test(_LEXER, _PARSER)
 
-for _LEXER in ('dynamic', 'dynamic_complete'):
+for _LEXER in ('basic', 'dynamic', 'dynamic_complete'):
     _make_full_earley_test(_LEXER)
 
 if __name__ == '__main__':