From 9b24e6ae9f35739f04b29ca13f90f1242e2296d8 Mon Sep 17 00:00:00 2001 From: Ilya Bylich Date: Thu, 8 Jul 2021 19:50:34 +0300 Subject: [PATCH] + lexer.rl: reject `\u` after control/meta escape chars. (#807) This commit tracks upstream commit ruby/ruby@110f242. --- lib/parser/lexer.rl | 5 +++++ test/test_lexer.rb | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl index 5e370c95d..8574f95cc 100644 --- a/lib/parser/lexer.rl +++ b/lib/parser/lexer.rl @@ -705,6 +705,11 @@ class Parser::Lexer action unescape_char { codepoint = @source_pts[p - 1] + + if @version >= 30 && (codepoint == 117 || codepoint == 85) # 'u' or 'U' + diagnostic :fatal, :invalid_escape + end + if (@escape = ESCAPES[codepoint]).nil? @escape = encode_escape(@source_buffer.slice(p - 1)) end diff --git a/test/test_lexer.rb b/test/test_lexer.rb index 54db8bb20..9074ccc27 100644 --- a/test/test_lexer.rb +++ b/test/test_lexer.rb @@ -3614,4 +3614,44 @@ def refute_scanned_numbered_parameter(input, message = nil) end end + def test_meta_escape_slash_u__before_30 + setup_lexer(27) + assert_scanned('"\c\u0000"', + :tSTRING, "\u00150000", [0, 10]) + assert_scanned('"\c\U0000"', + :tSTRING, "\u00150000", [0, 10]) + + assert_scanned('"\C-\u0000"', + :tSTRING, "\u00150000", [0, 11]) + assert_scanned('"\C-\U0000"', + :tSTRING, "\u00150000", [0, 11]) + + assert_scanned('"\M-\u0000"', + :tSTRING, "\xF50000", [0, 11]) + assert_scanned('"\M-\U0000"', + :tSTRING, "\xD50000", [0, 11]) + end + + def refute_scanned_meta_escape_slash_u(input) + setup_lexer(30) + source_buffer = Parser::Source::Buffer.new('(refute_scanned_meta_escape_slash_u)', source: input) + @lex.source_buffer = source_buffer + + err = assert_raises Parser::SyntaxError do + @lex.advance + end + + assert_equal :invalid_escape, err.diagnostic.reason + end + + def test_meta_escape_slash_u__after_30 + refute_scanned_meta_escape_slash_u('"\c\u0000"') + refute_scanned_meta_escape_slash_u('"\c\U0000"') + + refute_scanned_meta_escape_slash_u('"\C-\u0000"') + refute_scanned_meta_escape_slash_u('"\C-\U0000"') + + refute_scanned_meta_escape_slash_u('"\M-\u0000"') + refute_scanned_meta_escape_slash_u('"\M-\U0000"') + end end