Skip to content

Commit afaa3b6

Browse files
committed
Prevent ICEs when parsing invalid escapes, closes rust-lang#23620
1 parent 242ed0b commit afaa3b6

File tree

3 files changed

+78
-12
lines changed

3 files changed

+78
-12
lines changed

src/libsyntax/parse/lexer/mod.rs

+29-11
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,7 @@ impl<'a> StringReader<'a> {
742742
let start_bpos = self.last_pos;
743743
let mut accum_int = 0;
744744

745+
let mut valid = true;
745746
for _ in 0..n_digits {
746747
if self.is_eof() {
747748
let last_bpos = self.last_pos;
@@ -750,13 +751,16 @@ impl<'a> StringReader<'a> {
750751
if self.curr_is(delim) {
751752
let last_bpos = self.last_pos;
752753
self.err_span_(start_bpos, last_bpos, "numeric character escape is too short");
754+
valid = false;
753755
break;
754756
}
755757
let c = self.curr.unwrap_or('\x00');
756758
accum_int *= 16;
757759
accum_int += c.to_digit(16).unwrap_or_else(|| {
758760
self.err_span_char(self.last_pos, self.pos,
759761
"illegal character in numeric character escape", c);
762+
763+
valid = false;
760764
0
761765
});
762766
self.bump();
@@ -767,10 +771,11 @@ impl<'a> StringReader<'a> {
767771
self.last_pos,
768772
"this form of character escape may only be used \
769773
with characters in the range [\\x00-\\x7f]");
774+
valid = false;
770775
}
771776

772777
match char::from_u32(accum_int) {
773-
Some(_) => true,
778+
Some(_) => valid,
774779
None => {
775780
let last_bpos = self.last_pos;
776781
self.err_span_(start_bpos, last_bpos, "illegal numeric character escape");
@@ -799,7 +804,18 @@ impl<'a> StringReader<'a> {
799804
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
800805
'x' => self.scan_byte_escape(delim, !ascii_only),
801806
'u' if self.curr_is('{') => {
802-
self.scan_unicode_escape(delim)
807+
let valid = self.scan_unicode_escape(delim);
808+
if valid && ascii_only {
809+
self.err_span_(
810+
escaped_pos,
811+
self.last_pos,
812+
"unicode escape sequences cannot be used as a byte or in \
813+
a byte string"
814+
);
815+
false
816+
} else {
817+
valid
818+
}
803819
}
804820
'\n' if delim == '"' => {
805821
self.consume_whitespace();
@@ -869,6 +885,7 @@ impl<'a> StringReader<'a> {
869885
let start_bpos = self.last_pos;
870886
let mut count = 0;
871887
let mut accum_int = 0;
888+
let mut valid = true;
872889

873890
while !self.curr_is('}') && count <= 6 {
874891
let c = match self.curr {
@@ -884,29 +901,30 @@ impl<'a> StringReader<'a> {
884901
self.fatal_span_(self.last_pos, self.pos,
885902
"unterminated unicode escape (needed a `}`)");
886903
} else {
887-
self.fatal_span_char(self.last_pos, self.pos,
904+
self.err_span_char(self.last_pos, self.pos,
888905
"illegal character in unicode escape", c);
889906
}
907+
valid = false;
908+
0
890909
});
891910
self.bump();
892911
count += 1;
893912
}
894913

895914
if count > 6 {
896-
self.fatal_span_(start_bpos, self.last_pos,
915+
self.err_span_(start_bpos, self.last_pos,
897916
"overlong unicode escape (can have at most 6 hex digits)");
917+
valid = false;
898918
}
899919

900920
self.bump(); // past the ending }
901921

902-
let mut valid = count >= 1 && count <= 6;
903-
if char::from_u32(accum_int).is_none() {
904-
valid = false;
922+
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
923+
self.err_span_(start_bpos, self.last_pos, "illegal unicode character escape");
924+
valid= false;
905925
}
906926

907-
if !valid {
908-
self.fatal_span_(start_bpos, self.last_pos, "illegal unicode character escape");
909-
}
927+
910928
valid
911929
}
912930

@@ -1330,7 +1348,7 @@ impl<'a> StringReader<'a> {
13301348
"unterminated byte constant".to_string());
13311349
}
13321350

1333-
let id = if valid { self.name_from(start) } else { token::intern("??") };
1351+
let id = if valid { self.name_from(start) } else { token::intern("?") };
13341352
self.bump(); // advance curr past token
13351353
return token::Byte(id);
13361354
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
fn main() {
12+
let _ = b"\u{a66e}";
13+
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
14+
15+
let _ = b'\u{a66e}';
16+
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
17+
18+
let _ = b'\u';
19+
//~^ ERROR unknown byte escape: u
20+
21+
let _ = b'\x5';
22+
//~^ ERROR numeric character escape is too short
23+
24+
let _ = b'\xxy';
25+
//~^ ERROR illegal character in numeric character escape: x
26+
//~^^ ERROR illegal character in numeric character escape: y
27+
28+
let _ = '\x5';
29+
//~^ ERROR numeric character escape is too short
30+
31+
let _ = '\xxy';
32+
//~^ ERROR illegal character in numeric character escape: x
33+
//~^^ ERROR illegal character in numeric character escape: y
34+
35+
let _ = b"\u{a4a4} \xf \u";
36+
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
37+
//~^^ ERROR illegal character in numeric character escape:
38+
//~^^^ ERROR unknown byte escape: u
39+
40+
let _ = "\u{ffffff} \xf \u";
41+
//~^ ERROR illegal unicode character escape
42+
//~^^ ERROR illegal character in numeric character escape:
43+
//~^^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
44+
//~^^^^ ERROR unknown character escape: u
45+
}

src/test/parse-fail/new-unicode-escapes-4.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@
99
// except according to those terms.
1010

1111
pub fn main() {
12-
let s = "\u{lol}"; //~ ERROR illegal character in unicode escape
12+
let s = "\u{lol}";
13+
//~^ ERROR illegal character in unicode escape: l
14+
//~^^ ERROR illegal character in unicode escape: o
15+
//~^^^ ERROR illegal character in unicode escape: l
1316
}

0 commit comments

Comments
 (0)