Skip to content

Commit cde61ba

Browse files
authored
Auto merge of #36485 - nnethercote:char_lit-2, r=nagisa
Overhaul char_lit() This commit does the following. - Removes parsing support for '\X12', '\u123456' and '\U12345678' char literals. These are no longer valid Rust and rejected by the lexer. (This strange-sounding situation occurs because the parser rescans char literals to compute their value.) - Rearranges the function so that all the escaped values are handled in a single `match`. The error-handling strategy is based on the one used by byte_lit().
2 parents 141012d + 63ded05 commit cde61ba

File tree

1 file changed

+28
-43
lines changed

1 file changed

+28
-43
lines changed

Diff for: src/libsyntax/parse/mod.rs

+28-43
Original file line numberDiff line numberDiff line change
@@ -286,52 +286,37 @@ pub fn tts_to_parser<'a>(sess: &'a ParseSess,
286286
pub fn char_lit(lit: &str) -> (char, isize) {
287287
use std::char;
288288

289-
let mut chars = lit.chars();
290-
match (chars.next(), chars.next()) {
291-
(Some(c), None) if c != '\\' => return (c, 1),
292-
(Some('\\'), Some(c)) => match c {
293-
'"' => return ('"', 2),
294-
'n' => return ('\n', 2),
295-
'r' => return ('\r', 2),
296-
't' => return ('\t', 2),
297-
'\\' => return ('\\', 2),
298-
'\'' => return ('\'', 2),
299-
'0' => return ('\0', 2),
300-
_ => {}
301-
},
302-
_ => panic!("lexer accepted invalid char escape `{}`", lit)
303-
};
304-
305-
fn esc(len: usize, lit: &str) -> Option<(char, isize)> {
306-
u32::from_str_radix(&lit[2..len], 16).ok()
307-
.and_then(char::from_u32)
308-
.map(|x| (x, len as isize))
289+
// Handle non-escaped chars first.
290+
if lit.as_bytes()[0] != b'\\' {
291+
// If the first byte isn't '\\' it might part of a multi-byte char, so
292+
// get the char with chars().
293+
let c = lit.chars().next().unwrap();
294+
return (c, 1);
309295
}
310296

311-
let unicode_escape = || -> Option<(char, isize)> {
312-
if lit.as_bytes()[2] == b'{' {
313-
let idx = lit.find('}').unwrap_or_else(|| {
314-
panic!("lexer should have rejected a bad character escape {}", lit)
315-
});
316-
317-
let subslice = &lit[3..idx];
318-
u32::from_str_radix(subslice, 16).ok()
319-
.and_then(char::from_u32)
320-
.map(|x| (x, subslice.chars().count() as isize + 4))
321-
} else {
322-
esc(6, lit)
297+
// Handle escaped chars.
298+
match lit.as_bytes()[1] as char {
299+
'"' => ('"', 2),
300+
'n' => ('\n', 2),
301+
'r' => ('\r', 2),
302+
't' => ('\t', 2),
303+
'\\' => ('\\', 2),
304+
'\'' => ('\'', 2),
305+
'0' => ('\0', 2),
306+
'x' => {
307+
let v = u32::from_str_radix(&lit[2..4], 16).unwrap();
308+
let c = char::from_u32(v).unwrap();
309+
(c, 4)
323310
}
324-
};
325-
326-
// Unicode escapes
327-
return match lit.as_bytes()[1] as char {
328-
'x' | 'X' => esc(4, lit),
329-
'u' => unicode_escape(),
330-
'U' => esc(10, lit),
331-
_ => None,
332-
}.unwrap_or_else(|| {
333-
panic!("lexer should have rejected a bad character escape {}", lit)
334-
})
311+
'u' => {
312+
assert!(lit.as_bytes()[2] == b'{');
313+
let idx = lit.find('}').unwrap();
314+
let v = u32::from_str_radix(&lit[3..idx], 16).unwrap();
315+
let c = char::from_u32(v).unwrap();
316+
(c, (idx + 1) as isize)
317+
}
318+
_ => panic!("lexer should have rejected a bad character escape {}", lit)
319+
}
335320
}
336321

337322
/// Parse a string representing a string literal into its final form. Does

0 commit comments

Comments
 (0)