@@ -230,7 +230,7 @@ pub fn stream_to_parser(sess: &ParseSess, stream: TokenStream) -> Parser {
230
230
/// Rather than just accepting/rejecting a given literal, unescapes it as
231
231
/// well. Can take any slice prefixed by a character escape. Returns the
232
232
/// character and the number of characters consumed.
233
- pub fn char_lit ( lit : & str ) -> ( char , isize ) {
233
+ pub fn char_lit ( lit : & str , diag : Option < ( Span , & Handler ) > ) -> ( char , isize ) {
234
234
use std:: char;
235
235
236
236
// Handle non-escaped chars first.
@@ -258,8 +258,19 @@ pub fn char_lit(lit: &str) -> (char, isize) {
258
258
'u' => {
259
259
assert_eq ! ( lit. as_bytes( ) [ 2 ] , b'{' ) ;
260
260
let idx = lit. find ( '}' ) . unwrap ( ) ;
261
- let v = u32:: from_str_radix ( & lit[ 3 ..idx] , 16 ) . unwrap ( ) ;
262
- let c = char:: from_u32 ( v) . unwrap ( ) ;
261
+ let s = & lit[ 3 ..idx] . chars ( ) . filter ( |& c| c != '_' ) . collect :: < String > ( ) ;
262
+ let v = u32:: from_str_radix ( & s, 16 ) . unwrap ( ) ;
263
+ let c = char:: from_u32 ( v) . unwrap_or_else ( || {
264
+ if let Some ( ( span, diag) ) = diag {
265
+ let mut diag = diag. struct_span_err ( span, "invalid unicode character escape" ) ;
266
+ if v > 0x10FFFF {
267
+ diag. help ( "unicode escape must be at most 10FFFF" ) . emit ( ) ;
268
+ } else {
269
+ diag. help ( "unicode escape must not be a surrogate" ) . emit ( ) ;
270
+ }
271
+ }
272
+ '\u{FFFD}'
273
+ } ) ;
263
274
( c, ( idx + 1 ) as isize )
264
275
}
265
276
_ => panic ! ( "lexer should have rejected a bad character escape {}" , lit)
@@ -272,7 +283,7 @@ pub fn escape_default(s: &str) -> String {
272
283
273
284
/// Parse a string representing a string literal into its final form. Does
274
285
/// unescaping.
275
- pub fn str_lit ( lit : & str ) -> String {
286
+ pub fn str_lit ( lit : & str , diag : Option < ( Span , & Handler ) > ) -> String {
276
287
debug ! ( "parse_str_lit: given {}" , escape_default( lit) ) ;
277
288
let mut res = String :: with_capacity ( lit. len ( ) ) ;
278
289
@@ -313,7 +324,7 @@ pub fn str_lit(lit: &str) -> String {
313
324
eat ( & mut chars) ;
314
325
} else {
315
326
// otherwise, a normal escape
316
- let ( c, n) = char_lit ( & lit[ i..] ) ;
327
+ let ( c, n) = char_lit ( & lit[ i..] , diag ) ;
317
328
for _ in 0 ..n - 1 { // we don't need to move past the first \
318
329
chars. next ( ) ;
319
330
}
@@ -385,15 +396,15 @@ pub fn lit_token(lit: token::Lit, suf: Option<Symbol>, diag: Option<(Span, &Hand
385
396
386
397
match lit {
387
398
token:: Byte ( i) => ( true , Some ( LitKind :: Byte ( byte_lit ( & i. as_str ( ) ) . 0 ) ) ) ,
388
- token:: Char ( i) => ( true , Some ( LitKind :: Char ( char_lit ( & i. as_str ( ) ) . 0 ) ) ) ,
399
+ token:: Char ( i) => ( true , Some ( LitKind :: Char ( char_lit ( & i. as_str ( ) , diag ) . 0 ) ) ) ,
389
400
390
401
// There are some valid suffixes for integer and float literals,
391
402
// so all the handling is done internally.
392
403
token:: Integer ( s) => ( false , integer_lit ( & s. as_str ( ) , suf, diag) ) ,
393
404
token:: Float ( s) => ( false , float_lit ( & s. as_str ( ) , suf, diag) ) ,
394
405
395
406
token:: Str_ ( s) => {
396
- let s = Symbol :: intern ( & str_lit ( & s. as_str ( ) ) ) ;
407
+ let s = Symbol :: intern ( & str_lit ( & s. as_str ( ) , diag ) ) ;
397
408
( true , Some ( LitKind :: Str ( s, ast:: StrStyle :: Cooked ) ) )
398
409
}
399
410
token:: StrRaw ( s, n) => {
0 commit comments