@@ -79,7 +79,7 @@ impl<'a> StringReader<'a> {
79
79
/// preceded by whitespace.
80
80
fn next_token ( & mut self ) -> ( Token , bool ) {
81
81
let mut preceded_by_whitespace = false ;
82
-
82
+ let mut swallow_next_invalid = 0 ;
83
83
// Skip trivial (whitespace & comments) tokens
84
84
loop {
85
85
let token = self . cursor . advance_token ( ) ;
@@ -232,19 +232,34 @@ impl<'a> StringReader<'a> {
232
232
rustc_lexer:: TokenKind :: Percent => token:: BinOp ( token:: Percent ) ,
233
233
234
234
rustc_lexer:: TokenKind :: Unknown | rustc_lexer:: TokenKind :: InvalidIdent => {
235
- let c = self . str_from ( start) . chars ( ) . next ( ) . unwrap ( ) ;
235
+ // Don't emit diagnostics for sequences of the same invalid token
236
+ if swallow_next_invalid > 0 {
237
+ swallow_next_invalid -= 1 ;
238
+ continue ;
239
+ }
240
+ let mut it = self . str_from_to_end ( start) . chars ( ) ;
241
+ let c = it. next ( ) . unwrap ( ) ;
242
+ let repeats = it. take_while ( |c1| * c1 == c) . count ( ) ;
236
243
let mut err =
237
- self . struct_err_span_char ( start, self . pos , "unknown start of token" , c) ;
244
+ self . struct_err_span_char ( start, self . pos + Pos :: from_usize ( repeats * c . len_utf8 ( ) ) , "unknown start of token" , c) ;
238
245
// FIXME: the lexer could be used to turn the ASCII version of unicode
239
246
// homoglyphs, instead of keeping a table in `check_for_substitution`into the
240
247
// token. Ideally, this should be inside `rustc_lexer`. However, we should
241
248
// first remove compound tokens like `<<` from `rustc_lexer`, and then add
242
249
// fancier error recovery to it, as there will be less overall work to do this
243
250
// way.
244
- let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err) ;
251
+ let token = unicode_chars:: check_for_substitution ( self , start, c, & mut err, repeats+ 1 ) ;
245
252
if c == '\x00' {
246
253
err. help ( "source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used" ) ;
247
254
}
255
+ if repeats > 0 {
256
+ if repeats == 1 {
257
+ err. note ( format ! ( "character appears once more" ) ) ;
258
+ } else {
259
+ err. note ( format ! ( "character appears {repeats} more times" ) ) ;
260
+ }
261
+ swallow_next_invalid = repeats;
262
+ }
248
263
err. emit ( ) ;
249
264
if let Some ( token) = token {
250
265
token
@@ -486,6 +501,11 @@ impl<'a> StringReader<'a> {
486
501
& self . src [ self . src_index ( start) ..self . src_index ( end) ]
487
502
}
488
503
504
+ /// Slice of the source text spanning from `start` until the end
505
+ fn str_from_to_end ( & self , start : BytePos ) -> & str {
506
+ & self . src [ self . src_index ( start) ..]
507
+ }
508
+
489
509
fn report_raw_str_error ( & self , start : BytePos , prefix_len : u32 ) -> ! {
490
510
match rustc_lexer:: validate_raw_str ( self . str_from ( start) , prefix_len) {
491
511
Err ( RawStrError :: InvalidStarter { bad_char } ) => {
0 commit comments