@@ -21,6 +21,7 @@ use crate::{
21
21
SuggestionStyle , TerminalUrl ,
22
22
} ;
23
23
use derive_setters:: Setters ;
24
+ use either:: Either ;
24
25
use rustc_data_structures:: fx:: { FxHashMap , FxIndexMap , FxIndexSet } ;
25
26
use rustc_data_structures:: sync:: { DynSend , IntoDynSyncSend , Lrc } ;
26
27
use rustc_error_messages:: { FluentArgs , SpanLabel } ;
@@ -2559,60 +2560,65 @@ fn num_decimal_digits(num: usize) -> usize {
2559
2560
2560
2561
// We replace some characters so the CLI output is always consistent and underlines aligned.
2561
2562
// Keep the following list in sync with `rustc_span::char_width`.
2563
+ // ATTENTION: keep lexicografically sorted so that the binary search will work
2562
2564
const OUTPUT_REPLACEMENTS : & [ ( char , & str ) ] = & [
2563
- ( '\t' , " " ) , // We do our own tab replacement
2564
- ( '\u{200D}' , "" ) , // Replace ZWJ with nothing for consistent terminal output of grapheme clusters.
2565
- ( '\u{202A}' , "�" ) , // The following unicode text flow control characters are inconsistently
2566
- ( '\u{202B}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2567
- ( '\u{202D}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2568
- ( '\u{202E}' , "�" ) ,
2565
+ // In terminals without Unicode support the following will be garbled, but in *all* terminals
2566
+ // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2567
+ // support" gate.
2568
+ ( '\0' , "␀" ) ,
2569
+ ( '\u{1}' , "␁" ) ,
2570
+ ( '\u{2}' , "␂" ) ,
2571
+ ( '\u{3}' , "␃" ) ,
2572
+ ( '\u{4}' , "␄" ) ,
2573
+ ( '\u{5}' , "␅" ) ,
2574
+ ( '\u{6}' , "␆" ) ,
2575
+ ( '\u{7}' , "␇" ) ,
2576
+ ( '\u{8}' , "␈" ) ,
2577
+ ( '\t' , " " ) , // We do our own tab replacement
2578
+ ( '\u{b}' , "␋" ) ,
2579
+ ( '\u{c}' , "␌" ) ,
2580
+ ( '\r' , "␍" ) ,
2581
+ ( '\u{e}' , "␎" ) ,
2582
+ ( '\u{f}' , "␏" ) ,
2583
+ ( '\u{10}' , "␐" ) ,
2584
+ ( '\u{11}' , "␑" ) ,
2585
+ ( '\u{12}' , "␒" ) ,
2586
+ ( '\u{13}' , "␓" ) ,
2587
+ ( '\u{14}' , "␔" ) ,
2588
+ ( '\u{15}' , "␕" ) ,
2589
+ ( '\u{16}' , "␖" ) ,
2590
+ ( '\u{17}' , "␗" ) ,
2591
+ ( '\u{18}' , "␘" ) ,
2592
+ ( '\u{19}' , "␙" ) ,
2593
+ ( '\u{1a}' , "␚" ) ,
2594
+ ( '\u{1b}' , "␛" ) ,
2595
+ ( '\u{1c}' , "␜" ) ,
2596
+ ( '\u{1d}' , "␝" ) ,
2597
+ ( '\u{1e}' , "␞" ) ,
2598
+ ( '\u{1f}' , "␟" ) ,
2599
+ ( '\u{7f}' , "␡" ) ,
2600
+ ( '\u{200d}' , "" ) , // Replace ZWJ for consistent terminal output of grapheme clusters.
2601
+ ( '\u{202a}' , "�" ) , // The following unicode text flow control characters are inconsistently
2602
+ ( '\u{202b}' , "�" ) , // supported across CLIs and can cause confusion due to the bytes on disk
2603
+ ( '\u{202c}' , "�" ) , // not corresponding to the visible source code, so we replace them always.
2604
+ ( '\u{202d}' , "�" ) ,
2605
+ ( '\u{202e}' , "�" ) ,
2569
2606
( '\u{2066}' , "�" ) ,
2570
2607
( '\u{2067}' , "�" ) ,
2571
2608
( '\u{2068}' , "�" ) ,
2572
- ( '\u{202C}' , "�" ) ,
2573
2609
( '\u{2069}' , "�" ) ,
2574
- // In terminals without Unicode support the following will be garbled, but in *all* terminals
2575
- // the underlying codepoint will be as well. We could gate this replacement behind a "unicode
2576
- // support" gate.
2577
- ( '\u{0000}' , "␀" ) ,
2578
- ( '\u{0001}' , "␁" ) ,
2579
- ( '\u{0002}' , "␂" ) ,
2580
- ( '\u{0003}' , "␃" ) ,
2581
- ( '\u{0004}' , "␄" ) ,
2582
- ( '\u{0005}' , "␅" ) ,
2583
- ( '\u{0006}' , "␆" ) ,
2584
- ( '\u{0007}' , "␇" ) ,
2585
- ( '\u{0008}' , "␈" ) ,
2586
- ( '\u{000B}' , "␋" ) ,
2587
- ( '\u{000C}' , "␌" ) ,
2588
- ( '\u{000D}' , "␍" ) ,
2589
- ( '\u{000E}' , "␎" ) ,
2590
- ( '\u{000F}' , "␏" ) ,
2591
- ( '\u{0010}' , "␐" ) ,
2592
- ( '\u{0011}' , "␑" ) ,
2593
- ( '\u{0012}' , "␒" ) ,
2594
- ( '\u{0013}' , "␓" ) ,
2595
- ( '\u{0014}' , "␔" ) ,
2596
- ( '\u{0015}' , "␕" ) ,
2597
- ( '\u{0016}' , "␖" ) ,
2598
- ( '\u{0017}' , "␗" ) ,
2599
- ( '\u{0018}' , "␘" ) ,
2600
- ( '\u{0019}' , "␙" ) ,
2601
- ( '\u{001A}' , "␚" ) ,
2602
- ( '\u{001B}' , "␛" ) ,
2603
- ( '\u{001C}' , "␜" ) ,
2604
- ( '\u{001D}' , "␝" ) ,
2605
- ( '\u{001E}' , "␞" ) ,
2606
- ( '\u{001F}' , "␟" ) ,
2607
- ( '\u{007F}' , "␡" ) ,
2608
2610
] ;
2609
2611
2610
2612
fn normalize_whitespace ( str : & str ) -> String {
2611
- let mut s = str. to_string ( ) ;
2612
- for ( c, replacement) in OUTPUT_REPLACEMENTS {
2613
- s = s. replace ( * c, replacement) ;
2614
- }
2615
- s
2613
+ // Scan the input string for a character in the ordered table above. If it's present, replace
2614
+ // it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
2615
+ // char. At the end, allocate all chars into a string in one operation.
2616
+ str. chars ( )
2617
+ . flat_map ( |c| match OUTPUT_REPLACEMENTS . binary_search_by_key ( & c, |( k, _) | * k) {
2618
+ Ok ( i) => Either :: Left ( OUTPUT_REPLACEMENTS [ i] . 1 . chars ( ) ) ,
2619
+ _ => Either :: Right ( [ c] . into_iter ( ) ) ,
2620
+ } )
2621
+ . collect ( )
2616
2622
}
2617
2623
2618
2624
fn draw_col_separator ( buffer : & mut StyledBuffer , line : usize , col : usize ) {
0 commit comments