-
Notifications
You must be signed in to change notification settings - Fork 12.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Rollup merge of #119033 - Zalathar:unicode, r=davidtwco
coverage: `llvm-cov` expects column numbers to be bytes, not code points Normally the compiler emits column numbers as a 1-based number of Unicode code points. But when we embed coverage mappings for `-Cinstrument-coverage`, those mappings will ultimately be read by the `llvm-cov` tool. That tool assumes that column numbers are 1-based numbers of *bytes*, and relies on that assumption when slicing up source code to apply highlighting (in HTML reports, and in text-based reports with colour). For the very common case of all-ASCII source code, bytes and code points are the same, so the difference isn't noticeable. But for code that contains non-ASCII characters, emitting column numbers as code points will result in `llvm-cov` slicing strings in the wrong places, producing mangled output or fatal errors. (See taiki-e/cargo-llvm-cov#275 as an example of what can go wrong.)
- Loading branch information
Showing
5 changed files
with
200 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
Function name: unicode::main | ||
Raw bytes (67): 0x[01, 01, 09, 01, 05, 03, 05, 1e, 0d, 22, 09, 03, 05, 11, 1b, 1e, 0d, 22, 09, 03, 05, 09, 01, 0e, 01, 00, 0b, 05, 01, 09, 00, 0c, 03, 00, 10, 00, 1b, 05, 00, 1c, 00, 28, 22, 02, 08, 00, 25, 09, 00, 29, 00, 46, 11, 00, 47, 02, 06, 1b, 02, 06, 00, 07, 17, 02, 05, 01, 02] | ||
Number of files: 1 | ||
- file 0 => global file 1 | ||
Number of expressions: 9 | ||
- expression 0 operands: lhs = Counter(0), rhs = Counter(1) | ||
- expression 1 operands: lhs = Expression(0, Add), rhs = Counter(1) | ||
- expression 2 operands: lhs = Expression(7, Sub), rhs = Counter(3) | ||
- expression 3 operands: lhs = Expression(8, Sub), rhs = Counter(2) | ||
- expression 4 operands: lhs = Expression(0, Add), rhs = Counter(1) | ||
- expression 5 operands: lhs = Counter(4), rhs = Expression(6, Add) | ||
- expression 6 operands: lhs = Expression(7, Sub), rhs = Counter(3) | ||
- expression 7 operands: lhs = Expression(8, Sub), rhs = Counter(2) | ||
- expression 8 operands: lhs = Expression(0, Add), rhs = Counter(1) | ||
Number of file 0 mappings: 9 | ||
- Code(Counter(0)) at (prev + 14, 1) to (start + 0, 11) | ||
- Code(Counter(1)) at (prev + 1, 9) to (start + 0, 12) | ||
- Code(Expression(0, Add)) at (prev + 0, 16) to (start + 0, 27) | ||
= (c0 + c1) | ||
- Code(Counter(1)) at (prev + 0, 28) to (start + 0, 40) | ||
- Code(Expression(8, Sub)) at (prev + 2, 8) to (start + 0, 37) | ||
= ((c0 + c1) - c1) | ||
- Code(Counter(2)) at (prev + 0, 41) to (start + 0, 70) | ||
- Code(Counter(4)) at (prev + 0, 71) to (start + 2, 6) | ||
- Code(Expression(6, Add)) at (prev + 2, 6) to (start + 0, 7) | ||
= ((((c0 + c1) - c1) - c2) + c3) | ||
- Code(Expression(5, Add)) at (prev + 2, 5) to (start + 1, 2) | ||
= (c4 + ((((c0 + c1) - c1) - c2) + c3)) | ||
|
||
Function name: unicode::サビ | ||
Raw bytes (9): 0x[01, 01, 00, 01, 01, 1e, 14, 00, 18] | ||
Number of files: 1 | ||
- file 0 => global file 1 | ||
Number of expressions: 0 | ||
Number of file 0 mappings: 1 | ||
- Code(Counter(0)) at (prev + 30, 20) to (start + 0, 24) | ||
|
||
Function name: unicode::他 (unused) | ||
Raw bytes (9): 0x[01, 01, 00, 01, 00, 1e, 19, 00, 25] | ||
Number of files: 1 | ||
- file 0 => global file 1 | ||
Number of expressions: 0 | ||
Number of file 0 mappings: 1 | ||
- Code(Zero) at (prev + 30, 25) to (start + 0, 37) | ||
|
||
Function name: unicode::申し訳ございません | ||
Raw bytes (9): 0x[01, 01, 00, 01, 01, 18, 01, 02, 02] | ||
Number of files: 1 | ||
- file 0 => global file 1 | ||
Number of expressions: 0 | ||
Number of file 0 mappings: 1 | ||
- Code(Counter(0)) at (prev + 24, 1) to (start + 2, 2) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
LL| |// edition: 2021 | ||
LL| |// ignore-windows - we can't force `llvm-cov` to use ANSI escapes on Windows | ||
LL| |// llvm-cov-flags: --use-color | ||
LL| | | ||
LL| |// Check that column numbers are denoted in bytes, so that they don't cause | ||
LL| |// `llvm-cov` to fail or emit malformed output. | ||
LL| |// | ||
LL| |// Note that when `llvm-cov` prints ^ arrows on a subsequent line, it simply | ||
LL| |// inserts one space character for each "column", with no understanding of | ||
LL| |// Unicode or character widths. So those arrows will tend to be misaligned | ||
LL| |// for non-ASCII source code, regardless of whether column numbers are code | ||
LL| |// points or bytes. | ||
LL| | | ||
LL| 1|fn main() { | ||
LL| [0;35m33[0m| for _İ in 'А'..='Я' { /* Я */ } | ||
^32 ^32 | ||
LL| | | ||
LL| [0;35m1[0m| if 申し訳ございません() && [0;41m申し訳ございません()[0m [0;41m{[0m | ||
^0 | ||
LL| 0|[0;41m println!("true");[0m | ||
LL| 1|[0;41m }[0m | ||
LL| | | ||
LL| 1| サビ(); | ||
LL| 1|} | ||
LL| | | ||
LL| 1|fn 申し訳ございません() -> bool { | ||
LL| 1| std::hint::black_box(false) | ||
LL| 1|} | ||
LL| | | ||
LL| |macro_rules! macro_that_defines_a_function { | ||
LL| | (fn $名:ident () $体:tt) => { | ||
LL| [0;35m1[0m| fn $名 () $体 [0;41mfn 他 () {}[0m | ||
^0 | ||
LL| | } | ||
LL| |} | ||
LL| | | ||
LL| |macro_that_defines_a_function! { | ||
LL| | fn サビ() {} | ||
LL| |} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
// edition: 2021 | ||
// ignore-windows - we can't force `llvm-cov` to use ANSI escapes on Windows | ||
// llvm-cov-flags: --use-color | ||
|
||
// Check that column numbers are denoted in bytes, so that they don't cause | ||
// `llvm-cov` to fail or emit malformed output. | ||
// | ||
// Note that when `llvm-cov` prints ^ arrows on a subsequent line, it simply | ||
// inserts one space character for each "column", with no understanding of | ||
// Unicode or character widths. So those arrows will tend to be misaligned | ||
// for non-ASCII source code, regardless of whether column numbers are code | ||
// points or bytes. | ||
|
||
fn main() { | ||
for _İ in 'А'..='Я' { /* Я */ } | ||
|
||
if 申し訳ございません() && 申し訳ございません() { | ||
println!("true"); | ||
} | ||
|
||
サビ(); | ||
} | ||
|
||
fn 申し訳ございません() -> bool { | ||
std::hint::black_box(false) | ||
} | ||
|
||
macro_rules! macro_that_defines_a_function { | ||
(fn $名:ident () $体:tt) => { | ||
fn $名 () $体 fn 他 () {} | ||
} | ||
} | ||
|
||
macro_that_defines_a_function! { | ||
fn サビ() {} | ||
} |