Skip to content

Commit c2cdba4

Browse files
authored
Rollup merge of #88795 - FabianWolff:issue-88684, r=wesleywiser
Print a note if a character literal contains a variation selector Fixes #88684.
2 parents 840acd3 + 0d8245b commit c2cdba4

File tree

4 files changed

+137
-14
lines changed

4 files changed

+137
-14
lines changed

Diff for: compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

+47-14
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use std::iter::once;
44
use std::ops::Range;
55

6-
use rustc_errors::{Applicability, Handler};
6+
use rustc_errors::{pluralize, Applicability, Handler};
77
use rustc_lexer::unescape::{EscapeError, Mode};
88
use rustc_span::{BytePos, Span};
99

@@ -49,24 +49,57 @@ pub(crate) fn emit_unescape_error(
4949
.emit();
5050
}
5151
EscapeError::MoreThanOneChar => {
52-
let (prefix, msg) = if mode.is_bytes() {
53-
("b", "if you meant to write a byte string literal, use double quotes")
54-
} else {
55-
("", "if you meant to write a `str` literal, use double quotes")
56-
};
52+
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
5753

58-
handler
59-
.struct_span_err(
60-
span_with_quotes,
61-
"character literal may only contain one codepoint",
62-
)
63-
.span_suggestion(
54+
let mut has_help = false;
55+
let mut handler = handler.struct_span_err(
56+
span_with_quotes,
57+
"character literal may only contain one codepoint",
58+
);
59+
60+
if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
61+
let escaped_marks =
62+
lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
63+
handler.span_note(
64+
span,
65+
&format!(
66+
"this `{}` is followed by the combining mark{} `{}`",
67+
lit.chars().next().unwrap(),
68+
pluralize!(escaped_marks.len()),
69+
escaped_marks.join(""),
70+
),
71+
);
72+
let normalized = lit.nfc().to_string();
73+
if normalized.chars().count() == 1 {
74+
has_help = true;
75+
handler.span_suggestion(
76+
span,
77+
&format!(
78+
"consider using the normalized form `{}` of this character",
79+
normalized.chars().next().unwrap().escape_default()
80+
),
81+
normalized,
82+
Applicability::MachineApplicable,
83+
);
84+
}
85+
}
86+
87+
if !has_help {
88+
let (prefix, msg) = if mode.is_bytes() {
89+
("b", "if you meant to write a byte string literal, use double quotes")
90+
} else {
91+
("", "if you meant to write a `str` literal, use double quotes")
92+
};
93+
94+
handler.span_suggestion(
6495
span_with_quotes,
6596
msg,
6697
format!("{}\"{}\"", prefix, lit),
6798
Applicability::MachineApplicable,
68-
)
69-
.emit();
99+
);
100+
}
101+
102+
handler.emit();
70103
}
71104
EscapeError::EscapeOnlyChar => {
72105
let (c, char_span) = last_char();

Diff for: src/test/ui/parser/unicode-character-literal.fixed

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Regression test for #88684: Improve diagnostics for combining marks
2+
// in character literals.
3+
4+
// run-rustfix
5+
6+
fn main() {
7+
let _spade = "♠️";
8+
//~^ ERROR: character literal may only contain one codepoint
9+
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
10+
//~| HELP: if you meant to write a `str` literal, use double quotes
11+
12+
let _s = "ṩ̂̊";
13+
//~^ ERROR: character literal may only contain one codepoint
14+
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
15+
//~| HELP: if you meant to write a `str` literal, use double quotes
16+
17+
let _a = 'Å';
18+
//~^ ERROR: character literal may only contain one codepoint
19+
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
20+
//~| HELP: consider using the normalized form `\u{c5}` of this character
21+
}

Diff for: src/test/ui/parser/unicode-character-literal.rs

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Regression test for #88684: Improve diagnostics for combining marks
2+
// in character literals.
3+
4+
// run-rustfix
5+
6+
fn main() {
7+
let _spade = '♠️';
8+
//~^ ERROR: character literal may only contain one codepoint
9+
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
10+
//~| HELP: if you meant to write a `str` literal, use double quotes
11+
12+
let _s = 'ṩ̂̊';
13+
//~^ ERROR: character literal may only contain one codepoint
14+
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
15+
//~| HELP: if you meant to write a `str` literal, use double quotes
16+
17+
let _a = '';
18+
//~^ ERROR: character literal may only contain one codepoint
19+
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
20+
//~| HELP: consider using the normalized form `\u{c5}` of this character
21+
}

Diff for: src/test/ui/parser/unicode-character-literal.stderr

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
error: character literal may only contain one codepoint
2+
--> $DIR/unicode-character-literal.rs:7:18
3+
|
4+
LL | let _spade = '♠️';
5+
| ^^^
6+
|
7+
note: this `♠` is followed by the combining mark `\u{fe0f}`
8+
--> $DIR/unicode-character-literal.rs:7:19
9+
|
10+
LL | let _spade = '♠️';
11+
| ^
12+
help: if you meant to write a `str` literal, use double quotes
13+
|
14+
LL | let _spade = "♠️";
15+
| ~~~
16+
17+
error: character literal may only contain one codepoint
18+
--> $DIR/unicode-character-literal.rs:12:14
19+
|
20+
LL | let _s = 'ṩ̂̊';
21+
| ^^^
22+
|
23+
note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
24+
--> $DIR/unicode-character-literal.rs:12:15
25+
|
26+
LL | let _s = 'ṩ̂̊';
27+
| ^
28+
help: if you meant to write a `str` literal, use double quotes
29+
|
30+
LL | let _s = "ṩ̂̊";
31+
| ~~~
32+
33+
error: character literal may only contain one codepoint
34+
--> $DIR/unicode-character-literal.rs:17:14
35+
|
36+
LL | let _a = 'Å';
37+
| ^-^
38+
| |
39+
| help: consider using the normalized form `\u{c5}` of this character: `Å`
40+
|
41+
note: this `A` is followed by the combining mark `\u{30a}`
42+
--> $DIR/unicode-character-literal.rs:17:15
43+
|
44+
LL | let _a = 'Å';
45+
| ^
46+
47+
error: aborting due to 3 previous errors
48+

0 commit comments

Comments
 (0)