Skip to content

Commit 48352e3

Browse files
Add nfc_normalize_idents flag
Also fixes rust-lang#6069.
1 parent 5805040 commit 48352e3

File tree

16 files changed

+196
-49
lines changed

16 files changed

+196
-49
lines changed

Cargo.lock

+32-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+4-1
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,12 @@ thiserror = "1.0.40"
5353
toml = "0.7.4"
5454
tracing = "0.1.37"
5555
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
56+
unicode-normalization = "0.1.22"
57+
unicode-properties = { version = "0.1", default-features = false, features = [
58+
"general-category",
59+
] }
5660
unicode-segmentation = "1.9"
5761
unicode-width = "0.1"
58-
unicode-properties = { version = "0.1", default-features = false, features = ["general-category"] }
5962

6063
rustfmt-config_proc_macro = { version = "0.3", path = "config_proc_macro" }
6164

Configurations.md

+8
Original file line numberDiff line numberDiff line change
@@ -1069,6 +1069,14 @@ Number of lines to check for a `@generated` pragma header, starting from the top
10691069

10701070
See also [format_generated_files](#format_generated_files) link here.
10711071

1072+
## `nfc_normalize_idents`
1073+
1074+
Whether to normalize identifiers with Unicode Normalization Form C (NFC). The compiler considers identifiers with identical NFC normalizations to be interchangeable.
1075+
1076+
- **Default value**: `false`
1077+
- **Possible values**: `true`, `false`
1078+
- **Stable**: No
1079+
10721080
## `format_macro_matchers`
10731081

10741082
Format the metavariable matching patterns in macros.

src/config/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ create_config! {
153153
format_generated_files: bool, true, false, "Format generated files";
154154
generated_marker_line_search_limit: usize, 5, false, "Number of lines to check for a \
155155
`@generated` marker when `format_generated_files` is enabled";
156+
nfc_normalize_idents: bool, false, false, "Whether to normalize identifiers \
157+
to Unicode Normalization Form C";
156158

157159
// Options that can change the source code beyond whitespace/blocks (somewhat linty things)
158160
merge_derives: bool, true, true, "Merge multiple `#[derive(...)]` into a single one";
@@ -683,6 +685,7 @@ version = "One"
683685
inline_attribute_width = 0
684686
format_generated_files = true
685687
generated_marker_line_search_limit = 5
688+
nfc_normalize_idents = false
686689
merge_derives = true
687690
use_try_shorthand = false
688691
use_field_init_shorthand = false

src/expr.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use crate::types::{rewrite_path, PathContext};
3232
use crate::utils::{
3333
colon_spaces, contains_skip, count_newlines, filtered_str_fits, first_line_ends_with,
3434
inner_attributes, last_line_extendable, last_line_width, mk_sp, outer_attributes,
35-
semicolon_for_expr, unicode_str_width, wrap_str,
35+
rewrite_ident, semicolon_for_expr, unicode_str_width, wrap_str,
3636
};
3737
use crate::vertical::rewrite_with_alignment;
3838
use crate::visitor::FmtVisitor;
@@ -1754,9 +1754,9 @@ pub(crate) fn rewrite_field(
17541754
if !attrs_str.is_empty() {
17551755
attrs_str.push_str(&shape.indent.to_string_with_newline(context.config));
17561756
};
1757-
let name = context.snippet(field.ident.span);
1757+
let name = rewrite_ident(context, field.ident);
17581758
if field.is_shorthand {
1759-
Some(attrs_str + name)
1759+
Some(attrs_str + &name)
17601760
} else {
17611761
let mut separator = String::from(struct_lit_field_separator(context.config));
17621762
for _ in 0..prefix_max_width.saturating_sub(name.len()) {
@@ -1770,7 +1770,7 @@ pub(crate) fn rewrite_field(
17701770
Some(ref e)
17711771
if !is_lit && e.as_str() == name && context.config.use_field_init_shorthand() =>
17721772
{
1773-
Some(attrs_str + name)
1773+
Some(attrs_str + &name)
17741774
}
17751775
Some(e) => Some(format!("{attrs_str}{name}{separator}{e}")),
17761776
None => {

src/imports.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ impl UseSegment {
185185
if name.is_empty() || name == "{{root}}" {
186186
return None;
187187
}
188-
let kind = match name {
188+
let kind = match &*name {
189189
"self" => UseSegmentKind::Slf(None),
190190
"super" => UseSegmentKind::Super(None),
191191
"crate" => UseSegmentKind::Crate(None),
@@ -498,7 +498,7 @@ impl UseTree {
498498
let name = if a.prefix.segments.len() == 2 && leading_modsep {
499499
context.snippet(a.prefix.span).to_owned()
500500
} else {
501-
rewrite_ident(context, path_to_imported_ident(&a.prefix)).to_owned()
501+
rewrite_ident(context, path_to_imported_ident(&a.prefix)).into_owned()
502502
};
503503
let alias = rename.and_then(|ident| {
504504
if ident.name == sym::underscore_imports {
@@ -507,7 +507,7 @@ impl UseTree {
507507
} else if ident == path_to_imported_ident(&a.prefix) {
508508
None
509509
} else {
510-
Some(rewrite_ident(context, ident).to_owned())
510+
Some(rewrite_ident(context, ident).into_owned())
511511
}
512512
});
513513
let kind = match name.as_ref() {

src/items.rs

+13-9
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ impl<'a> FmtVisitor<'a> {
679679
self.block_indent,
680680
Some(one_line_width),
681681
)?,
682-
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).to_owned(),
682+
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).into_owned(),
683683
};
684684

685685
let variant_body = if let Some(ref expr) = field.disr_expr {
@@ -1160,8 +1160,12 @@ pub(crate) fn format_trait(
11601160
let body_lo = context.snippet_provider.span_after(item.span, "{");
11611161

11621162
let shape = Shape::indented(offset, context.config).offset_left(result.len())?;
1163-
let generics_str =
1164-
rewrite_generics(context, rewrite_ident(context, item.ident), generics, shape)?;
1163+
let generics_str = rewrite_generics(
1164+
context,
1165+
&rewrite_ident(context, item.ident),
1166+
generics,
1167+
shape,
1168+
)?;
11651169
result.push_str(&generics_str);
11661170

11671171
// FIXME(#2055): rustfmt fails to format when there are comments between trait bounds.
@@ -1356,7 +1360,7 @@ pub(crate) fn format_trait_alias(
13561360
let alias = rewrite_ident(context, ident);
13571361
// 6 = "trait ", 2 = " ="
13581362
let g_shape = shape.offset_left(6)?.sub_width(2)?;
1359-
let generics_str = rewrite_generics(context, alias, generics, g_shape)?;
1363+
let generics_str = rewrite_generics(context, &alias, generics, g_shape)?;
13601364
let vis_str = format_visibility(context, vis);
13611365
let lhs = format!("{vis_str}trait {generics_str} =");
13621366
// 1 = ";"
@@ -1749,13 +1753,13 @@ fn rewrite_ty<R: Rewrite>(
17491753
let ident_str = rewrite_ident(context, ident);
17501754

17511755
if generics.params.is_empty() {
1752-
result.push_str(ident_str)
1756+
result.push_str(&ident_str)
17531757
} else {
17541758
// 2 = `= `
17551759
let g_shape = Shape::indented(indent, context.config)
17561760
.offset_left(result.len())?
17571761
.sub_width(2)?;
1758-
let generics_str = rewrite_generics(context, ident_str, generics, g_shape)?;
1762+
let generics_str = rewrite_generics(context, &ident_str, generics, g_shape)?;
17591763
result.push_str(&generics_str);
17601764
}
17611765

@@ -2362,7 +2366,7 @@ fn rewrite_fn_base(
23622366
let fd = fn_sig.decl;
23632367
let generics_str = rewrite_generics(
23642368
context,
2365-
rewrite_ident(context, ident),
2369+
&rewrite_ident(context, ident),
23662370
&fn_sig.generics,
23672371
shape,
23682372
)?;
@@ -3188,7 +3192,7 @@ fn format_header(
31883192
}
31893193
}
31903194

3191-
result.push_str(rewrite_ident(context, ident));
3195+
result.push_str(&rewrite_ident(context, ident));
31923196

31933197
result
31943198
}
@@ -3438,7 +3442,7 @@ pub(crate) fn rewrite_mod(
34383442
let mut result = String::with_capacity(32);
34393443
result.push_str(&*format_visibility(context, &item.vis));
34403444
result.push_str("mod ");
3441-
result.push_str(rewrite_ident(context, item.ident));
3445+
result.push_str(&rewrite_ident(context, item.ident));
34423446
result.push(';');
34433447
rewrite_attrs(context, item, &result, attrs_shape)
34443448
}

src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ extern crate rustc_builtin_macros;
1717
extern crate rustc_data_structures;
1818
extern crate rustc_errors;
1919
extern crate rustc_expand;
20+
extern crate rustc_lexer;
2021
extern crate rustc_parse;
2122
extern crate rustc_session;
2223
extern crate rustc_span;

src/macros.rs

+20-10
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use crate::shape::{Indent, Shape};
3636
use crate::source_map::SpanUtils;
3737
use crate::spanned::Spanned;
3838
use crate::utils::{
39-
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp,
39+
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp, nfc_normalize,
4040
remove_trailing_white_spaces, rewrite_ident, trim_left_preserve_layout, NodeIdExt,
4141
};
4242
use crate::visitor::FmtVisitor;
@@ -284,7 +284,7 @@ fn rewrite_macro_inner(
284284
},
285285
)
286286
.map(|rw| match position {
287-
MacroPosition::Item => format!("{};", rw),
287+
MacroPosition::Item => format!("{rw};"),
288288
_ => rw,
289289
})
290290
}
@@ -425,7 +425,7 @@ pub(crate) fn rewrite_macro_def(
425425
};
426426

427427
result += " ";
428-
result += rewrite_ident(context, ident);
428+
result += &rewrite_ident(context, ident);
429429

430430
let multi_branch_style = def.macro_rules || parsed_def.branches.len() != 1;
431431

@@ -490,6 +490,7 @@ pub(crate) fn rewrite_macro_def(
490490
}
491491

492492
fn register_metavariable(
493+
context: &RewriteContext<'_>,
493494
map: &mut HashMap<String, String>,
494495
result: &mut String,
495496
name: &str,
@@ -502,14 +503,21 @@ fn register_metavariable(
502503
new_name.push_str(name);
503504
old_name.push_str(name);
504505

506+
// `$` is `NFC_Inert`, so won't get mangled
507+
let new_name = nfc_normalize(context, &new_name).into_owned();
508+
let old_name = nfc_normalize(context, &old_name).into_owned();
509+
505510
result.push_str(&new_name);
506511
map.insert(old_name, new_name);
507512
}
508513

509514
// Replaces `$foo` with `zfoo`. We must check for name overlap to ensure we
510515
// aren't causing problems.
511516
// This should also work for escaped `$` variables, where we leave earlier `$`s.
512-
fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
517+
fn replace_names(
518+
context: &RewriteContext<'_>,
519+
input: &str,
520+
) -> Option<(String, HashMap<String, String>)> {
513521
// Each substitution will require five or six extra bytes.
514522
let mut result = String::with_capacity(input.len() + 64);
515523
let mut substs = HashMap::new();
@@ -523,23 +531,23 @@ fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
523531
dollar_count += 1;
524532
} else if dollar_count == 0 {
525533
result.push(c);
526-
} else if !c.is_alphanumeric() && !cur_name.is_empty() {
534+
} else if !rustc_lexer::is_id_continue(c) && !cur_name.is_empty() {
527535
// Terminates a name following one or more dollars.
528-
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
536+
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);
529537

530538
result.push(c);
531539
dollar_count = 0;
532540
cur_name.clear();
533541
} else if c == '(' && cur_name.is_empty() {
534542
// FIXME: Support macro def with repeat.
535543
return None;
536-
} else if c.is_alphanumeric() || c == '_' {
544+
} else if rustc_lexer::is_id_continue(c) {
537545
cur_name.push(c);
538546
}
539547
}
540548

541549
if !cur_name.is_empty() {
542-
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
550+
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);
543551
}
544552

545553
debug!("replace_names `{}` {:?}", result, substs);
@@ -655,7 +663,9 @@ impl MacroArgKind {
655663
};
656664

657665
match *self {
658-
MacroArgKind::MetaVariable(ty, ref name) => Some(format!("${name}:{ty}")),
666+
MacroArgKind::MetaVariable(ty, ref name) => {
667+
Some(format!("${}:{ty}", nfc_normalize(context, name)))
668+
}
659669
MacroArgKind::Repeat(delim_tok, ref args, ref another, ref tok) => {
660670
let (lhs, inner, rhs) = rewrite_delimited_inner(delim_tok, args)?;
661671
let another = another
@@ -1273,7 +1283,7 @@ impl MacroBranch {
12731283
// `$$`). We'll try and format like an AST node, but we'll substitute
12741284
// variables for new names with the same length first.
12751285

1276-
let (body_str, substs) = replace_names(old_body)?;
1286+
let (body_str, substs) = replace_names(context, old_body)?;
12771287

12781288
let mut config = context.config.clone();
12791289
config.set().show_parse_errors(false);

src/patterns.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -162,14 +162,14 @@ impl Rewrite for Pat {
162162
let hi = context.snippet_provider.span_before(self.span, "@");
163163
combine_strs_with_missing_comments(
164164
context,
165-
id_str,
165+
&id_str,
166166
&sub_pat,
167167
mk_sp(ident.span.hi(), hi),
168168
shape,
169169
true,
170170
)?
171171
} else {
172-
id_str.to_owned()
172+
id_str.into_owned()
173173
};
174174

175175
combine_strs_with_missing_comments(

0 commit comments

Comments
 (0)