Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add nfc_normalize_idents flag #6072

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 32 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,12 @@ thiserror = "1.0.40"
toml = "0.7.4"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
unicode-normalization = "0.1.22"
unicode-properties = { version = "0.1", default-features = false, features = [
"general-category",
] }
unicode-segmentation = "1.9"
unicode-width = "0.1"
unicode-properties = { version = "0.1", default-features = false, features = ["general-category"] }

rustfmt-config_proc_macro = { version = "0.3", path = "config_proc_macro" }

Expand Down
8 changes: 8 additions & 0 deletions Configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -1069,6 +1069,14 @@ Number of lines to check for a `@generated` pragma header, starting from the top

See also [format_generated_files](#format_generated_files) link here.

## `nfc_normalize_idents`

Whether to normalize identifiers with Unicode Normalization Form C (NFC). The compiler considers identifiers with identical NFC normalizations to be interchangeable.

- **Default value**: `false`
- **Possible values**: `true`, `false`
- **Stable**: No

## `format_macro_matchers`

Format the metavariable matching patterns in macros.
Expand Down
3 changes: 3 additions & 0 deletions src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ create_config! {
format_generated_files: bool, true, false, "Format generated files";
generated_marker_line_search_limit: usize, 5, false, "Number of lines to check for a \
`@generated` marker when `format_generated_files` is enabled";
nfc_normalize_idents: bool, false, false, "Whether to normalize identifiers \
to Unicode Normalization Form C";

// Options that can change the source code beyond whitespace/blocks (somewhat linty things)
merge_derives: bool, true, true, "Merge multiple `#[derive(...)]` into a single one";
Expand Down Expand Up @@ -683,6 +685,7 @@ version = "One"
inline_attribute_width = 0
format_generated_files = true
generated_marker_line_search_limit = 5
nfc_normalize_idents = false
merge_derives = true
use_try_shorthand = false
use_field_init_shorthand = false
Expand Down
8 changes: 4 additions & 4 deletions src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::types::{rewrite_path, PathContext};
use crate::utils::{
colon_spaces, contains_skip, count_newlines, filtered_str_fits, first_line_ends_with,
inner_attributes, last_line_extendable, last_line_width, mk_sp, outer_attributes,
semicolon_for_expr, unicode_str_width, wrap_str,
rewrite_ident, semicolon_for_expr, unicode_str_width, wrap_str,
};
use crate::vertical::rewrite_with_alignment;
use crate::visitor::FmtVisitor;
Expand Down Expand Up @@ -1754,9 +1754,9 @@ pub(crate) fn rewrite_field(
if !attrs_str.is_empty() {
attrs_str.push_str(&shape.indent.to_string_with_newline(context.config));
};
let name = context.snippet(field.ident.span);
let name = rewrite_ident(context, field.ident);
if field.is_shorthand {
Some(attrs_str + name)
Some(attrs_str + &name)
} else {
let mut separator = String::from(struct_lit_field_separator(context.config));
for _ in 0..prefix_max_width.saturating_sub(name.len()) {
Expand All @@ -1770,7 +1770,7 @@ pub(crate) fn rewrite_field(
Some(ref e)
if !is_lit && e.as_str() == name && context.config.use_field_init_shorthand() =>
{
Some(attrs_str + name)
Some(attrs_str + &name)
}
Some(e) => Some(format!("{attrs_str}{name}{separator}{e}")),
None => {
Expand Down
6 changes: 3 additions & 3 deletions src/imports.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl UseSegment {
if name.is_empty() || name == "{{root}}" {
return None;
}
let kind = match name {
let kind = match &*name {
"self" => UseSegmentKind::Slf(None),
"super" => UseSegmentKind::Super(None),
"crate" => UseSegmentKind::Crate(None),
Expand Down Expand Up @@ -498,7 +498,7 @@ impl UseTree {
let name = if a.prefix.segments.len() == 2 && leading_modsep {
context.snippet(a.prefix.span).to_owned()
} else {
rewrite_ident(context, path_to_imported_ident(&a.prefix)).to_owned()
rewrite_ident(context, path_to_imported_ident(&a.prefix)).into_owned()
};
let alias = rename.and_then(|ident| {
if ident.name == sym::underscore_imports {
Expand All @@ -507,7 +507,7 @@ impl UseTree {
} else if ident == path_to_imported_ident(&a.prefix) {
None
} else {
Some(rewrite_ident(context, ident).to_owned())
Some(rewrite_ident(context, ident).into_owned())
}
});
let kind = match name.as_ref() {
Expand Down
22 changes: 13 additions & 9 deletions src/items.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ impl<'a> FmtVisitor<'a> {
self.block_indent,
Some(one_line_width),
)?,
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).to_owned(),
ast::VariantData::Unit(..) => rewrite_ident(&context, field.ident).into_owned(),
};

let variant_body = if let Some(ref expr) = field.disr_expr {
Expand Down Expand Up @@ -1160,8 +1160,12 @@ pub(crate) fn format_trait(
let body_lo = context.snippet_provider.span_after(item.span, "{");

let shape = Shape::indented(offset, context.config).offset_left(result.len())?;
let generics_str =
rewrite_generics(context, rewrite_ident(context, item.ident), generics, shape)?;
let generics_str = rewrite_generics(
context,
&rewrite_ident(context, item.ident),
generics,
shape,
)?;
result.push_str(&generics_str);

// FIXME(#2055): rustfmt fails to format when there are comments between trait bounds.
Expand Down Expand Up @@ -1356,7 +1360,7 @@ pub(crate) fn format_trait_alias(
let alias = rewrite_ident(context, ident);
// 6 = "trait ", 2 = " ="
let g_shape = shape.offset_left(6)?.sub_width(2)?;
let generics_str = rewrite_generics(context, alias, generics, g_shape)?;
let generics_str = rewrite_generics(context, &alias, generics, g_shape)?;
let vis_str = format_visibility(context, vis);
let lhs = format!("{vis_str}trait {generics_str} =");
// 1 = ";"
Expand Down Expand Up @@ -1749,13 +1753,13 @@ fn rewrite_ty<R: Rewrite>(
let ident_str = rewrite_ident(context, ident);

if generics.params.is_empty() {
result.push_str(ident_str)
result.push_str(&ident_str)
} else {
// 2 = `= `
let g_shape = Shape::indented(indent, context.config)
.offset_left(result.len())?
.sub_width(2)?;
let generics_str = rewrite_generics(context, ident_str, generics, g_shape)?;
let generics_str = rewrite_generics(context, &ident_str, generics, g_shape)?;
result.push_str(&generics_str);
}

Expand Down Expand Up @@ -2362,7 +2366,7 @@ fn rewrite_fn_base(
let fd = fn_sig.decl;
let generics_str = rewrite_generics(
context,
rewrite_ident(context, ident),
&rewrite_ident(context, ident),
&fn_sig.generics,
shape,
)?;
Expand Down Expand Up @@ -3188,7 +3192,7 @@ fn format_header(
}
}

result.push_str(rewrite_ident(context, ident));
result.push_str(&rewrite_ident(context, ident));

result
}
Expand Down Expand Up @@ -3438,7 +3442,7 @@ pub(crate) fn rewrite_mod(
let mut result = String::with_capacity(32);
result.push_str(&*format_visibility(context, &item.vis));
result.push_str("mod ");
result.push_str(rewrite_ident(context, item.ident));
result.push_str(&rewrite_ident(context, item.ident));
result.push(';');
rewrite_attrs(context, item, &result, attrs_shape)
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ extern crate rustc_builtin_macros;
extern crate rustc_data_structures;
extern crate rustc_errors;
extern crate rustc_expand;
extern crate rustc_lexer;
extern crate rustc_parse;
extern crate rustc_session;
extern crate rustc_span;
Expand Down
30 changes: 20 additions & 10 deletions src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use crate::shape::{Indent, Shape};
use crate::source_map::SpanUtils;
use crate::spanned::Spanned;
use crate::utils::{
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp,
filtered_str_fits, format_visibility, indent_next_line, is_empty_line, mk_sp, nfc_normalize,
remove_trailing_white_spaces, rewrite_ident, trim_left_preserve_layout, NodeIdExt,
};
use crate::visitor::FmtVisitor;
Expand Down Expand Up @@ -284,7 +284,7 @@ fn rewrite_macro_inner(
},
)
.map(|rw| match position {
MacroPosition::Item => format!("{};", rw),
MacroPosition::Item => format!("{rw};"),
_ => rw,
})
}
Expand Down Expand Up @@ -425,7 +425,7 @@ pub(crate) fn rewrite_macro_def(
};

result += " ";
result += rewrite_ident(context, ident);
result += &rewrite_ident(context, ident);

let multi_branch_style = def.macro_rules || parsed_def.branches.len() != 1;

Expand Down Expand Up @@ -490,6 +490,7 @@ pub(crate) fn rewrite_macro_def(
}

fn register_metavariable(
context: &RewriteContext<'_>,
map: &mut HashMap<String, String>,
result: &mut String,
name: &str,
Expand All @@ -502,14 +503,21 @@ fn register_metavariable(
new_name.push_str(name);
old_name.push_str(name);

// `$` is `NFC_Inert`, so won't get mangled
let new_name = nfc_normalize(context, &new_name).into_owned();
let old_name = nfc_normalize(context, &old_name).into_owned();

result.push_str(&new_name);
map.insert(old_name, new_name);
}

// Replaces `$foo` with `zfoo`. We must check for name overlap to ensure we
// aren't causing problems.
// This should also work for escaped `$` variables, where we leave earlier `$`s.
fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
fn replace_names(
context: &RewriteContext<'_>,
input: &str,
) -> Option<(String, HashMap<String, String>)> {
// Each substitution will require five or six extra bytes.
let mut result = String::with_capacity(input.len() + 64);
let mut substs = HashMap::new();
Expand All @@ -523,23 +531,23 @@ fn replace_names(input: &str) -> Option<(String, HashMap<String, String>)> {
dollar_count += 1;
} else if dollar_count == 0 {
result.push(c);
} else if !c.is_alphanumeric() && !cur_name.is_empty() {
} else if !rustc_lexer::is_id_continue(c) && !cur_name.is_empty() {
// Terminates a name following one or more dollars.
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);

result.push(c);
dollar_count = 0;
cur_name.clear();
} else if c == '(' && cur_name.is_empty() {
// FIXME: Support macro def with repeat.
return None;
} else if c.is_alphanumeric() || c == '_' {
} else if rustc_lexer::is_id_continue(c) {
cur_name.push(c);
}
}

if !cur_name.is_empty() {
register_metavariable(&mut substs, &mut result, &cur_name, dollar_count);
register_metavariable(context, &mut substs, &mut result, &cur_name, dollar_count);
}

debug!("replace_names `{}` {:?}", result, substs);
Expand Down Expand Up @@ -655,7 +663,9 @@ impl MacroArgKind {
};

match *self {
MacroArgKind::MetaVariable(ty, ref name) => Some(format!("${name}:{ty}")),
MacroArgKind::MetaVariable(ty, ref name) => {
Some(format!("${}:{ty}", nfc_normalize(context, name)))
}
MacroArgKind::Repeat(delim_tok, ref args, ref another, ref tok) => {
let (lhs, inner, rhs) = rewrite_delimited_inner(delim_tok, args)?;
let another = another
Expand Down Expand Up @@ -1273,7 +1283,7 @@ impl MacroBranch {
// `$$`). We'll try and format like an AST node, but we'll substitute
// variables for new names with the same length first.

let (body_str, substs) = replace_names(old_body)?;
let (body_str, substs) = replace_names(context, old_body)?;

let mut config = context.config.clone();
config.set().show_parse_errors(false);
Expand Down
4 changes: 2 additions & 2 deletions src/patterns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,14 @@ impl Rewrite for Pat {
let hi = context.snippet_provider.span_before(self.span, "@");
combine_strs_with_missing_comments(
context,
id_str,
&id_str,
&sub_pat,
mk_sp(ident.span.hi(), hi),
shape,
true,
)?
} else {
id_str.to_owned()
id_str.into_owned()
};

combine_strs_with_missing_comments(
Expand Down
Loading
Loading