Skip to content

Commit 6fcd589

Browse files
Rollup merge of #79000 - sivadeilra:user/ardavis/lev_distance, r=wesleywiser
Move lev_distance to rustc_ast, make non-generic rustc_ast currently has a few dependencies on rustc_lexer. Ideally, an AST would not have any dependency its lexer, for minimizing design-time dependencies. Breaking this dependency would also have practical benefits, since modifying rustc_lexer would not trigger a rebuild of rustc_ast. This commit does not remove the rustc_ast --> rustc_lexer dependency, but it does remove one of the sources of this dependency, which is the code that handles fuzzy matching between symbol names for making suggestions in diagnostics. Since that code depends only on Symbol, it is easy to move it to rustc_span. It might even be best to move it to a separate crate, since other tools such as Cargo use the same algorithm, and have simply contain a duplicate of the code. This changes the signature of find_best_match_for_name so that it is no longer generic over its input. I checked the optimized binaries, and this function was duplicated for nearly every call site, because most call sites used short-lived iterator chains, generic over Map and such. But there's no good reason for a function like this to be generic, since all it does is immediately convert the generic input (the Iterator impl) to a concrete Vec<Symbol>. This has all of the costs of generics (duplicated method bodies) with no benefit. Changing find_best_match_for_name to be non-generic removed about 10KB of code from the optimized binary. I know it's a drop in the bucket, but we have to start reducing binary size, and beginning to tame over-use of generics is part of that.
2 parents 9b2117d + 5481c1b commit 6fcd589

File tree

16 files changed

+96
-84
lines changed

16 files changed

+96
-84
lines changed

compiler/rustc_ast/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ macro_rules! unwrap_or {
3434
pub mod util {
3535
pub mod classify;
3636
pub mod comments;
37-
pub mod lev_distance;
3837
pub mod literal;
3938
pub mod parser;
4039
}

compiler/rustc_interface/src/util.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
use rustc_ast::mut_visit::{visit_clobber, MutVisitor, *};
22
use rustc_ast::ptr::P;
3-
use rustc_ast::util::lev_distance::find_best_match_for_name;
43
use rustc_ast::{self as ast, AttrVec, BlockCheckMode};
54
use rustc_codegen_ssa::traits::CodegenBackend;
65
use rustc_data_structures::fingerprint::Fingerprint;
@@ -20,6 +19,7 @@ use rustc_session::parse::CrateConfig;
2019
use rustc_session::CrateDisambiguator;
2120
use rustc_session::{early_error, filesearch, output, DiagnosticOutput, Session};
2221
use rustc_span::edition::Edition;
22+
use rustc_span::lev_distance::find_best_match_for_name;
2323
use rustc_span::source_map::FileLoader;
2424
use rustc_span::symbol::{sym, Symbol};
2525
use smallvec::SmallVec;
@@ -512,8 +512,11 @@ pub(crate) fn check_attr_crate_type(
512512

513513
if let ast::MetaItemKind::NameValue(spanned) = a.meta().unwrap().kind {
514514
let span = spanned.span;
515-
let lev_candidate =
516-
find_best_match_for_name(CRATE_TYPES.iter().map(|(k, _)| k), n, None);
515+
let lev_candidate = find_best_match_for_name(
516+
&CRATE_TYPES.iter().map(|(k, _)| *k).collect::<Vec<_>>(),
517+
n,
518+
None,
519+
);
517520
if let Some(candidate) = lev_candidate {
518521
lint_buffer.buffer_lint_with_diagnostic(
519522
lint::builtin::UNKNOWN_CRATE_TYPES,

compiler/rustc_lint/src/context.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ use self::TargetLint::*;
1919
use crate::levels::LintLevelsBuilder;
2020
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
2121
use rustc_ast as ast;
22-
use rustc_ast::util::lev_distance::find_best_match_for_name;
2322
use rustc_data_structures::fx::FxHashMap;
2423
use rustc_data_structures::sync;
2524
use rustc_errors::{add_elided_lifetime_in_path_suggestion, struct_span_err, Applicability};
@@ -37,6 +36,7 @@ use rustc_session::lint::BuiltinLintDiagnostics;
3736
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
3837
use rustc_session::Session;
3938
use rustc_session::SessionLintStore;
39+
use rustc_span::lev_distance::find_best_match_for_name;
4040
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
4141
use rustc_target::abi::LayoutOf;
4242

@@ -411,7 +411,7 @@ impl LintStore {
411411
self.by_name.keys().map(|name| Symbol::intern(&name)).collect::<Vec<_>>();
412412

413413
let suggestion = find_best_match_for_name(
414-
symbols.iter(),
414+
&symbols,
415415
Symbol::intern(&lint_name.to_lowercase()),
416416
None,
417417
);

compiler/rustc_resolve/src/diagnostics.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use std::cmp::Reverse;
22
use std::ptr;
33

4-
use rustc_ast::util::lev_distance::find_best_match_for_name;
54
use rustc_ast::{self as ast, Path};
65
use rustc_ast_pretty::pprust;
76
use rustc_data_structures::fx::FxHashSet;
@@ -14,6 +13,7 @@ use rustc_middle::bug;
1413
use rustc_middle::ty::{self, DefIdTree};
1514
use rustc_session::Session;
1615
use rustc_span::hygiene::MacroKind;
16+
use rustc_span::lev_distance::find_best_match_for_name;
1717
use rustc_span::source_map::SourceMap;
1818
use rustc_span::symbol::{kw, sym, Ident, Symbol};
1919
use rustc_span::{BytePos, MultiSpan, Span};
@@ -716,7 +716,7 @@ impl<'a> Resolver<'a> {
716716
suggestions.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());
717717

718718
match find_best_match_for_name(
719-
suggestions.iter().map(|suggestion| &suggestion.candidate),
719+
&suggestions.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
720720
ident.name,
721721
None,
722722
) {

compiler/rustc_resolve/src/imports.rs

+26-22
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use crate::{CrateLint, Module, ModuleOrUniformRoot, ParentScope, PerNS, ScopeSet
1010
use crate::{NameBinding, NameBindingKind, PathResult, PrivacyError, ToNameBinding};
1111

1212
use rustc_ast::unwrap_or;
13-
use rustc_ast::util::lev_distance::find_best_match_for_name;
1413
use rustc_ast::NodeId;
1514
use rustc_ast_lowering::ResolverAstLowering;
1615
use rustc_data_structures::fx::FxHashSet;
@@ -25,6 +24,7 @@ use rustc_session::lint::builtin::{PUB_USE_OF_PRIVATE_EXTERN_CRATE, UNUSED_IMPOR
2524
use rustc_session::lint::BuiltinLintDiagnostics;
2625
use rustc_session::DiagnosticMessageId;
2726
use rustc_span::hygiene::ExpnId;
27+
use rustc_span::lev_distance::find_best_match_for_name;
2828
use rustc_span::symbol::{kw, Ident, Symbol};
2929
use rustc_span::{MultiSpan, Span};
3030

@@ -1096,33 +1096,37 @@ impl<'a, 'b> ImportResolver<'a, 'b> {
10961096
_ => None,
10971097
};
10981098
let resolutions = resolutions.as_ref().into_iter().flat_map(|r| r.iter());
1099-
let names = resolutions.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
1100-
if *i == ident {
1101-
return None;
1102-
} // Never suggest the same name
1103-
match *resolution.borrow() {
1104-
NameResolution { binding: Some(name_binding), .. } => {
1105-
match name_binding.kind {
1106-
NameBindingKind::Import { binding, .. } => {
1107-
match binding.kind {
1108-
// Never suggest the name that has binding error
1109-
// i.e., the name that cannot be previously resolved
1110-
NameBindingKind::Res(Res::Err, _) => None,
1111-
_ => Some(&i.name),
1099+
let names = resolutions
1100+
.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
1101+
if *i == ident {
1102+
return None;
1103+
} // Never suggest the same name
1104+
match *resolution.borrow() {
1105+
NameResolution { binding: Some(name_binding), .. } => {
1106+
match name_binding.kind {
1107+
NameBindingKind::Import { binding, .. } => {
1108+
match binding.kind {
1109+
// Never suggest the name that has binding error
1110+
// i.e., the name that cannot be previously resolved
1111+
NameBindingKind::Res(Res::Err, _) => None,
1112+
_ => Some(i.name),
1113+
}
11121114
}
1115+
_ => Some(i.name),
11131116
}
1114-
_ => Some(&i.name),
11151117
}
1118+
NameResolution { ref single_imports, .. }
1119+
if single_imports.is_empty() =>
1120+
{
1121+
None
1122+
}
1123+
_ => Some(i.name),
11161124
}
1117-
NameResolution { ref single_imports, .. } if single_imports.is_empty() => {
1118-
None
1119-
}
1120-
_ => Some(&i.name),
1121-
}
1122-
});
1125+
})
1126+
.collect::<Vec<Symbol>>();
11231127

11241128
let lev_suggestion =
1125-
find_best_match_for_name(names, ident.name, None).map(|suggestion| {
1129+
find_best_match_for_name(&names, ident.name, None).map(|suggestion| {
11261130
(
11271131
vec![(ident.span, suggestion.to_string())],
11281132
String::from("a similar name exists in the module"),

compiler/rustc_resolve/src/late/diagnostics.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::path_names_to_string;
55
use crate::{CrateLint, Module, ModuleKind, ModuleOrUniformRoot};
66
use crate::{PathResult, PathSource, Segment};
77

8-
use rustc_ast::util::lev_distance::find_best_match_for_name;
98
use rustc_ast::visit::FnKind;
109
use rustc_ast::{self as ast, Expr, ExprKind, Item, ItemKind, NodeId, Path, Ty, TyKind};
1110
use rustc_ast_pretty::pprust::path_segment_to_string;
@@ -18,6 +17,7 @@ use rustc_hir::def_id::{DefId, CRATE_DEF_INDEX, LOCAL_CRATE};
1817
use rustc_hir::PrimTy;
1918
use rustc_session::parse::feature_err;
2019
use rustc_span::hygiene::MacroKind;
20+
use rustc_span::lev_distance::find_best_match_for_name;
2121
use rustc_span::symbol::{kw, sym, Ident, Symbol};
2222
use rustc_span::{BytePos, MultiSpan, Span, DUMMY_SP};
2323

@@ -1206,7 +1206,7 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
12061206
names.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());
12071207

12081208
match find_best_match_for_name(
1209-
names.iter().map(|suggestion| &suggestion.candidate),
1209+
&names.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
12101210
name,
12111211
None,
12121212
) {
@@ -1592,9 +1592,10 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
15921592
.bindings
15931593
.iter()
15941594
.filter(|(id, _)| id.span.ctxt() == label.span.ctxt())
1595-
.map(|(id, _)| &id.name);
1595+
.map(|(id, _)| id.name)
1596+
.collect::<Vec<Symbol>>();
15961597

1597-
find_best_match_for_name(names, label.name, None).map(|symbol| {
1598+
find_best_match_for_name(&names, label.name, None).map(|symbol| {
15981599
// Upon finding a similar name, get the ident that it was from - the span
15991600
// contained within helps make a useful diagnostic. In addition, determine
16001601
// whether this candidate is within scope.

compiler/rustc_ast/src/util/lev_distance.rs compiler/rustc_span/src/lev_distance.rs

+9-14
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
// FIXME(Centril): Move to rustc_span?
2-
3-
use rustc_span::symbol::Symbol;
1+
use crate::symbol::Symbol;
42
use std::cmp;
53

64
#[cfg(test)]
@@ -45,17 +43,14 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
4543
///
4644
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
4745
/// a lower(upper)case letters mismatch.
48-
pub fn find_best_match_for_name<'a, T>(
49-
iter_names: T,
46+
#[cold]
47+
pub fn find_best_match_for_name(
48+
name_vec: &[Symbol],
5049
lookup: Symbol,
5150
dist: Option<usize>,
52-
) -> Option<Symbol>
53-
where
54-
T: Iterator<Item = &'a Symbol>,
55-
{
51+
) -> Option<Symbol> {
5652
let lookup = &lookup.as_str();
5753
let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
58-
let name_vec: Vec<&Symbol> = iter_names.collect();
5954

6055
let (case_insensitive_match, levenshtein_match) = name_vec
6156
.iter()
@@ -83,18 +78,18 @@ where
8378
// 2. Levenshtein distance match
8479
// 3. Sorted word match
8580
if let Some(candidate) = case_insensitive_match {
86-
Some(*candidate)
81+
Some(candidate)
8782
} else if levenshtein_match.is_some() {
88-
levenshtein_match.map(|(candidate, _)| *candidate)
83+
levenshtein_match.map(|(candidate, _)| candidate)
8984
} else {
9085
find_match_by_sorted_words(name_vec, lookup)
9186
}
9287
}
9388

94-
fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
89+
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
9590
iter_names.iter().fold(None, |result, candidate| {
9691
if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
97-
Some(**candidate)
92+
Some(*candidate)
9893
} else {
9994
result
10095
}

compiler/rustc_ast/src/util/lev_distance/tests.rs compiler/rustc_span/src/lev_distance/tests.rs

+7-10
Original file line numberDiff line numberDiff line change
@@ -21,38 +21,35 @@ fn test_lev_distance() {
2121

2222
#[test]
2323
fn test_find_best_match_for_name() {
24-
use rustc_span::with_default_session_globals;
24+
use crate::with_default_session_globals;
2525
with_default_session_globals(|| {
2626
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
2727
assert_eq!(
28-
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None),
28+
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
2929
Some(Symbol::intern("aaab"))
3030
);
3131

32-
assert_eq!(
33-
find_best_match_for_name(input.iter(), Symbol::intern("1111111111"), None),
34-
None
35-
);
32+
assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
3633

3734
let input = vec![Symbol::intern("aAAA")];
3835
assert_eq!(
39-
find_best_match_for_name(input.iter(), Symbol::intern("AAAA"), None),
36+
find_best_match_for_name(&input, Symbol::intern("AAAA"), None),
4037
Some(Symbol::intern("aAAA"))
4138
);
4239

4340
let input = vec![Symbol::intern("AAAA")];
4441
// Returns None because `lev_distance > max_dist / 3`
45-
assert_eq!(find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None), None);
42+
assert_eq!(find_best_match_for_name(&input, Symbol::intern("aaaa"), None), None);
4643

4744
let input = vec![Symbol::intern("AAAA")];
4845
assert_eq!(
49-
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), Some(4)),
46+
find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
5047
Some(Symbol::intern("AAAA"))
5148
);
5249

5350
let input = vec![Symbol::intern("a_longer_variable_name")];
5451
assert_eq!(
55-
find_best_match_for_name(input.iter(), Symbol::intern("a_variable_longer_name"), None),
52+
find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
5653
Some(Symbol::intern("a_longer_variable_name"))
5754
);
5855
})

compiler/rustc_span/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ use hygiene::Transparency;
3434
pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, ForLoopLoc, MacroKind};
3535
pub mod def_id;
3636
use def_id::{CrateNum, DefId, LOCAL_CRATE};
37+
pub mod lev_distance;
3738
mod span_encoding;
3839
pub use span_encoding::{Span, DUMMY_SP};
3940

compiler/rustc_typeck/src/astconv/errors.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
use crate::astconv::AstConv;
2-
use rustc_ast::util::lev_distance::find_best_match_for_name;
32
use rustc_data_structures::fx::FxHashMap;
43
use rustc_errors::{pluralize, struct_span_err, Applicability};
54
use rustc_hir as hir;
65
use rustc_hir::def_id::DefId;
76
use rustc_middle::ty;
87
use rustc_session::parse::feature_err;
8+
use rustc_span::lev_distance::find_best_match_for_name;
99
use rustc_span::symbol::{sym, Ident};
1010
use rustc_span::{Span, DUMMY_SP};
1111

@@ -180,7 +180,7 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {
180180
.collect();
181181

182182
if let (Some(suggested_name), true) = (
183-
find_best_match_for_name(all_candidate_names.iter(), assoc_name.name, None),
183+
find_best_match_for_name(&all_candidate_names, assoc_name.name, None),
184184
assoc_name.span != DUMMY_SP,
185185
) {
186186
err.span_suggestion(

compiler/rustc_typeck/src/astconv/mod.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use crate::errors::{
1313
};
1414
use crate::middle::resolve_lifetime as rl;
1515
use crate::require_c_abi_if_c_variadic;
16-
use rustc_ast::util::lev_distance::find_best_match_for_name;
1716
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
1817
use rustc_errors::{struct_span_err, Applicability, ErrorReported, FatalError};
1918
use rustc_hir as hir;
@@ -26,6 +25,7 @@ use rustc_middle::ty::subst::{self, InternalSubsts, Subst, SubstsRef};
2625
use rustc_middle::ty::GenericParamDefKind;
2726
use rustc_middle::ty::{self, Const, DefIdTree, Ty, TyCtxt, TypeFoldable};
2827
use rustc_session::lint::builtin::AMBIGUOUS_ASSOCIATED_ITEMS;
28+
use rustc_span::lev_distance::find_best_match_for_name;
2929
use rustc_span::symbol::{Ident, Symbol};
3030
use rustc_span::{Span, DUMMY_SP};
3131
use rustc_target::spec::abi;
@@ -1579,7 +1579,11 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {
15791579

15801580
let adt_def = qself_ty.ty_adt_def().expect("enum is not an ADT");
15811581
if let Some(suggested_name) = find_best_match_for_name(
1582-
adt_def.variants.iter().map(|variant| &variant.ident.name),
1582+
&adt_def
1583+
.variants
1584+
.iter()
1585+
.map(|variant| variant.ident.name)
1586+
.collect::<Vec<Symbol>>(),
15831587
assoc_ident.name,
15841588
None,
15851589
) {

0 commit comments

Comments
 (0)