Skip to content

Commit 5c0907b

Browse files
committed
Auto merge of #119331 - notriddle:notriddle/maxpatheditdistance, r=GuillaumeGomez
rustdoc-search: count path edits with separate edit limit Avoids strange-looking results like this one, where the path component seems to be ignored: ![image](https://github.com/rust-lang/rust/assets/1593513/f0ef077a-6e09-4d67-a29d-8cabc1495f66) Since the two are counted separately elsewhere, they should get their own limits, too. The biggest problem with combining them is that paths are loosely checked by not requiring every component to match, which means that if they are short and matched loosely, they can easily find "drunk typist" matches that make no sense, like this old result: std::collections::btree_map::itermut matching slice::itermut maxEditDistance = ("slice::itermut".length) / 3 = 14 / 3 = 4 editDistance("std", "slice") = 4 editDistance("itermut", "itermut") = 0 4 + 0 <= 4 PASS Of course, `slice::itermut` should not match stuff from btreemap. `slice` should not match `std`. The new result counts them separately: maxPathEditDistance = "slice".length / 3 = 5 / 3 = 1 maxEditDistance = "itermut".length / 3 = 7 / 3 = 2 editDistance("std", "slice") = 4 4 <= 1 FAIL Effectively, this makes path queries less "typo-resistant". It's not zero, but it means `vec` won't match the `v1` prelude. This commit also adds substring matching to paths. It's stricter than the substring matching in the main part, but loose enough that what I expect to match does. Queries without parent paths are unchanged.
2 parents f4d794e + 0ea58e2 commit 5c0907b

10 files changed

+152
-44
lines changed

src/librustdoc/html/static/js/search.js

+30-17
Original file line numberDiff line numberDiff line change
@@ -1805,11 +1805,20 @@ function initSearch(rawSearchIndex) {
18051805
return unifyFunctionTypes([row], [elem], whereClause, mgens);
18061806
}
18071807

1808-
function checkPath(contains, ty, maxEditDistance) {
1808+
/**
1809+
* Compute an "edit distance" that ignores missing path elements.
1810+
* @param {string[]} contains search query path
1811+
* @param {Row} ty indexed item
1812+
* @returns {null|number} edit distance
1813+
*/
1814+
function checkPath(contains, ty) {
18091815
if (contains.length === 0) {
18101816
return 0;
18111817
}
1812-
let ret_dist = maxEditDistance + 1;
1818+
const maxPathEditDistance = Math.floor(
1819+
contains.reduce((acc, next) => acc + next.length, 0) / 3
1820+
);
1821+
let ret_dist = maxPathEditDistance + 1;
18131822
const path = ty.path.split("::");
18141823

18151824
if (ty.parent && ty.parent.name) {
@@ -1821,15 +1830,23 @@ function initSearch(rawSearchIndex) {
18211830
pathiter: for (let i = length - clength; i >= 0; i -= 1) {
18221831
let dist_total = 0;
18231832
for (let x = 0; x < clength; ++x) {
1824-
const dist = editDistance(path[i + x], contains[x], maxEditDistance);
1825-
if (dist > maxEditDistance) {
1826-
continue pathiter;
1833+
const [p, c] = [path[i + x], contains[x]];
1834+
if (Math.floor((p.length - c.length) / 3) <= maxPathEditDistance &&
1835+
p.indexOf(c) !== -1
1836+
) {
1837+
// discount distance on substring match
1838+
dist_total += Math.floor((p.length - c.length) / 3);
1839+
} else {
1840+
const dist = editDistance(p, c, maxPathEditDistance);
1841+
if (dist > maxPathEditDistance) {
1842+
continue pathiter;
1843+
}
1844+
dist_total += dist;
18271845
}
1828-
dist_total += dist;
18291846
}
18301847
ret_dist = Math.min(ret_dist, Math.round(dist_total / clength));
18311848
}
1832-
return ret_dist;
1849+
return ret_dist > maxPathEditDistance ? null : ret_dist;
18331850
}
18341851

18351852
function typePassesFilter(filter, type) {
@@ -2030,8 +2047,8 @@ function initSearch(rawSearchIndex) {
20302047
}
20312048

20322049
if (elem.fullPath.length > 1) {
2033-
path_dist = checkPath(elem.pathWithoutLast, row, maxEditDistance);
2034-
if (path_dist > maxEditDistance) {
2050+
path_dist = checkPath(elem.pathWithoutLast, row);
2051+
if (path_dist === null) {
20352052
return;
20362053
}
20372054
}
@@ -2045,7 +2062,7 @@ function initSearch(rawSearchIndex) {
20452062

20462063
const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance);
20472064

2048-
if (index === -1 && dist + path_dist > maxEditDistance) {
2065+
if (index === -1 && dist > maxEditDistance) {
20492066
return;
20502067
}
20512068

@@ -2100,13 +2117,9 @@ function initSearch(rawSearchIndex) {
21002117
}
21012118

21022119
function innerRunQuery() {
2103-
let queryLen = 0;
2104-
for (const elem of parsedQuery.elems) {
2105-
queryLen += elem.name.length;
2106-
}
2107-
for (const elem of parsedQuery.returned) {
2108-
queryLen += elem.name.length;
2109-
}
2120+
const queryLen =
2121+
parsedQuery.elems.reduce((acc, next) => acc + next.pathLast.length, 0) +
2122+
parsedQuery.returned.reduce((acc, next) => acc + next.pathLast.length, 0);
21102123
const maxEditDistance = Math.floor(queryLen / 3);
21112124

21122125
/**

tests/rustdoc-js-std/asrawfd.js

-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ const EXPECTED = {
77
// Validate that type alias methods get the correct path.
88
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
99
{ 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' },
10-
{ 'path': 'std::os::linux::process::PidFd', 'name': 'as_raw_fd' },
1110
{ 'path': 'std::os::fd::RawFd', 'name': 'as_raw_fd' },
1211
],
1312
};
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// exact-check
2+
const FILTER_CRATE = "std";
3+
const EXPECTED = [
4+
{
5+
query: 'vec::intoiterator',
6+
others: [
7+
// trait std::iter::IntoIterator is not the first result
8+
{ 'path': 'std::vec', 'name': 'IntoIter' },
9+
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
10+
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
11+
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
12+
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
13+
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
14+
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
15+
],
16+
},
17+
{
18+
query: 'vec::iter',
19+
others: [
20+
// std::net::ToSocketAttrs::iter should not show up here
21+
{ 'path': 'std::vec', 'name': 'IntoIter' },
22+
{ 'path': 'std::vec::Vec', 'name': 'from_iter' },
23+
{ 'path': 'std::vec::Vec', 'name': 'into_iter' },
24+
{ 'path': 'std::vec::Drain', 'name': 'into_iter' },
25+
{ 'path': 'std::vec::IntoIter', 'name': 'into_iter' },
26+
{ 'path': 'std::vec::ExtractIf', 'name': 'into_iter' },
27+
{ 'path': 'std::vec::Splice', 'name': 'into_iter' },
28+
{ 'path': 'std::collections::VecDeque', 'name': 'iter' },
29+
{ 'path': 'std::collections::VecDeque', 'name': 'iter_mut' },
30+
{ 'path': 'std::collections::VecDeque', 'name': 'from_iter' },
31+
{ 'path': 'std::collections::VecDeque', 'name': 'into_iter' },
32+
],
33+
},
34+
{
35+
query: 'slice::itermut',
36+
others: [
37+
// std::collections::btree_map::itermut should not show up here
38+
{ 'path': 'std::slice', 'name': 'IterMut' },
39+
{ 'path': 'std::slice', 'name': 'iter_mut' },
40+
],
41+
},
42+
];

tests/rustdoc-js-std/path-ordering.js

+20-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
1-
const EXPECTED = {
2-
query: 'hashset::insert',
3-
others: [
4-
// ensure hashset::insert comes first
5-
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
6-
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
7-
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
8-
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
9-
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
10-
],
11-
};
1+
const EXPECTED = [
2+
{
3+
query: 'hashset::insert',
4+
others: [
5+
// ensure hashset::insert comes first
6+
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
7+
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' },
8+
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' },
9+
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' },
10+
],
11+
},
12+
{
13+
query: 'hash::insert',
14+
others: [
15+
// ensure hashset/hashmap::insert come first
16+
{ 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' },
17+
{ 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' },
18+
],
19+
},
20+
];

tests/rustdoc-js/exact-match.js

-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,5 @@ const EXPECTED = {
33
'others': [
44
{ 'path': 'exact_match::Si', 'name': 'pc' },
55
{ 'path': 'exact_match::Psi', 'name': 'pc' },
6-
{ 'path': 'exact_match::Si', 'name': 'pa' },
76
],
87
};

tests/rustdoc-js/module-substring.js

+15-7
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
1-
const EXPECTED = {
2-
'query': 'ig::pc',
3-
'others': [
4-
{ 'path': 'module_substring::Sig', 'name': 'pc' },
5-
{ 'path': 'module_substring::Si', 'name': 'pc' },
6-
],
7-
};
1+
const EXPECTED = [
2+
{
3+
'query': 'ig::pc',
4+
'others': [
5+
{ 'path': 'module_substring::Sig', 'name': 'pc' },
6+
],
7+
},
8+
{
9+
'query': 'si::pc',
10+
'others': [
11+
{ 'path': 'module_substring::Si', 'name': 'pc' },
12+
{ 'path': 'module_substring::Sig', 'name': 'pc' },
13+
],
14+
},
15+
];
+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// exact-check
2+
3+
const EXPECTED = [
4+
{
5+
'query': 'xxxxxxxxxxx::hocuspocusprestidigitation',
6+
// do not match abracadabra::hocuspocusprestidigitation
7+
'others': [],
8+
},
9+
{
10+
// exact match
11+
'query': 'abracadabra::hocuspocusprestidigitation',
12+
'others': [
13+
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
14+
],
15+
},
16+
{
17+
// swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3)
18+
'query': 'arbacadarba::hocuspocusprestidigitation',
19+
'others': [
20+
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
21+
],
22+
},
23+
{
24+
// truncate 5 chars, where maxEditDistance = 7 (21 / 3)
25+
'query': 'abracadarba::hocusprestidigitation',
26+
'others': [
27+
{ 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' },
28+
],
29+
},
30+
{
31+
// truncate 9 chars, where maxEditDistance = 5 (17 / 3)
32+
'query': 'abracadarba::hprestidigitation',
33+
'others': [],
34+
},
35+
];
+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#![crate_name="abracadabra"]
2+
3+
pub struct HocusPocusPrestidigitation;

tests/rustdoc-js/path-ordering.js

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
// exact-check
22

33
const EXPECTED = {
4-
'query': 'b::ccccccc',
4+
'query': 'bbbbbb::ccccccc',
55
'others': [
66
// `ccccccc` is an exact match for all three of these.
77
// However `b` is a closer match for `bb` than for any
88
// of the others, so it ought to go first.
9-
{ 'path': 'path_ordering::bb', 'name': 'Ccccccc' },
10-
{ 'path': 'path_ordering::aa', 'name': 'Ccccccc' },
11-
{ 'path': 'path_ordering::dd', 'name': 'Ccccccc' },
9+
{ 'path': 'path_ordering::bbbbbb', 'name': 'Ccccccc' },
10+
{ 'path': 'path_ordering::abbbbb', 'name': 'Ccccccc' },
11+
{ 'path': 'path_ordering::dbbbbb', 'name': 'Ccccccc' },
1212
],
1313
};

tests/rustdoc-js/path-ordering.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
pub mod dd {
1+
pub mod dbbbbb {
22
pub struct Ccccccc;
33
}
4-
pub mod aa {
4+
pub mod abbbbb {
55
pub struct Ccccccc;
66
}
7-
pub mod bb {
7+
pub mod bbbbbb {
88
pub struct Ccccccc;
99
}

0 commit comments

Comments
 (0)