From 0ea58e2346b905a7847d890db6e8e5154a390e2e Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Tue, 26 Dec 2023 16:15:51 -0700 Subject: [PATCH] rustdoc-search: count path edits with separate edit limit Since the two are counted separately elsewhere, they should get their own limits, too. The biggest problem with combining them is that paths are loosely checked by not requiring every component to match, which means that if they are short and matched loosely, they can easily find "drunk typist" matches that make no sense, like this old result: std::collections::btree_map::itermut matching slice::itermut maxEditDistance = ("slice::itermut".length) / 3 = 14 / 3 = 4 editDistance("std", "slice") = 4 editDistance("itermut", "itermut") = 0 4 + 0 <= 4 PASS Of course, `slice::itermut` should not match stuff from btreemap. `slice` should not match `std`. The new result counts them separately: maxPathEditDistance = "slice".length / 3 = 5 / 3 = 1 maxEditDistance = "itermut".length / 3 = 7 / 3 = 2 editDistance("std", "slice") = 4 4 <= 1 FAIL Effectively, this makes path queries less "typo-resistant". It's not zero, but it means `vec` won't match the `v1` prelude. Queries without parent paths are unchanged. --- src/librustdoc/html/static/js/search.js | 47 +++++++++++++------- tests/rustdoc-js-std/asrawfd.js | 1 - tests/rustdoc-js-std/path-maxeditdistance.js | 42 +++++++++++++++++ tests/rustdoc-js-std/path-ordering.js | 31 ++++++++----- tests/rustdoc-js/exact-match.js | 1 - tests/rustdoc-js/module-substring.js | 22 ++++++--- tests/rustdoc-js/path-maxeditdistance.js | 35 +++++++++++++++ tests/rustdoc-js/path-maxeditdistance.rs | 3 ++ tests/rustdoc-js/path-ordering.js | 8 ++-- tests/rustdoc-js/path-ordering.rs | 6 +-- 10 files changed, 152 insertions(+), 44 deletions(-) create mode 100644 tests/rustdoc-js-std/path-maxeditdistance.js create mode 100644 tests/rustdoc-js/path-maxeditdistance.js create mode 100644 tests/rustdoc-js/path-maxeditdistance.rs diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index e824a1fd4bda1..e6263db32835d 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1805,11 +1805,20 @@ function initSearch(rawSearchIndex) { return unifyFunctionTypes([row], [elem], whereClause, mgens); } - function checkPath(contains, ty, maxEditDistance) { + /** + * Compute an "edit distance" that ignores missing path elements. + * @param {string[]} contains search query path + * @param {Row} ty indexed item + * @returns {null|number} edit distance + */ + function checkPath(contains, ty) { if (contains.length === 0) { return 0; } - let ret_dist = maxEditDistance + 1; + const maxPathEditDistance = Math.floor( + contains.reduce((acc, next) => acc + next.length, 0) / 3 + ); + let ret_dist = maxPathEditDistance + 1; const path = ty.path.split("::"); if (ty.parent && ty.parent.name) { @@ -1821,15 +1830,23 @@ function initSearch(rawSearchIndex) { pathiter: for (let i = length - clength; i >= 0; i -= 1) { let dist_total = 0; for (let x = 0; x < clength; ++x) { - const dist = editDistance(path[i + x], contains[x], maxEditDistance); - if (dist > maxEditDistance) { - continue pathiter; + const [p, c] = [path[i + x], contains[x]]; + if (Math.floor((p.length - c.length) / 3) <= maxPathEditDistance && + p.indexOf(c) !== -1 + ) { + // discount distance on substring match + dist_total += Math.floor((p.length - c.length) / 3); + } else { + const dist = editDistance(p, c, maxPathEditDistance); + if (dist > maxPathEditDistance) { + continue pathiter; + } + dist_total += dist; } - dist_total += dist; } ret_dist = Math.min(ret_dist, Math.round(dist_total / clength)); } - return ret_dist; + return ret_dist > maxPathEditDistance ? null : ret_dist; } function typePassesFilter(filter, type) { @@ -2030,8 +2047,8 @@ function initSearch(rawSearchIndex) { } if (elem.fullPath.length > 1) { - path_dist = checkPath(elem.pathWithoutLast, row, maxEditDistance); - if (path_dist > maxEditDistance) { + path_dist = checkPath(elem.pathWithoutLast, row); + if (path_dist === null) { return; } } @@ -2045,7 +2062,7 @@ function initSearch(rawSearchIndex) { const dist = editDistance(row.normalizedName, elem.normalizedPathLast, maxEditDistance); - if (index === -1 && dist + path_dist > maxEditDistance) { + if (index === -1 && dist > maxEditDistance) { return; } @@ -2100,13 +2117,9 @@ function initSearch(rawSearchIndex) { } function innerRunQuery() { - let queryLen = 0; - for (const elem of parsedQuery.elems) { - queryLen += elem.name.length; - } - for (const elem of parsedQuery.returned) { - queryLen += elem.name.length; - } + const queryLen = + parsedQuery.elems.reduce((acc, next) => acc + next.pathLast.length, 0) + + parsedQuery.returned.reduce((acc, next) => acc + next.pathLast.length, 0); const maxEditDistance = Math.floor(queryLen / 3); /** diff --git a/tests/rustdoc-js-std/asrawfd.js b/tests/rustdoc-js-std/asrawfd.js index 5b3cfeabbcdd2..5dbc4ba95d9a7 100644 --- a/tests/rustdoc-js-std/asrawfd.js +++ b/tests/rustdoc-js-std/asrawfd.js @@ -7,7 +7,6 @@ const EXPECTED = { // Validate that type alias methods get the correct path. { 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' }, { 'path': 'std::os::fd::AsRawFd', 'name': 'as_raw_fd' }, - { 'path': 'std::os::linux::process::PidFd', 'name': 'as_raw_fd' }, { 'path': 'std::os::fd::RawFd', 'name': 'as_raw_fd' }, ], }; diff --git a/tests/rustdoc-js-std/path-maxeditdistance.js b/tests/rustdoc-js-std/path-maxeditdistance.js new file mode 100644 index 0000000000000..822389aaa4ffd --- /dev/null +++ b/tests/rustdoc-js-std/path-maxeditdistance.js @@ -0,0 +1,42 @@ +// exact-check +const FILTER_CRATE = "std"; +const EXPECTED = [ + { + query: 'vec::intoiterator', + others: [ + // trait std::iter::IntoIterator is not the first result + { 'path': 'std::vec', 'name': 'IntoIter' }, + { 'path': 'std::vec::Vec', 'name': 'into_iter' }, + { 'path': 'std::vec::Drain', 'name': 'into_iter' }, + { 'path': 'std::vec::IntoIter', 'name': 'into_iter' }, + { 'path': 'std::vec::ExtractIf', 'name': 'into_iter' }, + { 'path': 'std::vec::Splice', 'name': 'into_iter' }, + { 'path': 'std::collections::VecDeque', 'name': 'into_iter' }, + ], + }, + { + query: 'vec::iter', + others: [ + // std::net::ToSocketAttrs::iter should not show up here + { 'path': 'std::vec', 'name': 'IntoIter' }, + { 'path': 'std::vec::Vec', 'name': 'from_iter' }, + { 'path': 'std::vec::Vec', 'name': 'into_iter' }, + { 'path': 'std::vec::Drain', 'name': 'into_iter' }, + { 'path': 'std::vec::IntoIter', 'name': 'into_iter' }, + { 'path': 'std::vec::ExtractIf', 'name': 'into_iter' }, + { 'path': 'std::vec::Splice', 'name': 'into_iter' }, + { 'path': 'std::collections::VecDeque', 'name': 'iter' }, + { 'path': 'std::collections::VecDeque', 'name': 'iter_mut' }, + { 'path': 'std::collections::VecDeque', 'name': 'from_iter' }, + { 'path': 'std::collections::VecDeque', 'name': 'into_iter' }, + ], + }, + { + query: 'slice::itermut', + others: [ + // std::collections::btree_map::itermut should not show up here + { 'path': 'std::slice', 'name': 'IterMut' }, + { 'path': 'std::slice', 'name': 'iter_mut' }, + ], + }, +]; diff --git a/tests/rustdoc-js-std/path-ordering.js b/tests/rustdoc-js-std/path-ordering.js index c3d61d238cc35..e6b7bfab1e5f5 100644 --- a/tests/rustdoc-js-std/path-ordering.js +++ b/tests/rustdoc-js-std/path-ordering.js @@ -1,11 +1,20 @@ -const EXPECTED = { - query: 'hashset::insert', - others: [ - // ensure hashset::insert comes first - { 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' }, - { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' }, - { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' }, - { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' }, - { 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' }, - ], -}; +const EXPECTED = [ + { + query: 'hashset::insert', + others: [ + // ensure hashset::insert comes first + { 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' }, + { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert' }, + { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_with' }, + { 'path': 'std::collections::hash_set::HashSet', 'name': 'get_or_insert_owned' }, + ], + }, + { + query: 'hash::insert', + others: [ + // ensure hashset/hashmap::insert come first + { 'path': 'std::collections::hash_map::HashMap', 'name': 'insert' }, + { 'path': 'std::collections::hash_set::HashSet', 'name': 'insert' }, + ], + }, +]; diff --git a/tests/rustdoc-js/exact-match.js b/tests/rustdoc-js/exact-match.js index ce3a76f9b7dd3..9e47d27490bf8 100644 --- a/tests/rustdoc-js/exact-match.js +++ b/tests/rustdoc-js/exact-match.js @@ -3,6 +3,5 @@ const EXPECTED = { 'others': [ { 'path': 'exact_match::Si', 'name': 'pc' }, { 'path': 'exact_match::Psi', 'name': 'pc' }, - { 'path': 'exact_match::Si', 'name': 'pa' }, ], }; diff --git a/tests/rustdoc-js/module-substring.js b/tests/rustdoc-js/module-substring.js index 7a10397ebc620..74c421d7f0b5a 100644 --- a/tests/rustdoc-js/module-substring.js +++ b/tests/rustdoc-js/module-substring.js @@ -1,7 +1,15 @@ -const EXPECTED = { - 'query': 'ig::pc', - 'others': [ - { 'path': 'module_substring::Sig', 'name': 'pc' }, - { 'path': 'module_substring::Si', 'name': 'pc' }, - ], -}; +const EXPECTED = [ + { + 'query': 'ig::pc', + 'others': [ + { 'path': 'module_substring::Sig', 'name': 'pc' }, + ], + }, + { + 'query': 'si::pc', + 'others': [ + { 'path': 'module_substring::Si', 'name': 'pc' }, + { 'path': 'module_substring::Sig', 'name': 'pc' }, + ], + }, +]; diff --git a/tests/rustdoc-js/path-maxeditdistance.js b/tests/rustdoc-js/path-maxeditdistance.js new file mode 100644 index 0000000000000..73b24a6dddf0a --- /dev/null +++ b/tests/rustdoc-js/path-maxeditdistance.js @@ -0,0 +1,35 @@ +// exact-check + +const EXPECTED = [ + { + 'query': 'xxxxxxxxxxx::hocuspocusprestidigitation', + // do not match abracadabra::hocuspocusprestidigitation + 'others': [], + }, + { + // exact match + 'query': 'abracadabra::hocuspocusprestidigitation', + 'others': [ + { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, + ], + }, + { + // swap br/rb; that's edit distance 2, where maxPathEditDistance = 3 (11 / 3) + 'query': 'arbacadarba::hocuspocusprestidigitation', + 'others': [ + { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, + ], + }, + { + // truncate 5 chars, where maxEditDistance = 7 (21 / 3) + 'query': 'abracadarba::hocusprestidigitation', + 'others': [ + { 'path': 'abracadabra', 'name': 'HocusPocusPrestidigitation' }, + ], + }, + { + // truncate 9 chars, where maxEditDistance = 5 (17 / 3) + 'query': 'abracadarba::hprestidigitation', + 'others': [], + }, +]; diff --git a/tests/rustdoc-js/path-maxeditdistance.rs b/tests/rustdoc-js/path-maxeditdistance.rs new file mode 100644 index 0000000000000..3861280d59bf4 --- /dev/null +++ b/tests/rustdoc-js/path-maxeditdistance.rs @@ -0,0 +1,3 @@ +#![crate_name="abracadabra"] + +pub struct HocusPocusPrestidigitation; diff --git a/tests/rustdoc-js/path-ordering.js b/tests/rustdoc-js/path-ordering.js index f2e6fe2fa61c4..73d3f4b275590 100644 --- a/tests/rustdoc-js/path-ordering.js +++ b/tests/rustdoc-js/path-ordering.js @@ -1,13 +1,13 @@ // exact-check const EXPECTED = { - 'query': 'b::ccccccc', + 'query': 'bbbbbb::ccccccc', 'others': [ // `ccccccc` is an exact match for all three of these. // However `b` is a closer match for `bb` than for any // of the others, so it ought to go first. - { 'path': 'path_ordering::bb', 'name': 'Ccccccc' }, - { 'path': 'path_ordering::aa', 'name': 'Ccccccc' }, - { 'path': 'path_ordering::dd', 'name': 'Ccccccc' }, + { 'path': 'path_ordering::bbbbbb', 'name': 'Ccccccc' }, + { 'path': 'path_ordering::abbbbb', 'name': 'Ccccccc' }, + { 'path': 'path_ordering::dbbbbb', 'name': 'Ccccccc' }, ], }; diff --git a/tests/rustdoc-js/path-ordering.rs b/tests/rustdoc-js/path-ordering.rs index 7843cf7f9dc48..71e24923ed118 100644 --- a/tests/rustdoc-js/path-ordering.rs +++ b/tests/rustdoc-js/path-ordering.rs @@ -1,9 +1,9 @@ -pub mod dd { +pub mod dbbbbb { pub struct Ccccccc; } -pub mod aa { +pub mod abbbbb { pub struct Ccccccc; } -pub mod bb { +pub mod bbbbbb { pub struct Ccccccc; }