Skip to content

Improved the doc search by including Levenshtein distance #15385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 31, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 89 additions & 44 deletions src/librustdoc/html/static/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
}
$('#' + from)[0].scrollIntoView();
$('.line-numbers span').removeClass('line-highlighted');
for (i = from; i <= to; i += 1) {
for (i = from; i <= to; ++i) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feels a bit funny to see this in a diff since Rust doesn't have this.

$('#' + i).addClass('line-highlighted');
}
}
Expand Down Expand Up @@ -99,7 +99,7 @@
stripped = '',
len = rootPath.match(/\.\.\//g).length + 1;

for (i = 0; i < len; i += 1) {
for (i = 0; i < len; ++i) {
match = url.match(/\/[^\/]*$/);
if (i < len - 1) {
stripped = match[0] + stripped;
Expand All @@ -111,9 +111,47 @@

document.location.href = url;
});
/**
* A function to compute the Levenshtein distance between two strings
* Licensed under the Creative Commons Attribution-ShareAlike 3.0 Unported
* Full License can be found at http://creativecommons.org/licenses/by-sa/3.0/legalcode
* This code is an unmodified version of the code written by Marco de Wit
* and was found at http://stackoverflow.com/a/18514751/745719
*/
var levenshtein = (function() {
var row2 = [];
return function(s1, s2) {
if (s1 === s2) {
return 0;
} else {
var s1_len = s1.length, s2_len = s2.length;
if (s1_len && s2_len) {
var i1 = 0, i2 = 0, a, b, c, c2, row = row2;
while (i1 < s1_len)
row[i1] = ++i1;
while (i2 < s2_len) {
c2 = s2.charCodeAt(i2);
a = i2;
++i2;
b = i2;
for (i1 = 0; i1 < s1_len; ++i1) {
c = a + (s1.charCodeAt(i1) !== c2 ? 1 : 0);
a = row[i1];
b = b < a ? (b < c ? b + 1 : c) : (a < c ? a + 1 : c);
row[i1] = b;
}
}
return b;
} else {
return s1_len + s2_len;
}
}
};
})();

function initSearch(rawSearchIndex) {
var currentResults, index, searchIndex;
var MAX_LEV_DISTANCE = 3;
var params = getQueryStringParams();

// Populate search bar with query string search term when provided,
Expand All @@ -140,7 +178,7 @@
split = valLower.split("::");

//remove empty keywords
for (var j = 0; j < split.length; j++) {
for (var j = 0; j < split.length; ++j) {
split[j].toLowerCase();
if (split[j] === "") {
split.splice(j, 1);
Expand All @@ -153,7 +191,7 @@
val.charAt(val.length - 1) === val.charAt(0))
{
val = val.substr(1, val.length - 2);
for (var i = 0; i < nSearchWords; i += 1) {
for (var i = 0; i < nSearchWords; ++i) {
if (searchWords[i] === val) {
// filter type: ... queries
if (typeFilter < 0 || typeFilter === searchIndex[i].ty) {
Expand All @@ -167,15 +205,31 @@
} else {
// gather matching search results up to a certain maximum
val = val.replace(/\_/g, "");
for (var i = 0; i < split.length; i++) {
for (var j = 0; j < nSearchWords; j += 1) {
for (var i = 0; i < split.length; ++i) {
for (var j = 0; j < nSearchWords; ++j) {
var lev_distance;
if (searchWords[j].indexOf(split[i]) > -1 ||
searchWords[j].indexOf(val) > -1 ||
searchWords[j].replace(/_/g, "").indexOf(val) > -1)
{
// filter type: ... queries
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
results.push({id: j, index: searchWords[j].replace(/_/g, "").indexOf(val)});
results.push({
id: j,
index: searchWords[j].replace(/_/g, "").indexOf(val),
lev: 0,
});
}
} else if (
(lev_distance = levenshtein(searchWords[j], val)) <=
MAX_LEV_DISTANCE) {
if (typeFilter < 0 || typeFilter === searchIndex[j].ty) {
results.push({
id: j,
index: 0,
// we want lev results to go lower than others
lev: lev_distance,
});
}
}
if (results.length === max) {
Expand All @@ -186,7 +240,7 @@
}

var nresults = results.length;
for (var i = 0; i < nresults; i += 1) {
for (var i = 0; i < nresults; ++i) {
results[i].word = searchWords[results[i].id];
results[i].item = searchIndex[results[i].id] || {};
}
Expand All @@ -198,6 +252,12 @@
results.sort(function(aaa, bbb) {
var a, b;

// Sort by non levenshtein results and then levenshtein results by the distance
// (less changes required to match means higher rankings)
a = (aaa.lev);
b = (bbb.lev);
if (a !== b) return a - b;

// sort by crate (non-current crate goes later)
a = (aaa.item.crate !== window.currentCrate);
b = (bbb.item.crate !== window.currentCrate);
Expand Down Expand Up @@ -255,7 +315,7 @@
results[i].id = -1;
}
}
for (var i = 0; i < results.length; i++) {
for (var i = 0; i < results.length; ++i) {
var result = results[i],
name = result.item.name.toLowerCase(),
path = result.item.path.toLowerCase(),
Expand Down Expand Up @@ -285,38 +345,23 @@
* @return {[boolean]} [Whether the result is valid or not]
*/
function validateResult(name, path, keys, parent) {
//initially valid
var validate = true;
//if there is a parent, then validate against parent
if (parent !== undefined) {
for (var i = 0; i < keys.length; i++) {
// if previous keys are valid and current key is in the
// path, name or parent
if ((validate) &&
(name.toLowerCase().indexOf(keys[i]) > -1 ||
path.toLowerCase().indexOf(keys[i]) > -1 ||
parent.name.toLowerCase().indexOf(keys[i]) > -1))
{
validate = true;
} else {
validate = false;
}
}
} else {
for (var i = 0; i < keys.length; i++) {
// if previous keys are valid and current key is in the
// path, name
if ((validate) &&
(name.toLowerCase().indexOf(keys[i]) > -1 ||
path.toLowerCase().indexOf(keys[i]) > -1))
{
validate = true;
} else {
validate = false;
}
for (var i=0; i < keys.length; ++i) {
// each check is for validation so we negate the conditions and invalidate
if (!(
// check for an exact name match
name.toLowerCase().indexOf(keys[i]) > -1 ||
// then an exact path match
path.toLowerCase().indexOf(keys[i]) > -1 ||
// next if there is a parent, check for exact parent match
(parent !== undefined &&
parent.name.toLowerCase().indexOf(keys[i]) > -1) ||
// lastly check to see if the name was a levenshtein match
levenshtein(name.toLowerCase(), keys[i]) <=
MAX_LEV_DISTANCE)) {
return false;
}
}
return validate;
return true;
}

function getQuery() {
Expand Down Expand Up @@ -496,7 +541,7 @@

resultIndex = execQuery(query, 20000, index);
len = resultIndex.length;
for (i = 0; i < len; i += 1) {
for (i = 0; i < len; ++i) {
if (resultIndex[i].id > -1) {
obj = searchIndex[resultIndex[i].id];
filterdata.push([obj.name, obj.ty, obj.path, obj.desc]);
Expand Down Expand Up @@ -568,7 +613,7 @@
// faster analysis operations
var len = items.length;
var lastPath = "";
for (var i = 0; i < len; i += 1) {
for (var i = 0; i < len; ++i) {
var rawRow = items[i];
var row = {crate: crate, ty: rawRow[0], name: rawRow[1],
path: rawRow[2] || lastPath, desc: rawRow[3],
Expand Down Expand Up @@ -640,7 +685,7 @@
crates.push(crate);
}
crates.sort();
for (var i = 0; i < crates.length; i++) {
for (var i = 0; i < crates.length; ++i) {
var klass = 'crate';
if (crates[i] == window.currentCrate) {
klass += ' current';
Expand All @@ -657,10 +702,10 @@
window.register_implementors = function(imp) {
var list = $('#implementors-list');
var libs = Object.getOwnPropertyNames(imp);
for (var i = 0; i < libs.length; i++) {
for (var i = 0; i < libs.length; ++i) {
if (libs[i] == currentCrate) continue;
var structs = imp[libs[i]];
for (var j = 0; j < structs.length; j++) {
for (var j = 0; j < structs.length; ++j) {
var code = $('<code>').append(structs[j]);
$.each(code.find('a'), function(idx, a) {
var href = $(a).attr('href');
Expand Down