Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use weighted algorithm for string matching when finding files in repo #21370

Merged
merged 6 commits into from
Oct 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion jest.config.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
// to run tests with ES6 module, node must run with "--experimental-vm-modules", or see Makefile's "test-frontend" for reference
export default {
rootDir: 'web_src',
setupFilesAfterEnv: ['jest-extended/all'],
Expand All @@ -7,6 +8,8 @@ export default {
transform: {
'\\.svg$': '<rootDir>/js/testUtils/jestRawLoader.js',
},
setupFiles: [
'./js/testUtils/jestSetup.js', // prepare global variables used by our code (eg: window.config)
],
verbose: false,
};

wxiaoguang marked this conversation as resolved.
Show resolved Hide resolved
87 changes: 69 additions & 18 deletions web_src/js/features/repo-findfile.js
Original file line number Diff line number Diff line change
@@ -1,45 +1,96 @@
import $ from 'jquery';

import {svg} from '../svg.js';
import {strSubMatch} from '../utils.js';
const {csrf} = window.config;

const threshold = 50;
let files = [];
let $repoFindFileInput, $repoFindFileTableBody, $repoFindFileNoResult;

function filterRepoFiles(filter) {
const treeLink = $repoFindFileInput.attr('data-url-tree-link');
$repoFindFileTableBody.empty();

const fileRes = [];
// return the case-insensitive sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
// res[even] is unmatched, res[odd] is matched, see unit tests for examples
// argument subLower must be a lower-cased string.
export function strSubMatch(full, subLower) {
const res = [''];
wxiaoguang marked this conversation as resolved.
Show resolved Hide resolved
let i = 0, j = 0;
const fullLower = full.toLowerCase();
while (i < subLower.length && j < fullLower.length) {
if (subLower[i] === fullLower[j]) {
if (res.length % 2 !== 0) res.push('');
res[res.length - 1] += full[j];
j++;
i++;
} else {
if (res.length % 2 === 0) res.push('');
res[res.length - 1] += full[j];
j++;
}
wxiaoguang marked this conversation as resolved.
Show resolved Hide resolved
}
if (i !== subLower.length) {
// if the sub string doesn't match the full, only return the full as unmatched.
return [full];
}
if (j < full.length) {
// append remaining chars from full to result as unmatched
if (res.length % 2 === 0) res.push('');
res[res.length - 1] += full.substring(j);
}
return res;
}

export function calcMatchedWeight(matchResult) {
let weight = 0;
for (let i = 0; i < matchResult.length; i++) {
if (i % 2 === 1) { // matches are on odd indices, see strSubMatch
// use a function f(x+x) > f(x) + f(x) to make the longer matched string has higher weight.
weight += matchResult[i].length * matchResult[i].length;
}
}
return weight;
}

export function filterRepoFilesWeighted(files, filter) {
let filterResult = [];
if (filter) {
for (let i = 0; i < files.length && fileRes.length < threshold; i++) {
const subMatch = strSubMatch(files[i], filter);
if (subMatch.length > 1) {
fileRes.push(subMatch);
const filterLower = filter.toLowerCase();
// TODO: for large repo, this loop could be slow, maybe there could be one more limit:
// ... && filterResult.length < threshold * 20, wait for more feedbacks
for (let i = 0; i < files.length; i++) {
const res = strSubMatch(files[i], filterLower);
if (res.length > 1) { // length==1 means unmatched, >1 means having matched sub strings
filterResult.push({matchResult: res, matchWeight: calcMatchedWeight(res)});
}
}
filterResult.sort((a, b) => b.matchWeight - a.matchWeight);
filterResult = filterResult.slice(0, threshold);
} else {
for (let i = 0; i < files.length && i < threshold; i++) {
fileRes.push([files[i]]);
filterResult.push({matchResult: [files[i]], matchWeight: 0});
}
}
return filterResult;
}

function filterRepoFiles(filter) {
const treeLink = $repoFindFileInput.attr('data-url-tree-link');
$repoFindFileTableBody.empty();

const filterResult = filterRepoFilesWeighted(files, filter);
const tmplRow = `<tr><td><a></a></td></tr>`;
wxiaoguang marked this conversation as resolved.
Show resolved Hide resolved

$repoFindFileNoResult.toggle(fileRes.length === 0);
for (const matchRes of fileRes) {
$repoFindFileNoResult.toggle(filterResult.length === 0);
for (const r of filterResult) {
const $row = $(tmplRow);
const $a = $row.find('a');
$a.attr('href', `${treeLink}/${matchRes.join('')}`);
$a.attr('href', `${treeLink}/${r.matchResult.join('')}`);
const $octiconFile = $(svg('octicon-file')).addClass('mr-3');
$a.append($octiconFile);
// if the target file path is "abc/xyz", to search "bx", then the matchRes is ['a', 'b', 'c/', 'x', 'yz']
// the matchRes[odd] is matched and highlighted to red.
for (let j = 0; j < matchRes.length; j++) {
if (!matchRes[j]) continue;
const $span = $('<span>').text(matchRes[j]);
// if the target file path is "abc/xyz", to search "bx", then the matchResult is ['a', 'b', 'c/', 'x', 'yz']
// the matchResult[odd] is matched and highlighted to red.
for (let j = 0; j < r.matchResult.length; j++) {
if (!r.matchResult[j]) continue;
const $span = $('<span>').text(r.matchResult[j]);
if (j % 2 === 1) $span.addClass('ui text red');
$a.append($span);
}
Expand Down
34 changes: 34 additions & 0 deletions web_src/js/features/repo-findfile.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import {strSubMatch, calcMatchedWeight, filterRepoFilesWeighted} from './repo-findfile.js';

describe('Repo Find Files', () => {
test('strSubMatch', () => {
expect(strSubMatch('abc', '')).toEqual(['abc']);
expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
expect(strSubMatch('abc', 'z')).toEqual(['abc']);
expect(strSubMatch('abc', 'az')).toEqual(['abc']);

expect(strSubMatch('ABc', 'ac')).toEqual(['', 'A', 'B', 'c']);
expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);

expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
});

test('calcMatchedWeight', () => {
expect(calcMatchedWeight(['a', 'b', 'c', 'd']) < calcMatchedWeight(['a', 'bc', 'c'])).toBeTruthy();
});

test('filterRepoFilesWeighted', () => {
// the first matched result should always be the "word.txt"
let res = filterRepoFilesWeighted(['word.txt', 'we-got-result.dat'], 'word');
expect(res).toHaveLength(2);
expect(res[0].matchResult).toEqual(['', 'word', '.txt']);

res = filterRepoFilesWeighted(['we-got-result.dat', 'word.txt'], 'word');
expect(res).toHaveLength(2);
expect(res[0].matchResult).toEqual(['', 'word', '.txt']);
});
});
5 changes: 5 additions & 0 deletions web_src/js/testUtils/jestSetup.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
window.config = {
csrfToken: 'jest-test-csrf-token-123456',
pageData: {},
i18n: {},
};
30 changes: 0 additions & 30 deletions web_src/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,36 +59,6 @@ export function parseIssueHref(href) {
return {owner, repo, type, index};
}

// return the sub-match result as an array: [unmatched, matched, unmatched, matched, ...]
// res[even] is unmatched, res[odd] is matched, see unit tests for examples
export function strSubMatch(full, sub) {
const res = [''];
let i = 0, j = 0;
const subLower = sub.toLowerCase(), fullLower = full.toLowerCase();
while (i < subLower.length && j < fullLower.length) {
if (subLower[i] === fullLower[j]) {
if (res.length % 2 !== 0) res.push('');
res[res.length - 1] += full[j];
j++;
i++;
} else {
if (res.length % 2 === 0) res.push('');
res[res.length - 1] += full[j];
j++;
}
}
if (i !== sub.length) {
// if the sub string doesn't match the full, only return the full as unmatched.
return [full];
}
if (j < full.length) {
// append remaining chars from full to result as unmatched
if (res.length % 2 === 0) res.push('');
res[res.length - 1] += full.substring(j);
}
return res;
}

// pretty-print a number using locale-specific separators, e.g. 1200 -> 1,200
export function prettyNumber(num, locale = 'en-US') {
if (typeof num !== 'number') return '';
Expand Down
18 changes: 1 addition & 17 deletions web_src/js/utils.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import {
basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref, strSubMatch,
basename, extname, isObject, uniq, stripTags, joinPaths, parseIssueHref,
prettyNumber, parseUrl,
} from './utils.js';

Expand Down Expand Up @@ -86,22 +86,6 @@ test('parseIssueHref', () => {
expect(parseIssueHref('')).toEqual({owner: undefined, repo: undefined, type: undefined, index: undefined});
});

test('strSubMatch', () => {
expect(strSubMatch('abc', '')).toEqual(['abc']);
expect(strSubMatch('abc', 'a')).toEqual(['', 'a', 'bc']);
expect(strSubMatch('abc', 'b')).toEqual(['a', 'b', 'c']);
expect(strSubMatch('abc', 'c')).toEqual(['ab', 'c']);
expect(strSubMatch('abc', 'ac')).toEqual(['', 'a', 'b', 'c']);
expect(strSubMatch('abc', 'z')).toEqual(['abc']);
expect(strSubMatch('abc', 'az')).toEqual(['abc']);

expect(strSubMatch('abc', 'aC')).toEqual(['', 'a', 'b', 'c']);
expect(strSubMatch('abC', 'ac')).toEqual(['', 'a', 'b', 'C']);

expect(strSubMatch('aabbcc', 'abc')).toEqual(['', 'a', 'a', 'b', 'b', 'c', 'c']);
expect(strSubMatch('the/directory', 'hedir')).toEqual(['t', 'he', '/', 'dir', 'ectory']);
});

test('prettyNumber', () => {
expect(prettyNumber()).toEqual('');
expect(prettyNumber(null)).toEqual('');
Expand Down