Skip to content
This repository has been archived by the owner on Sep 6, 2021. It is now read-only.

Add String Matching Boost for Case Matches #9512

Closed
wants to merge 14 commits into from
45 changes: 36 additions & 9 deletions src/utils/StringMatch.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ define(function (require, exports, module) {
// Constants for scoring
var SPECIAL_POINTS = 35;
var MATCH_POINTS = 10;
var MATCH_CASE_POINTS = 50;
var LAST_SEGMENT_BOOST = 1;
var BEGINNING_OF_NAME_POINTS = 10;
var DEDUCTION_FOR_LENGTH = 0.2;
Expand All @@ -134,6 +135,11 @@ define(function (require, exports, module) {
function NormalMatch(index) {
this.index = index;
}

// Used in match lists to designate any matched characters that are case-sensitive matches
function CaseMatch(index) {
this.index = index;
}

/*
* Finds the best matches between the query and the string. The query is
Expand Down Expand Up @@ -209,7 +215,7 @@ define(function (require, exports, module) {
* @param {int} startingSpecial index into specials array to start scanning with
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible
*/
function _generateMatchList(query, str, specials, startingSpecial) {
function _generateMatchList(query, str, originalQuery, originalString, specials, startingSpecial) {
var result = [];

// used to keep track of which special character we're testing now
Expand Down Expand Up @@ -338,8 +344,13 @@ define(function (require, exports, module) {
// we look character by character for matches
if (query[queryCounter] === str[strCounter]) {
// got a match! record it, and switch back to searching specials
if (originalQuery[queryCounter] === originalString[strCounter]) {
result.push(new CaseMatch(strCounter++));
} else {
result.push(new NormalMatch(strCounter++));
}

queryCounter++;
result.push(new NormalMatch(strCounter++));
state = SPECIALS_MATCH;
} else {
// no match, keep looking
Expand Down Expand Up @@ -426,7 +437,7 @@ define(function (require, exports, module) {
* @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible
*/
function _wholeStringSearch(query, compareStr, specials, lastSegmentSpecialsIndex) {
function _wholeStringSearch(query, compareStr, originalQuery, originalString, specials, lastSegmentSpecialsIndex) {
var lastSegmentStart = specials[lastSegmentSpecialsIndex];
var result;
var matchList;
Expand Down Expand Up @@ -481,6 +492,7 @@ define(function (require, exports, module) {
scoreDebug = {
special: 0,
match: 0,
case: 0,
lastSegment: 0,
beginning: 0,
lengthDeduction: 0,
Expand Down Expand Up @@ -544,6 +556,13 @@ define(function (require, exports, module) {
}
newPoints += MATCH_POINTS;

if (match instanceof CaseMatch) {
if (DEBUG_SCORES) {
scoreDebug.case += MATCH_CASE_POINTS;
}
newPoints += MATCH_CASE_POINTS;
}

// A bonus is given for characters that match at the beginning
// of the filename
if (c === lastSegmentStart) {
Expand Down Expand Up @@ -664,7 +683,14 @@ define(function (require, exports, module) {
*/
function _prefixMatchResult(str, query) {
var result = new SearchResult(str);

result.matchGoodness = -Number.MAX_VALUE;

if (str.substr(0, query.length) !== query) {
// Penalize for not matching case
result.matchGoodness *= 0.5;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Compute a value that is enough to trickle the non-case matches below the case-matches

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: result.scoreDebug should probably reflect this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this below but I'm not 100% certain what should be debugged here. Do we think it's important to just show the beginning value computed due to case match or not or have debug show the MAX_VALUE and a deduction for case. Since it's just a really big number it seems that we don't get much distinction from showing how much was deduced so just having the beginning value was enough for me in my thinking but I wanted to get your opinion.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was basically just thinking that scoreDebug should reflect the actual score, so you can get an idea of what you're looking at if you're trying to debug it.

}

if (DEBUG_SCORES) {
result.scoreDebug = {
beginning: Number.MAX_VALUE
Expand All @@ -684,7 +710,8 @@ define(function (require, exports, module) {
}
return result;
}



/*
* Match str against the query using the QuickOpen algorithm provided by
* the functions above. The general idea is to prefer matches of "special" characters and,
Expand Down Expand Up @@ -732,7 +759,7 @@ define(function (require, exports, module) {
}

// comparisons are case insensitive, so switch to lower case here
query = query.toLowerCase();
var queryStr = query.toLowerCase();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend changing queryStr to queryLower or something to make it a bit clearer (also changing compareStr to strLower for consistency)

var compareStr = str.toLowerCase();

if (options.preferPrefixMatches) {
Expand All @@ -754,18 +781,18 @@ define(function (require, exports, module) {
// avoid some extra work
if (options.segmentedSearch) {
lastSegmentStart = special.specials[special.lastSegmentSpecialsIndex];
matchList = _wholeStringSearch(query, compareStr, special.specials,
matchList = _wholeStringSearch(queryStr, compareStr, query, str, special.specials,
special.lastSegmentSpecialsIndex);
} else {
lastSegmentStart = 0;
matchList = _generateMatchList(query, compareStr, special.specials,
matchList = _generateMatchList(queryStr, compareStr, query, str, special.specials,
0);
}

// If we get a match, turn this into a SearchResult as expected by the consumers
// of this API.
if (matchList) {
var compareData = _computeRangesAndScore(matchList, str, lastSegmentStart);
var compareData = _computeRangesAndScore(matchList, str, queryStr, lastSegmentStart);
result = new SearchResult(str);
result.stringRanges = compareData.ranges;
result.matchGoodness = -1 * compareData.matchGoodness;
Expand Down