This repository has been archived by the owner on Sep 6, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7.6k
Add String Matching Boost for Case Matches #9512
Closed
Closed
Changes from 5 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
8de2c26
Add Case Boost to StringMatch
JeffryBooher 450a3cb
rethink priorities
JeffryBooher 18887e6
touch of cleanup
JeffryBooher 04280f3
fix rte with quick open
JeffryBooher f05a14b
fix quick open
JeffryBooher 9aef4bd
Merge branch 'master' into jeff/CodeHIntsCaseBoost
JeffryBooher 753d622
Merge branch 'master' into jeff/CodeHintsCaseBoost
JeffryBooher 094e40f
down to just S-R-U test
JeffryBooher 86b945a
add test for non-case match
JeffryBooher 1916493
all tests passing
JeffryBooher 4b53f48
name change and jsdoc updates
JeffryBooher d441044
update debug data
JeffryBooher 959140d
a few other name changes
JeffryBooher 60fc868
Merge branch 'master' into jeff/CodeHintsCaseBoost
JeffryBooher File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,13 +115,15 @@ define(function (require, exports, module) { | |
DEBUG_SCORES = ds; | ||
} | ||
|
||
|
||
// Constants for scoring | ||
var SPECIAL_POINTS = 35; | ||
var MATCH_POINTS = 10; | ||
var LAST_SEGMENT_BOOST = 1; | ||
var MATCH_CASE_POINTS = 7; // Consecutive non-case matches have higher priority | ||
var CONSECUTIVE_MATCHES_POINTS = 8; | ||
var BEGINNING_OF_NAME_POINTS = 10; | ||
var LAST_SEGMENT_BOOST = 1; | ||
var DEDUCTION_FOR_LENGTH = 0.2; | ||
var CONSECUTIVE_MATCHES_POINTS = 7; | ||
var NOT_STARTING_ON_SPECIAL_PENALTY = 25; | ||
|
||
// Used in match lists to designate matches of "special" characters (see | ||
|
@@ -134,6 +136,11 @@ define(function (require, exports, module) { | |
function NormalMatch(index) { | ||
this.index = index; | ||
} | ||
|
||
// Used in match lists to designate any matched characters that are case-sensitive matches | ||
function CaseMatch(index) { | ||
this.index = index; | ||
} | ||
|
||
/* | ||
* Finds the best matches between the query and the string. The query is | ||
|
@@ -209,7 +216,7 @@ define(function (require, exports, module) { | |
* @param {int} startingSpecial index into specials array to start scanning with | ||
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible | ||
*/ | ||
function _generateMatchList(query, str, specials, startingSpecial) { | ||
function _generateMatchList(query, str, originalQuery, originalString, specials, startingSpecial) { | ||
var result = []; | ||
|
||
// used to keep track of which special character we're testing now | ||
|
@@ -338,8 +345,13 @@ define(function (require, exports, module) { | |
// we look character by character for matches | ||
if (query[queryCounter] === str[strCounter]) { | ||
// got a match! record it, and switch back to searching specials | ||
if (originalQuery[queryCounter] === originalString[strCounter]) { | ||
result.push(new CaseMatch(strCounter++)); | ||
} else { | ||
result.push(new NormalMatch(strCounter++)); | ||
} | ||
|
||
queryCounter++; | ||
result.push(new NormalMatch(strCounter++)); | ||
state = SPECIALS_MATCH; | ||
} else { | ||
// no match, keep looking | ||
|
@@ -362,6 +374,7 @@ define(function (require, exports, module) { | |
return result; | ||
} | ||
|
||
|
||
/* | ||
* Seek out the best match in the last segment (generally the filename). | ||
* Matches in the filename are preferred, but the query entered could match | ||
|
@@ -381,22 +394,26 @@ define(function (require, exports, module) { | |
* @param {boolean} lastSegmentStart which character does the last segment start at | ||
* @return {{remainder:int, matchList:Array.<SpecialMatch|NormalMatch>}} matched indexes or null if no matches possible | ||
*/ | ||
function _lastSegmentSearch(query, str, specials, startingSpecial, lastSegmentStart) { | ||
function _lastSegmentSearch(query, str, originalQuery, originalString, specials, startingSpecial, lastSegmentStart) { | ||
var queryCounter, matchList; | ||
|
||
// It's possible that the query is longer than the last segment. | ||
// If so, we can chop off the bit that we know couldn't possibly be there. | ||
var remainder = ""; | ||
var extraCharacters = specials[startingSpecial] + query.length - str.length; | ||
var remainder = "", | ||
originalRemainder = "", | ||
extraCharacters = specials[startingSpecial] + query.length - str.length; | ||
|
||
if (extraCharacters > 0) { | ||
remainder = query.substring(0, extraCharacters); | ||
originalRemainder = originalQuery.substring(0, extraCharacters); | ||
query = query.substring(extraCharacters); | ||
originalQuery = originalQuery.substring(extraCharacters); | ||
} | ||
|
||
for (queryCounter = 0; queryCounter < query.length; queryCounter++) { | ||
matchList = _generateMatchList(query.substring(queryCounter), | ||
str, specials, startingSpecial); | ||
str, originalQuery.substring(queryCounter), | ||
originalString, specials, startingSpecial); | ||
|
||
// if we've got a match *or* there are no segments in this string, we're done | ||
if (matchList || startingSpecial === 0) { | ||
|
@@ -409,6 +426,7 @@ define(function (require, exports, module) { | |
} else { | ||
return { | ||
remainder: remainder + query.substring(0, queryCounter), | ||
originalRemainder: originalRemainder + originalQuery.substring(0, queryCounter), | ||
matchList: matchList | ||
}; | ||
} | ||
|
@@ -426,12 +444,12 @@ define(function (require, exports, module) { | |
* @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with | ||
* @return {Array.<SpecialMatch|NormalMatch>} matched indexes or null if no matches possible | ||
*/ | ||
function _wholeStringSearch(query, compareStr, specials, lastSegmentSpecialsIndex) { | ||
function _wholeStringSearch(query, compareStr, originalQuery, originalString, specials, lastSegmentSpecialsIndex) { | ||
var lastSegmentStart = specials[lastSegmentSpecialsIndex]; | ||
var result; | ||
var matchList; | ||
|
||
result = _lastSegmentSearch(query, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); | ||
result = _lastSegmentSearch(query, compareStr, originalQuery, originalString, specials, lastSegmentSpecialsIndex, lastSegmentStart); | ||
|
||
if (result) { | ||
matchList = result.matchList; | ||
|
@@ -441,6 +459,8 @@ define(function (require, exports, module) { | |
// Scan with the remainder only through the beginning of the last segment | ||
var remainderMatchList = _generateMatchList(result.remainder, | ||
compareStr.substring(0, lastSegmentStart), | ||
result.originalRemainder, | ||
originalString.substring(0, lastSegmentStart), | ||
specials.slice(0, lastSegmentSpecialsIndex), 0); | ||
|
||
if (remainderMatchList) { | ||
|
@@ -454,7 +474,7 @@ define(function (require, exports, module) { | |
} else { | ||
// No match in the last segment, so we start over searching the whole | ||
// string | ||
matchList = _generateMatchList(query, compareStr, specials, 0); | ||
matchList = _generateMatchList(query, compareStr, originalQuery, originalString, specials, 0); | ||
} | ||
|
||
return matchList; | ||
|
@@ -481,6 +501,7 @@ define(function (require, exports, module) { | |
scoreDebug = { | ||
special: 0, | ||
match: 0, | ||
case: 0, | ||
lastSegment: 0, | ||
beginning: 0, | ||
lengthDeduction: 0, | ||
|
@@ -544,6 +565,13 @@ define(function (require, exports, module) { | |
} | ||
newPoints += MATCH_POINTS; | ||
|
||
if (match instanceof CaseMatch) { | ||
if (DEBUG_SCORES) { | ||
scoreDebug.case += MATCH_CASE_POINTS; | ||
} | ||
newPoints += MATCH_CASE_POINTS; | ||
} | ||
|
||
// A bonus is given for characters that match at the beginning | ||
// of the filename | ||
if (c === lastSegmentStart) { | ||
|
@@ -664,7 +692,14 @@ define(function (require, exports, module) { | |
*/ | ||
function _prefixMatchResult(str, query) { | ||
var result = new SearchResult(str); | ||
|
||
result.matchGoodness = -Number.MAX_VALUE; | ||
|
||
if (str.substr(0, query.length) !== query) { | ||
// Penalize for not matching case | ||
result.matchGoodness *= 0.5; | ||
} | ||
|
||
if (DEBUG_SCORES) { | ||
result.scoreDebug = { | ||
beginning: Number.MAX_VALUE | ||
|
@@ -684,7 +719,8 @@ define(function (require, exports, module) { | |
} | ||
return result; | ||
} | ||
|
||
|
||
|
||
/* | ||
* Match str against the query using the QuickOpen algorithm provided by | ||
* the functions above. The general idea is to prefer matches of "special" characters and, | ||
|
@@ -732,14 +768,18 @@ define(function (require, exports, module) { | |
} | ||
|
||
// comparisons are case insensitive, so switch to lower case here | ||
query = query.toLowerCase(); | ||
var queryStr = query.toLowerCase(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd recommend changing |
||
var compareStr = str.toLowerCase(); | ||
|
||
if (options.preferPrefixMatches) { | ||
options.segmentedSearch = false; | ||
} | ||
|
||
if (options.preferPrefixMatches && compareStr.substr(0, query.length) === query) { | ||
if (options.preferPrefixMatches && compareStr.substr(0, queryStr.length) === queryStr) { | ||
// NOTE: we compare against the case insensitive match | ||
// above but we pass the case-sensitive version in | ||
// because we want to weight the match to give case-matches | ||
// a higher score | ||
return _prefixMatchResult(str, query); | ||
} | ||
|
||
|
@@ -754,14 +794,13 @@ define(function (require, exports, module) { | |
// avoid some extra work | ||
if (options.segmentedSearch) { | ||
lastSegmentStart = special.specials[special.lastSegmentSpecialsIndex]; | ||
matchList = _wholeStringSearch(query, compareStr, special.specials, | ||
matchList = _wholeStringSearch(queryStr, compareStr, query, str, special.specials, | ||
special.lastSegmentSpecialsIndex); | ||
} else { | ||
lastSegmentStart = 0; | ||
matchList = _generateMatchList(query, compareStr, special.specials, | ||
0); | ||
matchList = _generateMatchList(queryStr, compareStr, query, str, special.specials, 0); | ||
} | ||
|
||
// If we get a match, turn this into a SearchResult as expected by the consumers | ||
// of this API. | ||
if (matchList) { | ||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Compute a value that is enough to trickle the non-case matches below the case-matches
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
result.scoreDebug
should probably reflect this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this below but I'm not 100% certain what should be debugged here. Do we think it's important to just show the beginning value computed due to case match or not or have debug show the
MAX_VALUE
and a deduction for case. Since it's just a really big number it seems that we don't get much distinction from showing how much was deduced so just having the beginning value was enough for me in my thinking but I wanted to get your opinion.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was basically just thinking that
scoreDebug
should reflect the actual score, so you can get an idea of what you're looking at if you're trying to debug it.