Skip to content

Commit

Permalink
implement case-sensitive match sorting
Browse files Browse the repository at this point in the history
  • Loading branch information
leeoniya committed Nov 20, 2024
1 parent 8d8ec31 commit 832a73e
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 31 deletions.
43 changes: 36 additions & 7 deletions dist/uFuzzy.cjs.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ const OPTS = {
start,
intraIns,
interIns,
cases,
} = info;

return idx.map((v, i) => i).sort((ia, ib) => (
Expand All @@ -104,6 +105,8 @@ const OPTS = {
interIns[ia] - interIns[ib] ||
// earliest start of match
start[ia] - start[ib] ||
// case match
cases[ib] - cases[ia] ||
// alphabetic
cmp(haystack[idx[ia]], haystack[idx[ib]])
));
Expand Down Expand Up @@ -220,15 +223,18 @@ function uFuzzy(opts) {
let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag);
let contrsRe = new RegExp(intraContr, 'gi' + uFlag);

const split = needle => {
const split = (needle, keepCase = false) => {
let exacts = [];

needle = needle.replace(EXACTS_RE, m => {
exacts.push(m);
return EXACT_HERE;
});

needle = needle.replace(trimRe, '').toLocaleLowerCase();
needle = needle.replace(trimRe, '');

if (!keepCase)
needle = needle.toLocaleLowerCase();

if (withIntraSplit)
needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]);
Expand Down Expand Up @@ -414,9 +420,24 @@ function uFuzzy(opts) {
const info = (idxs, haystack, needle) => {

let [query, parts, contrs] = prepQuery(needle, 1);
let partsCased = split(needle, true);
let [queryR] = prepQuery(needle, 2);
let partsLen = parts.length;

let _terms = Array(partsLen);
let _termsCased = Array(partsLen);

for (let j = 0; j < partsLen; j++) {
let part = parts[j];
let partCased = partsCased[j];

let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j];

_terms[j] = term;
_termsCased[j] = termCased;
}

let len = idxs.length;

let field = Array(len).fill(0);
Expand All @@ -433,6 +454,9 @@ function uFuzzy(opts) {
// contiguous chars matched
chars: field.slice(),

// case matched in term (via term.includes(match))
case: field.slice(),

// contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1)
// excludes terms that are contiguous but have < 2 bounds (substrings)
terms: field.slice(),
Expand Down Expand Up @@ -474,19 +498,23 @@ function uFuzzy(opts) {
let rgt1 = 0;
let chars = 0;
let terms = 0;
let cases = 0;
let inter = 0;
let intra = 0;

let refine = [];

for (let j = 0, k = 2; j < partsLen; j++, k+=2) {
let group = m[k].toLocaleLowerCase();
let part = parts[j];
let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termLen = term.length;
let groupLen = group.length;
let group = m[k].toLocaleLowerCase();
let term = _terms[j];
let termCased = _termsCased[j];
let termLen = term.length;
let groupLen = group.length;
let fullMatch = group == term;

if (m[k] == termCased)
cases++;

// this won't handle the case when an exact match exists across the boundary of the current group and the next junk
// e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion)
if (!fullMatch && m[k+1].length >= termLen) {
Expand Down Expand Up @@ -636,6 +664,7 @@ function uFuzzy(opts) {
info.interRgt1[ii] = rgt1;
info.chars[ii] = chars;
info.terms[ii] = terms;
info.cases[ii] = cases;
info.interIns[ii] = inter;
info.intraIns[ii] = intra;

Expand Down
5 changes: 4 additions & 1 deletion dist/uFuzzy.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ declare class uFuzzy {
): uFuzzy.InfoIdxOrder;

/** utility for splitting needle into terms following defined interSplit/intraSplit opts. useful for out-of-order permutes */
split(needle: string): uFuzzy.Terms;
split(needle: string, keepCase?: boolean): uFuzzy.Terms;

/** util for creating out-of-order permutations of a needle terms array */
static permute(arr: unknown[]): unknown[][];
Expand Down Expand Up @@ -188,6 +188,9 @@ declare namespace uFuzzy {
/** number of exactly-matched terms (intra = 0) where both lft and rgt landed on a BoundMode.Loose or BoundMode.Strict boundary */
terms: number[];

/** number of needle terms with case-sensitive partial matches */
cases: number[];

/** offset ranges within match for highlighting: [startIdx0, endIdx0, startIdx1, endIdx1,...] */
ranges: number[][];
}
Expand Down
43 changes: 36 additions & 7 deletions dist/uFuzzy.esm.js
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ const OPTS = {
start,
intraIns,
interIns,
cases,
} = info;

return idx.map((v, i) => i).sort((ia, ib) => (
Expand All @@ -102,6 +103,8 @@ const OPTS = {
interIns[ia] - interIns[ib] ||
// earliest start of match
start[ia] - start[ib] ||
// case match
cases[ib] - cases[ia] ||
// alphabetic
cmp(haystack[idx[ia]], haystack[idx[ib]])
));
Expand Down Expand Up @@ -218,15 +221,18 @@ function uFuzzy(opts) {
let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag);
let contrsRe = new RegExp(intraContr, 'gi' + uFlag);

const split = needle => {
const split = (needle, keepCase = false) => {
let exacts = [];

needle = needle.replace(EXACTS_RE, m => {
exacts.push(m);
return EXACT_HERE;
});

needle = needle.replace(trimRe, '').toLocaleLowerCase();
needle = needle.replace(trimRe, '');

if (!keepCase)
needle = needle.toLocaleLowerCase();

if (withIntraSplit)
needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]);
Expand Down Expand Up @@ -412,9 +418,24 @@ function uFuzzy(opts) {
const info = (idxs, haystack, needle) => {

let [query, parts, contrs] = prepQuery(needle, 1);
let partsCased = split(needle, true);
let [queryR] = prepQuery(needle, 2);
let partsLen = parts.length;

let _terms = Array(partsLen);
let _termsCased = Array(partsLen);

for (let j = 0; j < partsLen; j++) {
let part = parts[j];
let partCased = partsCased[j];

let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j];

_terms[j] = term;
_termsCased[j] = termCased;
}

let len = idxs.length;

let field = Array(len).fill(0);
Expand All @@ -431,6 +452,9 @@ function uFuzzy(opts) {
// contiguous chars matched
chars: field.slice(),

// case matched in term (via term.includes(match))
case: field.slice(),

// contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1)
// excludes terms that are contiguous but have < 2 bounds (substrings)
terms: field.slice(),
Expand Down Expand Up @@ -472,19 +496,23 @@ function uFuzzy(opts) {
let rgt1 = 0;
let chars = 0;
let terms = 0;
let cases = 0;
let inter = 0;
let intra = 0;

let refine = [];

for (let j = 0, k = 2; j < partsLen; j++, k+=2) {
let group = m[k].toLocaleLowerCase();
let part = parts[j];
let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termLen = term.length;
let groupLen = group.length;
let group = m[k].toLocaleLowerCase();
let term = _terms[j];
let termCased = _termsCased[j];
let termLen = term.length;
let groupLen = group.length;
let fullMatch = group == term;

if (m[k] == termCased)
cases++;

// this won't handle the case when an exact match exists across the boundary of the current group and the next junk
// e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion)
if (!fullMatch && m[k+1].length >= termLen) {
Expand Down Expand Up @@ -634,6 +662,7 @@ function uFuzzy(opts) {
info.interRgt1[ii] = rgt1;
info.chars[ii] = chars;
info.terms[ii] = terms;
info.cases[ii] = cases;
info.interIns[ii] = inter;
info.intraIns[ii] = intra;

Expand Down
43 changes: 36 additions & 7 deletions dist/uFuzzy.iife.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ var uFuzzy = (function () {
start,
intraIns,
interIns,
cases,
} = info;

return idx.map((v, i) => i).sort((ia, ib) => (
Expand All @@ -105,6 +106,8 @@ var uFuzzy = (function () {
interIns[ia] - interIns[ib] ||
// earliest start of match
start[ia] - start[ib] ||
// case match
cases[ib] - cases[ia] ||
// alphabetic
cmp(haystack[idx[ia]], haystack[idx[ib]])
));
Expand Down Expand Up @@ -221,15 +224,18 @@ var uFuzzy = (function () {
let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag);
let contrsRe = new RegExp(intraContr, 'gi' + uFlag);

const split = needle => {
const split = (needle, keepCase = false) => {
let exacts = [];

needle = needle.replace(EXACTS_RE, m => {
exacts.push(m);
return EXACT_HERE;
});

needle = needle.replace(trimRe, '').toLocaleLowerCase();
needle = needle.replace(trimRe, '');

if (!keepCase)
needle = needle.toLocaleLowerCase();

if (withIntraSplit)
needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]);
Expand Down Expand Up @@ -415,9 +421,24 @@ var uFuzzy = (function () {
const info = (idxs, haystack, needle) => {

let [query, parts, contrs] = prepQuery(needle, 1);
let partsCased = split(needle, true);
let [queryR] = prepQuery(needle, 2);
let partsLen = parts.length;

let _terms = Array(partsLen);
let _termsCased = Array(partsLen);

for (let j = 0; j < partsLen; j++) {
let part = parts[j];
let partCased = partsCased[j];

let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j];

_terms[j] = term;
_termsCased[j] = termCased;
}

let len = idxs.length;

let field = Array(len).fill(0);
Expand All @@ -434,6 +455,9 @@ var uFuzzy = (function () {
// contiguous chars matched
chars: field.slice(),

// case matched in term (via term.includes(match))
case: field.slice(),

// contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1)
// excludes terms that are contiguous but have < 2 bounds (substrings)
terms: field.slice(),
Expand Down Expand Up @@ -475,19 +499,23 @@ var uFuzzy = (function () {
let rgt1 = 0;
let chars = 0;
let terms = 0;
let cases = 0;
let inter = 0;
let intra = 0;

let refine = [];

for (let j = 0, k = 2; j < partsLen; j++, k+=2) {
let group = m[k].toLocaleLowerCase();
let part = parts[j];
let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j];
let termLen = term.length;
let groupLen = group.length;
let group = m[k].toLocaleLowerCase();
let term = _terms[j];
let termCased = _termsCased[j];
let termLen = term.length;
let groupLen = group.length;
let fullMatch = group == term;

if (m[k] == termCased)
cases++;

// this won't handle the case when an exact match exists across the boundary of the current group and the next junk
// e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion)
if (!fullMatch && m[k+1].length >= termLen) {
Expand Down Expand Up @@ -637,6 +665,7 @@ var uFuzzy = (function () {
info.interRgt1[ii] = rgt1;
info.chars[ii] = chars;
info.terms[ii] = terms;
info.cases[ii] = cases;
info.interIns[ii] = inter;
info.intraIns[ii] = intra;

Expand Down
Loading

0 comments on commit 832a73e

Please sign in to comment.