From 832a73edfef05138d77c1ac3c015e41ffc795655 Mon Sep 17 00:00:00 2001 From: Leon Sorokin Date: Tue, 19 Nov 2024 18:50:05 -0600 Subject: [PATCH] implement case-sensitive match sorting --- dist/uFuzzy.cjs.js | 43 ++++++++++++++++++++++++++++++++++------- dist/uFuzzy.d.ts | 5 ++++- dist/uFuzzy.esm.js | 43 ++++++++++++++++++++++++++++++++++------- dist/uFuzzy.iife.js | 43 ++++++++++++++++++++++++++++++++++------- dist/uFuzzy.iife.min.js | 2 +- package.json | 2 +- src/uFuzzy.mjs | 43 ++++++++++++++++++++++++++++++++++------- 7 files changed, 150 insertions(+), 31 deletions(-) diff --git a/dist/uFuzzy.cjs.js b/dist/uFuzzy.cjs.js index 7071e9c..4bb1085 100644 --- a/dist/uFuzzy.cjs.js +++ b/dist/uFuzzy.cjs.js @@ -86,6 +86,7 @@ const OPTS = { start, intraIns, interIns, + cases, } = info; return idx.map((v, i) => i).sort((ia, ib) => ( @@ -104,6 +105,8 @@ const OPTS = { interIns[ia] - interIns[ib] || // earliest start of match start[ia] - start[ib] || + // case match + cases[ib] - cases[ia] || // alphabetic cmp(haystack[idx[ia]], haystack[idx[ib]]) )); @@ -220,7 +223,7 @@ function uFuzzy(opts) { let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag); let contrsRe = new RegExp(intraContr, 'gi' + uFlag); - const split = needle => { + const split = (needle, keepCase = false) => { let exacts = []; needle = needle.replace(EXACTS_RE, m => { @@ -228,7 +231,10 @@ function uFuzzy(opts) { return EXACT_HERE; }); - needle = needle.replace(trimRe, '').toLocaleLowerCase(); + needle = needle.replace(trimRe, ''); + + if (!keepCase) + needle = needle.toLocaleLowerCase(); if (withIntraSplit) needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]); @@ -414,9 +420,24 @@ function uFuzzy(opts) { const info = (idxs, haystack, needle) => { let [query, parts, contrs] = prepQuery(needle, 1); + let partsCased = split(needle, true); let [queryR] = prepQuery(needle, 2); let partsLen = parts.length; + let _terms = Array(partsLen); + let _termsCased = Array(partsLen); + + for (let j = 0; j < partsLen; j++) { + let part = parts[j]; + let partCased = partsCased[j]; + + let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; + let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j]; + + _terms[j] = term; + _termsCased[j] = termCased; + } + let len = idxs.length; let field = Array(len).fill(0); @@ -433,6 +454,9 @@ function uFuzzy(opts) { // contiguous chars matched chars: field.slice(), + // case matched in term (via term.includes(match)) + case: field.slice(), + // contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1) // excludes terms that are contiguous but have < 2 bounds (substrings) terms: field.slice(), @@ -474,19 +498,23 @@ function uFuzzy(opts) { let rgt1 = 0; let chars = 0; let terms = 0; + let cases = 0; let inter = 0; let intra = 0; let refine = []; for (let j = 0, k = 2; j < partsLen; j++, k+=2) { - let group = m[k].toLocaleLowerCase(); - let part = parts[j]; - let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; - let termLen = term.length; - let groupLen = group.length; + let group = m[k].toLocaleLowerCase(); + let term = _terms[j]; + let termCased = _termsCased[j]; + let termLen = term.length; + let groupLen = group.length; let fullMatch = group == term; + if (m[k] == termCased) + cases++; + // this won't handle the case when an exact match exists across the boundary of the current group and the next junk // e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion) if (!fullMatch && m[k+1].length >= termLen) { @@ -636,6 +664,7 @@ function uFuzzy(opts) { info.interRgt1[ii] = rgt1; info.chars[ii] = chars; info.terms[ii] = terms; + info.cases[ii] = cases; info.interIns[ii] = inter; info.intraIns[ii] = intra; diff --git a/dist/uFuzzy.d.ts b/dist/uFuzzy.d.ts index 5b9b331..ab9b03d 100644 --- a/dist/uFuzzy.d.ts +++ b/dist/uFuzzy.d.ts @@ -34,7 +34,7 @@ declare class uFuzzy { ): uFuzzy.InfoIdxOrder; /** utility for splitting needle into terms following defined interSplit/intraSplit opts. useful for out-of-order permutes */ - split(needle: string): uFuzzy.Terms; + split(needle: string, keepCase?: boolean): uFuzzy.Terms; /** util for creating out-of-order permutations of a needle terms array */ static permute(arr: unknown[]): unknown[][]; @@ -188,6 +188,9 @@ declare namespace uFuzzy { /** number of exactly-matched terms (intra = 0) where both lft and rgt landed on a BoundMode.Loose or BoundMode.Strict boundary */ terms: number[]; + /** number of needle terms with case-sensitive partial matches */ + cases: number[]; + /** offset ranges within match for highlighting: [startIdx0, endIdx0, startIdx1, endIdx1,...] */ ranges: number[][]; } diff --git a/dist/uFuzzy.esm.js b/dist/uFuzzy.esm.js index 484a4d5..a57156c 100644 --- a/dist/uFuzzy.esm.js +++ b/dist/uFuzzy.esm.js @@ -84,6 +84,7 @@ const OPTS = { start, intraIns, interIns, + cases, } = info; return idx.map((v, i) => i).sort((ia, ib) => ( @@ -102,6 +103,8 @@ const OPTS = { interIns[ia] - interIns[ib] || // earliest start of match start[ia] - start[ib] || + // case match + cases[ib] - cases[ia] || // alphabetic cmp(haystack[idx[ia]], haystack[idx[ib]]) )); @@ -218,7 +221,7 @@ function uFuzzy(opts) { let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag); let contrsRe = new RegExp(intraContr, 'gi' + uFlag); - const split = needle => { + const split = (needle, keepCase = false) => { let exacts = []; needle = needle.replace(EXACTS_RE, m => { @@ -226,7 +229,10 @@ function uFuzzy(opts) { return EXACT_HERE; }); - needle = needle.replace(trimRe, '').toLocaleLowerCase(); + needle = needle.replace(trimRe, ''); + + if (!keepCase) + needle = needle.toLocaleLowerCase(); if (withIntraSplit) needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]); @@ -412,9 +418,24 @@ function uFuzzy(opts) { const info = (idxs, haystack, needle) => { let [query, parts, contrs] = prepQuery(needle, 1); + let partsCased = split(needle, true); let [queryR] = prepQuery(needle, 2); let partsLen = parts.length; + let _terms = Array(partsLen); + let _termsCased = Array(partsLen); + + for (let j = 0; j < partsLen; j++) { + let part = parts[j]; + let partCased = partsCased[j]; + + let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; + let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j]; + + _terms[j] = term; + _termsCased[j] = termCased; + } + let len = idxs.length; let field = Array(len).fill(0); @@ -431,6 +452,9 @@ function uFuzzy(opts) { // contiguous chars matched chars: field.slice(), + // case matched in term (via term.includes(match)) + case: field.slice(), + // contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1) // excludes terms that are contiguous but have < 2 bounds (substrings) terms: field.slice(), @@ -472,19 +496,23 @@ function uFuzzy(opts) { let rgt1 = 0; let chars = 0; let terms = 0; + let cases = 0; let inter = 0; let intra = 0; let refine = []; for (let j = 0, k = 2; j < partsLen; j++, k+=2) { - let group = m[k].toLocaleLowerCase(); - let part = parts[j]; - let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; - let termLen = term.length; - let groupLen = group.length; + let group = m[k].toLocaleLowerCase(); + let term = _terms[j]; + let termCased = _termsCased[j]; + let termLen = term.length; + let groupLen = group.length; let fullMatch = group == term; + if (m[k] == termCased) + cases++; + // this won't handle the case when an exact match exists across the boundary of the current group and the next junk // e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion) if (!fullMatch && m[k+1].length >= termLen) { @@ -634,6 +662,7 @@ function uFuzzy(opts) { info.interRgt1[ii] = rgt1; info.chars[ii] = chars; info.terms[ii] = terms; + info.cases[ii] = cases; info.interIns[ii] = inter; info.intraIns[ii] = intra; diff --git a/dist/uFuzzy.iife.js b/dist/uFuzzy.iife.js index 5d46afc..9d175bc 100644 --- a/dist/uFuzzy.iife.js +++ b/dist/uFuzzy.iife.js @@ -87,6 +87,7 @@ var uFuzzy = (function () { start, intraIns, interIns, + cases, } = info; return idx.map((v, i) => i).sort((ia, ib) => ( @@ -105,6 +106,8 @@ var uFuzzy = (function () { interIns[ia] - interIns[ib] || // earliest start of match start[ia] - start[ib] || + // case match + cases[ib] - cases[ia] || // alphabetic cmp(haystack[idx[ia]], haystack[idx[ib]]) )); @@ -221,7 +224,7 @@ var uFuzzy = (function () { let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag); let contrsRe = new RegExp(intraContr, 'gi' + uFlag); - const split = needle => { + const split = (needle, keepCase = false) => { let exacts = []; needle = needle.replace(EXACTS_RE, m => { @@ -229,7 +232,10 @@ var uFuzzy = (function () { return EXACT_HERE; }); - needle = needle.replace(trimRe, '').toLocaleLowerCase(); + needle = needle.replace(trimRe, ''); + + if (!keepCase) + needle = needle.toLocaleLowerCase(); if (withIntraSplit) needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]); @@ -415,9 +421,24 @@ var uFuzzy = (function () { const info = (idxs, haystack, needle) => { let [query, parts, contrs] = prepQuery(needle, 1); + let partsCased = split(needle, true); let [queryR] = prepQuery(needle, 2); let partsLen = parts.length; + let _terms = Array(partsLen); + let _termsCased = Array(partsLen); + + for (let j = 0; j < partsLen; j++) { + let part = parts[j]; + let partCased = partsCased[j]; + + let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; + let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j]; + + _terms[j] = term; + _termsCased[j] = termCased; + } + let len = idxs.length; let field = Array(len).fill(0); @@ -434,6 +455,9 @@ var uFuzzy = (function () { // contiguous chars matched chars: field.slice(), + // case matched in term (via term.includes(match)) + case: field.slice(), + // contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1) // excludes terms that are contiguous but have < 2 bounds (substrings) terms: field.slice(), @@ -475,19 +499,23 @@ var uFuzzy = (function () { let rgt1 = 0; let chars = 0; let terms = 0; + let cases = 0; let inter = 0; let intra = 0; let refine = []; for (let j = 0, k = 2; j < partsLen; j++, k+=2) { - let group = m[k].toLocaleLowerCase(); - let part = parts[j]; - let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; - let termLen = term.length; - let groupLen = group.length; + let group = m[k].toLocaleLowerCase(); + let term = _terms[j]; + let termCased = _termsCased[j]; + let termLen = term.length; + let groupLen = group.length; let fullMatch = group == term; + if (m[k] == termCased) + cases++; + // this won't handle the case when an exact match exists across the boundary of the current group and the next junk // e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion) if (!fullMatch && m[k+1].length >= termLen) { @@ -637,6 +665,7 @@ var uFuzzy = (function () { info.interRgt1[ii] = rgt1; info.chars[ii] = chars; info.terms[ii] = terms; + info.cases[ii] = cases; info.interIns[ii] = inter; info.intraIns[ii] = intra; diff --git a/dist/uFuzzy.iife.min.js b/dist/uFuzzy.iife.min.js index 13b2dd1..aa30c3e 100644 --- a/dist/uFuzzy.iife.min.js +++ b/dist/uFuzzy.iife.min.js @@ -1,2 +1,2 @@ /*! https://github.com/leeoniya/uFuzzy (v1.0.14) */ -var uFuzzy=function(){"use strict";const e=new Intl.Collator("en",{numeric:!0,sensitivity:"base"}).compare,t=1/0,l=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),n="eexxaacctt",r=/\p{P}/gu,i=(e,t,l)=>e.replace("A-Z",t).replace("a-z",l),s={unicode:!1,alpha:null,interSplit:"[^A-Za-z\\d']+",intraSplit:"[a-z][A-Z]",interBound:"[^A-Za-z\\d]",intraBound:"[A-Za-z]\\d|\\d[A-Za-z]|[a-z][A-Z]",interLft:0,interRgt:0,interChars:".",interIns:t,intraChars:"[a-z\\d']",intraIns:null,intraContr:"'[a-z]{1,2}\\b",intraMode:0,intraSlice:[1,t],intraSub:null,intraTrn:null,intraDel:null,intraFilt:()=>!0,sort:(t,l)=>{let{idx:n,chars:r,terms:i,interLft2:s,interLft1:a,start:g,intraIns:u,interIns:f}=t;return n.map(((e,t)=>t)).sort(((t,h)=>r[h]-r[t]||u[t]-u[h]||i[h]+s[h]+.5*a[h]-(i[t]+s[t]+.5*a[t])||f[t]-f[h]||g[t]-g[h]||e(l[n[t]],l[n[h]])))}},a=(e,l)=>0==l?"":1==l?e+"??":l==t?e+"*?":e+`{0,${l}}?`,g="(?:\\b|_)";function u(e){e=Object.assign({},s,e);let{unicode:t,interLft:u,interRgt:f,intraMode:c,intraSlice:o,intraIns:p,intraSub:d,intraTrn:m,intraDel:x,intraContr:b,intraSplit:R,interSplit:L,intraBound:A,interBound:S,intraChars:z}=e;p??=c,d??=c,m??=c,x??=c;let E=e.letters??e.alpha;if(null!=E){let e=E.toLocaleUpperCase(),t=E.toLocaleLowerCase();L=i(L,e,t),R=i(R,e,t),S=i(S,e,t),A=i(A,e,t),z=i(z,e,t),b=i(b,e,t)}let I=t?"u":"";const C='".+?"',y=RegExp(C,"gi"+I),k=RegExp(`(?:\\s+|^)-(?:${z}+|${C})`,"gi"+I);let{intraRules:j}=e;null==j&&(j=e=>{let t=s.intraSlice,l=0,n=0,r=0,i=0;if(/[^\d]/.test(e)){let s=e.length;s>4?(t=o,l=p,n=d,r=m,i=x):3>s||(r=Math.min(m,1),4==s&&(l=Math.min(p,1)))}return{intraSlice:t,intraIns:l,intraSub:n,intraTrn:r,intraDel:i}});let Z=!!R,$=RegExp(R,"g"+I),w=RegExp(L,"g"+I),M=RegExp("^"+L+"|"+L+"$","g"+I),B=RegExp(b,"gi"+I);const D=e=>{let t=[];e=(e=e.replace(y,(e=>(t.push(e),n)))).replace(M,"").toLocaleLowerCase(),Z&&(e=e.replace($,(e=>e[0]+" "+e[1])));let l=0;return e.split(w).filter((e=>""!=e)).map((e=>e===n?t[l++]:e))},T=/[^\d]+|\d+/g,F=(t,n=0,r=!1)=>{let i=D(t);if(0==i.length)return[];let s,h=Array(i.length).fill("");if(i=i.map(((e,t)=>e.replace(B,(e=>(h[t]=e,""))))),1==c)s=i.map(((e,t)=>{if('"'===e[0])return l(e.slice(1,-1));let n="";for(let l of e.matchAll(T)){let e=l[0],{intraSlice:r,intraIns:i,intraSub:s,intraTrn:g,intraDel:u}=j(e);if(i+s+g+u==0)n+=e+h[t];else{let[l,f]=r,c=e.slice(0,l),o=e.slice(f),p=e.slice(l,f);1==i&&1==c.length&&c!=p[0]&&(c+="(?!"+c+")");let d=p.length,m=[e];if(s)for(let e=0;d>e;e++)m.push(c+p.slice(0,e)+z+p.slice(e+1)+o);if(g)for(let e=0;d-1>e;e++)p[e]!=p[e+1]&&m.push(c+p.slice(0,e)+p[e+1]+p[e]+p.slice(e+2)+o);if(u)for(let e=0;d>e;e++)m.push(c+p.slice(0,e+1)+"?"+p.slice(e+1)+o);if(i){let e=a(z,1);for(let t=0;d>t;t++)m.push(c+p.slice(0,t)+e+p.slice(t)+o)}n+="(?:"+m.join("|")+")"+h[t]}}return n}));else{let e=a(z,p);2==n&&p>0&&(e=")("+e+")("),s=i.map(((t,n)=>'"'===t[0]?l(t.slice(1,-1)):t.split("").map(((e,t,l)=>(1==p&&0==t&&l.length>1&&e!=l[t+1]&&(e+="(?!"+e+")"),e))).join(e)+h[n]))}let o=2==u?g:"",d=2==f?g:"",m=d+a(e.interChars,e.interIns)+o;return n>0?r?s=o+"("+s.join(")"+d+"|"+o+"(")+")"+d:(s="("+s.join(")("+m+")(")+")",s="(.??"+o+")"+s+"("+d+".*)"):(s=s.join(m),s=o+s+d),[RegExp(s,"i"+I),i,h]},O=(e,t,l)=>{let[n]=F(t);if(null==n)return null;let r=[];if(null!=l)for(let t=0;l.length>t;t++){let i=l[t];n.test(e[i])&&r.push(i)}else for(let t=0;e.length>t;t++)n.test(e[t])&&r.push(t);return r};let v=!!A,U=RegExp(S,I),N=RegExp(A,I);const P=(t,l,n)=>{let[r,i,s]=F(n,1),[a]=F(n,2),g=i.length,h=t.length,c=Array(h).fill(0),o={idx:Array(h),start:c.slice(),chars:c.slice(),terms:c.slice(),interIns:c.slice(),intraIns:c.slice(),interLft2:c.slice(),interRgt2:c.slice(),interLft1:c.slice(),interRgt1:c.slice(),ranges:Array(h)},p=1==u||1==f,d=0;for(let n=0;t.length>n;n++){let h=l[t[n]],c=h.match(r),m=c.index+c[1].length,x=m,b=!1,R=0,L=0,A=0,S=0,z=0,E=0,C=0,y=0,k=[];for(let t=0,l=2;g>t;t++,l+=2){let n=c[l].toLocaleLowerCase(),r=i[t],a='"'==r[0]?r.slice(1,-1):r+s[t],o=a.length,d=n.length,j=n==a;if(!j&&c[l+1].length>=o){let e=c[l+1].toLocaleLowerCase().indexOf(a);e>-1&&(k.push(x,d,e,o),x+=_(c,l,e,o),n=a,d=o,j=!0,0==t&&(m=x))}if(p||j){let e=x-1,r=x+d,i=!1,s=!1;if(-1==e||U.test(h[e]))j&&R++,i=!0;else{if(2==u){b=!0;break}if(v&&N.test(h[e]+h[e+1]))j&&L++,i=!0;else if(1==u){let e=c[l+1],r=x+d;if(e.length>=o){let s,g=0,u=!1,f=RegExp(a,"ig"+I);for(;s=f.exec(e);){g=s.index;let e=r+g,t=e-1;if(-1==t||U.test(h[t])){R++,u=!0;break}if(N.test(h[t]+h[e])){L++,u=!0;break}}u&&(i=!0,k.push(x,d,g,o),x+=_(c,l,g,o),n=a,d=o,j=!0,0==t&&(m=x))}if(!i){b=!0;break}}}if(r==h.length||U.test(h[r]))j&&A++,s=!0;else{if(2==f){b=!0;break}if(v&&N.test(h[r-1]+h[r]))j&&S++,s=!0;else if(1==f){b=!0;break}}j&&(z+=o,i&&s&&E++)}if(d>o&&(y+=d-o),t>0&&(C+=c[l-1].length),!e.intraFilt(a,n,x)){b=!0;break}g-1>t&&(x+=d+c[l+1].length)}if(!b){o.idx[d]=t[n],o.interLft2[d]=R,o.interLft1[d]=L,o.interRgt2[d]=A,o.interRgt1[d]=S,o.chars[d]=z,o.terms[d]=E,o.interIns[d]=C,o.intraIns[d]=y,o.start[d]=m;let e=h.match(a),l=e.index+e[1].length,r=k.length,i=r>0?0:1/0,s=r-4;for(let t=2;e.length>t;)if(i>s||k[i]!=l)l+=e[t].length,t++;else{let n=k[i+1],r=k[i+2],s=k[i+3],a=t,g="";for(let t=0;n>t;a++)g+=e[a],t+=e[a].length;e.splice(t,a-t,g),l+=_(e,t,r,s),i+=4}l=e.index+e[1].length;let g=o.ranges[d]=[],u=l,f=l;for(let t=2;e.length>t;t++){let n=e[t].length;l+=n,t%2==0?f=l:n>0&&(g.push(u,f),u=f=l)}f>u&&g.push(u,f),d++}}if(t.length>d)for(let e in o)o[e]=o[e].slice(0,d);return o},_=(e,t,l,n)=>{let r=e[t]+e[t+1].slice(0,l);return e[t-1]+=r,e[t]=e[t+1].slice(l,l+n),e[t+1]=e[t+1].slice(l+n),r.length};return{search:(...t)=>((t,n,i,s=1e3,a)=>{i=i?!0===i?5:i:0;let g=null,u=null,f=[];n=n.replace(k,(e=>{let t=e.trim().slice(1);return t='"'===t[0]?l(t.slice(1,-1)):t.replace(r,""),""!=t&&f.push(t),""}));let c,o=D(n);if(f.length>0){if(c=RegExp(f.join("|"),"i"+I),0==o.length){let e=[];for(let l=0;t.length>l;l++)c.test(t[l])||e.push(l);return[e,null,null]}}else if(0==o.length)return[null,null,null];if(i>0){let e=D(n);if(e.length>1){let l=e.slice().sort(((e,t)=>t.length-e.length));for(let e=0;l.length>e;e++){if(0==a?.length)return[[],null,null];a=O(t,l[e],a)}if(e.length>i)return[a,null,null];g=h(e).map((e=>e.join(" "))),u=[];let n=new Set;for(let e=0;g.length>e;e++)if(a.length>n.size){let l=a.filter((e=>!n.has(e))),r=O(t,g[e],l);for(let e=0;r.length>e;e++)n.add(r[e]);u.push(r)}else u.push([])}}null==g&&(g=[n],u=[a?.length>0?a:O(t,n)]);let p=null,d=null;if(f.length>0&&(u=u.map((e=>e.filter((e=>!c.test(t[e])))))),s>=u.reduce(((e,t)=>e+t.length),0)){p={},d=[];for(let l=0;u.length>l;l++){let n=u[l];if(null==n||0==n.length)continue;let r=g[l],i=P(n,t,r),s=e.sort(i,t,r);if(l>0)for(let e=0;s.length>e;e++)s[e]+=d.length;for(let e in i)p[e]=(p[e]??[]).concat(i[e]);d=d.concat(s)}}return[[].concat(...u),p,d]})(...t),split:D,filter:O,info:P,sort:e.sort}}const f=(()=>{let e={A:"ÁÀÃÂÄĄ",a:"áàãâäą",E:"ÉÈÊËĖ",e:"éèêëę",I:"ÍÌÎÏĮ",i:"íìîïį",O:"ÓÒÔÕÖ",o:"óòôõö",U:"ÚÙÛÜŪŲ",u:"úùûüūų",C:"ÇČĆ",c:"çčć",L:"Ł",l:"ł",N:"ÑŃ",n:"ñń",S:"ŠŚ",s:"šś",Z:"ŻŹ",z:"żź"},t=new Map,l="";for(let n in e)e[n].split("").forEach((e=>{l+=e,t.set(e,n)}));let n=RegExp(`[${l}]`,"g"),r=e=>t.get(e);return e=>{if("string"==typeof e)return e.replace(n,r);let t=Array(e.length);for(let l=0;e.length>l;l++)t[l]=e[l].replace(n,r);return t}})();function h(e){let t,l,n=(e=e.slice()).length,r=[e.slice()],i=Array(n).fill(0),s=1;for(;n>s;)s>i[s]?(t=s%2&&i[s],l=e[s],e[s]=e[t],e[t]=l,++i[s],s=1,r.push(e.slice())):(i[s]=0,++s);return r}const c=(e,t)=>t?`${e}`:e,o=(e,t)=>e+t;return u.latinize=f,u.permute=e=>h([...Array(e.length).keys()]).sort(((e,t)=>{for(let l=0;e.length>l;l++)if(e[l]!=t[l])return e[l]-t[l];return 0})).map((t=>t.map((t=>e[t])))),u.highlight=function(e,t,l=c,n="",r=o){n=r(n,l(e.substring(0,t[0]),!1))??n;for(let i=0;t.length>i;i+=2)n=r(n,l(e.substring(t[i],t[i+1]),!0))??n,t.length-3>i&&(n=r(n,l(e.substring(t[i+1],t[i+2]),!1))??n);return r(n,l(e.substring(t[t.length-1]),!1))??n},u}(); +var uFuzzy=function(){"use strict";const e=new Intl.Collator("en",{numeric:!0,sensitivity:"base"}).compare,t=1/0,l=e=>e.replace(/[.*+?^${}()|[\]\\]/g,"\\$&"),n="eexxaacctt",r=/\p{P}/gu,i=(e,t,l)=>e.replace("A-Z",t).replace("a-z",l),s={unicode:!1,alpha:null,interSplit:"[^A-Za-z\\d']+",intraSplit:"[a-z][A-Z]",interBound:"[^A-Za-z\\d]",intraBound:"[A-Za-z]\\d|\\d[A-Za-z]|[a-z][A-Z]",interLft:0,interRgt:0,interChars:".",interIns:t,intraChars:"[a-z\\d']",intraIns:null,intraContr:"'[a-z]{1,2}\\b",intraMode:0,intraSlice:[1,t],intraSub:null,intraTrn:null,intraDel:null,intraFilt:()=>!0,sort:(t,l)=>{let{idx:n,chars:r,terms:i,interLft2:s,interLft1:a,start:g,intraIns:c,interIns:u,cases:f}=t;return n.map(((e,t)=>t)).sort(((t,h)=>r[h]-r[t]||c[t]-c[h]||i[h]+s[h]+.5*a[h]-(i[t]+s[t]+.5*a[t])||u[t]-u[h]||g[t]-g[h]||f[h]-f[t]||e(l[n[t]],l[n[h]])))}},a=(e,l)=>0==l?"":1==l?e+"??":l==t?e+"*?":e+`{0,${l}}?`,g="(?:\\b|_)";function c(e){e=Object.assign({},s,e);let{unicode:t,interLft:c,interRgt:u,intraMode:h,intraSlice:o,intraIns:p,intraSub:d,intraTrn:m,intraDel:x,intraContr:b,intraSplit:R,interSplit:A,intraBound:L,interBound:S,intraChars:z}=e;p??=h,d??=h,m??=h,x??=h;let E=e.letters??e.alpha;if(null!=E){let e=E.toLocaleUpperCase(),t=E.toLocaleLowerCase();A=i(A,e,t),R=i(R,e,t),S=i(S,e,t),L=i(L,e,t),z=i(z,e,t),b=i(b,e,t)}let I=t?"u":"";const y='".+?"',C=RegExp(y,"gi"+I),k=RegExp(`(?:\\s+|^)-(?:${z}+|${y})`,"gi"+I);let{intraRules:j}=e;null==j&&(j=e=>{let t=s.intraSlice,l=0,n=0,r=0,i=0;if(/[^\d]/.test(e)){let s=e.length;s>4?(t=o,l=p,n=d,r=m,i=x):3>s||(r=Math.min(m,1),4==s&&(l=Math.min(p,1)))}return{intraSlice:t,intraIns:l,intraSub:n,intraTrn:r,intraDel:i}});let Z=!!R,$=RegExp(R,"g"+I),w=RegExp(A,"g"+I),M=RegExp("^"+A+"|"+A+"$","g"+I),B=RegExp(b,"gi"+I);const D=(e,t=!1)=>{let l=[];e=(e=e.replace(C,(e=>(l.push(e),n)))).replace(M,""),t||(e=e.toLocaleLowerCase()),Z&&(e=e.replace($,(e=>e[0]+" "+e[1])));let r=0;return e.split(w).filter((e=>""!=e)).map((e=>e===n?l[r++]:e))},T=/[^\d]+|\d+/g,F=(t,n=0,r=!1)=>{let i=D(t);if(0==i.length)return[];let s,f=Array(i.length).fill("");if(i=i.map(((e,t)=>e.replace(B,(e=>(f[t]=e,""))))),1==h)s=i.map(((e,t)=>{if('"'===e[0])return l(e.slice(1,-1));let n="";for(let l of e.matchAll(T)){let e=l[0],{intraSlice:r,intraIns:i,intraSub:s,intraTrn:g,intraDel:c}=j(e);if(i+s+g+c==0)n+=e+f[t];else{let[l,u]=r,h=e.slice(0,l),o=e.slice(u),p=e.slice(l,u);1==i&&1==h.length&&h!=p[0]&&(h+="(?!"+h+")");let d=p.length,m=[e];if(s)for(let e=0;d>e;e++)m.push(h+p.slice(0,e)+z+p.slice(e+1)+o);if(g)for(let e=0;d-1>e;e++)p[e]!=p[e+1]&&m.push(h+p.slice(0,e)+p[e+1]+p[e]+p.slice(e+2)+o);if(c)for(let e=0;d>e;e++)m.push(h+p.slice(0,e+1)+"?"+p.slice(e+1)+o);if(i){let e=a(z,1);for(let t=0;d>t;t++)m.push(h+p.slice(0,t)+e+p.slice(t)+o)}n+="(?:"+m.join("|")+")"+f[t]}}return n}));else{let e=a(z,p);2==n&&p>0&&(e=")("+e+")("),s=i.map(((t,n)=>'"'===t[0]?l(t.slice(1,-1)):t.split("").map(((e,t,l)=>(1==p&&0==t&&l.length>1&&e!=l[t+1]&&(e+="(?!"+e+")"),e))).join(e)+f[n]))}let o=2==c?g:"",d=2==u?g:"",m=d+a(e.interChars,e.interIns)+o;return n>0?r?s=o+"("+s.join(")"+d+"|"+o+"(")+")"+d:(s="("+s.join(")("+m+")(")+")",s="(.??"+o+")"+s+"("+d+".*)"):(s=s.join(m),s=o+s+d),[RegExp(s,"i"+I),i,f]},O=(e,t,l)=>{let[n]=F(t);if(null==n)return null;let r=[];if(null!=l)for(let t=0;l.length>t;t++){let i=l[t];n.test(e[i])&&r.push(i)}else for(let t=0;e.length>t;t++)n.test(e[t])&&r.push(t);return r};let v=!!L,U=RegExp(S,I),N=RegExp(L,I);const P=(t,l,n)=>{let[r,i,s]=F(n,1),a=D(n,!0),[g]=F(n,2),f=i.length,h=Array(f),o=Array(f);for(let e=0;f>e;e++){let t=i[e],l=a[e],n='"'==t[0]?t.slice(1,-1):t+s[e],r='"'==l[0]?l.slice(1,-1):l+s[e];h[e]=n,o[e]=r}let p=t.length,d=Array(p).fill(0),m={idx:Array(p),start:d.slice(),chars:d.slice(),case:d.slice(),terms:d.slice(),interIns:d.slice(),intraIns:d.slice(),interLft2:d.slice(),interRgt2:d.slice(),interLft1:d.slice(),interRgt1:d.slice(),ranges:Array(p)},x=1==c||1==u,b=0;for(let n=0;t.length>n;n++){let i=l[t[n]],s=i.match(r),a=s.index+s[1].length,p=a,d=!1,R=0,A=0,L=0,S=0,z=0,E=0,y=0,C=0,k=0,j=[];for(let t=0,l=2;f>t;t++,l+=2){let n=s[l].toLocaleLowerCase(),r=h[t],g=r.length,m=n.length,b=n==r;if(s[l]==o[t]&&y++,!b&&s[l+1].length>=g){let e=s[l+1].toLocaleLowerCase().indexOf(r);e>-1&&(j.push(p,m,e,g),p+=_(s,l,e,g),n=r,m=g,b=!0,0==t&&(a=p))}if(x||b){let e=p-1,f=p+m,h=!1,o=!1;if(-1==e||U.test(i[e]))b&&R++,h=!0;else{if(2==c){d=!0;break}if(v&&N.test(i[e]+i[e+1]))b&&A++,h=!0;else if(1==c){let e=s[l+1],c=p+m;if(e.length>=g){let u,f=0,o=!1,d=RegExp(r,"ig"+I);for(;u=d.exec(e);){f=u.index;let e=c+f,t=e-1;if(-1==t||U.test(i[t])){R++,o=!0;break}if(N.test(i[t]+i[e])){A++,o=!0;break}}o&&(h=!0,j.push(p,m,f,g),p+=_(s,l,f,g),n=r,m=g,b=!0,0==t&&(a=p))}if(!h){d=!0;break}}}if(f==i.length||U.test(i[f]))b&&L++,o=!0;else{if(2==u){d=!0;break}if(v&&N.test(i[f-1]+i[f]))b&&S++,o=!0;else if(1==u){d=!0;break}}b&&(z+=g,h&&o&&E++)}if(m>g&&(k+=m-g),t>0&&(C+=s[l-1].length),!e.intraFilt(r,n,p)){d=!0;break}f-1>t&&(p+=m+s[l+1].length)}if(!d){m.idx[b]=t[n],m.interLft2[b]=R,m.interLft1[b]=A,m.interRgt2[b]=L,m.interRgt1[b]=S,m.chars[b]=z,m.terms[b]=E,m.cases[b]=y,m.interIns[b]=C,m.intraIns[b]=k,m.start[b]=a;let e=i.match(g),l=e.index+e[1].length,r=j.length,s=r>0?0:1/0,c=r-4;for(let t=2;e.length>t;)if(s>c||j[s]!=l)l+=e[t].length,t++;else{let n=j[s+1],r=j[s+2],i=j[s+3],a=t,g="";for(let t=0;n>t;a++)g+=e[a],t+=e[a].length;e.splice(t,a-t,g),l+=_(e,t,r,i),s+=4}l=e.index+e[1].length;let u=m.ranges[b]=[],f=l,h=l;for(let t=2;e.length>t;t++){let n=e[t].length;l+=n,t%2==0?h=l:n>0&&(u.push(f,h),f=h=l)}h>f&&u.push(f,h),b++}}if(t.length>b)for(let e in m)m[e]=m[e].slice(0,b);return m},_=(e,t,l,n)=>{let r=e[t]+e[t+1].slice(0,l);return e[t-1]+=r,e[t]=e[t+1].slice(l,l+n),e[t+1]=e[t+1].slice(l+n),r.length};return{search:(...t)=>((t,n,i,s=1e3,a)=>{i=i?!0===i?5:i:0;let g=null,c=null,u=[];n=n.replace(k,(e=>{let t=e.trim().slice(1);return t='"'===t[0]?l(t.slice(1,-1)):t.replace(r,""),""!=t&&u.push(t),""}));let h,o=D(n);if(u.length>0){if(h=RegExp(u.join("|"),"i"+I),0==o.length){let e=[];for(let l=0;t.length>l;l++)h.test(t[l])||e.push(l);return[e,null,null]}}else if(0==o.length)return[null,null,null];if(i>0){let e=D(n);if(e.length>1){let l=e.slice().sort(((e,t)=>t.length-e.length));for(let e=0;l.length>e;e++){if(0==a?.length)return[[],null,null];a=O(t,l[e],a)}if(e.length>i)return[a,null,null];g=f(e).map((e=>e.join(" "))),c=[];let n=new Set;for(let e=0;g.length>e;e++)if(a.length>n.size){let l=a.filter((e=>!n.has(e))),r=O(t,g[e],l);for(let e=0;r.length>e;e++)n.add(r[e]);c.push(r)}else c.push([])}}null==g&&(g=[n],c=[a?.length>0?a:O(t,n)]);let p=null,d=null;if(u.length>0&&(c=c.map((e=>e.filter((e=>!h.test(t[e])))))),s>=c.reduce(((e,t)=>e+t.length),0)){p={},d=[];for(let l=0;c.length>l;l++){let n=c[l];if(null==n||0==n.length)continue;let r=g[l],i=P(n,t,r),s=e.sort(i,t,r);if(l>0)for(let e=0;s.length>e;e++)s[e]+=d.length;for(let e in i)p[e]=(p[e]??[]).concat(i[e]);d=d.concat(s)}}return[[].concat(...c),p,d]})(...t),split:D,filter:O,info:P,sort:e.sort}}const u=(()=>{let e={A:"ÁÀÃÂÄĄ",a:"áàãâäą",E:"ÉÈÊËĖ",e:"éèêëę",I:"ÍÌÎÏĮ",i:"íìîïį",O:"ÓÒÔÕÖ",o:"óòôõö",U:"ÚÙÛÜŪŲ",u:"úùûüūų",C:"ÇČĆ",c:"çčć",L:"Ł",l:"ł",N:"ÑŃ",n:"ñń",S:"ŠŚ",s:"šś",Z:"ŻŹ",z:"żź"},t=new Map,l="";for(let n in e)e[n].split("").forEach((e=>{l+=e,t.set(e,n)}));let n=RegExp(`[${l}]`,"g"),r=e=>t.get(e);return e=>{if("string"==typeof e)return e.replace(n,r);let t=Array(e.length);for(let l=0;e.length>l;l++)t[l]=e[l].replace(n,r);return t}})();function f(e){let t,l,n=(e=e.slice()).length,r=[e.slice()],i=Array(n).fill(0),s=1;for(;n>s;)s>i[s]?(t=s%2&&i[s],l=e[s],e[s]=e[t],e[t]=l,++i[s],s=1,r.push(e.slice())):(i[s]=0,++s);return r}const h=(e,t)=>t?`${e}`:e,o=(e,t)=>e+t;return c.latinize=u,c.permute=e=>f([...Array(e.length).keys()]).sort(((e,t)=>{for(let l=0;e.length>l;l++)if(e[l]!=t[l])return e[l]-t[l];return 0})).map((t=>t.map((t=>e[t])))),c.highlight=function(e,t,l=h,n="",r=o){n=r(n,l(e.substring(0,t[0]),!1))??n;for(let i=0;t.length>i;i+=2)n=r(n,l(e.substring(t[i],t[i+1]),!0))??n,t.length-3>i&&(n=r(n,l(e.substring(t[i+1],t[i+2]),!1))??n);return r(n,l(e.substring(t[t.length-1]),!1))??n},c}(); diff --git a/package.json b/package.json index a218e1b..9a6055b 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,6 @@ "homepage": "https://github.com/leeoniya/uFuzzy#readme", "devDependencies": { "@rollup/plugin-terser": "^0.4.4", - "rollup": "^4.24.0" + "rollup": "^4.27.3" } } diff --git a/src/uFuzzy.mjs b/src/uFuzzy.mjs index c5d043c..8b4e798 100644 --- a/src/uFuzzy.mjs +++ b/src/uFuzzy.mjs @@ -77,6 +77,7 @@ const OPTS = { start, intraIns, interIns, + cases, } = info; return idx.map((v, i) => i).sort((ia, ib) => ( @@ -95,6 +96,8 @@ const OPTS = { interIns[ia] - interIns[ib] || // earliest start of match start[ia] - start[ib] || + // case match + cases[ib] - cases[ia] || // alphabetic cmp(haystack[idx[ia]], haystack[idx[ib]]) )); @@ -211,7 +214,7 @@ export default function uFuzzy(opts) { let trimRe = new RegExp('^' + _interSplit + '|' + _interSplit + '$', 'g' + uFlag); let contrsRe = new RegExp(intraContr, 'gi' + uFlag); - const split = needle => { + const split = (needle, keepCase = false) => { let exacts = []; needle = needle.replace(EXACTS_RE, m => { @@ -219,7 +222,10 @@ export default function uFuzzy(opts) { return EXACT_HERE; }); - needle = needle.replace(trimRe, '').toLocaleLowerCase(); + needle = needle.replace(trimRe, ''); + + if (!keepCase) + needle = needle.toLocaleLowerCase(); if (withIntraSplit) needle = needle.replace(intraSplit, m => m[0] + ' ' + m[1]); @@ -409,9 +415,24 @@ export default function uFuzzy(opts) { DEBUG && console.time('info'); let [query, parts, contrs] = prepQuery(needle, 1); + let partsCased = split(needle, true); let [queryR] = prepQuery(needle, 2); let partsLen = parts.length; + let _terms = Array(partsLen); + let _termsCased = Array(partsLen); + + for (let j = 0; j < partsLen; j++) { + let part = parts[j]; + let partCased = partsCased[j]; + + let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; + let termCased = partCased[0] == '"' ? partCased.slice(1, -1) : partCased + contrs[j]; + + _terms[j] = term; + _termsCased[j] = termCased; + } + let len = idxs.length; let field = Array(len).fill(0); @@ -428,6 +449,9 @@ export default function uFuzzy(opts) { // contiguous chars matched chars: field.slice(), + // case matched in term (via term.includes(match)) + case: field.slice(), + // contiguous (no fuzz) and bounded terms (intra=0, lft2/1, rgt2/1) // excludes terms that are contiguous but have < 2 bounds (substrings) terms: field.slice(), @@ -469,19 +493,23 @@ export default function uFuzzy(opts) { let rgt1 = 0; let chars = 0; let terms = 0; + let cases = 0; let inter = 0; let intra = 0; let refine = []; for (let j = 0, k = 2; j < partsLen; j++, k+=2) { - let group = m[k].toLocaleLowerCase(); - let part = parts[j]; - let term = part[0] == '"' ? part.slice(1, -1) : part + contrs[j]; - let termLen = term.length; - let groupLen = group.length; + let group = m[k].toLocaleLowerCase(); + let term = _terms[j]; + let termCased = _termsCased[j]; + let termLen = term.length; + let groupLen = group.length; let fullMatch = group == term; + if (m[k] == termCased) + cases++; + // this won't handle the case when an exact match exists across the boundary of the current group and the next junk // e.g. blob,ob when searching for 'bob' but finding the earlier `blob` (with extra insertion) if (!fullMatch && m[k+1].length >= termLen) { @@ -631,6 +659,7 @@ export default function uFuzzy(opts) { info.interRgt1[ii] = rgt1; info.chars[ii] = chars; info.terms[ii] = terms; + info.cases[ii] = cases; info.interIns[ii] = inter; info.intraIns[ii] = intra;