forked from forrestthewoods/lib_fts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fts_fuzzy_match.fsx
157 lines (144 loc) · 6.47 KB
/
fts_fuzzy_match.fsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
type State =
{ Score : int
Index : int
PatternIdx : int
BestLetter : char option
BestLower : char option
BestLetterIdx : int option
BestLetterScore : int
MatchedIndices : int list
PrevMatched : bool
PrevLower : bool
PrevSeperator : bool }
with
static member Empty =
{ Score = 0
Index = 0
PatternIdx = 0
BestLetter = None
BestLower = None
BestLetterIdx = None
BestLetterScore = 0
MatchedIndices = []
PrevMatched = false
PrevLower = false
PrevSeperator = true }
let fuzzyMatch (pattern : string) (str : string) useLookup =
// Score consts
let adjacency_bonus = 5; // bonus for adjacent matches
let separator_bonus = 10; // bonus if match occurs after a separator
let camel_bonus = 10; // bonus if match is uppercase and prev is lower
let leading_letter_penalty = -3; // penalty applied for every letter in str before the first match
let max_leading_letter_penalty = -9; // maximum penalty for leading letters
let unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
let patternLength = pattern.Length
let state =
str
|> Seq.fold(fun (state : State) (strChar : char) ->
let patternChar =
if state.PatternIdx <> patternLength then
pattern.Chars(state.PatternIdx) |> Some
else
None
let patternLower = patternChar |> Option.bind(fun x -> System.Char.ToLower(x) |> Some)
let strLower = System.Char.ToLower(strChar)
let strUpper = System.Char.ToUpper(strChar)
let nextMatch =
match patternLower with
| None -> false
| Some x -> x = strLower
let rematch =
match state.BestLetter with
| None -> false
| Some x -> x = strLower
let advanced = nextMatch && state.BestLetter.IsSome
let patternRepeat =
if state.BestLetter.IsSome && patternChar.IsSome then
match state.BestLower with
| None -> false
| Some x -> Some x = patternLower
else
false
let state =
if advanced || patternRepeat then
{ state with
Score = state.Score + state.BestLetterScore
MatchedIndices = state.MatchedIndices @ [state.BestLetterIdx.Value]
BestLetter = None
BestLower = None
BestLetterIdx = None
BestLetterScore = 0 }
else
state
let state =
if nextMatch || rematch then
// Apply penalty for each letter before the first pattern match
// Note: System.Math because penalties are negative values. So max is smallest penalty.
let state =
if state.PatternIdx = 0 then
let penalty = System.Math.Max(state.Index * leading_letter_penalty, max_leading_letter_penalty)
{ state with Score = state.Score + penalty }
else
state
let newScore = 0
// Apply bonus for consecutive bonuses
let newScore =
if state.PrevMatched then
newScore + adjacency_bonus
else
newScore
// Apply bonus for matches after a separator
let newScore =
if state.PrevSeperator then
newScore + separator_bonus
else
newScore
// Apply bonus across camel case boundaries. Includes "clever" isLetter check.
let newScore =
if state.PrevLower && (strChar = strUpper) && strLower <> strUpper then
newScore + camel_bonus
else
newScore
// Update pattern index IFF the next pattern letter was matched
let state =
if nextMatch then
{ state with
PatternIdx = state.PatternIdx + 1 }
else
state
// Update best letter in str which may be for a "next" letter or a "rematch"
let state =
if newScore >= state.BestLetterScore then
// Apply penalty for now skipped letter
let score =
match state.BestLetter with
| None -> state.Score
| Some x -> state.Score + unmatched_letter_penalty
{ state with
Score = score
BestLetter = Some strChar
BestLower = Some strLower
BestLetterIdx = Some state.Index
BestLetterScore = newScore }
else
state
{ state with PrevMatched = true }
else
{ state with
Score = state.Score + unmatched_letter_penalty
PrevMatched = false }
let state =
{ state with
PrevLower = strChar = strLower && strLower <> strUpper
PrevSeperator = strChar = '_' || strChar = ' '
Index = state.Index + 1 }
state) State.Empty
let state =
if state.BestLetter.IsSome then
{ state with
Score = state.Score + state.BestLetterScore
MatchedIndices = state.MatchedIndices @ [state.BestLetterIdx.Value] }
else
state
let matched = patternLength = state.PatternIdx
matched, state.Score, state.MatchedIndices