-
Notifications
You must be signed in to change notification settings - Fork 0
/
lsd.go
167 lines (147 loc) · 4.1 KB
/
lsd.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/*
Package lsdp provides Weighted Levenshtein distance and its extended interface
*/
package lsdp
// DistanceMeasurer provides measurement of the distance between 2 strings
type DistanceMeasurer interface {
Distance(string, string) float64
}
// Lsd returns standard Levenshtein distance
func Lsd(a, b string) int {
wd := &Weights{1, 1, 1}
return int(wd.Distance(a, b))
}
// Weights represents cost parameters for weighted Levenshtein distance
type Weights struct {
Insert float64
Delete float64
Replace float64
}
// Distance returns weighted Levenshtein distance
func (w Weights) Distance(a, b string) float64 {
result := accumulateCost(a, b, func(_, _ int, ar, br rune, diagonal, above, left float64) (float64, float64, float64) {
if ar != br {
diagonal += w.Replace
}
above += w.Insert
left += w.Delete
return diagonal, above, left
}, minCost)
return result
}
// ByRune returns weighted levenshtein distance by rune
func ByRune(w *Weights) *WeightsByRune {
return &WeightsByRune{
w: w,
insRune: make(map[rune]float64),
delRune: make(map[rune]float64),
repRune: make(map[[2]rune]float64),
}
}
// WeightsByRune represents weighted levenshtein distance by rune
type WeightsByRune struct {
w *Weights
insRune map[rune]float64
delRune map[rune]float64
repRune map[[2]rune]float64
}
// Distance returns weighted levenshtein distance by rune
func (wr *WeightsByRune) Distance(a, b string) float64 {
ret := accumulateCost(a, b, func(_, _ int, ar, br rune, diagonal, above, left float64) (float64, float64, float64) {
if rw, ok := wr.repRune[[2]rune{ar, br}]; ok {
diagonal += rw
} else if ar != br {
diagonal += wr.w.Replace
}
if rw, ok := wr.insRune[br]; ok {
above += rw
} else {
above += wr.w.Insert
}
if rw, ok := wr.delRune[ar]; ok {
left += rw
} else {
left += wr.w.Delete
}
return diagonal, above, left
}, minCost)
return ret
}
// Insert specify cost by insert rune
func (wr *WeightsByRune) Insert(runeGroup string, insCost float64) *WeightsByRune {
for _, r := range runeGroup {
wr.insRune[r] = insCost
}
return wr
}
// Delete specify cost by delete rune
func (wr *WeightsByRune) Delete(runeGroup string, delCost float64) *WeightsByRune {
for _, r := range runeGroup {
wr.delRune[r] = delCost
}
return wr
}
// Replace specify cost by replace rune
func (wr *WeightsByRune) Replace(runeGroupSrc, runeGroupDest string, repCost float64) *WeightsByRune {
for _, rs := range runeGroupSrc {
for _, rd := range runeGroupDest {
wr.repRune[[2]rune{rs, rd}] = repCost
}
}
return wr
}
// Normalized returns what wrapped the DistanceMeasurer with nomalize by string length
func Normalized(dm DistanceMeasurer) DistanceMeasurer {
return normalizedParam{wrapped: dm}
}
type normalizedParam struct {
wrapped DistanceMeasurer
}
func (p normalizedParam) Distance(a, b string) float64 {
d := p.wrapped.Distance(a, b)
l := len([]rune(a))
if lb := len([]rune(b)); l < lb {
l = lb
}
if l == 0 {
return d
}
return d / float64(l)
}
// DistanceFunc type is an adapter to allow the use of ordinary functions as DistanceMeasurer.
// Similar to http.HandlerFunc.
type DistanceFunc func(string, string) float64
// Distance calls f(a,b)
func (f DistanceFunc) Distance(a, b string) float64 {
return f(a, b)
}
type costFunc func(ai, bi int, ar, br rune, diagonal, above, left float64) (rep, ins, del float64)
type minFunc func(a, b, c float64) (min float64)
func accumulateCost(a, b string, costf costFunc, min minFunc) float64 {
ar, br := []rune(a), []rune(b)
costRow := make([]float64, len(ar)+1)
for i := 1; i < len(costRow); i++ {
_, _, costRow[i] = costf(i, 0, ar[i-1], 0, 0, 0, costRow[i-1])
}
var left float64
for bc := 1; bc < len(br)+1; bc++ {
_, left, _ = costf(0, bc, 0, br[bc-1], 0, costRow[0], 0)
for i := 1; i < len(costRow); i++ {
rep, ins, del := costf(i, bc, ar[i-1], br[bc-1], costRow[i-1], costRow[i], left)
costRow[i-1] = left
left = min(rep, ins, del)
}
costRow[len(costRow)-1] = left
}
return costRow[len(costRow)-1]
}
func minCost(a, b, c float64) (min float64) {
min = a
if b < min {
min = b
}
if c < min {
min = c
}
return
}