Skip to content

Commit 7773d83

Browse files
committed
Adding buckets
1 parent 97188c9 commit 7773d83

File tree

3 files changed

+106
-3
lines changed

3 files changed

+106
-3
lines changed

finder/find.go

+24-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@ import (
1111
type Finder struct {
1212
referenceMap map[string]struct{}
1313
reference []string
14+
referenceBucket map[rune][]string
1415
Alg Algorithm
1516
LengthTolerance float64 // A number between 0.0-1.0 (percentage) to allow for length miss-match, anything outside this is considered not similar. Set to 0 to disable.
1617
lock sync.RWMutex
18+
enableBuckets bool
1719
}
1820

1921
// Errors
@@ -33,11 +35,12 @@ const (
3335
func New(list []string, options ...Option) (*Finder, error) {
3436
i := &Finder{}
3537

36-
i.Refresh(list)
3738
for _, o := range options {
3839
o(i)
3940
}
4041

42+
i.Refresh(list)
43+
4144
if i.Alg == nil {
4245
return i, ErrNoAlgorithmDefined
4346
}
@@ -48,13 +51,23 @@ func New(list []string, options ...Option) (*Finder, error) {
4851
// Refresh replaces the internal reference list.
4952
func (t *Finder) Refresh(list []string) {
5053
rm := make(map[string]struct{}, len(list))
54+
rb := make(map[rune][]string, 26)
5155
for _, r := range list {
5256
rm[r] = struct{}{}
57+
58+
if t.enableBuckets {
59+
l := rune(r[0])
60+
if _, ok := rb[l]; !ok {
61+
rb[l] = make([]string, 0, 128)
62+
}
63+
rb[l] = append(rb[l], r)
64+
}
5365
}
5466

5567
t.lock.Lock()
5668
t.reference = list
5769
t.referenceMap = rm
70+
t.referenceBucket = rb
5871
t.lock.Unlock()
5972
}
6073

@@ -79,12 +92,20 @@ func (t *Finder) FindTopRankingCtx(ctx context.Context, input string) ([]string,
7992
defer t.lock.RUnlock()
8093

8194
// Exact matches
82-
if _, exists := t.referenceMap[input]; exists {
95+
if _, exists := t.referenceMap[input]; exists || len(input) == 0 {
8396
return []string{input}, BestScoreValue, true
8497
}
8598

99+
var list []string
100+
r := rune(input[0])
101+
if l, ok := t.referenceBucket[r]; ok {
102+
list = l
103+
} else {
104+
list = t.reference
105+
}
106+
86107
var sameScore = []string{input}
87-
for _, ref := range t.reference {
108+
for _, ref := range list {
88109
select {
89110
case <-ctx.Done():
90111
return []string{input}, WorstScoreValue, false

finder/find_test.go

+76
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package finder
22

33
import (
44
"context"
5+
"math"
6+
"math/rand"
57
"testing"
68
"time"
79
)
@@ -148,7 +150,29 @@ func TestMeetsLengthTolerance(t *testing.T) {
148150
t.Errorf("Expected the tolerance to be %t\n%+v", td.Expect, td)
149151
}
150152
}
153+
}
154+
155+
// Preventing the compiler to inline
156+
var ceilA, ceilB int
157+
158+
func BenchmarkCeilOrNoCeil(b *testing.B) {
159+
inputLen := 64
160+
threshold := 0.195
161+
b.Run("No Ceil", func(b *testing.B) {
162+
for i := 0; i < b.N; i++ {
163+
ceilA = int((float64(inputLen) * threshold) + 0.555)
164+
}
165+
})
166+
167+
b.Run("Ceil", func(b *testing.B) {
168+
for i := 0; i < b.N; i++ {
169+
ceilB = int(math.Ceil(float64(inputLen) * threshold))
170+
}
171+
})
151172

173+
if ceilA != ceilB {
174+
b.Errorf("Implementation failure, a:%d != b:%d", ceilA, ceilB)
175+
}
152176
}
153177

154178
func BenchmarkSliceOrMap(b *testing.B) {
@@ -175,3 +199,55 @@ func BenchmarkSliceOrMap(b *testing.B) {
175199
}
176200
})
177201
}
202+
203+
func BenchmarkFindWithBucket(b *testing.B) {
204+
refs := generateRefs(1000, 20)
205+
alg := NewJaroWinkler(.7, 4)
206+
207+
testRef := generateRef(20)
208+
b.ReportAllocs()
209+
b.Run("find with bucket", func(b *testing.B) {
210+
f, _ := New(refs,
211+
WithAlgorithm(alg),
212+
WithLengthTolerance(0),
213+
WithBuckets(false),
214+
)
215+
216+
b.ResetTimer()
217+
for i := 0; i < b.N; i++ {
218+
f.Find(testRef)
219+
}
220+
})
221+
222+
b.Run("find without bucket", func(b *testing.B) {
223+
f, _ := New(refs,
224+
WithAlgorithm(alg),
225+
WithLengthTolerance(0),
226+
WithBuckets(true),
227+
)
228+
229+
b.ResetTimer()
230+
for i := 0; i < b.N; i++ {
231+
f.Find(testRef)
232+
}
233+
})
234+
}
235+
236+
func generateRefs(refNum, length uint64) []string {
237+
refs := make([]string, refNum)
238+
for i := uint64(0); i < refNum; i++ {
239+
refs[i] = generateRef(length)
240+
}
241+
242+
return refs
243+
}
244+
245+
func generateRef(length uint64) string {
246+
const alnum = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
247+
248+
var b = make([]byte, length)
249+
for i := uint64(0); i < length; i++ {
250+
b[i] = alnum[rand.Intn(len(alnum))]
251+
}
252+
return string(b)
253+
}

finder/option.go

+6
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,9 @@ func WithLengthTolerance(t float64) Option {
1818
s.LengthTolerance = t
1919
}
2020
}
21+
22+
func WithBuckets(enable bool) Option {
23+
return func(s *Finder) {
24+
s.enableBuckets = enable
25+
}
26+
}

0 commit comments

Comments
 (0)