-
Notifications
You must be signed in to change notification settings - Fork 1
/
ptrie.go
158 lines (137 loc) · 3.73 KB
/
ptrie.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Copyright 2023 Jean Niklas L'orange. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package ptrie implements a Pruning Radix Trie.
package ptrie
// Item is an element store in a PTrie. If T is immutable, so is the item.
type Item[T any] struct {
Value T
Term string
Rank uint
}
// PTrie is a Pruning Radix Trie.
type PTrie[T any] struct {
item Item[T]
children []*PTrie[T]
maxRank uint // highest rank in this subtrie
suffixLen int
}
// FindTopK returns the k items with the highest rank in pt with the prefix
// `term`, where the highest ranked item is the first element etc. There may be
// less than k elements in the result.
func (pt *PTrie[T]) FindTopK(prefix string, k int) []Item[T] {
return pt.FindTopKFast(prefix, make([]Item[T], 0, k))
}
// FindTopKFast works like [PTrie.FindTopK], but is able to reuse an already
// allocated result slice for the items. The slice will be filled up to its
// capacity if there are enough elements that match the prefix.
func (pt *PTrie[T]) FindTopKFast(prefix string, result []Item[T]) []Item[T] {
best := result[:0]
// first, walk pt until we have term sliced off:
lca := pt.lcaScan(prefix)
if lca == nil {
return nil
}
best = lca.walk(best)
return best
}
// lcaScan returns the lowest common ancestor subtrie containing all items with
// the given prefix.
func (pt *PTrie[T]) lcaScan(prefix string) *PTrie[T] {
for _, child := range pt.children {
c, numCommon := child.compare(prefix)
switch c {
case cmpNoMatch:
// check other children
case cmpEqual:
return child
case cmpSubkey:
return child
case cmpSuperkey:
return child.lcaScan(prefix[numCommon:])
case cmpSharedPrefix:
return nil
}
}
return pt
}
func (pt *PTrie[T]) hasPrefix(prefix string) bool {
if pt.suffixLen < len(prefix) {
return false
}
ptTerm := pt.term()
for i := 0; i < len(prefix); i++ {
if prefix[i] != ptTerm[i] {
return false
}
}
return true
}
func (pt *PTrie[T]) walk(result Items[T]) []Item[T] {
if pt.containsItem() {
result = result.insert(pt)
}
for _, child := range pt.children {
if !result.mustWalk(child) {
// since we sort children by max rank, any child after us will also
// return false for mustWalk, so terminate early.
break
}
result = child.walk(result)
}
return result
}
func (pt *PTrie[T]) containsItem() bool {
return pt.item.Rank != 0
}
type Items[T any] []Item[T]
func (result Items[T]) worst() Item[T] {
return result[len(result)-1]
}
func (result Items[T]) insert(subTrie *PTrie[T]) Items[T] {
if !result.shouldInsert(subTrie) {
return result
}
item := subTrie.item
// append the item, or set to last (least) element
if len(result) < cap(result) {
result = append(result, item)
} else {
result[len(result)-1] = item
}
// guard for indexing in loop, can skip if only 1 result so far
if len(result) > 1 {
// reverse bubble sort with early stop
for i := len(result)-2; i>=0; i-- {
if result[i].Rank < result[i+1].Rank {
result[i], result[i+1] = result[i+1], result[i]
} else {
break
}
}
}
return result
}
func (result Items[T]) shouldInsert(subTrie *PTrie[T]) bool {
// insert if we don't have k results yet
if len(result) < cap(result) {
return true
}
// or if we're better than the worst result
if result.worst().Rank < subTrie.item.Rank {
return true
}
return false
}
func (result Items[T]) mustWalk(subTrie *PTrie[T]) bool {
// must walk if we don't have k results yet
if len(result) < cap(result) {
return true
}
// must walk if there's at least one element with better rank in the subtree
// than the worst element in the current resultset
if result.worst().Rank < subTrie.maxRank {
return true
}
return false
}