-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuilder.go
126 lines (118 loc) · 2.69 KB
/
builder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package dawg
import (
"sort"
"strconv"
"strings"
)
// Builder
type Builder struct {
nfa []map[rune]*[]int // 非決定性有限オートマトン
suffixMap map[string]int
}
func NewBuilder() *Builder {
nodes := make([]map[rune]*[]int, 0)
nodes = append(nodes, make(map[rune]*[]int))
return &Builder{
nfa: nodes,
suffixMap: make(map[string]int),
}
}
func (b *Builder) AddWord(word string) {
if len(word) == 0 {
return
}
var ok bool
var r rune
var nodeIdx = 0
var suffixIdx = 0
var i int = -1
runes := []rune(word)
for i, r = range runes {
suffix := string(runes[i+1:])
nodeCurr := b.nfa[nodeIdx]
if suffixIdx, ok = b.suffixMap[suffix]; ok {
if edges, ok := nodeCurr[r]; ok {
*edges = append(*edges, suffixIdx)
} else {
nodeCurr[r] = &[]int{suffixIdx}
}
break
} else {
suffixIdx = len(b.nfa)
b.nfa = append(b.nfa, make(map[rune]*[]int))
b.suffixMap[suffix] = suffixIdx
if edges, ok := nodeCurr[r]; ok {
*edges = append(*edges, suffixIdx)
} else {
nodeCurr[r] = &[]int{suffixIdx}
}
nodeIdx = suffixIdx
}
}
if i == len(runes)-1 {
b.nfa[suffixIdx][-1] = &[]int{-1}
}
}
func (b *Builder) Build() *DAWG {
// NFA をもとに DFA (DAWG) を構築する
nodes := make([]map[rune]int32, 0)
nodes = append(nodes, make(map[rune]int32))
dawg := &DAWG{
DFA: nodes,
}
closureMap := make(map[string]int32)
closureMap["0"] = 0
stack := []map[int]struct{}{
{0: struct{}{}},
}
for len(stack) > 0 {
fromClosure := stack[len(stack)-1]
stack = stack[:len(stack)-1]
fromIdx := closureMap[convertIntSetToStringKey(fromClosure)]
res := make(map[rune]map[int]struct{})
for nfaIdx := range fromClosure {
for r, edges := range b.nfa[nfaIdx] {
if cl, ok := res[r]; ok {
for _, e := range *edges {
cl[e] = struct{}{}
}
} else {
res[r] = make(map[int]struct{})
for _, e := range *edges {
res[r][e] = struct{}{}
}
}
}
}
for r, cl := range res {
toClosure := convertIntSetToStringKey(cl)
if r == -1 {
dawg.DFA[fromIdx][-1] = -1
continue
}
if toIdx, ok := closureMap[toClosure]; ok {
dawg.DFA[fromIdx][r] = toIdx
} else {
toIdx = int32(len(dawg.DFA))
dawg.DFA = append(dawg.DFA, make(map[rune]int32))
closureMap[toClosure] = toIdx
stack = append(stack, cl)
dawg.DFA[fromIdx][r] = toIdx
}
}
}
return dawg
}
// int の集合を "5,11,130" のような文字列に変換する
func convertIntSetToStringKey(s map[int]struct{}) string {
ss := []int{}
for v := range s {
ss = append(ss, v)
}
sort.Ints(ss)
sss := make([]string, 0, len(ss))
for _, v := range ss {
sss = append(sss, strconv.Itoa(v))
}
return strings.Join(sss, ",")
}