-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract.go
82 lines (74 loc) · 1.53 KB
/
extract.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package dawg
type ExtractKeywordsResult struct {
Found bool
Fragment string
}
func ExtractKeywords(d *DAWG, document string) (result []ExtractKeywordsResult) {
type BufItem struct {
Rune rune
Node map[rune]int32
Advance int
}
buf := []*BufItem{}
nonhitRunes := []rune{}
for _, r := range document + string(rune(-1)) {
if len(buf) == 0 {
if _, ok := d.DFA[0][r]; !ok {
nonhitRunes = append(nonhitRunes, r)
continue
}
}
buf = append(buf, &BufItem{
Rune: r,
Node: d.DFA[0],
Advance: 0,
})
for i, bi := range buf {
if bi.Node == nil {
continue
}
if nn, ok := bi.Node[r]; !ok || nn == -1 {
bi.Node = nil
} else {
bi.Node = d.DFA[nn]
if _, ok := bi.Node[-1]; ok {
bi.Advance = len(buf) - i
}
}
}
var i = 0
for i < len(buf) && buf[i].Node == nil {
bi := buf[i]
lenk := bi.Advance
if lenk > 0 {
runes := make([]rune, lenk)
for i, r := range buf[i : i+lenk] {
runes[i] = r.Rune
}
if len(nonhitRunes) > 0 {
result = append(result, ExtractKeywordsResult{
Fragment: string(nonhitRunes),
})
}
nonhitRunes = nil
result = append(result, ExtractKeywordsResult{
Found: true,
Fragment: string(runes),
})
i += lenk
} else {
nonhitRunes = append(nonhitRunes, bi.Rune)
i += 1
}
}
if i > 0 {
buf = buf[i:]
}
}
if len(nonhitRunes) > 1 {
result = append(result, ExtractKeywordsResult{
Fragment: string(nonhitRunes[:len(nonhitRunes)-1]),
})
}
return result
}