-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathscanner.go
233 lines (202 loc) · 5 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
package agency
import (
"bytes"
"errors"
"unicode"
"unicode/utf8"
)
// errEOF is an internal EOF marker.
var errEOF = errors.New("eof")
// Scanner is a user agent tokenizer.
type Scanner struct {
c rune
buf []byte
buflen int
idx int
size int
prevstart int
mobile bool
browsers []*browser
devices []*device
oses []*aos
}
// NewScanner creates a new user agent scanner.
func NewScanner() *Scanner {
return &Scanner{
browsers: make([]*browser, maxRank),
devices: make([]*device, maxRank),
oses: make([]*aos, maxRank),
}
}
// ScanBytes scans a user agent byte slice for device information.
func (s *Scanner) ScanBytes(b []byte) (*UserAgent, error) {
var ua = new(UserAgent)
s.buf = b
s.buflen = len(b)
s.reset()
// Iterate over each word in the byte slice.
for {
unigram, bigram, err := s.readNgrams()
if err == errEOF {
break
} else if err != nil {
return nil, err
}
// Set the mobile flag.
if !s.mobile {
s.matchMobile(unigram)
}
s.matchBrowser(unigram, bigram)
s.matchDevice(unigram, bigram)
s.matchOS(unigram, bigram)
}
// Find browser by rank level.
for _, browser := range s.browsers {
if browser != nil {
ua.Browser.Type = browser.typ
ua.Browser.Name = browser.name
break
}
}
// Find device by rank level.
for _, device := range s.devices {
if device != nil {
ua.Device.Type = device.typ
break
}
}
// Find OS by rank level.
for _, os := range s.oses {
if os != nil {
ua.OS.Name = os.name
ua.OS.Version = os.version
break
}
}
// Special mobile cases.
if s.mobile {
if ua.Browser.Name == "Firefox" {
ua.Browser.Type = "Mobile Browser"
ua.Browser.Name = "Mobile Firefox"
} else if ua.Browser.Name == "Safari" {
ua.Browser.Type = "Mobile Browser"
ua.Browser.Name = "Mobile Safari"
} else if ua.Browser.Name == "Opera" {
ua.Browser.Type = "Mobile Browser"
ua.Browser.Name = "Opera Mobile"
} else if ua.Browser.Name == "Yandex.Browser" {
ua.Browser.Type = "Mobile Browser"
ua.Browser.Name = "Yandex.Browser mobile"
}
}
return ua, nil
}
// Scan scans a user agent string for device information.
func (s *Scanner) Scan(str string) (*UserAgent, error) {
return s.ScanBytes([]byte(str))
}
// read retrieves the next rune from the string.
func (s *Scanner) read() error {
if s.idx >= s.buflen {
return errEOF
}
// Read a single byte and then determine if utf8 decoding is needed.
b := s.buf[s.idx]
if b < utf8.RuneSelf {
s.c = rune(b)
s.size = 1
} else {
s.c, s.size = utf8.DecodeRune(s.buf[s.idx:])
}
s.idx += s.size
return nil
}
// unread moves back one rune. Only works once.
func (s *Scanner) unread() {
s.idx -= s.size
s.size = 0
}
// readWord reads a word and previous bigram from the string.
func (s *Scanner) readNgrams() ([]byte, []byte, error) {
var index int
start := s.idx
for {
if err := s.read(); err == errEOF {
break
}
// Only read in letters, numbers and some punctuation.
if unicode.IsLetter(s.c) || unicode.IsDigit(s.c) || s.c == '-' || s.c == '.' {
index++
} else if index == 0 {
// This section skips over initial non-word characters.
start = s.idx
} else {
s.unread()
break
}
}
// If nothing was read then it's EOF.
if s.idx == start {
return nil, nil, errEOF
}
unigram := s.buf[start:s.idx]
bigram := s.buf[s.prevstart:s.idx]
s.prevstart = start
return unigram, bigram, nil
}
// match checks a unigram against the list of mobile tokens.
func (s *Scanner) matchMobile(unigram []byte) {
for _, mobile := range mobiles {
if bytes.Equal(unigram, mobile.token) {
s.mobile = true
}
}
}
// matchBrowser checks a unigram and bigram against the list of browser tokens.
func (s *Scanner) matchBrowser(unigram []byte, bigram []byte) {
for _, browser := range browsers {
if bytes.Equal(unigram, browser.token) || bytes.Equal(bigram, browser.token) {
s.browsers[browser.rank] = browser
}
}
}
// matchDevice checks a unigram and bigram against the list of device tokens.
func (s *Scanner) matchDevice(unigram []byte, bigram []byte) {
for _, device := range devices {
if bytes.Equal(unigram, device.token) || bytes.Equal(bigram, device.token) {
s.devices[device.rank] = device
}
}
}
// matchOS checks a unigram and bigram against the list of OS tokens.
func (s *Scanner) matchOS(unigram []byte, bigram []byte) {
for _, os := range oses {
if bytes.Equal(unigram, os.token) || bytes.Equal(bigram, os.token) {
s.oses[os.rank] = os
}
}
}
// reset re-initializes the state of the scanner.
func (s *Scanner) reset() {
s.idx = 0
s.size = 0
s.prevstart = 0
s.mobile = false
for i := range s.browsers {
s.browsers[i] = nil
}
for i := range s.devices {
s.devices[i] = nil
}
for i := range s.oses {
s.oses[i] = nil
}
}
// ScanBytes extracts properties from a user agent byte slice.
func ScanBytes(b []byte) (*UserAgent, error) {
return NewScanner().ScanBytes(b)
}
// Scan extracts properties from a user agent string.
func Scan(str string) (*UserAgent, error) {
return NewScanner().Scan(str)
}