Skip to content

Commit

Permalink
feat: add dailymotion useragents (#55)
Browse files Browse the repository at this point in the history
* feat: add dailymotion useragents

* fix: remove trie strings after square brackets

* fix: edge version check

* fix: improve matching support by supported nested parenthesis

* style: switch case

* fix: remove some invalid agents

* fix: add whatsapp bot
  • Loading branch information
ayuhito authored Jan 19, 2025
1 parent 70d006a commit 805057f
Show file tree
Hide file tree
Showing 11 changed files with 75,660 additions and 63 deletions.
75,158 changes: 75,158 additions & 0 deletions agents/5.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions agents/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
2. [DeviceAtlas](https://deviceatlas.com/blog/list-of-user-agent-strings)
3. Random Test Cases
4. More Random Test Cases
5. [Dailymotion](https://github.com/ua-parser/uap-python/pull/163#issuecomment-1536412054)

## Update

Expand Down
502 changes: 457 additions & 45 deletions agents/final.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ var matchMap = map[string][]string{
MobileDevice: {"ONEPLUS", "Huawei", "HTC", "Galaxy", "iPhone", "iPod", "Windows Phone", "WindowsPhone", "LG"},
Tablet: {Tablet, "Touch", "iPad", "Nintendo Switch", "NintendoSwitch", "Kindle"},
TV: {TV, "Large Screen", "LargeScreen", "Smart Display", "SmartDisplay", "PLAYSTATION", "PlayStation", "ADT-2", "ADT-1", "CrKey", "Roku", "AFT", "Web0S", "Nexus Player", "Xbox", "XBOX", "Nintendo WiiU", "NintendoWiiU"},
Bot: {Bot, "HeadlessChrome", "bot", "Slurp", "LinkCheck", "QuickLook", "Haosou", "Yahoo Ad", "YahooAd", "Google", "Mediapartners", "Headless", "facebookexternalhit", "facebookcatalog", "Baidu"},
Bot: {Bot, "HeadlessChrome", "bot", "Slurp", "LinkCheck", "QuickLook", "Haosou", "Yahoo Ad", "YahooAd", "Google", "Mediapartners", "Headless", "facebookexternalhit", "facebookcatalog", "Baidu", "Instagram", "Pinterest", "PageSpeedInsights", "WhatsApp"},

// Version
Version: {Version},
Expand Down
1 change: 1 addition & 0 deletions internal/match_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var matchResults = [][]string{
{internal.Safari, internal.Mobile, internal.Version, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Version, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Chrome, internal.Version, internal.MobileDevice, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Chrome, internal.Android, internal.Linux},

// Bots (4)
{internal.Bot},
Expand Down
19 changes: 8 additions & 11 deletions internal/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,6 @@ func RemoveVersions(ua string) string {
continue
}

// Skip whitespace
switch r {
case ' ', ';', ')', '(', ',', '_', '-', '/':
indexesToReplace = append(indexesToReplace, i)
continue
}

// Replace all non-latin characters with a space. The trie function will automatically
// skip over any characters it can't find, so this is a safe operation.
if !IsLetter(r) {
Expand Down Expand Up @@ -107,25 +100,29 @@ func RemoveAndroidIdentifiers(ua string) string {

// Find mobile token.
for _, token := range tokens {
var skipUntilClosingParenthesis bool
var skipUntilClosingParenthesis int
var indexesToReplace []int

if token.Match == Android {
// Iterate over the user agent string and remove all characters
// after the Android token until we encounter a closing parenthesis
// to remove device identifiers.
for i, r := range ua {
if skipUntilClosingParenthesis {
if skipUntilClosingParenthesis > 0 {
if r == '(' {
skipUntilClosingParenthesis++
}

if r == ')' {
skipUntilClosingParenthesis = false
skipUntilClosingParenthesis--
} else {
indexesToReplace = append(indexesToReplace, i)
continue
}
}

if i == token.EndIndex-1 {
skipUntilClosingParenthesis = true
skipUntilClosingParenthesis++
}
}

Expand Down
1 change: 1 addition & 0 deletions internal/version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var versionResults = []string{
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoVersionMobileSafari",
"MozillaLinuxUAndroidAppleWebKitKHTMLlikeGeckoVersionMobileSafari",
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoVersionChromeMobileSafari",
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoChromeMobileSafari",

// Bots (4)
"MozillacompatibleGooglebothttpwwwgooglecombothtml",
Expand Down
10 changes: 8 additions & 2 deletions scripts/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,16 @@ func CleanAgentsFile(filePath string) ([]string, error) {
continue
}

if strings.Contains(line, "javascript") || strings.Contains(line, "function") || strings.Contains(line, "quot") || strings.Contains(line, "parent") {
lineLower := strings.ToLower(line)
if strings.Contains(lineLower, "javascript") || strings.Contains(lineLower, "function") || strings.Contains(lineLower, "quot") || strings.Contains(lineLower, "parent") {
continue
}

// Cut the line after the first "[" is used.
if strings.Contains(line, "[") {
line = line[:strings.Index(line, "[")]
}

line = internal.RemoveMobileIdentifiers(line)
line = internal.RemoveAndroidIdentifiers(line)
line = internal.RemoveVersions(line)
Expand Down Expand Up @@ -63,7 +69,7 @@ func CleanAgentsFile(filePath string) ([]string, error) {

func main() {
var content []string
filenames := []string{"agents/1.txt", "agents/2.txt", "agents/3.txt", "agents/4.txt"}
filenames := []string{"agents/1.txt", "agents/2.txt", "agents/3.txt", "agents/4.txt", "agents/5.txt"}

for _, filename := range filenames {
// Read agents.txt file.
Expand Down
1 change: 1 addition & 0 deletions testdata/cases.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var TestCases = []string{
"Mozilla/5.0 (Linux; Android 4.4.2; en-us; Z520 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
"Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; Z520 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
"Mozilla/5.0 (Linux; Android 5.0.1; LG-H440n Build/LRX21Y) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/38.0.2125.102 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 10; moto g(8) power lite) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36",

// Bots
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
Expand Down
27 changes: 23 additions & 4 deletions trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

// trieState is used to determine the current parsing state of the trie.
type trieState int
type trieState uint8

const (
// stateDefault is the default parsing state of the trie.
Expand Down Expand Up @@ -64,6 +64,8 @@ func (trie *RuneTrie) Get(key string) UserAgent {
// Number of runes to skip when iterating over the trie. This is used
// to skip over version numbers or language codes.
var skipCount uint8
// This is used to determine how many nested parenthesis deep we are.
var closingParenthisisNestCount uint8

for i, r := range key {
if skipCount > 0 {
Expand All @@ -78,11 +80,25 @@ func (trie *RuneTrie) Get(key string) UserAgent {
}

case stateSkipClosingParenthesis:
if r == ')' {
state = stateDefault
switch r {
case '(':
closingParenthisisNestCount++
case ')':
if closingParenthisisNestCount == 0 {
state = stateDefault
} else {
closingParenthisisNestCount--
}
}

case stateVersion:
// In the case of Edg and Edge, skipCount = 1 might just put us on the slash.
// Ideally, we need to improve the matcher to choose Edge over Edg, but this is
// a quick fix for now.
if r == '/' {
continue
}

// If we encounter any unknown characters, we can assume the version number is over.
if !internal.IsDigit(r) && r != '.' {
state = stateDefault
Expand Down Expand Up @@ -125,7 +141,10 @@ func (trie *RuneTrie) Get(key string) UserAgent {
//
// We also reject any version numbers related to Safari since it has a
// separate key for its version number.
if (matched && result.Type == internal.MatchBrowser && result.Match != internal.Safari) || (result.Type == internal.MatchVersion && ua.versionIndex == 0) {
if (matched && result.Type == internal.MatchBrowser &&
result.Match != internal.Safari) ||
(result.Type == internal.MatchVersion &&
ua.versionIndex == 0) {
// Clear version buffer if it has old values.
if ua.versionIndex > 0 {
ua.version = [32]rune{}
Expand Down
1 change: 1 addition & 0 deletions ua_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ var resultCases = []ResultCase{
{Browser: internal.AndroidBrowser, OS: internal.Android, Mobile: true, Version: "4.0"},
{Browser: internal.AndroidBrowser, OS: internal.Android, Mobile: true, Version: "4.0"},
{Browser: internal.Chrome, OS: internal.Android, Mobile: true, Version: "38.0.2125.102"},
{Browser: internal.Chrome, OS: internal.Android, Mobile: true, Version: "112.0.0.0"},
// Bots (6) 29
{Bot: true},
{Bot: true},
Expand Down

0 comments on commit 805057f

Please sign in to comment.