Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add dailymotion useragents #55

Merged
merged 7 commits into from
Jan 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75,158 changes: 75,158 additions & 0 deletions agents/5.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions agents/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
2. [DeviceAtlas](https://deviceatlas.com/blog/list-of-user-agent-strings)
3. Random Test Cases
4. More Random Test Cases
5. [Dailymotion](https://github.com/ua-parser/uap-python/pull/163#issuecomment-1536412054)

## Update

Expand Down
502 changes: 457 additions & 45 deletions agents/final.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion internal/match.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ var matchMap = map[string][]string{
MobileDevice: {"ONEPLUS", "Huawei", "HTC", "Galaxy", "iPhone", "iPod", "Windows Phone", "WindowsPhone", "LG"},
Tablet: {Tablet, "Touch", "iPad", "Nintendo Switch", "NintendoSwitch", "Kindle"},
TV: {TV, "Large Screen", "LargeScreen", "Smart Display", "SmartDisplay", "PLAYSTATION", "PlayStation", "ADT-2", "ADT-1", "CrKey", "Roku", "AFT", "Web0S", "Nexus Player", "Xbox", "XBOX", "Nintendo WiiU", "NintendoWiiU"},
Bot: {Bot, "HeadlessChrome", "bot", "Slurp", "LinkCheck", "QuickLook", "Haosou", "Yahoo Ad", "YahooAd", "Google", "Mediapartners", "Headless", "facebookexternalhit", "facebookcatalog", "Baidu"},
Bot: {Bot, "HeadlessChrome", "bot", "Slurp", "LinkCheck", "QuickLook", "Haosou", "Yahoo Ad", "YahooAd", "Google", "Mediapartners", "Headless", "facebookexternalhit", "facebookcatalog", "Baidu", "Instagram", "Pinterest", "PageSpeedInsights", "WhatsApp"},

// Version
Version: {Version},
Expand Down
1 change: 1 addition & 0 deletions internal/match_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var matchResults = [][]string{
{internal.Safari, internal.Mobile, internal.Version, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Version, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Chrome, internal.Version, internal.MobileDevice, internal.Android, internal.Linux},
{internal.Safari, internal.Mobile, internal.Chrome, internal.Android, internal.Linux},

// Bots (4)
{internal.Bot},
Expand Down
19 changes: 8 additions & 11 deletions internal/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,6 @@ func RemoveVersions(ua string) string {
continue
}

// Skip whitespace
switch r {
case ' ', ';', ')', '(', ',', '_', '-', '/':
indexesToReplace = append(indexesToReplace, i)
continue
}

// Replace all non-latin characters with a space. The trie function will automatically
// skip over any characters it can't find, so this is a safe operation.
if !IsLetter(r) {
Expand Down Expand Up @@ -107,25 +100,29 @@ func RemoveAndroidIdentifiers(ua string) string {

// Find mobile token.
for _, token := range tokens {
var skipUntilClosingParenthesis bool
var skipUntilClosingParenthesis int
var indexesToReplace []int

if token.Match == Android {
// Iterate over the user agent string and remove all characters
// after the Android token until we encounter a closing parenthesis
// to remove device identifiers.
for i, r := range ua {
if skipUntilClosingParenthesis {
if skipUntilClosingParenthesis > 0 {
if r == '(' {
skipUntilClosingParenthesis++
}

if r == ')' {
skipUntilClosingParenthesis = false
skipUntilClosingParenthesis--
} else {
indexesToReplace = append(indexesToReplace, i)
continue
}
}

if i == token.EndIndex-1 {
skipUntilClosingParenthesis = true
skipUntilClosingParenthesis++
}
}

Expand Down
1 change: 1 addition & 0 deletions internal/version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var versionResults = []string{
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoVersionMobileSafari",
"MozillaLinuxUAndroidAppleWebKitKHTMLlikeGeckoVersionMobileSafari",
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoVersionChromeMobileSafari",
"MozillaLinuxAndroidAppleWebKitKHTMLlikeGeckoChromeMobileSafari",

// Bots (4)
"MozillacompatibleGooglebothttpwwwgooglecombothtml",
Expand Down
10 changes: 8 additions & 2 deletions scripts/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,16 @@ func CleanAgentsFile(filePath string) ([]string, error) {
continue
}

if strings.Contains(line, "javascript") || strings.Contains(line, "function") || strings.Contains(line, "quot") || strings.Contains(line, "parent") {
lineLower := strings.ToLower(line)
if strings.Contains(lineLower, "javascript") || strings.Contains(lineLower, "function") || strings.Contains(lineLower, "quot") || strings.Contains(lineLower, "parent") {
continue
}

// Cut the line after the first "[" is used.
if strings.Contains(line, "[") {
line = line[:strings.Index(line, "[")]
}

line = internal.RemoveMobileIdentifiers(line)
line = internal.RemoveAndroidIdentifiers(line)
line = internal.RemoveVersions(line)
Expand Down Expand Up @@ -63,7 +69,7 @@ func CleanAgentsFile(filePath string) ([]string, error) {

func main() {
var content []string
filenames := []string{"agents/1.txt", "agents/2.txt", "agents/3.txt", "agents/4.txt"}
filenames := []string{"agents/1.txt", "agents/2.txt", "agents/3.txt", "agents/4.txt", "agents/5.txt"}

for _, filename := range filenames {
// Read agents.txt file.
Expand Down
1 change: 1 addition & 0 deletions testdata/cases.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var TestCases = []string{
"Mozilla/5.0 (Linux; Android 4.4.2; en-us; Z520 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
"Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; Z520 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
"Mozilla/5.0 (Linux; Android 5.0.1; LG-H440n Build/LRX21Y) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/38.0.2125.102 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 10; moto g(8) power lite) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36",

// Bots
"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
Expand Down
27 changes: 23 additions & 4 deletions trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
)

// trieState is used to determine the current parsing state of the trie.
type trieState int
type trieState uint8

const (
// stateDefault is the default parsing state of the trie.
Expand Down Expand Up @@ -64,6 +64,8 @@ func (trie *RuneTrie) Get(key string) UserAgent {
// Number of runes to skip when iterating over the trie. This is used
// to skip over version numbers or language codes.
var skipCount uint8
// This is used to determine how many nested parenthesis deep we are.
var closingParenthisisNestCount uint8

for i, r := range key {
if skipCount > 0 {
Expand All @@ -78,11 +80,25 @@ func (trie *RuneTrie) Get(key string) UserAgent {
}

case stateSkipClosingParenthesis:
if r == ')' {
state = stateDefault
switch r {
case '(':
closingParenthisisNestCount++
case ')':
if closingParenthisisNestCount == 0 {
state = stateDefault
} else {
closingParenthisisNestCount--
}
}

case stateVersion:
// In the case of Edg and Edge, skipCount = 1 might just put us on the slash.
// Ideally, we need to improve the matcher to choose Edge over Edg, but this is
// a quick fix for now.
if r == '/' {
continue
}

// If we encounter any unknown characters, we can assume the version number is over.
if !internal.IsDigit(r) && r != '.' {
state = stateDefault
Expand Down Expand Up @@ -125,7 +141,10 @@ func (trie *RuneTrie) Get(key string) UserAgent {
//
// We also reject any version numbers related to Safari since it has a
// separate key for its version number.
if (matched && result.Type == internal.MatchBrowser && result.Match != internal.Safari) || (result.Type == internal.MatchVersion && ua.versionIndex == 0) {
if (matched && result.Type == internal.MatchBrowser &&
result.Match != internal.Safari) ||
(result.Type == internal.MatchVersion &&
ua.versionIndex == 0) {
// Clear version buffer if it has old values.
if ua.versionIndex > 0 {
ua.version = [32]rune{}
Expand Down
1 change: 1 addition & 0 deletions ua_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ var resultCases = []ResultCase{
{Browser: internal.AndroidBrowser, OS: internal.Android, Mobile: true, Version: "4.0"},
{Browser: internal.AndroidBrowser, OS: internal.Android, Mobile: true, Version: "4.0"},
{Browser: internal.Chrome, OS: internal.Android, Mobile: true, Version: "38.0.2125.102"},
{Browser: internal.Chrome, OS: internal.Android, Mobile: true, Version: "112.0.0.0"},
// Bots (6) 29
{Bot: true},
{Bot: true},
Expand Down
Loading