From 4570948e0cb4ebeef1f3a8cf382a5d6ff3925e59 Mon Sep 17 00:00:00 2001 From: megumiimai Date: Tue, 1 Oct 2019 08:14:55 +0900 Subject: [PATCH 1/4] [#18]fix:change it to bot from software like curl, java and python --- all_test.go | 15 +++++++++++++++ bot.go | 4 +++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/all_test.go b/all_test.go index 1913b8c..751da73 100644 --- a/all_test.go +++ b/all_test.go @@ -19,6 +19,21 @@ var uastrings = []struct { expectedOS *OSInfo }{ // Bots + { + title: "apache-httpclient", + ua: "Apache-HttpClient/4.2.3 (java 1.5)", + expected: "Browser:Apache-HttpClient Bot:true Mobile:false", + }, + { + title: "Java", + ua: "Java/1.7.0_65", + expected: "Browser:Java Bot:true Mobile:false", + }, + { + title: "Wget", + ua: "Wget/1.12 (linux-gnu)", + expected: "Browser:Wget Bot:true Mobile:false", + }, { title: "GoogleBot", ua: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", diff --git a/bot.go b/bot.go index 4bd70bc..84e2a69 100644 --- a/bot.go +++ b/bot.go @@ -80,6 +80,8 @@ func (p *UserAgent) fixOther(sections []section) { var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)") +var botSoftwareRegex = regexp.MustCompile("(?i)(httpclient|curl|wget|python|java)") + // Check if we're dealing with a bot or with some weird browser. If that is the // case, the receiver will be modified accordingly. func (p *UserAgent) checkBot(sections []section) { @@ -89,7 +91,7 @@ func (p *UserAgent) checkBot(sections []section) { p.mozilla = "" // Check whether the name has some suspicious "bot" or "crawler" in his name. - if botRegex.Match([]byte(sections[0].name)) { + if botRegex.Match([]byte(sections[0].name)) || botSoftwareRegex.Match([]byte(sections[0].name)) { p.setSimple(sections[0].name, "", true) return } From 58722534c5601c028ebc5256b0c8f9574fc1f7e6 Mon Sep 17 00:00:00 2001 From: megumiimai Date: Tue, 1 Oct 2019 08:22:43 +0900 Subject: [PATCH 2/4] feat:Add detection for user-agent from slack req(LinkExpanding ImgProxy) --- all_test.go | 15 +++++++++++++++ bot.go | 4 ++++ 2 files changed, 19 insertions(+) diff --git a/all_test.go b/all_test.go index 751da73..339b22e 100644 --- a/all_test.go +++ b/all_test.go @@ -19,6 +19,21 @@ var uastrings = []struct { expectedOS *OSInfo }{ // Bots + { + title: "Slackbot-LinkExpanding", // see: https://api.slack.com/robots + ua: "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", + expected: "Browser:Slackbot-LinkExpanding Engine:1.0 Bot:true Mobile:false", + }, + { + title: "Slackbot", + ua: "Slackbot 1.0 (+https://api.slack.com/robots)", + expected: "Browser:Slackbot Engine:1.0 Bot:true Mobile:false", + }, + { + title: "Slack-ImgProxy", + ua: "Slack-ImgProxy 0.19 (+https://api.slack.com/robots)", + expected: "Browser:Slack-ImgProxy Engine:0.19 Bot:true Mobile:false", + }, { title: "apache-httpclient", ua: "Apache-HttpClient/4.2.3 (java 1.5)", diff --git a/bot.go b/bot.go index 84e2a69..3fc5f58 100644 --- a/bot.go +++ b/bot.go @@ -82,6 +82,8 @@ var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch var botSoftwareRegex = regexp.MustCompile("(?i)(httpclient|curl|wget|python|java)") +var botNameRegex = regexp.MustCompile("(?i)(slackbot|slack-imgproxy)") + // Check if we're dealing with a bot or with some weird browser. If that is the // case, the receiver will be modified accordingly. func (p *UserAgent) checkBot(sections []section) { @@ -119,6 +121,8 @@ func (p *UserAgent) checkBot(sections []section) { } p.setSimple(results[0], version, true) return + } else if botNameRegex.Match([]byte(v.name)) { + p.bot = true } } From 5b2896bc6d4c4017ce9e97f1331a6a852140840e Mon Sep 17 00:00:00 2001 From: megumiimai Date: Tue, 1 Oct 2019 14:55:22 +0900 Subject: [PATCH 3/4] [#18]refactor: change it to bot from software like curl, java and python --- all_test.go | 10 +++++----- bot.go | 6 +++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/all_test.go b/all_test.go index 339b22e..cdf0227 100644 --- a/all_test.go +++ b/all_test.go @@ -37,17 +37,17 @@ var uastrings = []struct { { title: "apache-httpclient", ua: "Apache-HttpClient/4.2.3 (java 1.5)", - expected: "Browser:Apache-HttpClient Bot:true Mobile:false", + expected: "Browser:Apache-HttpClient-4.2.3 Bot:true Mobile:false", }, { title: "Java", ua: "Java/1.7.0_65", - expected: "Browser:Java Bot:true Mobile:false", + expected: "Browser:Java-1.7.0_65 Bot:true Mobile:false", }, { title: "Wget", ua: "Wget/1.12 (linux-gnu)", - expected: "Browser:Wget Bot:true Mobile:false", + expected: "Browser:Wget-1.12 Bot:true Mobile:false", }, { title: "GoogleBot", @@ -427,12 +427,12 @@ var uastrings = []struct { { title: "Python", ua: "Python-urllib/2.7", - expected: "Browser:Python-urllib-2.7 Bot:false Mobile:false", + expected: "Browser:Python-urllib-2.7 Bot:true Mobile:false", }, { title: "Curl", ua: "curl/7.28.1", - expected: "Browser:curl-7.28.1 Bot:false Mobile:false", + expected: "Browser:curl-7.28.1 Bot:true Mobile:false", }, // WebKit diff --git a/bot.go b/bot.go index 3fc5f58..d869b47 100644 --- a/bot.go +++ b/bot.go @@ -93,10 +93,14 @@ func (p *UserAgent) checkBot(sections []section) { p.mozilla = "" // Check whether the name has some suspicious "bot" or "crawler" in his name. - if botRegex.Match([]byte(sections[0].name)) || botSoftwareRegex.Match([]byte(sections[0].name)) { + if botRegex.Match([]byte(sections[0].name)) { p.setSimple(sections[0].name, "", true) return } + if botSoftwareRegex.Match([]byte(sections[0].name)) { + p.setSimple(sections[0].name, sections[0].version, true) + return + } // Tough luck, let's try to see if it has a website in his comment. if name := getFromSite(sections[0].comment); name != "" { From 90b6ebef89406e3a0daf791201c44db94b497767 Mon Sep 17 00:00:00 2001 From: megumiimai Date: Wed, 2 Oct 2019 21:10:41 +0900 Subject: [PATCH 4/4] add comment for bot(software) detection --- bot.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bot.go b/bot.go index d869b47..0f0ac8f 100644 --- a/bot.go +++ b/bot.go @@ -97,6 +97,8 @@ func (p *UserAgent) checkBot(sections []section) { p.setSimple(sections[0].name, "", true) return } + + // Check whether the name has some software like "httpclient" or "curl" in his name. if botSoftwareRegex.Match([]byte(sections[0].name)) { p.setSimple(sections[0].name, sections[0].version, true) return