diff --git a/all_test.go b/all_test.go index 1913b8c..cdf0227 100644 --- a/all_test.go +++ b/all_test.go @@ -19,6 +19,36 @@ var uastrings = []struct { expectedOS *OSInfo }{ // Bots + { + title: "Slackbot-LinkExpanding", // see: https://api.slack.com/robots + ua: "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)", + expected: "Browser:Slackbot-LinkExpanding Engine:1.0 Bot:true Mobile:false", + }, + { + title: "Slackbot", + ua: "Slackbot 1.0 (+https://api.slack.com/robots)", + expected: "Browser:Slackbot Engine:1.0 Bot:true Mobile:false", + }, + { + title: "Slack-ImgProxy", + ua: "Slack-ImgProxy 0.19 (+https://api.slack.com/robots)", + expected: "Browser:Slack-ImgProxy Engine:0.19 Bot:true Mobile:false", + }, + { + title: "apache-httpclient", + ua: "Apache-HttpClient/4.2.3 (java 1.5)", + expected: "Browser:Apache-HttpClient-4.2.3 Bot:true Mobile:false", + }, + { + title: "Java", + ua: "Java/1.7.0_65", + expected: "Browser:Java-1.7.0_65 Bot:true Mobile:false", + }, + { + title: "Wget", + ua: "Wget/1.12 (linux-gnu)", + expected: "Browser:Wget-1.12 Bot:true Mobile:false", + }, { title: "GoogleBot", ua: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", @@ -397,12 +427,12 @@ var uastrings = []struct { { title: "Python", ua: "Python-urllib/2.7", - expected: "Browser:Python-urllib-2.7 Bot:false Mobile:false", + expected: "Browser:Python-urllib-2.7 Bot:true Mobile:false", }, { title: "Curl", ua: "curl/7.28.1", - expected: "Browser:curl-7.28.1 Bot:false Mobile:false", + expected: "Browser:curl-7.28.1 Bot:true Mobile:false", }, // WebKit diff --git a/bot.go b/bot.go index 4bd70bc..0f0ac8f 100644 --- a/bot.go +++ b/bot.go @@ -80,6 +80,10 @@ func (p *UserAgent) fixOther(sections []section) { var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)") +var botSoftwareRegex = regexp.MustCompile("(?i)(httpclient|curl|wget|python|java)") + +var botNameRegex = regexp.MustCompile("(?i)(slackbot|slack-imgproxy)") + // Check if we're dealing with a bot or with some weird browser. If that is the // case, the receiver will be modified accordingly. func (p *UserAgent) checkBot(sections []section) { @@ -94,6 +98,12 @@ func (p *UserAgent) checkBot(sections []section) { return } + // Check whether the name has some software like "httpclient" or "curl" in his name. + if botSoftwareRegex.Match([]byte(sections[0].name)) { + p.setSimple(sections[0].name, sections[0].version, true) + return + } + // Tough luck, let's try to see if it has a website in his comment. if name := getFromSite(sections[0].comment); name != "" { // First of all, this is a bot. Moreover, since it doesn't have the @@ -117,6 +127,8 @@ func (p *UserAgent) checkBot(sections []section) { } p.setSimple(results[0], version, true) return + } else if botNameRegex.Match([]byte(v.name)) { + p.bot = true } }