diff --git a/all_test.go b/all_test.go index 1913b8c..751da73 100644 --- a/all_test.go +++ b/all_test.go @@ -19,6 +19,21 @@ var uastrings = []struct { expectedOS *OSInfo }{ // Bots + { + title: "apache-httpclient", + ua: "Apache-HttpClient/4.2.3 (java 1.5)", + expected: "Browser:Apache-HttpClient Bot:true Mobile:false", + }, + { + title: "Java", + ua: "Java/1.7.0_65", + expected: "Browser:Java Bot:true Mobile:false", + }, + { + title: "Wget", + ua: "Wget/1.12 (linux-gnu)", + expected: "Browser:Wget Bot:true Mobile:false", + }, { title: "GoogleBot", ua: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", diff --git a/bot.go b/bot.go index 4bd70bc..84e2a69 100644 --- a/bot.go +++ b/bot.go @@ -80,6 +80,8 @@ func (p *UserAgent) fixOther(sections []section) { var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)") +var botSoftwareRegex = regexp.MustCompile("(?i)(httpclient|curl|wget|python|java)") + // Check if we're dealing with a bot or with some weird browser. If that is the // case, the receiver will be modified accordingly. func (p *UserAgent) checkBot(sections []section) { @@ -89,7 +91,7 @@ func (p *UserAgent) checkBot(sections []section) { p.mozilla = "" // Check whether the name has some suspicious "bot" or "crawler" in his name. - if botRegex.Match([]byte(sections[0].name)) { + if botRegex.Match([]byte(sections[0].name)) || botSoftwareRegex.Match([]byte(sections[0].name)) { p.setSimple(sections[0].name, "", true) return }