Skip to content

Commit

Permalink
[mssola#18]fix:change it to bot from software like curl, java and python
Browse files Browse the repository at this point in the history
  • Loading branch information
megumiimai committed Sep 30, 2019
1 parent 98a7664 commit 4570948
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
15 changes: 15 additions & 0 deletions all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ var uastrings = []struct {
expectedOS *OSInfo
}{
// Bots
{
title: "apache-httpclient",
ua: "Apache-HttpClient/4.2.3 (java 1.5)",
expected: "Browser:Apache-HttpClient Bot:true Mobile:false",
},
{
title: "Java",
ua: "Java/1.7.0_65",
expected: "Browser:Java Bot:true Mobile:false",
},
{
title: "Wget",
ua: "Wget/1.12 (linux-gnu)",
expected: "Browser:Wget Bot:true Mobile:false",
},
{
title: "GoogleBot",
ua: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
Expand Down
4 changes: 3 additions & 1 deletion bot.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ func (p *UserAgent) fixOther(sections []section) {

var botRegex = regexp.MustCompile("(?i)(bot|crawler|sp(i|y)der|search|worm|fetch|nutch)")

var botSoftwareRegex = regexp.MustCompile("(?i)(httpclient|curl|wget|python|java)")

// Check if we're dealing with a bot or with some weird browser. If that is the
// case, the receiver will be modified accordingly.
func (p *UserAgent) checkBot(sections []section) {
Expand All @@ -89,7 +91,7 @@ func (p *UserAgent) checkBot(sections []section) {
p.mozilla = ""

// Check whether the name has some suspicious "bot" or "crawler" in his name.
if botRegex.Match([]byte(sections[0].name)) {
if botRegex.Match([]byte(sections[0].name)) || botSoftwareRegex.Match([]byte(sections[0].name)) {
p.setSimple(sections[0].name, "", true)
return
}
Expand Down

0 comments on commit 4570948

Please sign in to comment.