Skip to content

Commit

Permalink
Merge pull request #21 from gplumb/dev
Browse files Browse the repository at this point in the history
Bot additions and removal of false positives
  • Loading branch information
gplumb authored Apr 30, 2019
2 parents 0c1a5be + b121120 commit 363fe15
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<TargetFramework>netcoreapp2.0</TargetFramework>

<IsPackable>false</IsPackable>
<ReleaseVersion>0.1.9</ReleaseVersion>
<ReleaseVersion>0.2.0</ReleaseVersion>
</PropertyGroup>

<ItemGroup>
Expand Down
35 changes: 34 additions & 1 deletion NetCrawlerDetect/NetCrawlerDetect.Tests/crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3410,4 +3410,37 @@ Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0 ; BacklinkHttpStatus)
SilverReader/1.0; http://silverreader.com
ExtractorPro
WebsiteExtractor
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T
Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1 google_partner_monitoring FWSzVTDDBz14547302713138T
Outlook-iOS/696.1188109.prod.iphone (2.102.0)
Polymail/1.47 (ctrlla.Polymail; build:64; OS X 10.12.6) Alamofire/4.5.1
nyawc/1.8.1 CPython/3.6.5 Linux/3.10.0-862.9.1.el7.x86_64
commonscan.org cralwer v1.01
AdminLabs
WebGazer/1.0 (+https://www.webgazer.io)
HappyApps-WebCheck/1.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6; +feeder.co) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36
Mozilla/5.0 (compatible; TestURI; +http://testuri.org/)
WebSniffer/1.0 (+http://websniffer.cc/)
Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1 SeoSiteCheckup (https://seositecheckup.co
Adstxtaggregator.com/1.0
Discourse Forum Onebox v2.0.0.beta10
faviconarchive.org
GreatNews/1.0
MemGator:1.0-rc2 <@WebSciDL>
URLTester/1 CFNetwork/974.1 Darwin/18.0.0 (x86_64)
URLTester/1 CFNetwork/975.0.3 Darwin/18.2.0 (x86_64)
Urlcheckr/2.0
SimpleChecker
reqwest/0.9.9
ReactorNetty/0.7.10.RELEASE
Mozilla/5.0 (compatible; woorankreview/2.0; +https://www.woorank.com/)
https://www.nominet.uk/privacy-notice
<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Safari/537.36 Chrome-Lighthouse
Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Mobile Safari/537.36 Chrome-Lighthouse
Trackuity (+http://trackuity.com)
deeris/1.0 (+http://www.deeris.com)
Mediametric
TextRazor Downloader (https://www.textrazor.com)
ddline.cn rank history
Web spyder
6 changes: 2 additions & 4 deletions NetCrawlerDetect/NetCrawlerDetect.Tests/devices.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30260,7 +30260,6 @@ Mozilla/5.0 (Linux; Android 4.4.4; GT-I9300 Build/KTU84Q) AppleWebKit/537.36 (KH
Mozilla/5.0 (Linux; Android 4.4.4; G7-L01 Build/HuaweiG7-L01) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.89 Mobile Safari/537.36
Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Mobile/14C92 (5298492416)
Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6 Build/MMB29V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.91 Mobile Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 Mediametric
Mozilla/5.0 (Linux; U; Android 5.1; en-US; SLIDE2 Build/LMY47I) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.9.0.731 U3/0.8.0 Mobile Safari/534.30
Mozilla/5.0 (Linux; U; Android 4.0.3; en-us; Sony Tablet S Build/TISU0143) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30
Opera/9.80 (SpreadTrum; Opera Mini/4.4.31492/37.8678; U; en) Presto/2.12.423 Version/12.16
Expand Down Expand Up @@ -122332,7 +122331,6 @@ Mozilla/5.0 (iPhone; CPU iPhone OS 11_1_2 like Mac OS X) AppleWebKit/604.3.5 (KH
Mozilla/5.0 (iPhone; CPU iPhone OS 11_1 like Mac OS X) AppleWebKit/604.3.5 (KHTML, like Gecko) Mobile/15B93 (4347460352)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Mobile/15A402 (4310753168)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_2 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C202 (4695841712)
Polymail/1.47 (ctrlla.Polymail; build:64; OS X 10.12.6) Alamofire/4.5.1
Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Mobile/14G60 (4300266224)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_3 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Mobile/15A432 (4761665200)
Mozilla/5.0 (Linux; Android 6.0.1; SM-G900V Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Mobile Safari/537.36
Expand Down Expand Up @@ -156257,7 +156255,6 @@ Mozilla/5.0 (X11; CrOS x86_64 10032.86.0) AppleWebKit/537.36 (KHTML, like Gecko)
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/170.1.0.80.91;FBBV/106613464;FBDV/iPhone10,3;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/3;FBCR/giffgaff;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/107080238]
Mozilla/5.0 (Linux; Android 7.1.2; Swift 2 X Build/N2G47Z) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 6.0; XT1700 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.109 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Mobile Safari/537.36 Chrome-Lighthouse
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3278.0 Safari/537.36
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_1 like Mac OS X) AppleWebKit/604.4.7 (KHTML, like Gecko) Mobile/15C153 [FBAN/FBIOS;FBAV/164.0.0.56.96;FBBV/98434650;FBDV/iPhone9,3;FBMD/iPhone;FBSN/iOS;FBSV/11.2.1;FBSS/2;FBCR/O2;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/100018292]
Mozilla/5.0 (X11; Linux i686) AppleWebKit/538.1 (KHTML, like Gecko) QupZilla/1.8.6 Safari/538.1
Expand Down Expand Up @@ -161339,7 +161336,6 @@ Mozilla/5.0 (Linux; Android 5.0.2; A0001 Build/LRX22G) AppleWebKit/537.36 (KHTML
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/162.0.0.47.94;FBBV/95649710;FBDV/iPhone8,4;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/2;FBCR/Virgin;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/97201607]
Mozilla/5.0 (iPhone; CPU iPhone OS 11_2_6 like Mac OS X) AppleWebKit/604.5.6 (KHTML, like Gecko) Mobile/15D100 [FBAN/FBIOS;FBAV/164.0.0.56.96;FBBV/98434650;FBDV/iPhone8,4;FBMD/iPhone;FBSN/iOS;FBSV/11.2.6;FBSS/2;FBCR/giffgaff;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/100018292]
Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.1b3) Gecko/20090305 Firefox/3.1b3
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/61.0.3116.0 Safari/537.36 Chrome-Lighthouse
Mozilla/5.0 (Linux; Android 7.1.1; ONEPLUS A3003 Build/NMF26F; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/65.0.3325.109 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/164.0.0.44.95;]
Mozilla/5.0 (Linux; Android 5.1.1; SM-G361F Build/LMY48B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 8.0.0; F8331 Build/41.3.A.0.401; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/64.0.3282.137 Mobile Safari/537.36
Expand Down Expand Up @@ -165981,3 +165977,5 @@ Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/604.4.7 (KHTML, like
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36
Mozilla/5.0 (Linux; Android 8.1.0; Pixel Build/OPM4.171019.016.B1; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.158 Mobile Safari/537.36 [FB_IAB/FB4A;FBAV/171.0.0.49.92;]
Mozilla/5.0 (iPhone; CPU iPhone OS 11_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E302 [FBAN/FBIOS;FBAV/171.0.0.49.95;FBBV/107251038;FBDV/iPhone7,2;FBMD/iPhone;FBSN/iOS;FBSV/11.3.1;FBSS/2;FBCR/EE;FBID/phone;FBLC/en_GB;FBOP/5;FBRV/108659124]
Mozilla/5.0 (Linux; Android 5.1; CRONO 54 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36
Mozilla/5.0 (Linux; Android 4.2.1; MAJESTIC Zeus21 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.72 Mobile Safari/537.36
5 changes: 4 additions & 1 deletion NetCrawlerDetect/NetCrawlerDetect.sln
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ Global
EndGlobalSection
GlobalSection(MonoDevelopProperties) = preSolution
description = A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).
version = 0.1.9
version = 0.2.0
Policies = $0
$0.VersionControlPolicy = $1
$0.DotNetNamingPolicy = $2
$2.DirectoryNamespaceAssociation = PrefixedHierarchical
$0.StandardHeader = $3
EndGlobalSection
EndGlobal
37 changes: 33 additions & 4 deletions NetCrawlerDetect/NetCrawlerDetect/Fixtures/Crawlers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public Crawlers()
// Collection of user-agent regex snippets
_data = new List<string>()
{
@"outbrain",
@".*Java.*outbrain",
@" YLT",
@"^b0t$",
@"^bluefish ",
Expand Down Expand Up @@ -64,8 +64,10 @@ public Crawlers()
@"adbeat",
@"AddThis",
@"ADmantX",
@"AdminLabs",
@"adressendeutschland",
@"adscanner",
@"Adstxtaggregator",
@"agentslug",
@"AHC",
@"aihit",
Expand Down Expand Up @@ -214,6 +216,7 @@ public Crawlers()
@"colly -",
@"CommaFeed",
@"Commons-HttpClient",
@"commonscan",
@"contactbigdatafr",
@"contentkingapp",
@"convera",
Expand Down Expand Up @@ -242,6 +245,8 @@ public Crawlers()
@"dataprovider",
@"DataXu",
@"Daum(oa)?[ \/][0-9]",
@"ddline",
@"deeris",
@"Demon",
@"DeuSu",
@"developers\.google\.com\/\+\/web\/snippet\/",
Expand All @@ -250,6 +255,7 @@ public Crawlers()
@"Digincore",
@"DigitalPebble",
@"Dirbuster",
@"Discourse Forum Onebox",
@"Disqus\/",
@"Dispatch\/",
@"DittoSpyder",
Expand Down Expand Up @@ -314,6 +320,7 @@ public Crawlers()
@"Faveeo",
@"Favicon downloader",
@"faviconkit",
@"faviconarchive",
@"FavOrg",
@"Feed Wrangler",
@"Feedable\/",
Expand All @@ -322,6 +329,7 @@ public Crawlers()
@"FeedBucket",
@"FeedBunch\/",
@"FeedBurner",
@"feeder",
@"Feedly",
@"FeedshowOnline",
@"Feedspot",
Expand All @@ -346,7 +354,7 @@ public Crawlers()
@"forensiq",
@"FoundSeoTool",

// 'Francis [Bot]'
//'Francis [Bot]'
@"http:\/\/www.neomo.de\/",
@"free thumbnails",
@"Freeuploader",
Expand Down Expand Up @@ -416,6 +424,7 @@ public Crawlers()
@"Grafula",
@"Grammarly",
@"GrapeFX",
@"GreatNews",
@"Gregarius",
@"GRequests",
@"grokkit",
Expand All @@ -430,6 +439,7 @@ public Crawlers()
@"Haansoft",
@"hackney\/",
@"Hadi Agent",
@"HappyApps-WebCheck",
@"Hatena",
@"Havij",
@"HeadlessChrome",
Expand Down Expand Up @@ -476,7 +486,6 @@ public Crawlers()
@"huaweisymantec",
@"HubSpot ",
@"Humanlinks",
@"HyperZbozi\.cz Feeder",
@"i2kconnect\/",
@"Iblog",
@"ichiro",
Expand Down Expand Up @@ -577,6 +586,7 @@ public Crawlers()
@"Licorne",
@"Liferea\/",
@"Lightspeedsystems",
@"Lighthouse",
@"Likse",
@"Link Valet",
@"link_thumbnailer",
Expand Down Expand Up @@ -620,11 +630,13 @@ public Crawlers()
@"Mass Downloader",
@"masscan\/",
@"Mata Hari",
@"Mediametric",
@"Mediapartners-Google",
@"mediawords",
@"MegaIndex\.ru",
@"MeltwaterNews",
@"Melvil Rawi",
@"MemGator",
@"Metaspinner",
@"MetaURI",
@"MFC_Tear_Sample",
Expand Down Expand Up @@ -708,13 +720,15 @@ public Crawlers()
@"Nodemeter",
@"NodePing",
@"nominet\.org\.uk",
@"nominet\.uk",
@"Norton-Safeweb",
@"Notifixious",
@"notifyninja",
@"nuhk",
@"nutch",
@"Nuzzel",
@"nWormFeedFinder",
@"nyawc\/",
@"Nymesis",
@"NYU",
@"Ocelli\/",
Expand All @@ -737,6 +751,7 @@ public Crawlers()
@"OrgProbe\/",
@"orion-semantics",
@"Outlook-Express",
@"Outlook-iOS",
@"ow\.ly",
@"Owler",
@"ownCloud News",
Expand Down Expand Up @@ -789,6 +804,7 @@ public Crawlers()
@"PocketParser",
@"Pockey",
@"POE-Component-Client-HTTP",
@"Polymail\/",
@"Pompos",
@"Porkbun",
@"Port Monitor",
Expand Down Expand Up @@ -830,6 +846,7 @@ public Crawlers()
@"RankFlex",
@"RankSonicSiteAuditor",
@"Re-re Studio",
@"ReactorNetty",
@"Readability",
@"RealDownload",
@"RealPlayer%20Downloader",
Expand All @@ -841,6 +858,7 @@ public Crawlers()
@"ReGet",
@"RepoMonkey",
@"request\.js",
@"reqwest\/",
@"ResponseCodeTest",
@"RestSharp",
@"Riddler",
Expand Down Expand Up @@ -894,6 +912,7 @@ public Crawlers()
@"SEOprofiler",
@"SEOsearch",
@"seoscanners",
@"seositecheckup",
@"SEOstats",
@"servernfo",
@"sexsearcher",
Expand Down Expand Up @@ -1001,7 +1020,9 @@ public Crawlers()
@"teoma",
@"terrainformatica",
@"Test Certificate Info",
@"testuri",
@"Tetrahedron",
@"TextRazor Downloader",
@"The Drop Reaper",
@"The Expert HTML Source Viewer",
@"The Knowledge AI",
Expand All @@ -1020,6 +1041,7 @@ public Crawlers()
@"touche\.com",
@"Traackr\.com",
@"tracemyfile",
@"Trackuity",
@"TrapitAgent",
@"Trendiction",
@"Trendsmap",
Expand All @@ -1041,6 +1063,7 @@ public Crawlers()
@"ubermetrics-technologies",
@"uclassify",
@"UdmSearch",
@"unchaos",
@"unirest-java",
@"UniversalFeedParser",
@"Unshorten\.It",
Expand All @@ -1050,10 +1073,12 @@ public Crawlers()
@"updown\.io daemon",
@"Upflow",
@"Uptimia",
@"Urlcheckr",
@"URL Verifier",
@"URLitor",
@"urlresolver",
@"Urlstat",
@"URLTester",
@"UrlTrends Ranking Updater",
@"URLy Warning",
@"URLy\.Warning",
Expand Down Expand Up @@ -1096,6 +1121,7 @@ public Crawlers()
@"Web Fuck",
@"Web Pix",
@"Web Sauger",
@"Web spyder",
@"Web Sucker",
@"Webalta",
@"Webauskunft",
Expand All @@ -1111,6 +1137,7 @@ public Crawlers()
@"WebEnhancer",
@"WebFetch",
@"WebFuck",
@"WebGazer",
@"WebGo IS",
@"WebImageCollector",
@"WebImages",
Expand All @@ -1129,6 +1156,7 @@ public Crawlers()
@"websitepulse agent",
@"WebsiteQuester",
@"Websnapr",
@"WebSniffer",
@"Webster",
@"WebStripper",
@"WebSucker",
Expand All @@ -1154,6 +1182,7 @@ public Crawlers()
@"wkhtmlto",
@"wmtips",
@"Woko",
@"woorankreview",
@"Word\/",
@"WordPress\/",
@"WordupinfoSearch",
Expand Down Expand Up @@ -1203,7 +1232,7 @@ public Crawlers()
@"Zend_Http_Client",
@"Zend\\\\Http\\\\Client",
@"Zermelo",
@"Zeus",
@"Zeus ",
@"zgrab",
@"ZnajdzFoto",
@"Zombie\.js",
Expand Down
1 change: 1 addition & 0 deletions NetCrawlerDetect/NetCrawlerDetect/Fixtures/Exclusions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public Exclusions()
@" \.NET[\d\.]*",
@"cubot",
@"; M bot",
@"; CRONO",
@"; B bot",
@"; IDbot",
@"; ID bot",
Expand Down
6 changes: 3 additions & 3 deletions NetCrawlerDetect/NetCrawlerDetect/NetCrawlerDetect.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<ReleaseVersion>0.1.9</ReleaseVersion>
<PackageVersion>0.1.9</PackageVersion>
<ReleaseVersion>0.2.0</ReleaseVersion>
<PackageVersion>0.2.0</PackageVersion>
<Authors>Graham "Gee" Plumb</Authors>
<PackageLicenseUrl>https://github.com/gplumb/NetCrawlerDetect/blob/master/LICENSE</PackageLicenseUrl>
<Owners>Graham "Gee" Plumb</Owners>
Expand All @@ -12,7 +12,7 @@
<Description>A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).</Description>
<PackOnBuild>true</PackOnBuild>
<PackageId>NetCrawlerDetect</PackageId>
<PackageReleaseNotes>Performance and general clean up (https://github.com/JayBizzle/Crawler-Detect/pull/312)</PackageReleaseNotes>
<PackageReleaseNotes>Bot additions and removal of false positives (up to https://github.com/JayBizzle/Crawler-Detect/pull/325)</PackageReleaseNotes>
<Summary>A .net standard port of JayBizzle's CrawlerDetect project (https://github.com/JayBizzle/Crawler-Detect).</Summary>
</PropertyGroup>

Expand Down

0 comments on commit 363fe15

Please sign in to comment.