-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
30 changed files
with
612 additions
and
240 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
AllCops: | ||
Include: | ||
- '**/Gemfile' | ||
- '**/Rakefile' | ||
- 'lib/**/*.rb' | ||
- 'test/**/*.rb' | ||
Exclude: | ||
- 'pkg/**' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
# frozen_string_literal: true | ||
|
||
source 'https://rubygems.org' | ||
gemspec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
# frozen_string_literal: true | ||
|
||
require_relative 'legitbot/legitbot' | ||
require_relative 'legitbot/botmatch' | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,18 @@ | ||
module Legitbot | ||
# frozen_string_literal: true | ||
|
||
module Legitbot # :nodoc: | ||
# https://ahrefs.com/robot | ||
class Ahrefs < BotMatch | ||
Ranges = %w(54.36.148.0/24 54.36.149.0/24 54.36.150.0/24 195.154.122.0/24 195.154.123.0/24 195.154.126.0/24 195.154.127.0/24) | ||
|
||
def valid? | ||
ip = IPAddr.new @ip | ||
Ranges.any? { |range| IPAddr.new(range).include? ip } | ||
end | ||
ip_ranges %w[ | ||
54.36.148.0/24 | ||
54.36.149.0/24 | ||
54.36.150.0/24 | ||
195.154.122.0/24 | ||
195.154.123.0/24 | ||
195.154.126.0/24 | ||
195.154.127.0/24 | ||
] | ||
end | ||
|
||
rule Legitbot::Ahrefs, %w(AhrefsBot) | ||
rule Legitbot::Ahrefs, %w[AhrefsBot] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,20 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'ipaddr' | ||
|
||
module Legitbot | ||
module Legitbot # :nodoc: | ||
# https://support.apple.com/en-us/HT204683 | ||
|
||
class Apple < BotMatch | ||
Range = IPAddr.new('17.0.0.0/8') | ||
|
||
def valid? | ||
ip = IPAddr.new @ip | ||
Range.include? ip | ||
end | ||
ip_ranges '17.0.0.0/8' | ||
end | ||
|
||
class Apple_as_Google < Apple | ||
# https://support.apple.com/en-us/HT204683 | ||
# rubocop:disable Naming/ClassAndModuleCamelCase | ||
class Apple_as_Google < BotMatch | ||
ip_ranges '17.0.0.0/8' | ||
end | ||
# rubocop:enable Naming/ClassAndModuleCamelCase | ||
|
||
rule Legitbot::Apple, %w(Applebot) | ||
rule Legitbot::Apple_as_Google, %w(Googlebot) | ||
rule Legitbot::Apple, %w[Applebot] | ||
rule Legitbot::Apple_as_Google, %w[Googlebot] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,10 @@ | ||
module Legitbot | ||
# frozen_string_literal: true | ||
|
||
module Legitbot # :nodoc: | ||
# http://help.baidu.com/question?prod_en=master&class=498&id=1000973 | ||
class Baidu < BotMatch | ||
ValidDomains = ["baidu.com.", "baidu.jp."] | ||
|
||
def valid? | ||
subdomain_of?(*Baidu::ValidDomains) | ||
end | ||
domains 'baidu.com.', 'baidu.jp.', reverse: false | ||
end | ||
|
||
rule Legitbot::Baidu, %w(Baiduspider) | ||
rule Legitbot::Baidu, %w[Baiduspider] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,10 @@ | ||
module Legitbot | ||
# frozen_string_literal: true | ||
|
||
module Legitbot # :nodoc: | ||
# https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/ | ||
class Bing < BotMatch | ||
ValidDomains = ["search.msn.com."] | ||
|
||
def valid? | ||
subdomain_of?(*Bing::ValidDomains) && reverse_resolves? | ||
end | ||
domains 'search.msn.com.' | ||
end | ||
|
||
rule Legitbot::Bing, %w(Bingbot bingbot) | ||
rule Legitbot::Bing, %w[Bingbot bingbot] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,67 +1,40 @@ | ||
require 'resolv' | ||
require 'ipaddr' | ||
# frozen_string_literal: true | ||
|
||
require_relative 'config/resolver' | ||
require_relative 'validators/domains' | ||
require_relative 'validators/ip_ranges' | ||
|
||
module Legitbot | ||
## | ||
# Represents a bot instance match. Typical methods are | ||
# +valid?+, +fake?+ and +detected_as+ | ||
# | ||
class BotMatch | ||
def initialize(ip, resolver_config = nil) | ||
@dns = Resolv::DNS.new(resolver_config) | ||
@ip = ip | ||
end | ||
|
||
## | ||
# Returns a Resolv::DNS::Name instance with | ||
# the reverse name | ||
def reverse_domain | ||
@reverse_domain ||= @dns.getname(@ip) | ||
rescue Resolv::ResolvError | ||
@reverse_domain ||= nil | ||
end | ||
|
||
## | ||
# Returns a String with the reverse name | ||
def reverse_name | ||
reverse_domain&.to_s | ||
end | ||
|
||
## | ||
# Returns a String with IP created from the reverse name | ||
def reversed_ip | ||
return nil if reverse_name.nil? | ||
include Legitbot::Validators::IpRanges | ||
include Legitbot::Validators::Domains | ||
|
||
@reverse_ip ||= @dns.getaddress(reverse_name) | ||
@reverse_ip.to_s | ||
end | ||
|
||
def reverse_resolves? | ||
@ip == reversed_ip | ||
end | ||
|
||
def subdomain_of?(*domains) | ||
return false if reverse_name.nil? | ||
|
||
domains.any? { |d| | ||
reverse_domain.subdomain_of? Resolv::DNS::Name.create(d) | ||
} | ||
def initialize(ip) | ||
@ip = ip | ||
end | ||
|
||
def detected_as | ||
self.class.name.split('::').last.downcase.to_sym | ||
end | ||
|
||
def valid? | ||
valid_ip? && valid_domain? | ||
end | ||
|
||
def fake? | ||
!valid? | ||
end | ||
|
||
def self.valid?(ip, resolver_config = nil) | ||
self.new(ip, resolver_config).valid? | ||
def self.valid?(ip) | ||
new(ip).valid? | ||
end | ||
|
||
def self.fake?(ip, resolver_config = nil) | ||
self.new(ip, resolver_config).fake? | ||
def self.fake?(ip) | ||
new(ip).fake? | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# frozen_string_literal: true | ||
|
||
require 'resolv' | ||
|
||
module Legitbot | ||
module Config | ||
module Resolver # :nodoc: | ||
def resolver_config(options = nil) | ||
@resolver_config = options | ||
end | ||
|
||
def resolver | ||
@resolver_config ||= Legitbot.resolver_config | ||
@resolver ||= Resolv::DNS.new @resolver_config | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,20 @@ | ||
module Legitbot | ||
# frozen_string_literal: true | ||
|
||
module Legitbot # :nodoc: | ||
# https://duckduckgo.com/duckduckbot | ||
class DuckDuckGo < BotMatch | ||
ValidIPs = %w(50.16.241.113 50.16.241.114 50.16.241.117 50.16.247.234 52.204.97.54 52.5.190.19 54.197.234.188 54.208.100.253 23.21.227.69) | ||
|
||
def valid? | ||
DuckDuckGo::ValidIPs.include? @ip | ||
end | ||
ip_ranges %w[ | ||
50.16.241.113 | ||
50.16.241.114 | ||
50.16.241.117 | ||
50.16.247.234 | ||
52.204.97.54 | ||
52.5.190.19 | ||
54.197.234.188 | ||
54.208.100.253 | ||
23.21.227.69 | ||
] | ||
end | ||
|
||
rule Legitbot::DuckDuckGo, %w(DuckDuckBot) | ||
rule Legitbot::DuckDuckGo, %w[DuckDuckBot] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,48 +1,22 @@ | ||
require 'ipaddr' | ||
# frozen_string_literal: true | ||
|
||
require 'irrc' | ||
require 'interval_tree' | ||
|
||
module Legitbot | ||
module Legitbot # :nodoc: | ||
# https://developers.facebook.com/docs/sharing/webmasters/crawler | ||
|
||
class Facebook < BotMatch | ||
AS = 'AS32934' | ||
|
||
def valid? | ||
ip = IPAddr.new(@ip) | ||
Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0 | ||
end | ||
|
||
@mutex = Mutex.new | ||
|
||
def self.valid_ips | ||
@mutex.synchronize { @ips ||= load_ips } | ||
end | ||
|
||
def self.reload! | ||
@mutex.synchronize { @ips = load_ips } | ||
end | ||
|
||
def self.load_ips | ||
whois.map do |(family, records)| | ||
ranges = records.map do |cidr| | ||
range = IPAddr.new(cidr).to_range | ||
(range.begin.to_i..range.end.to_i) | ||
end | ||
[family, IntervalTree::Tree.new(ranges)] | ||
end.to_h | ||
end | ||
|
||
def self.whois | ||
ip_ranges do | ||
client = Irrc::Client.new | ||
client.query :radb, AS | ||
results = client.perform | ||
|
||
%i(ipv4 ipv6).map do |family| | ||
[family, results[AS][family][AS]] | ||
end.to_h | ||
%i[ipv4 ipv6].map do |family| | ||
results[AS][family][AS] | ||
end.flatten | ||
end | ||
end | ||
|
||
rule Legitbot::Facebook, %w(Facebot facebookexternalhit/1.1) | ||
rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1] | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,11 @@ | ||
module Legitbot | ||
# frozen_string_literal: true | ||
|
||
module Legitbot # :nodoc: | ||
# https://support.google.com/webmasters/answer/1061943 | ||
# https://support.google.com/webmasters/answer/80553 | ||
|
||
class Google < BotMatch | ||
ValidDomains = ["google.com.", "googlebot.com."] | ||
|
||
def valid? | ||
subdomain_of?(*Google::ValidDomains) && reverse_resolves? | ||
end | ||
domains 'google.com.', 'googlebot.com.' | ||
end | ||
|
||
rule Legitbot::Google, %w(Googlebot Mediapartners-Google AdsBot-Google) | ||
rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google] | ||
end |
Oops, something went wrong.