Skip to content

Commit

Permalink
Merge branch 'declarative'
Browse files Browse the repository at this point in the history
  • Loading branch information
alaz committed Sep 18, 2019
2 parents d2f729a + ff2f23c commit 6fd6eea
Show file tree
Hide file tree
Showing 30 changed files with 612 additions and 240 deletions.
8 changes: 8 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
AllCops:
Include:
- '**/Gemfile'
- '**/Rakefile'
- 'lib/**/*.rb'
- 'test/**/*.rb'
Exclude:
- 'pkg/**'
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# frozen_string_literal: true

source 'https://rubygems.org'
gemspec
8 changes: 5 additions & 3 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# frozen_string_literal: true

require 'rubygems'
require 'bundler'
require 'bump/tasks'
require "rake/testtask"
require 'rake/testtask'
Bundler::GemHelper.install_tasks

Bump.tag_by_default = true

Rake::TestTask.new do |t|
t.libs << "test"
t.test_files = FileList['test/*_test.rb']
t.libs << 'test'
t.test_files = FileList['test/**/*_test.rb']
t.warning = true
t.verbose = true
end
Expand Down
7 changes: 4 additions & 3 deletions legitbot.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ Gem::Specification.new do |spec|
spec.required_ruby_version = '>= 2.3.0'
spec.add_dependency "irrc", ">= 0.2.1"
spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
spec.add_development_dependency "bump"
spec.add_development_dependency "rake"
spec.add_development_dependency "minitest"
spec.add_development_dependency "bump", '>= 0.8.0'
spec.add_development_dependency "rake", '>= 12.3.0'
spec.add_development_dependency "rubocop", '>= 0.74.0'
spec.add_development_dependency "minitest", '>= 5.1.0'

spec.files = `git ls-files`.split($/)
spec.rdoc_options = ["--charset=UTF-8"]
Expand Down
2 changes: 2 additions & 0 deletions lib/legitbot.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

require_relative 'legitbot/legitbot'
require_relative 'legitbot/botmatch'

Expand Down
21 changes: 13 additions & 8 deletions lib/legitbot/ahrefs.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
module Legitbot
# frozen_string_literal: true

module Legitbot # :nodoc:
# https://ahrefs.com/robot
class Ahrefs < BotMatch
Ranges = %w(54.36.148.0/24 54.36.149.0/24 54.36.150.0/24 195.154.122.0/24 195.154.123.0/24 195.154.126.0/24 195.154.127.0/24)

def valid?
ip = IPAddr.new @ip
Ranges.any? { |range| IPAddr.new(range).include? ip }
end
ip_ranges %w[
54.36.148.0/24
54.36.149.0/24
54.36.150.0/24
195.154.122.0/24
195.154.123.0/24
195.154.126.0/24
195.154.127.0/24
]
end

rule Legitbot::Ahrefs, %w(AhrefsBot)
rule Legitbot::Ahrefs, %w[AhrefsBot]
end
22 changes: 11 additions & 11 deletions lib/legitbot/apple.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
# frozen_string_literal: true

require 'ipaddr'

module Legitbot
module Legitbot # :nodoc:
# https://support.apple.com/en-us/HT204683

class Apple < BotMatch
Range = IPAddr.new('17.0.0.0/8')

def valid?
ip = IPAddr.new @ip
Range.include? ip
end
ip_ranges '17.0.0.0/8'
end

class Apple_as_Google < Apple
# https://support.apple.com/en-us/HT204683
# rubocop:disable Naming/ClassAndModuleCamelCase
class Apple_as_Google < BotMatch
ip_ranges '17.0.0.0/8'
end
# rubocop:enable Naming/ClassAndModuleCamelCase

rule Legitbot::Apple, %w(Applebot)
rule Legitbot::Apple_as_Google, %w(Googlebot)
rule Legitbot::Apple, %w[Applebot]
rule Legitbot::Apple_as_Google, %w[Googlebot]
end
12 changes: 5 additions & 7 deletions lib/legitbot/baidu.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
module Legitbot
# frozen_string_literal: true

module Legitbot # :nodoc:
# http://help.baidu.com/question?prod_en=master&class=498&id=1000973
class Baidu < BotMatch
ValidDomains = ["baidu.com.", "baidu.jp."]

def valid?
subdomain_of?(*Baidu::ValidDomains)
end
domains 'baidu.com.', 'baidu.jp.', reverse: false
end

rule Legitbot::Baidu, %w(Baiduspider)
rule Legitbot::Baidu, %w[Baiduspider]
end
12 changes: 5 additions & 7 deletions lib/legitbot/bing.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
module Legitbot
# frozen_string_literal: true

module Legitbot # :nodoc:
# https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
class Bing < BotMatch
ValidDomains = ["search.msn.com."]

def valid?
subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
end
domains 'search.msn.com.'
end

rule Legitbot::Bing, %w(Bingbot bingbot)
rule Legitbot::Bing, %w[Bingbot bingbot]
end
61 changes: 17 additions & 44 deletions lib/legitbot/botmatch.rb
Original file line number Diff line number Diff line change
@@ -1,67 +1,40 @@
require 'resolv'
require 'ipaddr'
# frozen_string_literal: true

require_relative 'config/resolver'
require_relative 'validators/domains'
require_relative 'validators/ip_ranges'

module Legitbot
##
# Represents a bot instance match. Typical methods are
# +valid?+, +fake?+ and +detected_as+
#
class BotMatch
def initialize(ip, resolver_config = nil)
@dns = Resolv::DNS.new(resolver_config)
@ip = ip
end

##
# Returns a Resolv::DNS::Name instance with
# the reverse name
def reverse_domain
@reverse_domain ||= @dns.getname(@ip)
rescue Resolv::ResolvError
@reverse_domain ||= nil
end

##
# Returns a String with the reverse name
def reverse_name
reverse_domain&.to_s
end

##
# Returns a String with IP created from the reverse name
def reversed_ip
return nil if reverse_name.nil?
include Legitbot::Validators::IpRanges
include Legitbot::Validators::Domains

@reverse_ip ||= @dns.getaddress(reverse_name)
@reverse_ip.to_s
end

def reverse_resolves?
@ip == reversed_ip
end

def subdomain_of?(*domains)
return false if reverse_name.nil?

domains.any? { |d|
reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
}
def initialize(ip)
@ip = ip
end

def detected_as
self.class.name.split('::').last.downcase.to_sym
end

def valid?
valid_ip? && valid_domain?
end

def fake?
!valid?
end

def self.valid?(ip, resolver_config = nil)
self.new(ip, resolver_config).valid?
def self.valid?(ip)
new(ip).valid?
end

def self.fake?(ip, resolver_config = nil)
self.new(ip, resolver_config).fake?
def self.fake?(ip)
new(ip).fake?
end
end
end
18 changes: 18 additions & 0 deletions lib/legitbot/config/resolver.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# frozen_string_literal: true

require 'resolv'

module Legitbot
module Config
module Resolver # :nodoc:
def resolver_config(options = nil)
@resolver_config = options
end

def resolver
@resolver_config ||= Legitbot.resolver_config
@resolver ||= Resolv::DNS.new @resolver_config
end
end
end
end
22 changes: 15 additions & 7 deletions lib/legitbot/duckduckgo.rb
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
module Legitbot
# frozen_string_literal: true

module Legitbot # :nodoc:
# https://duckduckgo.com/duckduckbot
class DuckDuckGo < BotMatch
ValidIPs = %w(50.16.241.113 50.16.241.114 50.16.241.117 50.16.247.234 52.204.97.54 52.5.190.19 54.197.234.188 54.208.100.253 23.21.227.69)

def valid?
DuckDuckGo::ValidIPs.include? @ip
end
ip_ranges %w[
50.16.241.113
50.16.241.114
50.16.241.117
50.16.247.234
52.204.97.54
52.5.190.19
54.197.234.188
54.208.100.253
23.21.227.69
]
end

rule Legitbot::DuckDuckGo, %w(DuckDuckBot)
rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
end
42 changes: 8 additions & 34 deletions lib/legitbot/facebook.rb
Original file line number Diff line number Diff line change
@@ -1,48 +1,22 @@
require 'ipaddr'
# frozen_string_literal: true

require 'irrc'
require 'interval_tree'

module Legitbot
module Legitbot # :nodoc:
# https://developers.facebook.com/docs/sharing/webmasters/crawler

class Facebook < BotMatch
AS = 'AS32934'

def valid?
ip = IPAddr.new(@ip)
Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
end

@mutex = Mutex.new

def self.valid_ips
@mutex.synchronize { @ips ||= load_ips }
end

def self.reload!
@mutex.synchronize { @ips = load_ips }
end

def self.load_ips
whois.map do |(family, records)|
ranges = records.map do |cidr|
range = IPAddr.new(cidr).to_range
(range.begin.to_i..range.end.to_i)
end
[family, IntervalTree::Tree.new(ranges)]
end.to_h
end

def self.whois
ip_ranges do
client = Irrc::Client.new
client.query :radb, AS
results = client.perform

%i(ipv4 ipv6).map do |family|
[family, results[AS][family][AS]]
end.to_h
%i[ipv4 ipv6].map do |family|
results[AS][family][AS]
end.flatten
end
end

rule Legitbot::Facebook, %w(Facebot facebookexternalhit/1.1)
rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
end
13 changes: 5 additions & 8 deletions lib/legitbot/google.rb
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
module Legitbot
# frozen_string_literal: true

module Legitbot # :nodoc:
# https://support.google.com/webmasters/answer/1061943
# https://support.google.com/webmasters/answer/80553

class Google < BotMatch
ValidDomains = ["google.com.", "googlebot.com."]

def valid?
subdomain_of?(*Google::ValidDomains) && reverse_resolves?
end
domains 'google.com.', 'googlebot.com.'
end

rule Legitbot::Google, %w(Googlebot Mediapartners-Google AdsBot-Google)
rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
end
Loading

0 comments on commit 6fd6eea

Please sign in to comment.