diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 0000000..7b498a9 --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,8 @@ +AllCops: + Include: + - '**/Gemfile' + - '**/Rakefile' + - 'lib/**/*.rb' + - 'test/**/*.rb' + Exclude: + - 'pkg/**' diff --git a/Gemfile b/Gemfile index 851fabc..5f10ba8 100644 --- a/Gemfile +++ b/Gemfile @@ -1,2 +1,4 @@ +# frozen_string_literal: true + source 'https://rubygems.org' gemspec diff --git a/Rakefile b/Rakefile index 056f078..63c42c4 100644 --- a/Rakefile +++ b/Rakefile @@ -1,14 +1,16 @@ +# frozen_string_literal: true + require 'rubygems' require 'bundler' require 'bump/tasks' -require "rake/testtask" +require 'rake/testtask' Bundler::GemHelper.install_tasks Bump.tag_by_default = true Rake::TestTask.new do |t| - t.libs << "test" - t.test_files = FileList['test/*_test.rb'] + t.libs << 'test' + t.test_files = FileList['test/**/*_test.rb'] t.warning = true t.verbose = true end diff --git a/legitbot.gemspec b/legitbot.gemspec index a10d0e2..c9ecfea 100644 --- a/legitbot.gemspec +++ b/legitbot.gemspec @@ -17,9 +17,10 @@ Gem::Specification.new do |spec| spec.required_ruby_version = '>= 2.3.0' spec.add_dependency "irrc", ">= 0.2.1" spec.add_dependency "augmented_interval_tree", ">= 0.1.1" - spec.add_development_dependency "bump" - spec.add_development_dependency "rake" - spec.add_development_dependency "minitest" + spec.add_development_dependency "bump", '>= 0.8.0' + spec.add_development_dependency "rake", '>= 12.3.0' + spec.add_development_dependency "rubocop", '>= 0.74.0' + spec.add_development_dependency "minitest", '>= 5.1.0' spec.files = `git ls-files`.split($/) spec.rdoc_options = ["--charset=UTF-8"] diff --git a/lib/legitbot.rb b/lib/legitbot.rb index 7cea037..9a0a9ad 100644 --- a/lib/legitbot.rb +++ b/lib/legitbot.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require_relative 'legitbot/legitbot' require_relative 'legitbot/botmatch' diff --git a/lib/legitbot/ahrefs.rb b/lib/legitbot/ahrefs.rb index a6884a8..e083d9f 100644 --- a/lib/legitbot/ahrefs.rb +++ b/lib/legitbot/ahrefs.rb @@ -1,13 +1,18 @@ -module Legitbot +# frozen_string_literal: true + +module Legitbot # :nodoc: # https://ahrefs.com/robot class Ahrefs < BotMatch - Ranges = %w(54.36.148.0/24 54.36.149.0/24 54.36.150.0/24 195.154.122.0/24 195.154.123.0/24 195.154.126.0/24 195.154.127.0/24) - - def valid? - ip = IPAddr.new @ip - Ranges.any? { |range| IPAddr.new(range).include? ip } - end + ip_ranges %w[ + 54.36.148.0/24 + 54.36.149.0/24 + 54.36.150.0/24 + 195.154.122.0/24 + 195.154.123.0/24 + 195.154.126.0/24 + 195.154.127.0/24 + ] end - rule Legitbot::Ahrefs, %w(AhrefsBot) + rule Legitbot::Ahrefs, %w[AhrefsBot] end diff --git a/lib/legitbot/apple.rb b/lib/legitbot/apple.rb index 6c55723..a6a0627 100644 --- a/lib/legitbot/apple.rb +++ b/lib/legitbot/apple.rb @@ -1,20 +1,20 @@ +# frozen_string_literal: true + require 'ipaddr' -module Legitbot +module Legitbot # :nodoc: # https://support.apple.com/en-us/HT204683 - class Apple < BotMatch - Range = IPAddr.new('17.0.0.0/8') - - def valid? - ip = IPAddr.new @ip - Range.include? ip - end + ip_ranges '17.0.0.0/8' end - class Apple_as_Google < Apple + # https://support.apple.com/en-us/HT204683 + # rubocop:disable Naming/ClassAndModuleCamelCase + class Apple_as_Google < BotMatch + ip_ranges '17.0.0.0/8' end + # rubocop:enable Naming/ClassAndModuleCamelCase - rule Legitbot::Apple, %w(Applebot) - rule Legitbot::Apple_as_Google, %w(Googlebot) + rule Legitbot::Apple, %w[Applebot] + rule Legitbot::Apple_as_Google, %w[Googlebot] end diff --git a/lib/legitbot/baidu.rb b/lib/legitbot/baidu.rb index d16eec1..91ddc47 100644 --- a/lib/legitbot/baidu.rb +++ b/lib/legitbot/baidu.rb @@ -1,12 +1,10 @@ -module Legitbot +# frozen_string_literal: true + +module Legitbot # :nodoc: # http://help.baidu.com/question?prod_en=master&class=498&id=1000973 class Baidu < BotMatch - ValidDomains = ["baidu.com.", "baidu.jp."] - - def valid? - subdomain_of?(*Baidu::ValidDomains) - end + domains 'baidu.com.', 'baidu.jp.', reverse: false end - rule Legitbot::Baidu, %w(Baiduspider) + rule Legitbot::Baidu, %w[Baiduspider] end diff --git a/lib/legitbot/bing.rb b/lib/legitbot/bing.rb index 753d2e6..275491a 100644 --- a/lib/legitbot/bing.rb +++ b/lib/legitbot/bing.rb @@ -1,12 +1,10 @@ -module Legitbot +# frozen_string_literal: true + +module Legitbot # :nodoc: # https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/ class Bing < BotMatch - ValidDomains = ["search.msn.com."] - - def valid? - subdomain_of?(*Bing::ValidDomains) && reverse_resolves? - end + domains 'search.msn.com.' end - rule Legitbot::Bing, %w(Bingbot bingbot) + rule Legitbot::Bing, %w[Bingbot bingbot] end diff --git a/lib/legitbot/botmatch.rb b/lib/legitbot/botmatch.rb index 3e44ba2..2a17bdc 100644 --- a/lib/legitbot/botmatch.rb +++ b/lib/legitbot/botmatch.rb @@ -1,5 +1,8 @@ -require 'resolv' -require 'ipaddr' +# frozen_string_literal: true + +require_relative 'config/resolver' +require_relative 'validators/domains' +require_relative 'validators/ip_ranges' module Legitbot ## @@ -7,61 +10,31 @@ module Legitbot # +valid?+, +fake?+ and +detected_as+ # class BotMatch - def initialize(ip, resolver_config = nil) - @dns = Resolv::DNS.new(resolver_config) - @ip = ip - end - - ## - # Returns a Resolv::DNS::Name instance with - # the reverse name - def reverse_domain - @reverse_domain ||= @dns.getname(@ip) - rescue Resolv::ResolvError - @reverse_domain ||= nil - end - - ## - # Returns a String with the reverse name - def reverse_name - reverse_domain&.to_s - end - - ## - # Returns a String with IP created from the reverse name - def reversed_ip - return nil if reverse_name.nil? + include Legitbot::Validators::IpRanges + include Legitbot::Validators::Domains - @reverse_ip ||= @dns.getaddress(reverse_name) - @reverse_ip.to_s - end - - def reverse_resolves? - @ip == reversed_ip - end - - def subdomain_of?(*domains) - return false if reverse_name.nil? - - domains.any? { |d| - reverse_domain.subdomain_of? Resolv::DNS::Name.create(d) - } + def initialize(ip) + @ip = ip end def detected_as self.class.name.split('::').last.downcase.to_sym end + def valid? + valid_ip? && valid_domain? + end + def fake? !valid? end - def self.valid?(ip, resolver_config = nil) - self.new(ip, resolver_config).valid? + def self.valid?(ip) + new(ip).valid? end - def self.fake?(ip, resolver_config = nil) - self.new(ip, resolver_config).fake? + def self.fake?(ip) + new(ip).fake? end end end diff --git a/lib/legitbot/config/resolver.rb b/lib/legitbot/config/resolver.rb new file mode 100644 index 0000000..d12d9ec --- /dev/null +++ b/lib/legitbot/config/resolver.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require 'resolv' + +module Legitbot + module Config + module Resolver # :nodoc: + def resolver_config(options = nil) + @resolver_config = options + end + + def resolver + @resolver_config ||= Legitbot.resolver_config + @resolver ||= Resolv::DNS.new @resolver_config + end + end + end +end diff --git a/lib/legitbot/duckduckgo.rb b/lib/legitbot/duckduckgo.rb index 132a008..96f80e9 100644 --- a/lib/legitbot/duckduckgo.rb +++ b/lib/legitbot/duckduckgo.rb @@ -1,12 +1,20 @@ -module Legitbot +# frozen_string_literal: true + +module Legitbot # :nodoc: # https://duckduckgo.com/duckduckbot class DuckDuckGo < BotMatch - ValidIPs = %w(50.16.241.113 50.16.241.114 50.16.241.117 50.16.247.234 52.204.97.54 52.5.190.19 54.197.234.188 54.208.100.253 23.21.227.69) - - def valid? - DuckDuckGo::ValidIPs.include? @ip - end + ip_ranges %w[ + 50.16.241.113 + 50.16.241.114 + 50.16.241.117 + 50.16.247.234 + 52.204.97.54 + 52.5.190.19 + 54.197.234.188 + 54.208.100.253 + 23.21.227.69 + ] end - rule Legitbot::DuckDuckGo, %w(DuckDuckBot) + rule Legitbot::DuckDuckGo, %w[DuckDuckBot] end diff --git a/lib/legitbot/facebook.rb b/lib/legitbot/facebook.rb index c8f7b33..8cc4ccc 100644 --- a/lib/legitbot/facebook.rb +++ b/lib/legitbot/facebook.rb @@ -1,48 +1,22 @@ -require 'ipaddr' +# frozen_string_literal: true + require 'irrc' -require 'interval_tree' -module Legitbot +module Legitbot # :nodoc: # https://developers.facebook.com/docs/sharing/webmasters/crawler - class Facebook < BotMatch AS = 'AS32934' - def valid? - ip = IPAddr.new(@ip) - Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0 - end - - @mutex = Mutex.new - - def self.valid_ips - @mutex.synchronize { @ips ||= load_ips } - end - - def self.reload! - @mutex.synchronize { @ips = load_ips } - end - - def self.load_ips - whois.map do |(family, records)| - ranges = records.map do |cidr| - range = IPAddr.new(cidr).to_range - (range.begin.to_i..range.end.to_i) - end - [family, IntervalTree::Tree.new(ranges)] - end.to_h - end - - def self.whois + ip_ranges do client = Irrc::Client.new client.query :radb, AS results = client.perform - %i(ipv4 ipv6).map do |family| - [family, results[AS][family][AS]] - end.to_h + %i[ipv4 ipv6].map do |family| + results[AS][family][AS] + end.flatten end end - rule Legitbot::Facebook, %w(Facebot facebookexternalhit/1.1) + rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1] end diff --git a/lib/legitbot/google.rb b/lib/legitbot/google.rb index 8b7ed38..0458fe6 100644 --- a/lib/legitbot/google.rb +++ b/lib/legitbot/google.rb @@ -1,14 +1,11 @@ -module Legitbot +# frozen_string_literal: true + +module Legitbot # :nodoc: # https://support.google.com/webmasters/answer/1061943 # https://support.google.com/webmasters/answer/80553 - class Google < BotMatch - ValidDomains = ["google.com.", "googlebot.com."] - - def valid? - subdomain_of?(*Google::ValidDomains) && reverse_resolves? - end + domains 'google.com.', 'googlebot.com.' end - rule Legitbot::Google, %w(Googlebot Mediapartners-Google AdsBot-Google) + rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google] end diff --git a/lib/legitbot/legitbot.rb b/lib/legitbot/legitbot.rb index 7e6311e..eb61b02 100644 --- a/lib/legitbot/legitbot.rb +++ b/lib/legitbot/legitbot.rb @@ -1,6 +1,14 @@ +# frozen_string_literal: true + +## +# Bot lookup based on user agent module Legitbot @rules = [] + class << self + attr_accessor :resolver_config + end + ## # Lookup a bot based on its signature from +User-Agent+ header. # @@ -10,15 +18,12 @@ module Legitbot # otherwise. # :yields: a found bot # - def self.bot(userAgent, ip, resolver_config = nil) - bots = - @rules.select { |rule| - rule[:fragments].any? {|f| userAgent.index f} - }.map { |rule| - rule[:class].new(ip, resolver_config) - } + def self.bot(user_agent, ip) + bots = @rules + .select { |rule| rule[:fragments].any? { |f| user_agent.index f } } + .map { |rule| rule[:class].new(ip) } - selected = bots.select { |b| b.valid? }.first if bots.size > 1 + selected = bots.select(&:valid?).first if bots.size > 1 selected = bots.last if selected.nil? if selected && block_given? @@ -29,6 +34,6 @@ def self.bot(userAgent, ip, resolver_config = nil) end def self.rule(clazz, fragments) - @rules << {:class => clazz, :fragments => fragments} + @rules << { class: clazz, fragments: fragments } end end diff --git a/lib/legitbot/pinterest.rb b/lib/legitbot/pinterest.rb index 94bdb94..e1248cb 100644 --- a/lib/legitbot/pinterest.rb +++ b/lib/legitbot/pinterest.rb @@ -1,13 +1,10 @@ -module Legitbot - # https://help.pinterest.com/en/articles/about-pinterest-crawler-0 +# frozen_string_literal: true +module Legitbot # :nodoc: + # https://help.pinterest.com/en/articles/about-pinterest-crawler-0 class Pinterest < BotMatch - ValidDomains = ["pinterest.com."] - - def valid? - subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves? - end + domains 'pinterest.com.' end - rule Legitbot::Pinterest, %w(Pinterestbot Pinterest/0.2) + rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2] end diff --git a/lib/legitbot/validators/domains.rb b/lib/legitbot/validators/domains.rb new file mode 100644 index 0000000..be1e25b --- /dev/null +++ b/lib/legitbot/validators/domains.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +require 'resolv' +require 'ipaddr' + +module Legitbot + module Validators + # + # In a bot matcher: + # `domains 'search.msn.com', ...` + # `domains 'googlebot.com', reverse: false` + # + # `reverse` is true by default. + module Domains + class << self + def included(base) + base.extend ClassMethods + end + end + + def valid_domain? + self.class.valid_domain?(@ip) + end + + module ClassMethods # :nodoc: + include Legitbot::Config::Resolver + + def domains(*list, reverse: true) + @valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) } + @validate_reverse_record = reverse + end + + def check_domains? + instance_variable_defined?(:@valid_domains) + end + + def valid_domain?(ip) + return true unless check_domains? + return true if @valid_domains.empty? + + domains = reverse_domains(ip) + return false if domains.empty? + + record = find_subdomain_record(domains) + return false unless record + return true unless @validate_reverse_record + + ip == reverse_ip(record) + end + + def reverse_domains(ip) + resolver.getnames(ip) + rescue Resolv::ResolvError + nil + end + + def find_subdomain_record(domains) + domains.find do |d| + @valid_domains.any? { |vd| d.subdomain_of?(vd) } + end + end + + def reverse_ip(record) + return nil if record.nil? + + resolver.getaddress(record.to_s).to_s + end + end + end + end +end diff --git a/lib/legitbot/validators/ip_ranges.rb b/lib/legitbot/validators/ip_ranges.rb new file mode 100644 index 0000000..4f9f156 --- /dev/null +++ b/lib/legitbot/validators/ip_ranges.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'ipaddr' +require 'interval_tree' + +module Legitbot + module Validators + # + # In a bot matcher: + # `ip_ranges ip, range, ip, ...` + # `ip_ranges do [ip, range, ...]; end` + module IpRanges + class << self + def included(base) + base.extend ClassMethods + end + end + + def valid_ip? + self.class.valid_ip?(@ip) + end + + module ClassMethods # :nodoc: + FAMILIES = %i[ipv4 ipv6].freeze + EMPTY_GENERATOR = proc { [] } + + def ip_ranges(*ips, &block) + @ip_ranges = partition_ips(ips.flatten) unless ips.empty? + @ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR + @ip_loader_mutex = Mutex.new + end + + def check_ranges? + instance_variable_defined?(:@ip_ranges_loader) + end + + def valid_ip?(ip) + return true unless check_ranges? + return true if valid_ips.empty? + + obj = IPAddr.new(ip) + ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i) + !ranges.empty? + end + + def valid_ips + @ip_loader_mutex.synchronize do + @ip_ranges ||= load_ips + end + end + + def reload_ips + @ip_loader_mutex.synchronize do + @ip_ranges = load_ips + end + end + + def load_ips + partition_ips(@ip_ranges_loader.call) + end + + # rubocop:disable Metrics/AbcSize, Metrics/MethodLength + def partition_ips(ips) + return [] if ips.empty? + + ips + .map { |cidr| IPAddr.new(cidr) } + .partition(&:ipv4?) + .each_with_index + .map do |list, index| + ranges = list.map(&:to_range).map do |r| + (r.begin.to_i..r.end.to_i) + end + [FAMILIES[index], IntervalTree::Tree.new(ranges)] + end.to_h + end + # rubocop:enable Metrics/AbcSize, Metrics/MethodLength + end + end + end +end diff --git a/lib/legitbot/version.rb b/lib/legitbot/version.rb index cd503ff..8a81e67 100644 --- a/lib/legitbot/version.rb +++ b/lib/legitbot/version.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + module Legitbot VERSION = '0.3.2' end diff --git a/lib/legitbot/yandex.rb b/lib/legitbot/yandex.rb index 8fc5a33..6eb20ee 100644 --- a/lib/legitbot/yandex.rb +++ b/lib/legitbot/yandex.rb @@ -1,17 +1,33 @@ -module Legitbot - # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html +# frozen_string_literal: true +module Legitbot # :nodoc: + # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html class Yandex < BotMatch - ValidDomains = ["yandex.ru.", "yandex.net.", "yandex.com."] - - def valid? - subdomain_of?(*Yandex::ValidDomains) && reverse_resolves? - end + domains 'yandex.ru.', 'yandex.net.', 'yandex.com.' end - rule Legitbot::Yandex, %w(YandexBot YandexAccessibilityBot YandexMobileBot - YandexDirectDyn YandexScreenshotBot YandexImages YandexVideo YandexVideoParser - YandexMedia YandexBlogs YandexFavicons YandexWebmaster YandexPagechecker - YandexImageResizer YaDirectFetcher YandexCalendar YandexSitelinks YandexMetrika - YandexNews YandexVertis YandexSearchShop YandexVerticals) + rule Legitbot::Yandex, %w[ + YandexBot + YandexAccessibilityBot + YandexMobileBot + YandexDirectDyn + YandexScreenshotBot + YandexImages + YandexVideo + YandexVideoParser + YandexMedia + YandexBlogs + YandexFavicons + YandexWebmaster + YandexPagechecker + YandexImageResizer + YaDirectFetcher + YandexCalendar + YandexSitelinks + YandexMetrika + YandexNews + YandexVertis + YandexSearchShop + YandexVerticals + ] end diff --git a/test/ahrefs_test.rb b/test/ahrefs_test.rb index e487f0d..f0313a5 100644 --- a/test/ahrefs_test.rb +++ b/test/ahrefs_test.rb @@ -1,28 +1,36 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class AhrefsTest < Minitest::Test def test_malicious_ip - ip = "149.210.164.47" + ip = '149.210.164.47' match = Legitbot::Ahrefs.new ip assert !match.valid?, msg: "#{ip} is not a real Ahrefs IP" end def test_valid_ip - ip = "54.36.148.0" + ip = '54.36.148.0' match = Legitbot::Ahrefs.new ip assert match.valid?, msg: "#{ip} is a valid Ahrefs IP" end def test_malicious_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)", "149.210.164.47") - assert bot, msg: "Ahrefs detected from User-Agent" - assert !bot.valid?, msg: "Not a valid Ahrefs" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)', + '149.210.164.47' + ) + assert bot, msg: 'Ahrefs detected from User-Agent' + assert !bot.valid?, msg: 'Not a valid Ahrefs' end def test_valid_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)", "54.36.148.0") - assert bot, msg: "Ahrefs detected from User-Agent" - assert bot.valid?, msg: "Valid Ahrefs" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)', + '54.36.148.0' + ) + assert bot, msg: 'Ahrefs detected from User-Agent' + assert bot.valid?, msg: 'Valid Ahrefs' end end diff --git a/test/apple_as_google_test.rb b/test/apple_as_google_test.rb index 30f7f1b..ed0d1ab 100644 --- a/test/apple_as_google_test.rb +++ b/test/apple_as_google_test.rb @@ -1,22 +1,27 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class AppleAsGoogleTest < Minitest::Test def test_valid_ip - ip = "17.58.98.60" + ip = '17.58.98.60' match = Legitbot::Apple_as_Google.new(ip) assert match.valid?, msg: "#{ip} is a valid Applebot IP" end def test_invalid_ip - ip = "127.0.0.1" + ip = '127.0.0.1' match = Legitbot::Apple_as_Google.new(ip) assert match.fake?, msg: "#{ip} is a fake Applebot IP" end def test_user_agent - bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "17.58.98.60") + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + '17.58.98.60' + ) assert_equal :apple_as_google, bot.detected_as - assert bot.valid?, msg: "A valid Applebot User-agent and IP" + assert bot.valid?, msg: 'A valid Applebot User-agent and IP' end end diff --git a/test/apple_test.rb b/test/apple_test.rb index 05f07b5..5234aba 100644 --- a/test/apple_test.rb +++ b/test/apple_test.rb @@ -1,22 +1,29 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class AppleTest < Minitest::Test def test_valid_ip - ip = "17.58.98.60" + ip = '17.58.98.60' match = Legitbot::Apple.new(ip) assert match.valid?, msg: "#{ip} is a valid Applebot IP" end def test_invalid_ip - ip = "127.0.0.1" + ip = '127.0.0.1' match = Legitbot::Apple.new(ip) assert match.fake?, msg: "#{ip} is a fake Applebot IP" end + # rubocop:disable Metrics/LineLength def test_user_agent - bot = Legitbot.bot("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)", "17.58.98.60") + bot = Legitbot.bot( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)', + '17.58.98.60' + ) assert_equal :apple, bot.detected_as - assert bot.valid?, msg: "A valid Applebot User-agent and IP" + assert bot.valid?, msg: 'A valid Applebot User-agent and IP' end + # rubocop:enable Metrics/LineLength end diff --git a/test/botmatch_test.rb b/test/botmatch_test.rb index 1ac1788..17b05e3 100644 --- a/test/botmatch_test.rb +++ b/test/botmatch_test.rb @@ -1,29 +1,11 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class BotMatchTest < Minitest::Test - def test_reverse_name - match = Legitbot::BotMatch.new "66.249.64.141" - assert_equal "crawl-66-249-64-141.googlebot.com", match.reverse_name - end - - def test_reverse_ip - match = Legitbot::BotMatch.new "66.249.64.141" - assert_equal "66.249.64.141", match.reversed_ip - end - - def test_reverse_resolves - match = Legitbot::BotMatch.new "66.249.64.141" - assert_equal true, match.reverse_resolves? - end - - def test_reverse_doesnt_resolve - match = Legitbot::BotMatch.new "5.140.70.64" - assert !match.reverse_resolves? - end - def test_valid_class_syntax - assert Legitbot::Google.valid?("66.249.64.141"), msg: "Valid Googlebot" - assert Legitbot::Google.fake?("149.210.164.47"), msg: "Fake Googlebot" + assert Legitbot::Google.valid?('66.249.64.141'), msg: 'Valid Googlebot' + assert Legitbot::Google.fake?('149.210.164.47'), msg: 'Fake Googlebot' end end diff --git a/test/facebook_test.rb b/test/facebook_test.rb index c6b3426..5cc02c5 100644 --- a/test/facebook_test.rb +++ b/test/facebook_test.rb @@ -1,20 +1,24 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' module Legitbot class Facebook + # rubocop:disable Metrics/LineLength def self.whois { - ipv4: ["69.63.176.0/20", "66.220.144.0/20", "66.220.144.0/21", "69.63.184.0/21", "69.63.176.0/21", "74.119.76.0/22", "69.171.255.0/24", "173.252.64.0/18", "69.171.224.0/19", "69.171.224.0/20", "103.4.96.0/22", "69.63.176.0/24", "173.252.64.0/19", "173.252.70.0/24", "31.13.64.0/18", "31.13.24.0/21", "66.220.152.0/21", "66.220.159.0/24", "69.171.239.0/24", "69.171.240.0/20", "31.13.64.0/19", "31.13.64.0/24", "31.13.65.0/24", "31.13.67.0/24", "31.13.68.0/24", "31.13.69.0/24", "31.13.70.0/24", "31.13.71.0/24", "31.13.72.0/24", "31.13.73.0/24", "31.13.74.0/24", "31.13.75.0/24", "31.13.76.0/24", "31.13.77.0/24", "31.13.96.0/19", "31.13.66.0/24", "173.252.96.0/19", "69.63.178.0/24", "31.13.78.0/24", "31.13.79.0/24", "31.13.80.0/24", "31.13.82.0/24", "31.13.83.0/24", "31.13.84.0/24", "31.13.85.0/24", "31.13.86.0/24", "31.13.87.0/24", "31.13.88.0/24", "31.13.89.0/24", "31.13.90.0/24", "31.13.91.0/24", "31.13.92.0/24", "31.13.93.0/24", "31.13.94.0/24", "31.13.95.0/24", "69.171.253.0/24", "69.63.186.0/24", "31.13.81.0/24", "179.60.192.0/22", "179.60.192.0/24", "179.60.193.0/24", "179.60.194.0/24", "179.60.195.0/24", "185.60.216.0/22", "45.64.40.0/22", "185.60.216.0/24", "185.60.217.0/24", "185.60.218.0/24", "185.60.219.0/24", "129.134.0.0/16", "157.240.0.0/16", "157.240.8.0/24", "157.240.0.0/24", "157.240.1.0/24", "157.240.2.0/24", "157.240.3.0/24", "157.240.4.0/24", "157.240.5.0/24", "157.240.6.0/24", "157.240.7.0/24", "157.240.9.0/24", "157.240.10.0/24", "157.240.16.0/24", "157.240.19.0/24", "157.240.11.0/24", "157.240.12.0/24", "157.240.13.0/24", "157.240.14.0/24", "157.240.15.0/24", "157.240.17.0/24", "157.240.18.0/24", "157.240.20.0/24", "157.240.21.0/24", "157.240.22.0/24", "157.240.23.0/24", "157.240.0.0/17", "69.171.250.0/24", "157.240.24.0/24", "157.240.25.0/24", "199.201.64.0/24", "199.201.65.0/24", "199.201.64.0/22", "204.15.20.0/22", "157.240.192.0/24", "129.134.0.0/17", "157.240.198.0/24"], + ipv4: ['69.63.176.0/20', '66.220.144.0/20', '66.220.144.0/21', '69.63.184.0/21', '69.63.176.0/21', '74.119.76.0/22', '69.171.255.0/24', '173.252.64.0/18', '69.171.224.0/19', '69.171.224.0/20', '103.4.96.0/22', '69.63.176.0/24', '173.252.64.0/19', '173.252.70.0/24', '31.13.64.0/18', '31.13.24.0/21', '66.220.152.0/21', '66.220.159.0/24', '69.171.239.0/24', '69.171.240.0/20', '31.13.64.0/19', '31.13.64.0/24', '31.13.65.0/24', '31.13.67.0/24', '31.13.68.0/24', '31.13.69.0/24', '31.13.70.0/24', '31.13.71.0/24', '31.13.72.0/24', '31.13.73.0/24', '31.13.74.0/24', '31.13.75.0/24', '31.13.76.0/24', '31.13.77.0/24', '31.13.96.0/19', '31.13.66.0/24', '173.252.96.0/19', '69.63.178.0/24', '31.13.78.0/24', '31.13.79.0/24', '31.13.80.0/24', '31.13.82.0/24', '31.13.83.0/24', '31.13.84.0/24', '31.13.85.0/24', '31.13.86.0/24', '31.13.87.0/24', '31.13.88.0/24', '31.13.89.0/24', '31.13.90.0/24', '31.13.91.0/24', '31.13.92.0/24', '31.13.93.0/24', '31.13.94.0/24', '31.13.95.0/24', '69.171.253.0/24', '69.63.186.0/24', '31.13.81.0/24', '179.60.192.0/22', '179.60.192.0/24', '179.60.193.0/24', '179.60.194.0/24', '179.60.195.0/24', '185.60.216.0/22', '45.64.40.0/22', '185.60.216.0/24', '185.60.217.0/24', '185.60.218.0/24', '185.60.219.0/24', '129.134.0.0/16', '157.240.0.0/16', '157.240.8.0/24', '157.240.0.0/24', '157.240.1.0/24', '157.240.2.0/24', '157.240.3.0/24', '157.240.4.0/24', '157.240.5.0/24', '157.240.6.0/24', '157.240.7.0/24', '157.240.9.0/24', '157.240.10.0/24', '157.240.16.0/24', '157.240.19.0/24', '157.240.11.0/24', '157.240.12.0/24', '157.240.13.0/24', '157.240.14.0/24', '157.240.15.0/24', '157.240.17.0/24', '157.240.18.0/24', '157.240.20.0/24', '157.240.21.0/24', '157.240.22.0/24', '157.240.23.0/24', '157.240.0.0/17', '69.171.250.0/24', '157.240.24.0/24', '157.240.25.0/24', '199.201.64.0/24', '199.201.65.0/24', '199.201.64.0/22', '204.15.20.0/22', '157.240.192.0/24', '129.134.0.0/17', '157.240.198.0/24'], ipv6: [] } end + # rubocop:enable Metrics/LineLength end end class FacebookTest < Minitest::Test def test_valid_ip - ip = "69.63.186.89" + ip = '69.63.186.89' match = Legitbot::Facebook.new(ip) assert match.valid?, msg: "#{ip} is a valid Facebook IP" @@ -24,25 +28,36 @@ def test_valid_ip end def test_invalid_ip - ip = "127.0.0.1" + ip = '127.0.0.1' match = Legitbot::Facebook.new(ip) assert match.fake?, msg: "#{ip} is a fake Facebook IP" end + # rubocop:disable Metrics/LineLength, Metrics/MethodLength def test_user_agent - Legitbot.bot("facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "31.13.76.56") do |bot| + Legitbot.bot( + 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)', + '31.13.76.56' + ) do |bot| assert_equal :facebook, bot.detected_as - assert bot.valid?, msg: "true Facebook" + assert bot.valid?, msg: 'true Facebook' end - Legitbot.bot("facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)", "173.252.87.8") do |bot| + Legitbot.bot( + 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)', + '173.252.87.8' + ) do |bot| assert_equal :facebook, bot.detected_as - assert bot.valid?, msg: "true Facebook" + assert bot.valid?, msg: 'true Facebook' end - Legitbot.bot("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0", "92.243.181.7") do |bot| + Legitbot.bot( + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0', + '92.243.181.7' + ) do |bot| assert_equal :facebook, bot.detected_as - assert bot.fake?, msg: "fake Facebook" + assert bot.fake?, msg: 'fake Facebook' end end + # rubocop:enable Metrics/LineLength, Metrics/MethodLength end diff --git a/test/google_test.rb b/test/google_test.rb index 0919d36..509a19f 100644 --- a/test/google_test.rb +++ b/test/google_test.rb @@ -1,42 +1,52 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class GoogleTest < Minitest::Test def test_malicious_ip - ip = "149.210.164.47" + ip = '149.210.164.47' match = Legitbot::Google.new ip - reverse_name = match.reverse_name - assert !match.subdomain_of?("googlebot.com."), msg: "#{reverse_name} is not a subdomain of googlebot.com" assert !match.valid?, msg: "#{ip} is not a real Googlebot IP" end def test_valid_ip - ip = "66.249.64.141" + ip = '66.249.64.141' match = Legitbot::Google.new ip - reverse_name = match.reverse_name - assert match.subdomain_of?("googlebot.com."), msg: "#{reverse_name} is a subdomain of googlebot.com" assert match.valid?, msg: "#{ip} is a valid Googlebot IP" end def test_malicious_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "149.210.164.47") - assert bot, msg: "Googlebot detected from User-Agent" - assert !bot.valid?, msg: "Not a valid Googlebot" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + '149.210.164.47' + ) + assert bot, msg: 'Googlebot detected from User-Agent' + assert !bot.valid?, msg: 'Not a valid Googlebot' end def test_valid_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141") - assert bot, msg: "Googlebot detected from User-Agent" - assert bot.valid?, msg: "Valid Googlebot" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + '66.249.64.141' + ) + assert bot, msg: 'Googlebot detected from User-Agent' + assert bot.valid?, msg: 'Valid Googlebot' end def test_valid_name - bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "66.249.64.141") + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + '66.249.64.141' + ) assert_equal :google, bot.detected_as end def test_fake_name - bot = Legitbot.bot("Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", "81.1.172.108") + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + '81.1.172.108' + ) assert_equal :google, bot.detected_as end end diff --git a/test/legitbot/validators/domains_test.rb b/test/legitbot/validators/domains_test.rb new file mode 100644 index 0000000..c937bb4 --- /dev/null +++ b/test/legitbot/validators/domains_test.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require 'legitbot' + +module Legitbot + module Validators + class NoDomains + include Domains + end + + class DomainMatch + include Domains + domains 'search.msn.com', reverse: false + + @resolver = Minitest::Mock.new + @resolver.expect(:getnames, + ['po18-218.co2-6nf-srch-2b.ntwk.msn.net', + 'msnbot-157-55-39-132.search.msn.com'].map do |d| + Resolv::DNS::Name.create(d) + end, [String]) + @resolver.expect(:getnames, + ['crawl-66-249-64-141.googlebot.com'].map do |d| + Resolv::DNS::Name.create(d) + end, [String]) + end + + class ReverseMatch + include Domains + domains 'search.msn.com' + end + + class DomainsTest < Minitest::Test + def test_no_domains + assert NoDomains.valid_domain?('127.0.0.1') + end + + def test_reverse_domain + assert DomainMatch.valid_domain?('127.0.0.1') + refute DomainMatch.valid_domain?('127.0.0.1') + end + + def test_reverse_ip + dlist = ['po18-218.co2-6nf-srch-2b.ntwk.msn.net', + 'msnbot-157-55-39-132.search.msn.com'].map do |d| + Resolv::DNS::Name.create(d) + end + + ReverseMatch.resolver.stub :getnames, dlist do + ReverseMatch.resolver.stub :getaddress, '127.0.0.1' do + assert ReverseMatch.valid_domain?('127.0.0.1') + refute ReverseMatch.valid_domain?('127.0.0.2') + end + end + end + end + end +end diff --git a/test/legitbot/validators/ip_ranges_test.rb b/test/legitbot/validators/ip_ranges_test.rb new file mode 100644 index 0000000..3ac145d --- /dev/null +++ b/test/legitbot/validators/ip_ranges_test.rb @@ -0,0 +1,113 @@ +# frozen_string_literal: true + +require 'minitest/autorun' +require 'legitbot' + +module Legitbot + module Validators + class NoRanges + include IpRanges + end + + class ArrayRanges + include IpRanges + ip_ranges '66.220.144.0/21', '2a03:2880:f234::/48' + end + + class FlattenRanges + include IpRanges + ip_ranges %w[66.220.144.0/21 2a03:2880:f234::/48] + end + + class EmptyRanges + include IpRanges + ip_ranges + + def initialize(ip) + @ip = ip + end + end + + class LoadRanges + include IpRanges + + @i = 0 + ip_ranges do + @i += 1 + [@i.odd? ? '127.0.0.0/8' : '192.168.0.0/16'] + end + + def self.counter + @i + end + + def initialize(ip) + @ip = ip + end + end + + class IpRangesTest < Minitest::Test + def test_partition_method + empty = NoRanges.partition_ips([]) + assert_empty empty + end + + def test_ipv6_partition + ipv6 = NoRanges.partition_ips(['2a03:2880:f234::/48']) + assert_nil ipv6[:ipv4].top_node + refute_nil ipv6[:ipv6].top_node + end + + def test_ipv4_partition + ipv4 = NoRanges.partition_ips(['66.220.144.0/21']) + refute_nil ipv4[:ipv4].top_node + assert_nil ipv4[:ipv6].top_node + end + + def test_no_ranges + assert NoRanges.valid_ip?('127.0.0.1') + end + + def test_empty_matcher + assert_empty EmptyRanges.valid_ips + assert_empty EmptyRanges.load_ips + assert EmptyRanges.valid_ip?('127.0.0.0') + assert EmptyRanges.valid_ip?('66.220.144.1') + assert EmptyRanges.valid_ip?('2a03:2880:f234:0:0:0:0:1') + + matcher = EmptyRanges.new '127.0.0.0' + assert matcher.valid_ip? + end + + def test_matcher_array + assert ArrayRanges.valid_ip?('66.220.144.1') + assert ArrayRanges.valid_ip?('2a03:2880:f234:0:0:0:0:1') + refute ArrayRanges.valid_ip?('66.220.143.1') + refute ArrayRanges.valid_ip?('2a03:2880:f233:0:0:0:0:1') + end + + def test_flatten + assert FlattenRanges.valid_ip?('66.220.144.1') + assert FlattenRanges.valid_ip?('2a03:2880:f234:0:0:0:0:1') + refute FlattenRanges.valid_ip?('66.220.143.1') + refute FlattenRanges.valid_ip?('2a03:2880:f233:0:0:0:0:1') + end + + # rubocop:disable Metrics/AbcSize + def test_matcher_loader + assert_equal 0, LoadRanges.counter + assert LoadRanges.new('127.127.127.127').valid_ip? + refute LoadRanges.new('10.10.10.10').valid_ip? + refute LoadRanges.new('192.168.127.254').valid_ip? + assert_equal 1, LoadRanges.counter + + LoadRanges.reload_ips + refute LoadRanges.new('127.127.127.127').valid_ip? + refute LoadRanges.new('10.10.10.10').valid_ip? + assert LoadRanges.new('192.168.127.254').valid_ip? + assert_equal 2, LoadRanges.counter + end + # rubocop:enable Metrics/AbcSize + end + end +end diff --git a/test/legitbot_test.rb b/test/legitbot_test.rb index 40dee5f..830d14f 100644 --- a/test/legitbot_test.rb +++ b/test/legitbot_test.rb @@ -1,13 +1,17 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class LegitbotTest < Minitest::Test def test_rules - assert !Legitbot.bot("Firefox", "127.0.0.1"), msg: "Not a bot" - assert Legitbot.bot("Googlebot", "5.140.70.64"), msg: "No reverse resolve, bot" + assert !Legitbot.bot('Firefox', '127.0.0.1'), + msg: 'Not a bot' + assert Legitbot.bot('Googlebot', '5.140.70.64'), + msg: 'No reverse resolve, bot' - Legitbot.bot("Firefox", "127.0.0.1") do |bot| - flunk "No bot Firefox is possible" + Legitbot.bot('Firefox', '127.0.0.1') do |_bot| + flunk 'No bot Firefox is possible' end end end diff --git a/test/pinterest_test.rb b/test/pinterest_test.rb index 9565457..e632711 100644 --- a/test/pinterest_test.rb +++ b/test/pinterest_test.rb @@ -1,42 +1,54 @@ +# frozen_string_literal: true + require 'minitest/autorun' require 'legitbot' class PinterestTest < Minitest::Test def test_malicious_ip - ip = "149.210.164.47" + ip = '149.210.164.47' match = Legitbot::Pinterest.new ip - reverse_name = match.reverse_name - assert !match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is not a subdomain of pinterest.com" assert !match.valid?, msg: "#{ip} is not a real Pinterest IP" end def test_valid_ip - ip = "54.236.1.11" + ip = '54.236.1.11' match = Legitbot::Pinterest.new ip - reverse_name = match.reverse_name - assert match.subdomain_of?("pinterest.com."), msg: "#{reverse_name} is a subdomain of pinterest.com" assert match.valid?, msg: "#{ip} is a valid Pinterest IP" end def test_malicious_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "149.210.164.47") - assert bot, msg: "Pinterest detected from User-Agent" - assert !bot.valid?, msg: "Not a valid Pinterest" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)', + '149.210.164.47' + ) + assert bot, msg: 'Pinterest detected from User-Agent' + assert !bot.valid?, msg: 'Not a valid Pinterest' end def test_valid_ua - bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11") - assert bot, msg: "Pinterest detected from User-Agent" - assert bot.valid?, msg: "Valid Pinterest" + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)', + '54.236.1.11' + ) + assert bot, msg: 'Pinterest detected from User-Agent' + assert bot.valid?, msg: 'Valid Pinterest' end + # rubocop:disable Metrics/LineLength def test_android_not_bot - bot = Legitbot.bot("Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.64 Mobile Safari/537.36 [Pinterest/Android]", "85.117.106.133") + bot = Legitbot.bot( + 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.64 Mobile Safari/537.36 [Pinterest/Android]', + '85.117.106.133' + ) assert_nil bot end + # rubocop:enable Metrics/LineLength def test_engine_name - bot = Legitbot.bot("Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)", "54.236.1.11") + bot = Legitbot.bot( + 'Mozilla/5.0 (compatible; Pinterestbot/1.0; +https://www.pinterest.com/bot.html)', + '54.236.1.11' + ) assert_equal :pinterest, bot.detected_as end end