From 72115ec1b866e54b5a4d530d7eaeb7e52a3c8e98 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Sun, 3 Nov 2024 21:57:59 +0900 Subject: [PATCH] feat: Cache DNS lookups (#256) --- lib/valid_email2/address.rb | 37 +++- lib/valid_email2/dns_records_cache.rb | 37 ++++ spec/address_spec.rb | 279 ++++++++++++++++++++++++++ spec/spec_helper.rb | 1 + 4 files changed, 344 insertions(+), 10 deletions(-) create mode 100644 lib/valid_email2/dns_records_cache.rb diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index aec285b..36facb3 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -4,6 +4,7 @@ require "resolv" require "mail" require "unicode/emoji" +require "valid_email2/dns_records_cache" module ValidEmail2 class Address @@ -25,9 +26,7 @@ def initialize(address, dns_timeout = 5, dns_nameserver = nil) @parse_error = false @raw_address = address @dns_timeout = dns_timeout - - @resolv_config = Resolv::DNS::Config.default_config_hash - @resolv_config[:nameserver] = dns_nameserver if dns_nameserver + @dns_nameserver = dns_nameserver begin @address = Mail::Address.new(address) @@ -137,10 +136,24 @@ def address_contain_emoticons? @raw_address.scan(Unicode::Emoji::REGEX).length >= 1 end + def resolv_config + @resolv_config ||= begin + config = Resolv::DNS::Config.default_config_hash + config[:nameserver] = @dns_nameserver if @dns_nameserver + config + end + + @resolv_config + end + def mx_servers - @mx_servers ||= Resolv::DNS.open(@resolv_config) do |dns| - dns.timeouts = @dns_timeout - dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) + @mx_servers_cache ||= ValidEmail2::DnsRecordsCache.new + + @mx_servers_cache.fetch(address.domain.downcase) do + Resolv::DNS.open(resolv_config) do |dns| + dns.timeouts = @dns_timeout + dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) + end end end @@ -149,10 +162,14 @@ def null_mx? end def mx_or_a_servers - @mx_or_a_servers ||= Resolv::DNS.open(@resolv_config) do |dns| - dns.timeouts = @dns_timeout - (mx_servers.any? && mx_servers) || - dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) + @mx_or_a_servers_cache ||= ValidEmail2::DnsRecordsCache.new + + @mx_or_a_servers_cache.fetch(address.domain.downcase) do + Resolv::DNS.open(resolv_config) do |dns| + dns.timeouts = @dns_timeout + (mx_servers.any? && mx_servers) || + dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) + end end end end diff --git a/lib/valid_email2/dns_records_cache.rb b/lib/valid_email2/dns_records_cache.rb new file mode 100644 index 0000000..72daa0d --- /dev/null +++ b/lib/valid_email2/dns_records_cache.rb @@ -0,0 +1,37 @@ +module ValidEmail2 + class DnsRecordsCache + MAX_CACHE_SIZE = 1_000 + + def initialize + # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } + @cache = {} + end + + def fetch(domain, &block) + prune_cache if @cache.size > MAX_CACHE_SIZE + + cache_entry = @cache[domain] + + if cache_entry && (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] + return cache_entry[:records] + else + @cache.delete(domain) + end + + records = block.call + + if records.any? + ttl = records.map(&:ttl).min + @cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } + end + + records + end + + def prune_cache + entries_sorted_by_cached_at_asc = (@cache.sort_by { |_domain, data| data[:cached_at] }).flatten + entries_to_remove = entries_sorted_by_cached_at_asc.first(@cache.size - MAX_CACHE_SIZE) + entries_to_remove.each { |domain| @cache.delete(domain) } + end + end +end \ No newline at end of file diff --git a/spec/address_spec.rb b/spec/address_spec.rb index 0f1165e..0861857 100644 --- a/spec/address_spec.rb +++ b/spec/address_spec.rb @@ -39,4 +39,283 @@ expect(address.valid?).to eq true end end + + describe "caching" do + let(:email_address) { "example@ymail.com" } + let(:email_instance) { described_class.new(email_address) } + let(:dns_records_cache_instance) { ValidEmail2::DnsRecordsCache.new } + let(:ttl) { 1_000 } + let(:mock_resolv_dns) { instance_double(Resolv::DNS) } + let(:mock_mx_records) { [double("MX", exchange: "mx.ymail.com", preference: 10, ttl: ttl)] } + + before do + allow(email_instance).to receive(:null_mx?).and_return(false) + allow(Resolv::DNS).to receive(:open).and_yield(mock_resolv_dns) + allow(mock_resolv_dns).to receive(:timeouts=) + end + + describe "#valid_strict_mx?" do + let(:cached_at) { Time.now } + let(:mock_cache_data) { { email_instance.address.domain => { records: mock_mx_records, cached_at: cached_at, ttl: ttl } } } + + before do + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX) + .and_return(mock_mx_records) + end + + it "calls the MX servers lookup when the email is not cached" do + result = email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true + end + + it "does not call the MX servers lookup when the email is cached" do + email_instance.valid_strict_mx? + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "returns the cached result for subsequent calls" do + first_result = email_instance.valid_strict_mx? + expect(first_result).to be true + + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX) + .and_return([]) + + second_result = email_instance.valid_strict_mx? + expect(second_result).to be true + end + + describe "ttl" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end + + context "when the time since last lookup is less than the cached ttl entry" do + let(:cached_at) { Time.now } + + it "does not call the MX servers lookup" do + email_instance.valid_strict_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + end + + context "when the time since last lookup is greater than the cached ttl entry" do + let(:cached_at) { Time.now - ttl } + + it "calls the MX servers lookup" do + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + end + end + + describe "cache size" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end + + context "when the cache size is less than or equal to the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1) + end + + it "does not prune the cache" do + expect(dns_records_cache_instance).not_to receive(:prune_cache) + + email_instance.valid_strict_mx? + end + + it "does not call the MX servers lookup" do + email_instance.valid_strict_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } } + + it "does not prune those entries" do + email_instance.valid_strict_mx? + + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"]) + end + end + end + + context "when the cache size is greater than the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0) + end + + it "prunes the cache" do + expect(dns_records_cache_instance).to receive(:prune_cache).once + + email_instance.valid_strict_mx? + end + + it "calls the the MX servers lookup" do + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } } + + it "prunes those entries" do + email_instance.valid_strict_mx? + + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain]) + end + end + end + end + end + + describe "#valid_mx?" do + let(:cached_at) { Time.now } + let(:mock_cache_data) { { email_instance.address.domain => { records: mock_a_records, cached_at: cached_at, ttl: ttl } } } + let(:mock_a_records) { [double("A", address: "192.168.1.1", ttl: ttl)] } + + before do + allow(email_instance).to receive(:mx_servers).and_return(mock_mx_records) + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::A) + .and_return(mock_a_records) + end + + it "calls the MX or A servers lookup when the email is not cached" do + result = email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true + end + + it "does not call the MX or A servers lookup when the email is cached" do + email_instance.valid_mx? + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "returns the cached result for subsequent calls" do + first_result = email_instance.valid_mx? + expect(first_result).to be true + + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::A) + .and_return([]) + + second_result = email_instance.valid_mx? + expect(second_result).to be true + end + + describe "ttl" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end + + context "when the time since last lookup is less than the cached ttl entry" do + let(:cached_at) { Time.now } + + it "does not call the MX or A servers lookup" do + email_instance.valid_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + end + + context "when the time since last lookup is greater than the cached ttl entry" do + let(:cached_at) { Time.now - ttl } + + it "calls the MX or A servers lookup " do + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + end + end + + describe "cache size" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end + + context "when the cache size is less than or equal to the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1) + end + + it "does not prune the cache" do + expect(email_instance).not_to receive(:prune_cache) + + email_instance.valid_mx? + end + + it "does not call the MX or A servers lookup" do + email_instance.valid_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } } + + it "does not prune those entries" do + email_instance.valid_mx? + + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"]) + end + end + end + + context "when the cache size is greater than the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0) + end + + it "prunes the cache" do + expect(dns_records_cache_instance).to receive(:prune_cache).once + + email_instance.valid_mx? + end + + it "calls the MX or A servers lookup" do + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } } + + it "prunes those entries" do + email_instance.valid_mx? + + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain]) + end + end + end + end + end + end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3a2a1ec..d5e02cc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -5,6 +5,7 @@ require 'rspec-benchmark' RSpec.configure do |config| config.include RSpec::Benchmark::Matchers + config.default_formatter = 'doc' end RSpec::Benchmark.configure do |config| config.disable_gc = true