Skip to content

Commit

Permalink
feat: Cache DNS lookups (#256)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianbayne authored Nov 3, 2024
1 parent 1643323 commit 72115ec
Show file tree
Hide file tree
Showing 4 changed files with 344 additions and 10 deletions.
37 changes: 27 additions & 10 deletions lib/valid_email2/address.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require "resolv"
require "mail"
require "unicode/emoji"
require "valid_email2/dns_records_cache"

module ValidEmail2
class Address
Expand All @@ -25,9 +26,7 @@ def initialize(address, dns_timeout = 5, dns_nameserver = nil)
@parse_error = false
@raw_address = address
@dns_timeout = dns_timeout

@resolv_config = Resolv::DNS::Config.default_config_hash
@resolv_config[:nameserver] = dns_nameserver if dns_nameserver
@dns_nameserver = dns_nameserver

begin
@address = Mail::Address.new(address)
Expand Down Expand Up @@ -137,10 +136,24 @@ def address_contain_emoticons?
@raw_address.scan(Unicode::Emoji::REGEX).length >= 1
end

def resolv_config
@resolv_config ||= begin
config = Resolv::DNS::Config.default_config_hash
config[:nameserver] = @dns_nameserver if @dns_nameserver
config
end

@resolv_config
end

def mx_servers
@mx_servers ||= Resolv::DNS.open(@resolv_config) do |dns|
dns.timeouts = @dns_timeout
dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX)
@mx_servers_cache ||= ValidEmail2::DnsRecordsCache.new

@mx_servers_cache.fetch(address.domain.downcase) do
Resolv::DNS.open(resolv_config) do |dns|
dns.timeouts = @dns_timeout
dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX)
end
end
end

Expand All @@ -149,10 +162,14 @@ def null_mx?
end

def mx_or_a_servers
@mx_or_a_servers ||= Resolv::DNS.open(@resolv_config) do |dns|
dns.timeouts = @dns_timeout
(mx_servers.any? && mx_servers) ||
dns.getresources(address.domain, Resolv::DNS::Resource::IN::A)
@mx_or_a_servers_cache ||= ValidEmail2::DnsRecordsCache.new

@mx_or_a_servers_cache.fetch(address.domain.downcase) do
Resolv::DNS.open(resolv_config) do |dns|
dns.timeouts = @dns_timeout
(mx_servers.any? && mx_servers) ||
dns.getresources(address.domain, Resolv::DNS::Resource::IN::A)
end
end
end
end
Expand Down
37 changes: 37 additions & 0 deletions lib/valid_email2/dns_records_cache.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module ValidEmail2
class DnsRecordsCache
MAX_CACHE_SIZE = 1_000

def initialize
# Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } }
@cache = {}
end

def fetch(domain, &block)
prune_cache if @cache.size > MAX_CACHE_SIZE

cache_entry = @cache[domain]

if cache_entry && (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl]
return cache_entry[:records]
else
@cache.delete(domain)
end

records = block.call

if records.any?
ttl = records.map(&:ttl).min
@cache[domain] = { records: records, cached_at: Time.now, ttl: ttl }
end

records
end

def prune_cache
entries_sorted_by_cached_at_asc = (@cache.sort_by { |_domain, data| data[:cached_at] }).flatten
entries_to_remove = entries_sorted_by_cached_at_asc.first(@cache.size - MAX_CACHE_SIZE)
entries_to_remove.each { |domain| @cache.delete(domain) }
end
end
end
279 changes: 279 additions & 0 deletions spec/address_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,283 @@
expect(address.valid?).to eq true
end
end

describe "caching" do
let(:email_address) { "example@ymail.com" }
let(:email_instance) { described_class.new(email_address) }
let(:dns_records_cache_instance) { ValidEmail2::DnsRecordsCache.new }
let(:ttl) { 1_000 }
let(:mock_resolv_dns) { instance_double(Resolv::DNS) }
let(:mock_mx_records) { [double("MX", exchange: "mx.ymail.com", preference: 10, ttl: ttl)] }

before do
allow(email_instance).to receive(:null_mx?).and_return(false)
allow(Resolv::DNS).to receive(:open).and_yield(mock_resolv_dns)
allow(mock_resolv_dns).to receive(:timeouts=)
end

describe "#valid_strict_mx?" do
let(:cached_at) { Time.now }
let(:mock_cache_data) { { email_instance.address.domain => { records: mock_mx_records, cached_at: cached_at, ttl: ttl } } }

before do
allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX)
.and_return(mock_mx_records)
end

it "calls the MX servers lookup when the email is not cached" do
result = email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
expect(result).to be true
end

it "does not call the MX servers lookup when the email is cached" do
email_instance.valid_strict_mx?
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end

it "returns the cached result for subsequent calls" do
first_result = email_instance.valid_strict_mx?
expect(first_result).to be true

allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX)
.and_return([])

second_result = email_instance.valid_strict_mx?
expect(second_result).to be true
end

describe "ttl" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the time since last lookup is less than the cached ttl entry" do
let(:cached_at) { Time.now }

it "does not call the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).not_to have_received(:open)
end
end

context "when the time since last lookup is greater than the cached ttl entry" do
let(:cached_at) { Time.now - ttl }

it "calls the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end
end
end

describe "cache size" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the cache size is less than or equal to the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1)
end

it "does not prune the cache" do
expect(dns_records_cache_instance).not_to receive(:prune_cache)

email_instance.valid_strict_mx?
end

it "does not call the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).not_to have_received(:open)
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } }

it "does not prune those entries" do
email_instance.valid_strict_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"])
end
end
end

context "when the cache size is greater than the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0)
end

it "prunes the cache" do
expect(dns_records_cache_instance).to receive(:prune_cache).once

email_instance.valid_strict_mx?
end

it "calls the the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } }

it "prunes those entries" do
email_instance.valid_strict_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain])
end
end
end
end
end

describe "#valid_mx?" do
let(:cached_at) { Time.now }
let(:mock_cache_data) { { email_instance.address.domain => { records: mock_a_records, cached_at: cached_at, ttl: ttl } } }
let(:mock_a_records) { [double("A", address: "192.168.1.1", ttl: ttl)] }

before do
allow(email_instance).to receive(:mx_servers).and_return(mock_mx_records)
allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::A)
.and_return(mock_a_records)
end

it "calls the MX or A servers lookup when the email is not cached" do
result = email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
expect(result).to be true
end

it "does not call the MX or A servers lookup when the email is cached" do
email_instance.valid_mx?
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end

it "returns the cached result for subsequent calls" do
first_result = email_instance.valid_mx?
expect(first_result).to be true

allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::A)
.and_return([])

second_result = email_instance.valid_mx?
expect(second_result).to be true
end

describe "ttl" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the time since last lookup is less than the cached ttl entry" do
let(:cached_at) { Time.now }

it "does not call the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).not_to have_received(:open)
end
end

context "when the time since last lookup is greater than the cached ttl entry" do
let(:cached_at) { Time.now - ttl }

it "calls the MX or A servers lookup " do
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end
end
end

describe "cache size" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the cache size is less than or equal to the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1)
end

it "does not prune the cache" do
expect(email_instance).not_to receive(:prune_cache)

email_instance.valid_mx?
end

it "does not call the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).not_to have_received(:open)
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } }

it "does not prune those entries" do
email_instance.valid_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"])
end
end
end

context "when the cache size is greater than the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0)
end

it "prunes the cache" do
expect(dns_records_cache_instance).to receive(:prune_cache).once

email_instance.valid_mx?
end

it "calls the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } }

it "prunes those entries" do
email_instance.valid_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain])
end
end
end
end
end
end
end
1 change: 1 addition & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require 'rspec-benchmark'
RSpec.configure do |config|
config.include RSpec::Benchmark::Matchers
config.default_formatter = 'doc'
end
RSpec::Benchmark.configure do |config|
config.disable_gc = true
Expand Down

0 comments on commit 72115ec

Please sign in to comment.