Specifically, when we call Array#include? to check if we've already
visited a node, Set is much faster especially as the number of nodes
visited increases.
It's probably neglible for most use cases but there is a difference.
Benchmark results: Array is between 1.42x and 3.57x slower than Set
in DeepDup with ruby 2.3.4.
```
$ ruby benchmark.rb
Warming up --------------------------------------
Small hash - Array way
33.169k i/100ms
Small hash - Set way 25.883k i/100ms
Calculating -------------------------------------
Small hash - Array way
379.253k (± 3.3%) i/s - 1.924M in 5.078544s
Small hash - Set way 266.919k (± 7.9%) i/s - 1.346M in 5.077567s
Comparison:
Small hash - Array way: 379253.4 i/s
Small hash - Set way: 266919.4 i/s - 1.42x slower
Warming up --------------------------------------
Large hash - Array way
20.000 i/100ms
Large hash - Set way 67.000 i/100ms
Calculating -------------------------------------
Large hash - Array way
218.159 (± 4.6%) i/s - 1.100k in 5.052188s
Large hash - Set way 779.445 (± 8.0%) i/s - 3.886k in 5.019390s
Comparison:
Large hash - Set way: 779.4 i/s
Large hash - Array way: 218.2 i/s - 3.57x slower
```
Benchmark script:
```
require 'yaml'
require "net/http"
require "uri"
uri = URI.parse("https://raw.githubusercontent.com/ManageIQ/manageiq/95130f3360a4abd82128ad2c23550d837799bb45/locale/en.yml")
response = Net::HTTP.get_response(uri)
large_hash = YAML.load(response.body)
small_hash = { :a => { :b => 'c' } }
require "benchmark/ips"
require 'set'
class DeepDupOriginal
def initialize(opts={})
@recurse_over_arrays = opts.fetch(:recurse_over_arrays, false)
@preserve_original_keys = opts.fetch(:preserve_original_keys, false)
end
def call(obj)
deep_dup(obj)
end
private
def deep_dup(obj, visited=[])
if obj.is_a?(Hash)
obj.each_with_object({}) do |(key, value), h|
h[@preserve_original_keys ? key : key.to_sym] = value_or_deep_dup(value, visited)
end
elsif obj.is_a?(Array) && @recurse_over_arrays
obj.each_with_object([]) do |value, arr|
value = value.is_a?(RecursiveOpenStruct) ? value.to_h : value
arr << value_or_deep_dup(value, visited)
end
else
obj
end
end
def value_or_deep_dup(value, visited)
obj_id = value.object_id
visited.include?(obj_id) ? value : deep_dup(value, visited << obj_id)
end
end
class DeepDupSet
def initialize(opts={})
@recurse_over_arrays = opts.fetch(:recurse_over_arrays, false)
@preserve_original_keys = opts.fetch(:preserve_original_keys, false)
end
def call(obj)
deep_dup(obj)
end
private
def deep_dup(obj, visited=Set.new)
if obj.is_a?(Hash)
obj.each_with_object({}) do |(key, value), h|
h[@preserve_original_keys ? key : key.to_sym] = value_or_deep_dup(value, visited)
end
elsif obj.is_a?(Array) && @recurse_over_arrays
obj.each_with_object([]) do |value, arr|
value = value.is_a?(RecursiveOpenStruct) ? value.to_h : value
arr << value_or_deep_dup(value, visited)
end
else
obj
end
end
def value_or_deep_dup(value, visited)
obj_id = value.object_id
visited.include?(obj_id) ? value : deep_dup(value, visited << obj_id)
end
end
Benchmark.ips do |x|
x.report("Small hash - Array way") { DeepDupOriginal.new.call(small_hash)}
x.report("Small hash - Set way") { DeepDupSet.new.call(small_hash)}
x.compare!
end
Benchmark.ips do |x|
x.report("Large hash - Array way") { DeepDupOriginal.new.call(large_hash)}
x.report("Large hash - Set way") { DeepDupSet.new.call(large_hash)}
x.compare!
end
```