From e5ad1bbacbc1760fce9297596dc4efd31138e4a1 Mon Sep 17 00:00:00 2001 From: Joe Rafaniello Date: Thu, 13 Apr 2017 17:13:48 -0400 Subject: [PATCH] Set is much faster than array for checking uniqueness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Specifically, when we call Array#include? to check if we've already visited a node, Set is much faster especially as the number of nodes visited increases. It's probably neglible for most use cases but there is a difference. Benchmark results: Array is between 1.42x and 3.57x slower than Set in DeepDup with ruby 2.3.4. ``` $ ruby benchmark.rb Warming up -------------------------------------- Small hash - Array way 33.169k i/100ms Small hash - Set way 25.883k i/100ms Calculating ------------------------------------- Small hash - Array way 379.253k (± 3.3%) i/s - 1.924M in 5.078544s Small hash - Set way 266.919k (± 7.9%) i/s - 1.346M in 5.077567s Comparison: Small hash - Array way: 379253.4 i/s Small hash - Set way: 266919.4 i/s - 1.42x slower Warming up -------------------------------------- Large hash - Array way 20.000 i/100ms Large hash - Set way 67.000 i/100ms Calculating ------------------------------------- Large hash - Array way 218.159 (± 4.6%) i/s - 1.100k in 5.052188s Large hash - Set way 779.445 (± 8.0%) i/s - 3.886k in 5.019390s Comparison: Large hash - Set way: 779.4 i/s Large hash - Array way: 218.2 i/s - 3.57x slower ``` Benchmark script: ``` require 'yaml' require "net/http" require "uri" uri = URI.parse("https://raw.githubusercontent.com/ManageIQ/manageiq/95130f3360a4abd82128ad2c23550d837799bb45/locale/en.yml") response = Net::HTTP.get_response(uri) large_hash = YAML.load(response.body) small_hash = { :a => { :b => 'c' } } require "benchmark/ips" require 'set' class DeepDupOriginal def initialize(opts={}) @recurse_over_arrays = opts.fetch(:recurse_over_arrays, false) @preserve_original_keys = opts.fetch(:preserve_original_keys, false) end def call(obj) deep_dup(obj) end private def deep_dup(obj, visited=[]) if obj.is_a?(Hash) obj.each_with_object({}) do |(key, value), h| h[@preserve_original_keys ? key : key.to_sym] = value_or_deep_dup(value, visited) end elsif obj.is_a?(Array) && @recurse_over_arrays obj.each_with_object([]) do |value, arr| value = value.is_a?(RecursiveOpenStruct) ? value.to_h : value arr << value_or_deep_dup(value, visited) end else obj end end def value_or_deep_dup(value, visited) obj_id = value.object_id visited.include?(obj_id) ? value : deep_dup(value, visited << obj_id) end end class DeepDupSet def initialize(opts={}) @recurse_over_arrays = opts.fetch(:recurse_over_arrays, false) @preserve_original_keys = opts.fetch(:preserve_original_keys, false) end def call(obj) deep_dup(obj) end private def deep_dup(obj, visited=Set.new) if obj.is_a?(Hash) obj.each_with_object({}) do |(key, value), h| h[@preserve_original_keys ? key : key.to_sym] = value_or_deep_dup(value, visited) end elsif obj.is_a?(Array) && @recurse_over_arrays obj.each_with_object([]) do |value, arr| value = value.is_a?(RecursiveOpenStruct) ? value.to_h : value arr << value_or_deep_dup(value, visited) end else obj end end def value_or_deep_dup(value, visited) obj_id = value.object_id visited.include?(obj_id) ? value : deep_dup(value, visited << obj_id) end end Benchmark.ips do |x| x.report("Small hash - Array way") { DeepDupOriginal.new.call(small_hash)} x.report("Small hash - Set way") { DeepDupSet.new.call(small_hash)} x.compare! end Benchmark.ips do |x| x.report("Large hash - Array way") { DeepDupOriginal.new.call(large_hash)} x.report("Large hash - Set way") { DeepDupSet.new.call(large_hash)} x.compare! end ``` --- lib/recursive_open_struct/deep_dup.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/recursive_open_struct/deep_dup.rb b/lib/recursive_open_struct/deep_dup.rb index b679017..8af870b 100644 --- a/lib/recursive_open_struct/deep_dup.rb +++ b/lib/recursive_open_struct/deep_dup.rb @@ -1,3 +1,4 @@ +require 'set' class RecursiveOpenStruct::DeepDup def initialize(opts={}) @recurse_over_arrays = opts.fetch(:recurse_over_arrays, false) @@ -10,7 +11,7 @@ def call(obj) private - def deep_dup(obj, visited=[]) + def deep_dup(obj, visited=Set.new) if obj.is_a?(Hash) obj.each_with_object({}) do |(key, value), h| h[@preserve_original_keys ? key : key.to_sym] = value_or_deep_dup(value, visited)