From 0dae38c385cd59068f31264aeb4960725b19a3e5 Mon Sep 17 00:00:00 2001 From: Benjamin Clayman Date: Wed, 6 Oct 2021 18:02:22 -0400 Subject: [PATCH 1/5] Proof of concept for improving ContainExactly matcher speed when elements obey transitivity This is a proof of concept approach for addressing issue #1161. The current implementation for ContainExactly runs in O(n!). In practice, it runs in O(n log n) when the elements are comparable and sorting result in a match. The crux of the problem is that some elements don't obey transitivity. As a result, knowing that sorting actual and expected doesn't result in a match *doesn't* guarantee that expected and actual don't match. This proof of concept provides a way for the user to indicate that the elements in a particular example's expected and actual obey transitivity. That looks like this: expect(a).to contain_exactly(*b).transitive And runs in O(n log n) time. More practically, this means that common use cases for contains_exactly will enjoy a massive speedup. Previously, users have examples where comparing arrays of 30 integers "never finishes." Using `.transitive` here with arrays of 10,000 integers runs in < 0.1s on my machine. --- .../matchers/built_in/contain_exactly.rb | 63 +++++++++++++- .../matchers/built_in/contain_exactly_spec.rb | 86 +++++++++++++++++++ 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/lib/rspec/matchers/built_in/contain_exactly.rb b/lib/rspec/matchers/built_in/contain_exactly.rb index 1bf7f9831..fe827f676 100644 --- a/lib/rspec/matchers/built_in/contain_exactly.rb +++ b/lib/rspec/matchers/built_in/contain_exactly.rb @@ -6,6 +6,19 @@ module BuiltIn # Provides the implementation for `contain_exactly` and `match_array`. # Not intended to be instantiated directly. class ContainExactly < BaseMatcher + def initialize(expected=nil) + super + @transitive = false + end + + # @api public + # Specifies that elements contained in actual and expected + # obey transitivity. This lets match run much faster. + def transitive + @transitive = true + self + end + # @api private # @return [String] def failure_message @@ -36,6 +49,7 @@ def description def generate_failure_message message = expected_collection_line message += actual_collection_line + @extra_items, @missing_items = fast_calculate_extra_missing if @transitive message += missing_elements_line unless missing_items.empty? message += extra_elements_line unless extra_items.empty? message @@ -72,7 +86,9 @@ def message_line(prefix, collection, surface_descriptions=false) def match(_expected, _actual) return false unless convert_actual_to_an_array - match_when_sorted? || (extra_items.empty? && missing_items.empty?) + matched_when_sorted = match_when_sorted? + return matched_when_sorted if matched_when_sorted || @transitive + (extra_items.empty? && missing_items.empty?) end # This cannot always work (e.g. when dealing with unsortable items, @@ -80,7 +96,8 @@ def match(_expected, _actual) # the slowness of the full matching algorithm, and in common cases this # works, so it's worth a try. def match_when_sorted? - values_match?(safe_sort(expected), safe_sort(actual)) + @sorted_expected, @sorted_actual = safe_sort(expected), safe_sort(actual) + values_match?(@sorted_expected, @sorted_actual) end def convert_actual_to_an_array @@ -96,6 +113,7 @@ def convert_actual_to_an_array def safe_sort(array) array.sort rescue Support::AllExceptionsExceptOnesWeMustNotRescue + raise "Invalid use of `.transitive` with unsortable array #{array}" if @transitive array end @@ -124,6 +142,47 @@ def extra_items end end + # We use this to determine extra and missing items between expected + # and actual arrays. This runs in O(n) time which is a big improvement + # over the O(n!) work incurred by PairingsMaximizer to evaluate all possible + # matchings between arrays + # rubocop:disable MethodLength + # rubocop:disable Metrics/AbcSize + def fast_calculate_extra_missing + extra, missing = [], [] + i, j = 0, 0 + + # Use 2-pointer approach to find elements in sorted_actual + # that aren't in sorted_expected and vice versa + while i < @sorted_actual.size && j < @sorted_expected.size + current_actual, current_expected = @sorted_actual[i], @sorted_expected[j] + + if current_actual < current_expected + extra << current_actual + i += 1 + elsif current_actual > current_expected + missing << current_expected + j += 1 + else + i += 1 + j += 1 + end + end + + while i < @sorted_actual.size + extra << current_actual + i += 1 + end + while j < @sorted_expected.size + missing << current_expected + j += 1 + end + + [extra, missing] + end + # rubocop:enable MethodLength + # rubocop:enable Metrics/AbcSize + def best_solution @best_solution ||= pairings_maximizer.find_best_solution end diff --git a/spec/rspec/matchers/built_in/contain_exactly_spec.rb b/spec/rspec/matchers/built_in/contain_exactly_spec.rb index ac1996ae8..815ad3d0b 100644 --- a/spec/rspec/matchers/built_in/contain_exactly_spec.rb +++ b/spec/rspec/matchers/built_in/contain_exactly_spec.rb @@ -95,6 +95,92 @@ def array.send; :sent; end end RSpec.describe "using contain_exactly with expect" do + # users have reported using contains_exactly with 50 elements + # never finishing! + context "with transitive enabled" do + require 'benchmark' + context "with actual and expected containing unsortable elements" do + it "raises" do + expect { + expect([be_positive, be_negative]).to contain_exactly(be_positive, be_negative).transitive + }.to raise_error(/Invalid use of/) + end + end + + context "with expected containing unsortable elements" do + it "raises" do + expect { + expect([1, -1]).to contain_exactly(be_positive, be_negative).transitive + }.to raise_error(/Invalid use of/) + end + end + + context "with actual and expected containing sortable elements" do + shared_examples "runs very fast" do + it do + time = Benchmark.realtime do + subject + end + # this is in seconds + expect(time).to be < 0.3 + end + end + + let(:a) { Array.new(10_000) { rand(10) } } + + context "with a positive expectation" do + subject { expect(a).to contain_exactly(*b).transitive } + + context "that is valid" do + let(:b) { a.shuffle } + + it "matches" do + subject + end + + include_examples "runs very fast" + end + + context "that is not valid" do + let(:b) { Array.new(10_000) { rand(10) } } + + it "fails quickly" do + time = Benchmark.realtime do + expect { subject }.to fail_with(/expected collection contained/) + end + expect(time).to be < 1 + end + + end + end + + context "with a negative expectation" do + subject { expect(a).not_to contain_exactly(*b).transitive } + + context "that is valid" do + let(:b) { Array.new(10_000) { rand(10) } } + + it "does not match" do + subject + end + + include_examples "runs very fast" + end + + context "that is not valid" do + let(:b) { a.shuffle } + + it "fails quickly" do + time = Benchmark.realtime do + expect { expect(a).not_to contain_exactly(*b).transitive }.to fail_with(/not to contain exactly/) + end + expect(time).to be < 1 + end + end + end + end + end + it "passes a valid positive expectation" do expect([1, 2]).to contain_exactly(2, 1) end From 3bd66a75b72b1474f3a8ee7d5fd94c9c08789769 Mon Sep 17 00:00:00 2001 From: Phil Pirozhkov Date: Sun, 10 Oct 2021 12:26:42 +0300 Subject: [PATCH 2/5] fixup! Split statement --- lib/rspec/matchers/built_in/contain_exactly.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/rspec/matchers/built_in/contain_exactly.rb b/lib/rspec/matchers/built_in/contain_exactly.rb index fe827f676..0b29430bc 100644 --- a/lib/rspec/matchers/built_in/contain_exactly.rb +++ b/lib/rspec/matchers/built_in/contain_exactly.rb @@ -86,8 +86,12 @@ def message_line(prefix, collection, surface_descriptions=false) def match(_expected, _actual) return false unless convert_actual_to_an_array + matched_when_sorted = match_when_sorted? - return matched_when_sorted if matched_when_sorted || @transitive + + return true if matched_when_sorted + return matched_when_sorted if @transitive + (extra_items.empty? && missing_items.empty?) end From fefb6da1730042999901679c7a6ebb13c556d7ec Mon Sep 17 00:00:00 2001 From: Phil Pirozhkov Date: Sun, 10 Oct 2021 12:39:56 +0300 Subject: [PATCH 3/5] fixup! Auto-detect transitivity --- .../matchers/built_in/contain_exactly.rb | 16 ++------------ .../matchers/built_in/contain_exactly_spec.rb | 21 +++---------------- 2 files changed, 5 insertions(+), 32 deletions(-) diff --git a/lib/rspec/matchers/built_in/contain_exactly.rb b/lib/rspec/matchers/built_in/contain_exactly.rb index 0b29430bc..b0cbf8c25 100644 --- a/lib/rspec/matchers/built_in/contain_exactly.rb +++ b/lib/rspec/matchers/built_in/contain_exactly.rb @@ -6,19 +6,6 @@ module BuiltIn # Provides the implementation for `contain_exactly` and `match_array`. # Not intended to be instantiated directly. class ContainExactly < BaseMatcher - def initialize(expected=nil) - super - @transitive = false - end - - # @api public - # Specifies that elements contained in actual and expected - # obey transitivity. This lets match run much faster. - def transitive - @transitive = true - self - end - # @api private # @return [String] def failure_message @@ -100,6 +87,7 @@ def match(_expected, _actual) # the slowness of the full matching algorithm, and in common cases this # works, so it's worth a try. def match_when_sorted? + @transitive = true # Be optimistic. Let `safe_sort` reveal non-transitivity. @sorted_expected, @sorted_actual = safe_sort(expected), safe_sort(actual) values_match?(@sorted_expected, @sorted_actual) end @@ -117,7 +105,7 @@ def convert_actual_to_an_array def safe_sort(array) array.sort rescue Support::AllExceptionsExceptOnesWeMustNotRescue - raise "Invalid use of `.transitive` with unsortable array #{array}" if @transitive + @transitive = false array end diff --git a/spec/rspec/matchers/built_in/contain_exactly_spec.rb b/spec/rspec/matchers/built_in/contain_exactly_spec.rb index 815ad3d0b..5e78e1f70 100644 --- a/spec/rspec/matchers/built_in/contain_exactly_spec.rb +++ b/spec/rspec/matchers/built_in/contain_exactly_spec.rb @@ -99,21 +99,6 @@ def array.send; :sent; end # never finishing! context "with transitive enabled" do require 'benchmark' - context "with actual and expected containing unsortable elements" do - it "raises" do - expect { - expect([be_positive, be_negative]).to contain_exactly(be_positive, be_negative).transitive - }.to raise_error(/Invalid use of/) - end - end - - context "with expected containing unsortable elements" do - it "raises" do - expect { - expect([1, -1]).to contain_exactly(be_positive, be_negative).transitive - }.to raise_error(/Invalid use of/) - end - end context "with actual and expected containing sortable elements" do shared_examples "runs very fast" do @@ -129,7 +114,7 @@ def array.send; :sent; end let(:a) { Array.new(10_000) { rand(10) } } context "with a positive expectation" do - subject { expect(a).to contain_exactly(*b).transitive } + subject { expect(a).to contain_exactly(*b) } context "that is valid" do let(:b) { a.shuffle } @@ -155,7 +140,7 @@ def array.send; :sent; end end context "with a negative expectation" do - subject { expect(a).not_to contain_exactly(*b).transitive } + subject { expect(a).not_to contain_exactly(*b) } context "that is valid" do let(:b) { Array.new(10_000) { rand(10) } } @@ -172,7 +157,7 @@ def array.send; :sent; end it "fails quickly" do time = Benchmark.realtime do - expect { expect(a).not_to contain_exactly(*b).transitive }.to fail_with(/not to contain exactly/) + expect { expect(a).not_to contain_exactly(*b) }.to fail_with(/not to contain exactly/) end expect(time).to be < 1 end From 6fd105e59260229b31ecc8a91322821bf9e38f43 Mon Sep 17 00:00:00 2001 From: Phil Pirozhkov Date: Sun, 10 Oct 2021 12:47:05 +0300 Subject: [PATCH 4/5] fixup! Fix fast calculate extra/missing --- lib/rspec/matchers/built_in/contain_exactly.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rspec/matchers/built_in/contain_exactly.rb b/lib/rspec/matchers/built_in/contain_exactly.rb index b0cbf8c25..86d74e2ff 100644 --- a/lib/rspec/matchers/built_in/contain_exactly.rb +++ b/lib/rspec/matchers/built_in/contain_exactly.rb @@ -162,11 +162,11 @@ def fast_calculate_extra_missing end while i < @sorted_actual.size - extra << current_actual + extra << @sorted_actual[i] i += 1 end while j < @sorted_expected.size - missing << current_expected + missing << @sorted_expected[j] j += 1 end From f0dee47ccb69e367fdb5c03e4b3963db1d34c164 Mon Sep 17 00:00:00 2001 From: Benjamin Clayman Date: Sun, 10 Oct 2021 12:39:48 -0400 Subject: [PATCH 5/5] Group tests verifying speed together; use timeout_if_not_debugging --- .../matchers/built_in/contain_exactly_spec.rb | 165 +++++++++--------- 1 file changed, 81 insertions(+), 84 deletions(-) diff --git a/spec/rspec/matchers/built_in/contain_exactly_spec.rb b/spec/rspec/matchers/built_in/contain_exactly_spec.rb index 5e78e1f70..d487fceb1 100644 --- a/spec/rspec/matchers/built_in/contain_exactly_spec.rb +++ b/spec/rspec/matchers/built_in/contain_exactly_spec.rb @@ -95,77 +95,6 @@ def array.send; :sent; end end RSpec.describe "using contain_exactly with expect" do - # users have reported using contains_exactly with 50 elements - # never finishing! - context "with transitive enabled" do - require 'benchmark' - - context "with actual and expected containing sortable elements" do - shared_examples "runs very fast" do - it do - time = Benchmark.realtime do - subject - end - # this is in seconds - expect(time).to be < 0.3 - end - end - - let(:a) { Array.new(10_000) { rand(10) } } - - context "with a positive expectation" do - subject { expect(a).to contain_exactly(*b) } - - context "that is valid" do - let(:b) { a.shuffle } - - it "matches" do - subject - end - - include_examples "runs very fast" - end - - context "that is not valid" do - let(:b) { Array.new(10_000) { rand(10) } } - - it "fails quickly" do - time = Benchmark.realtime do - expect { subject }.to fail_with(/expected collection contained/) - end - expect(time).to be < 1 - end - - end - end - - context "with a negative expectation" do - subject { expect(a).not_to contain_exactly(*b) } - - context "that is valid" do - let(:b) { Array.new(10_000) { rand(10) } } - - it "does not match" do - subject - end - - include_examples "runs very fast" - end - - context "that is not valid" do - let(:b) { a.shuffle } - - it "fails quickly" do - time = Benchmark.realtime do - expect { expect(a).not_to contain_exactly(*b) }.to fail_with(/not to contain exactly/) - end - expect(time).to be < 1 - end - end - end - end - end - it "passes a valid positive expectation" do expect([1, 2]).to contain_exactly(2, 1) end @@ -255,22 +184,90 @@ def array.send; :sent; end MESSAGE end - def timeout_if_not_debugging(time) - in_sub_process_if_possible do - require 'timeout' - return yield if defined?(::Debugger) - Timeout.timeout(time) { yield } + # users have reported using contains_exactly with 50 elements + # never finishing! + context 'with comparable elements' do + def timeout_if_not_debugging(time) + in_sub_process_if_possible do + require 'timeout' + return yield if defined?(::Debugger) + Timeout.timeout(time) { yield } + end end - end - it 'fails a match of 11 items with duplicates in a reasonable amount of time' do - timeout_if_not_debugging(0.1) do - expected = [0, 1, 1, 3, 3, 3, 4, 4, 8, 8, 9 ] - actual = [ 1, 2, 3, 3, 3, 3, 7, 8, 8, 9, 9] + it 'fails a match of 11 items with duplicates in a reasonable amount of time' do + timeout_if_not_debugging(0.1) do + expected = [0, 1, 1, 3, 3, 3, 4, 4, 8, 8, 9 ] + actual = [ 1, 2, 3, 3, 3, 3, 7, 8, 8, 9, 9] - expect { - expect(actual).to contain_exactly(*expected) - }.to fail_including("the missing elements were: [0, 1, 4, 4]") + expect { + expect(actual).to contain_exactly(*expected) + }.to fail_including("the missing elements were: [0, 1, 4, 4]") + end + end + + context "with actual and expected containing sortable elements" do + let(:max_runtime) { 1 } + + shared_examples "succeeds fast" do + it do + timeout_if_not_debugging(max_runtime) do + subject + end + end + end + + shared_examples "fails fast" do |failure_msg| + it do + timeout_if_not_debugging(max_runtime) do + expect { + subject + }.to fail_with(/#{Regexp.quote(failure_msg)}/) + end + end + end + + let(:actual) { Array.new(10_000) { rand(10) } } + + context "with a positive expectation" do + subject { expect(actual).to contain_exactly(*expected) } + + context "that is valid" do + let(:expected) { actual.shuffle } + + it "matches" do + subject + end + + include_examples "succeeds fast" + end + + context "that is not valid" do + let(:expected) { Array.new(10_000) { rand(10) } } + + include_examples "fails fast", "expected collection contained" + end + end + + context "with a negative expectation" do + subject { expect(actual).not_to contain_exactly(*expected) } + + context "that is valid" do + let(:expected) { Array.new(10_000) { rand(10) } } + + it "does not match" do + subject + end + + include_examples "succeeds fast" + end + + context "that is not valid" do + let(:expected) { actual.shuffle } + + include_examples "fails fast", "not to contain exactly" + end + end end end