Skip to content

Commit

Permalink
Merge pull request #213 from DataDog/anmarchenko/auto_test_retries_rspec
Browse files Browse the repository at this point in the history
[SDTEST-437] Auto test retries for RSpec
  • Loading branch information
anmarchenko authored Aug 8, 2024
2 parents 2eb656d + 9e6f62e commit b891aaf
Show file tree
Hide file tree
Showing 19 changed files with 589 additions and 51 deletions.
3 changes: 2 additions & 1 deletion lib/datadog/ci/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
require_relative "../test_optimisation/coverage/transport"
require_relative "../test_optimisation/coverage/writer"
require_relative "../test_retries/component"
require_relative "../test_retries/null_component"
require_relative "../test_visibility/component"
require_relative "../test_visibility/flush"
require_relative "../test_visibility/null_component"
Expand All @@ -35,7 +36,7 @@ def initialize(settings)
@test_visibility = TestVisibility::NullComponent.new
@git_tree_upload_worker = DummyWorker.new
@ci_remote = nil
@test_retries = nil
@test_retries = TestRetries::NullComponent.new

# Activate CI mode if enabled
if settings.ci.enabled
Expand Down
103 changes: 64 additions & 39 deletions lib/datadog/ci/contrib/rspec/example.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,50 +34,71 @@ def run(*args)

if ci_queue?
suite_name = "#{suite_name} (ci-queue running example [#{test_name}])"
test_suite_span = test_visibility_component.start_test_suite(suite_name)
ci_queue_test_span = test_visibility_component.start_test_suite(suite_name)
end

test_visibility_component.trace_test(
test_name,
suite_name,
tags: {
CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s,
CI::Ext::Test::TAG_SOURCE_FILE => Git::LocalRepository.relative_to_root(metadata[:file_path]),
CI::Ext::Test::TAG_SOURCE_START => metadata[:line_number].to_s,
CI::Ext::Test::TAG_PARAMETERS => Utils::TestRun.test_parameters(
metadata: {"scoped_id" => metadata[:scoped_id]}
)
},
service: datadog_configuration[:service_name]
) do |test_span|
test_span&.itr_unskippable! if metadata[CI::Ext::Test::ITR_UNSKIPPABLE_OPTION]

metadata[:skip] = CI::Ext::Test::ITR_TEST_SKIP_REASON if test_span&.skipped_by_itr?

result = super

case execution_result.status
when :passed
test_span&.passed!
test_suite_span&.passed!
when :failed
test_span&.failed!(exception: execution_result.exception)
test_suite_span&.failed!
else
# :pending or nil
test_span&.skipped!(
reason: execution_result.pending_message,
exception: execution_result.pending_exception
)

test_suite_span&.skipped!
# don't report test to RSpec::Core::Reporter until retries are done
@skip_reporting = true

test_retries_component.with_retries do |retry_callback|
test_visibility_component.trace_test(
test_name,
suite_name,
tags: {
CI::Ext::Test::TAG_FRAMEWORK => Ext::FRAMEWORK,
CI::Ext::Test::TAG_FRAMEWORK_VERSION => CI::Contrib::RSpec::Integration.version.to_s,
CI::Ext::Test::TAG_SOURCE_FILE => Git::LocalRepository.relative_to_root(metadata[:file_path]),
CI::Ext::Test::TAG_SOURCE_START => metadata[:line_number].to_s,
CI::Ext::Test::TAG_PARAMETERS => Utils::TestRun.test_parameters(
metadata: {"scoped_id" => metadata[:scoped_id]}
)
},
service: datadog_configuration[:service_name]
) do |test_span|
test_span&.itr_unskippable! if metadata[CI::Ext::Test::ITR_UNSKIPPABLE_OPTION]

metadata[:skip] = CI::Ext::Test::ITR_TEST_SKIP_REASON if test_span&.skipped_by_itr?

# before each run remove any previous exception
@exception = nil

super

case execution_result.status
when :passed
test_span&.passed!
when :failed
test_span&.failed!(exception: execution_result.exception)
else
# :pending or nil
test_span&.skipped!(
reason: execution_result.pending_message,
exception: execution_result.pending_exception
)
end

retry_callback.call(test_span)
end
end

test_suite_span&.finish
# after retries are done, we can report the test to RSpec
@skip_reporting = false

result
end
# this is a special case for ci-queue, we need to finish the test suite span
ci_queue_test_span&.finish

# Finish spec with latest retry's result
# TODO: when implementing new test retries make sure to clean @exception before calling this method
# if test passed at least once
finish(reporter)
end

def finish(reporter)
# by default finish test but do not report it to RSpec::Core::Reporter
# it is going to be reported once after retries are done
return super unless @skip_reporting

super(::RSpec::Core::NullReporter)
end

private
Expand All @@ -103,6 +124,10 @@ def test_visibility_component
Datadog.send(:components).test_visibility
end

def test_retries_component
Datadog.send(:components).test_retries
end

def ci_queue?
!!defined?(::RSpec::Queue::ExampleExtension) &&
self.class.ancestors.include?(::RSpec::Queue::ExampleExtension)
Expand Down
50 changes: 49 additions & 1 deletion lib/datadog/ci/test_retries/component.rb
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
# frozen_string_literal: true

require_relative "strategy/no_retry"
require_relative "strategy/retry_failed"

module Datadog
module CI
module TestRetries
# Encapsulates the logic to enable test retries, including:
# - retrying failed tests - improve success rate of CI pipelines
# - retrying new tests - detect flaky tests as early as possible to prevent them from being merged
class Component
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts, :retry_failed_tests_total_limit
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts,
:retry_failed_tests_total_limit, :retry_failed_tests_count

def initialize(
retry_failed_tests_max_attempts:,
Expand All @@ -17,11 +21,55 @@ def initialize(
@retry_failed_tests_enabled = false
@retry_failed_tests_max_attempts = retry_failed_tests_max_attempts
@retry_failed_tests_total_limit = retry_failed_tests_total_limit
# counter that store the current number of failed tests retried
@retry_failed_tests_count = 0

@mutex = Mutex.new
end

def configure(library_settings)
@retry_failed_tests_enabled = library_settings.flaky_test_retries_enabled?
end

def with_retries(&block)
# @type var retry_strategy: Strategy::Base
retry_strategy = nil

test_finished_callback = lambda do |test_span|
if retry_strategy.nil?
# we always run test at least once and after first pass create a correct retry strategy
retry_strategy = build_strategy(test_span)
else
# after each retry we record the result, strategy will decide if we should retry again
retry_strategy&.record_retry(test_span)
end
end

loop do
yield test_finished_callback

break unless retry_strategy&.should_retry?
end
end

def build_strategy(test_span)
@mutex.synchronize do
if should_retry_failed_test?(test_span)
Datadog.logger.debug("Failed test retry starts")
@retry_failed_tests_count += 1

Strategy::RetryFailed.new(max_attempts: @retry_failed_tests_max_attempts)
else
Strategy::NoRetry.new
end
end
end

private

def should_retry_failed_test?(test_span)
@retry_failed_tests_enabled && !!test_span&.failed? && @retry_failed_tests_count < @retry_failed_tests_total_limit
end
end
end
end
Expand Down
28 changes: 28 additions & 0 deletions lib/datadog/ci/test_retries/null_component.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

require_relative "component"

module Datadog
module CI
module TestRetries
class NullComponent < Component
attr_reader :retry_failed_tests_enabled, :retry_failed_tests_max_attempts, :retry_failed_tests_total_limit

def initialize
# enabled only by remote settings
@retry_failed_tests_enabled = false
@retry_failed_tests_max_attempts = 0
@retry_failed_tests_total_limit = 0
end

def configure(library_settings)
end

def with_retries(&block)
no_action = proc {}
yield no_action
end
end
end
end
end
19 changes: 19 additions & 0 deletions lib/datadog/ci/test_retries/strategy/base.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# frozen_string_literal: true

module Datadog
module CI
module TestRetries
module Strategy
class Base
def should_retry?
false
end

def record_retry(test_span)
test_span&.set_tag(Ext::Test::TAG_IS_RETRY, "true")
end
end
end
end
end
end
16 changes: 16 additions & 0 deletions lib/datadog/ci/test_retries/strategy/no_retry.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# frozen_string_literal: true

require_relative "base"

module Datadog
module CI
module TestRetries
module Strategy
class NoRetry < Base
def record_retry(test_span)
end
end
end
end
end
end
37 changes: 37 additions & 0 deletions lib/datadog/ci/test_retries/strategy/retry_failed.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# frozen_string_literal: true

require_relative "base"

require_relative "../../ext/test"

module Datadog
module CI
module TestRetries
module Strategy
class RetryFailed < Base
attr_reader :max_attempts

def initialize(max_attempts:)
@max_attempts = max_attempts

@attempts = 0
@passed_once = false
end

def should_retry?
@attempts < @max_attempts && !@passed_once
end

def record_retry(test_span)
super

@attempts += 1
@passed_once = true if test_span&.passed?

Datadog.logger.debug { "Retry Attempts [#{@attempts} / #{@max_attempts}], Passed: [#{@passed_once}]" }
end
end
end
end
end
end
5 changes: 3 additions & 2 deletions sig/datadog/ci/contrib/rspec/example.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ module Datadog
module RSpec
module Example
def self.included: (untyped base) -> untyped
module InstanceMethods
include ::RSpec::Core::Example
module InstanceMethods : ::RSpec::Core::Example
@skip_reporting: bool

def run: (untyped example_group_instance, untyped reporter) -> untyped

Expand All @@ -14,6 +14,7 @@ module Datadog
def fetch_top_level_example_group: () -> Hash[Symbol, untyped]
def datadog_configuration: () -> untyped
def test_visibility_component: () -> Datadog::CI::TestVisibility::Component
def test_retries_component: () -> Datadog::CI::TestRetries::Component
def ci_queue?: () -> bool
end
end
Expand Down
12 changes: 12 additions & 0 deletions sig/datadog/ci/test_retries/component.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,21 @@ module Datadog

attr_reader retry_failed_tests_total_limit: Integer

attr_reader retry_failed_tests_count: Integer

@mutex: Thread::Mutex

def initialize: (retry_failed_tests_max_attempts: Integer, retry_failed_tests_total_limit: Integer) -> void

def configure: (Datadog::CI::Remote::LibrarySettings library_settings) -> void

def with_retries: () { (untyped) -> void } -> void

def build_strategy: (Datadog::CI::Test test) -> Datadog::CI::TestRetries::Strategy::Base

private

def should_retry_failed_test?: (Datadog::CI::Test test) -> bool
end
end
end
Expand Down
25 changes: 25 additions & 0 deletions sig/datadog/ci/test_retries/null_component.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module Datadog
module CI
module TestRetries
class NullComponent < Component
@retry_failed_tests_enabled: untyped

@retry_failed_tests_max_attempts: untyped

@retry_failed_tests_total_limit: untyped

attr_reader retry_failed_tests_enabled: untyped

attr_reader retry_failed_tests_max_attempts: untyped

attr_reader retry_failed_tests_total_limit: untyped

def initialize: () -> void

def configure: (untyped library_settings) -> nil

def with_retries: () { (untyped) -> untyped } -> untyped
end
end
end
end
13 changes: 13 additions & 0 deletions sig/datadog/ci/test_retries/strategy/base.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Datadog
module CI
module TestRetries
module Strategy
class Base
def should_retry?: () -> bool

def record_retry: (Datadog::CI::Test test_span) -> void
end
end
end
end
end
Loading

0 comments on commit b891aaf

Please sign in to comment.