diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..f7c4118a37 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,4 @@ +RELEASE_TYPE: patch + +This patch improves certain corner cases for reporting of flaky errors +(:issue:`4183` and :issue:`4228`). diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py index dad5eebd8c..c0175b817b 100644 --- a/hypothesis-python/src/hypothesis/core.py +++ b/hypothesis-python/src/hypothesis/core.py @@ -1056,15 +1056,19 @@ def run(data): def _flaky_replay_to_failure( self, err: FlakyReplay, context: BaseException ) -> FlakyFailure: + # Note that in the mark_interesting case, _context_ itself + # is part of err._interesting_examples - but it's not in + # _runner.interesting_examples - this is fine, as the context + # (i.e., immediate exception) is appended. interesting_examples = [ self._runner.interesting_examples[io] for io in err._interesting_origins - if io + if io in self._runner.interesting_examples ] exceptions = [ ie.extra_information._expected_exception for ie in interesting_examples ] - exceptions.append(context) # the offending assume (or whatever) + exceptions.append(context) # the immediate exception return FlakyFailure(err.reason, exceptions) def _execute_once_for_engine(self, data: ConjectureData) -> None: @@ -1119,7 +1123,19 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None: # If an unhandled (i.e., non-Hypothesis) error was raised by # Hypothesis-internal code, re-raise it as a fatal error instead # of treating it as a test failure. - filepath = traceback.extract_tb(e.__traceback__)[-1][0] + if isinstance(e, BaseExceptionGroup) and len(e.exceptions) == 1: + # When a naked exception is implicitly wrapped in an ExceptionGroup + # due to a re-raising "except*", the ExceptionGroup is constructed in + # the caller's stack frame (see #4183). This workaround is specifically + # for implicit wrapping of naked exceptions by "except*", since explicit + # raising of ExceptionGroup gets the proper traceback in the first place + # - there's no need to handle hierarchical groups here, at least if no + # such implicit wrapping happens inside hypothesis code (we only care + # about the hypothesis-or-not distinction). + tb = e.exceptions[0].__traceback__ or e.__traceback__ + else: + tb = e.__traceback__ + filepath = traceback.extract_tb(tb)[-1][0] if is_hypothesis_file(filepath) and not isinstance(e, HypothesisException): raise @@ -1147,7 +1163,11 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None: if interesting_origin[0] == DeadlineExceeded: self.failed_due_to_deadline = True self.explain_traces.clear() - data.mark_interesting(interesting_origin) + try: + data.mark_interesting(interesting_origin) + except FlakyReplay as err: + raise self._flaky_replay_to_failure(err, e) from None + finally: # Conditional here so we can save some time constructing the payload; in # other cases (without coverage) it's cheap enough to do that regardless. diff --git a/hypothesis-python/tests/cover/test_flakiness.py b/hypothesis-python/tests/cover/test_flakiness.py index 967cdc6e13..32b9f91ebe 100644 --- a/hypothesis-python/tests/cover/test_flakiness.py +++ b/hypothesis-python/tests/cover/test_flakiness.py @@ -8,11 +8,14 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import sys + import pytest from hypothesis import HealthCheck, Verbosity, assume, example, given, reject, settings from hypothesis.core import StateForActualGivenExecution from hypothesis.errors import Flaky, FlakyFailure, Unsatisfiable, UnsatisfiedAssumption +from hypothesis.internal.compat import ExceptionGroup from hypothesis.internal.conjecture.engine import MIN_TEST_CALLS from hypothesis.internal.scrutineer import Tracer from hypothesis.strategies import booleans, composite, integers, lists, random_module @@ -41,6 +44,55 @@ def rude(x): assert isinstance(exceptions[0], Nope) +def test_fails_differently_is_flaky(): + call_count = 0 + + class DifferentNope(Exception): + pass + + @given(integers()) + @settings(database=None) + def rude(x): + nonlocal call_count + if x == 0: + call_count += 1 + if call_count > 1: + raise Nope + else: + raise DifferentNope + + with pytest.raises(FlakyFailure, match="Inconsistent results from replaying") as e: + rude() + exceptions = e.value.exceptions + assert len(exceptions) == 2 + assert set(map(type, exceptions)) == {Nope, DifferentNope} + + +@pytest.mark.skipif(sys.version_info < (3, 11), reason="except* syntax") +def test_exceptiongroup_wrapped_naked_exception_is_flaky(): + + # Defer parsing until runtime, as "except*" is syntax error pre 3.11 + rude_def = """ +first_call = True +def rude_fn(x): + global first_call + if first_call: + first_call = False + try: + raise Nope + except* Nope: + raise + """ + exec(rude_def, globals()) + rude = given(integers())(rude_fn) # noqa: F821 # defined by exec() + + with pytest.raises(FlakyFailure, match="Falsified on the first call but") as e: + rude() + exceptions = e.value.exceptions + assert list(map(type, exceptions)) == [ExceptionGroup] + assert list(map(type, exceptions[0].exceptions)) == [Nope] + + def test_gives_flaky_error_if_assumption_is_flaky(): seen = set()