Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch from MultipleFailures to PEP-654 ExceptionGroup #3308

Merged
merged 3 commits into from
Aug 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
RELEASE_TYPE: minor

Reporting of :obj:`multiple failing examples <hypothesis.settings.report_multiple_bugs>`
now uses the :pep:`654` `ExceptionGroup <https://docs.python.org/3.11/library/exceptions.html#ExceptionGroup>`__ type, which is provided by the
:pypi:`exceptiongroup` backport on Python 3.10 and earlier (:issue:`3175`).
``hypothesis.errors.MultipleFailures`` is therefore deprecated.

Failing examples and other reports are now stored as :pep:`678` exception notes, which
ensures that they will always appear together with the traceback and other information
about their respective error.
7 changes: 7 additions & 0 deletions hypothesis-python/src/_hypothesis_pytestplugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,13 @@ def pytest_configure(config):
pass
core.global_force_seed = seed

core.pytest_shows_exceptiongroups = (
sys.version_info[:2] >= (3, 11)
## See https://github.com/pytest-dev/pytest/issues/9159
# or pytest_version >= (7, 2) # TODO: fill in correct version here
or config.getoption("tbstyle", "auto") == "native"
)

@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_call(item):
__tracebackhide__ = True
Expand Down
148 changes: 69 additions & 79 deletions hypothesis-python/src/hypothesis/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
HypothesisDeprecationWarning,
HypothesisWarning,
InvalidArgument,
MultipleFailures,
NoSuchExample,
StopTest,
Unsatisfiable,
Expand All @@ -69,6 +68,7 @@
from hypothesis.executors import default_new_style_executor, new_style_executor
from hypothesis.internal.compat import (
PYPY,
BaseExceptionGroup,
bad_django_TestCase,
get_type_hints,
int_from_bytes,
Expand Down Expand Up @@ -126,6 +126,7 @@


running_under_pytest = False
pytest_shows_exceptiongroups = True
global_force_seed = None
_hypothesis_global_random = None

Expand Down Expand Up @@ -436,7 +437,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s
err = new

yield (fragments_reported, err)
if state.settings.report_multiple_bugs:
if state.settings.report_multiple_bugs and pytest_shows_exceptiongroups:
continue
break
finally:
Expand Down Expand Up @@ -575,7 +576,6 @@ def __init__(
self.settings = settings
self.last_exception = None
self.falsifying_examples = ()
self.__was_flaky = False
self.random = random
self.__test_runtime = None
self.ever_executed = False
Expand Down Expand Up @@ -710,11 +710,10 @@ def run(data):
)
else:
report("Failed to reproduce exception. Expected: \n" + traceback)
self.__flaky(
f"Hypothesis {text_repr} produces unreliable results: Falsified"
" on the first call but did not on a subsequent one",
cause=exception,
)
raise Flaky(
f"Hypothesis {text_repr} produces unreliable results: "
"Falsified on the first call but did not on a subsequent one"
) from exception
return result

def _execute_once_for_engine(self, data):
Expand Down Expand Up @@ -842,64 +841,57 @@ def run_engine(self):

if not self.falsifying_examples:
return
elif not self.settings.report_multiple_bugs:
elif not (self.settings.report_multiple_bugs and pytest_shows_exceptiongroups):
# Pretend that we only found one failure, by discarding the others.
del self.falsifying_examples[:-1]

# The engine found one or more failures, so we need to reproduce and
# report them.

flaky = 0
errors_to_report = []

if runner.best_observed_targets:
for line in describe_targets(runner.best_observed_targets):
report(line)
report("")
report_lines = describe_targets(runner.best_observed_targets)
if report_lines:
report_lines.append("")

explanations = explanatory_lines(self.explain_traces, self.settings)
for falsifying_example in self.falsifying_examples:
info = falsifying_example.extra_information
fragments = []

ran_example = ConjectureData.for_buffer(falsifying_example.buffer)
self.__was_flaky = False
assert info.__expected_exception is not None
try:
self.execute_once(
ran_example,
print_example=not self.is_find,
is_final=True,
expected_failure=(
info.__expected_exception,
info.__expected_traceback,
),
)
with with_reporter(fragments.append):
self.execute_once(
ran_example,
print_example=not self.is_find,
is_final=True,
expected_failure=(
info.__expected_exception,
info.__expected_traceback,
),
)
except (UnsatisfiedAssumption, StopTest) as e:
report(format_exception(e, e.__traceback__))
self.__flaky(
err = Flaky(
"Unreliable assumption: An example which satisfied "
"assumptions on the first run now fails it.",
cause=e,
)
err.__cause__ = err.__context__ = e
errors_to_report.append((fragments, err))
except BaseException as e:
# If we have anything for explain-mode, this is the time to report.
for line in explanations[falsifying_example.interesting_origin]:
report(line)

if len(self.falsifying_examples) <= 1:
# There is only one failure, so we can report it by raising
# it directly.
raise

# We are reporting multiple failures, so we need to manually
# print each exception's stack trace and information.
tb = get_trimmed_traceback()
report(format_exception(e, tb))
fragments.append(line)
errors_to_report.append(
(fragments, e.with_traceback(get_trimmed_traceback()))
)

finally:
# Whether or not replay actually raised the exception again, we want
# to print the reproduce_failure decorator for the failing example.
if self.settings.print_blob:
report(
fragments.append(
"\nYou can reproduce this example by temporarily adding "
"@reproduce_failure(%r, %r) as a decorator on your test case"
% (__version__, encode_failure(falsifying_example.buffer))
Expand All @@ -908,30 +900,38 @@ def run_engine(self):
# hold on to a reference to ``data`` know that it's now been
# finished and they can't draw more data from it.
ran_example.freeze()
_raise_to_user(errors_to_report, self.settings, report_lines)

if self.__was_flaky:
flaky += 1

# If we only have one example then we should have raised an error or
# flaky prior to this point.
assert len(self.falsifying_examples) > 1

if flaky > 0:
raise Flaky(
f"Hypothesis found {len(self.falsifying_examples)} distinct failures, "
f"but {flaky} of them exhibited some sort of flaky behaviour."
)
else:
raise MultipleFailures(
f"Hypothesis found {len(self.falsifying_examples)} distinct failures."
)
def add_note(exc, note):
try:
exc.add_note(note)
except AttributeError:
if not hasattr(exc, "__notes__"):
exc.__notes__ = []
exc.__notes__.append(note)


def _raise_to_user(errors_to_report, settings, target_lines, trailer=""):
"""Helper function for attaching notes and grouping multiple errors."""
if settings.verbosity >= Verbosity.normal:
for fragments, err in errors_to_report:
for note in fragments:
add_note(err, note)

if len(errors_to_report) == 1:
_, the_error_hypothesis_found = errors_to_report[0]
else:
assert errors_to_report
the_error_hypothesis_found = BaseExceptionGroup(
f"Hypothesis found {len(errors_to_report)} distinct failures{trailer}.",
[e for _, e in errors_to_report],
)

def __flaky(self, message, *, cause):
if len(self.falsifying_examples) <= 1:
raise Flaky(message) from cause
else:
self.__was_flaky = True
report("Flaky example! " + message)
if settings.verbosity >= Verbosity.normal:
for line in target_lines:
add_note(the_error_hypothesis_found, line)
raise the_error_hypothesis_found


@contextlib.contextmanager
Expand Down Expand Up @@ -1189,23 +1189,11 @@ def wrapped_test(*arguments, **kwargs):
state, wrapped_test, arguments, kwargs, original_sig
)
)
with local_settings(state.settings):
if len(errors) > 1:
# If we're not going to report multiple bugs, we would have
# stopped running explicit examples at the first failure.
assert state.settings.report_multiple_bugs
for fragments, err in errors:
for f in fragments:
report(f)
report(format_exception(err, err.__traceback__))
raise MultipleFailures(
f"Hypothesis found {len(errors)} failures in explicit examples."
)
elif errors:
fragments, the_error_hypothesis_found = errors[0]
for f in fragments:
report(f)
raise the_error_hypothesis_found
if errors:
# If we're not going to report multiple bugs, we would have
# stopped running explicit examples at the first failure.
assert len(errors) == 1 or state.settings.report_multiple_bugs
_raise_to_user(errors, state.settings, [], " in explicit examples")

# If there were any explicit examples, they all ran successfully.
# The next step is to use the Conjecture engine to run the test on
Expand Down Expand Up @@ -1236,7 +1224,7 @@ def wrapped_test(*arguments, **kwargs):
state.run_engine()
except BaseException as e:
# The exception caught here should either be an actual test
# failure (or MultipleFailures), or some kind of fatal error
# failure (or BaseExceptionGroup), or some kind of fatal error
# that caused the engine to stop.

generated_seed = wrapped_test._hypothesis_internal_use_generated_seed
Expand All @@ -1262,7 +1250,9 @@ def wrapped_test(*arguments, **kwargs):
# which will actually appear in tracebacks is as clear as
# possible - "raise the_error_hypothesis_found".
the_error_hypothesis_found = e.with_traceback(
get_trimmed_traceback()
None
if isinstance(e, BaseExceptionGroup)
else get_trimmed_traceback()
)
raise the_error_hypothesis_found

Expand Down
15 changes: 12 additions & 3 deletions hypothesis-python/src/hypothesis/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,18 @@ class Frozen(HypothesisException):
after freeze() has been called."""


class MultipleFailures(_Trimmable):
"""Indicates that Hypothesis found more than one distinct bug when testing
your code."""
def __getattr__(name):
if name == "MultipleFailures":
from hypothesis._settings import note_deprecation
from hypothesis.internal.compat import BaseExceptionGroup

note_deprecation(
"MultipleFailures is deprecated; use the builtin `BaseExceptionGroup` type "
"instead, or `exceptiongroup.BaseExceptionGroup` before Python 3.11",
since="RELEASEDAY",
has_codemod=False, # This would be a great PR though!
)
return BaseExceptionGroup


class DeadlineExceeded(_Trimmable):
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/src/hypothesis/internal/escalation.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def get_trimmed_traceback(exception=None):
else:
tb = exception.__traceback__
# Avoid trimming the traceback if we're in verbose mode, or the error
# was raised inside Hypothesis (and is not a MultipleFailures)
# was raised inside Hypothesis
if hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug or (
is_hypothesis_file(traceback.extract_tb(tb)[-1][0])
and not isinstance(exception, _Trimmable)
Expand Down
4 changes: 0 additions & 4 deletions hypothesis-python/src/hypothesis/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
from hypothesis.utils.dynamicvariables import DynamicVariable


def silent(value):
pass


def default(value):
try:
print(value)
Expand Down
37 changes: 13 additions & 24 deletions hypothesis-python/tests/cover/test_arbitrary_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@
import pytest
from pytest import raises

from hypothesis import find, given, reporting, strategies as st
from hypothesis import find, given, strategies as st
from hypothesis.errors import InvalidArgument

from tests.common.utils import capture_out


@given(st.integers(), st.data())
def test_conditional_draw(x, data):
Expand All @@ -32,13 +30,10 @@ def test(data):
if y in x:
raise ValueError()

with raises(ValueError):
with capture_out() as out:
with reporting.with_reporter(reporting.default):
test()
result = out.getvalue()
assert "Draw 1: [0, 0]" in result
assert "Draw 2: 0" in result
with raises(ValueError) as err:
test()
assert "Draw 1: [0, 0]" in err.value.__notes__
assert "Draw 2: 0" in err.value.__notes__


def test_prints_labels_if_given_on_failure():
Expand All @@ -50,13 +45,10 @@ def test(data):
x.remove(y)
assert y not in x

with raises(AssertionError):
with capture_out() as out:
with reporting.with_reporter(reporting.default):
test()
result = out.getvalue()
assert "Draw 1 (Some numbers): [0, 0]" in result
assert "Draw 2 (A number): 0" in result
with raises(AssertionError) as err:
test()
assert "Draw 1 (Some numbers): [0, 0]" in err.value.__notes__
assert "Draw 2 (A number): 0" in err.value.__notes__


def test_given_twice_is_same():
Expand All @@ -66,13 +58,10 @@ def test(data1, data2):
data2.draw(st.integers())
raise ValueError()

with raises(ValueError):
with capture_out() as out:
with reporting.with_reporter(reporting.default):
test()
result = out.getvalue()
assert "Draw 1: 0" in result
assert "Draw 2: 0" in result
with raises(ValueError) as err:
test()
assert "Draw 1: 0" in err.value.__notes__
assert "Draw 2: 0" in err.value.__notes__


def test_errors_when_used_in_find():
Expand Down
Loading