add option to treat exceptions as equivalent

add test encapsulate ExceptionEquivalence in enum reduce duplication in tests update incorrect comment lint files ignore flake8 check for bare except lint return IgnoreAttempt instead of returning the exception directly. unpack IgnoreAttempt while displaying lint revert wrap inside IgnoreAttempt Update diff_behavior.py add type information for mypy run pre-commit add myself to the contributor list 👀 fix (some) failing tests lint swap order of clauses to fix nan issue undo lint
pschanely · Dec 21, 2024 · 8ed4356 · 8ed4356
1 parent 2dd837c
commit 8ed4356
Show file tree

Hide file tree

Showing 6 changed files with 205 additions and 20 deletions.
diff --git a/crosshair/diff_behavior.py b/crosshair/diff_behavior.py
@@ -1,11 +1,13 @@
 import copy
 import dataclasses
 import dis
+import enum
 import inspect
 import sys
 import time
-from typing import Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
 
+from crosshair import IgnoreAttempt
 from crosshair.condition_parser import condition_parser
 from crosshair.core import ExceptionFilter, Patched, deep_realize, gen_args, realize
 from crosshair.fnutil import FunctionInfo
@@ -26,7 +28,13 @@
     PushedModule,
     ResumedTracing,
 )
-from crosshair.util import IgnoreAttempt, UnexploredPath, debug
+from crosshair.util import CrosshairUnsupported, IgnoreAttempt, UnexploredPath, debug
+
+
+class ExceptionEquivalenceType(enum.Enum):
+    ALL = "ALL"
+    SAME_TYPE = "SAME_TYPE"
+    TYPE_AND_MESSAGE = "TYPE_AND_MESSAGE"
 
 
 @dataclasses.dataclass
@@ -67,16 +75,16 @@ def describe(self, args_to_show: Set[str]) -> str:
 
 def describe_behavior(
     fn: Callable, args: inspect.BoundArguments
-) -> Tuple[object, Optional[str]]:
+) -> Tuple[Any, Optional[BaseException]]:
     with ExceptionFilter() as efilter:
         ret = fn(*args.args, **args.kwargs)
         return (ret, None)
     if efilter.user_exc is not None:
         exc = efilter.user_exc[0]
         debug("user-level exception found", repr(exc), *efilter.user_exc[1])
-        return (None, repr(exc))
+        return (None, exc)
     if efilter.ignore:
-        return (None, "IgnoreAttempt")
+        return (None, IgnoreAttempt())
     assert False
 
 
@@ -120,7 +128,10 @@ def scorer(diff: BehaviorDiff) -> Tuple[float, float]:
 
 
 def diff_behavior(
-    ctxfn1: FunctionInfo, ctxfn2: FunctionInfo, options: AnalysisOptions
+    ctxfn1: FunctionInfo,
+    ctxfn2: FunctionInfo,
+    options: AnalysisOptions,
+    exception_equivalence: ExceptionEquivalenceType = ExceptionEquivalenceType.TYPE_AND_MESSAGE,
 ) -> Union[str, List[BehaviorDiff]]:
     fn1, sig1 = ctxfn1.callable()
     fn2, sig2 = ctxfn2.callable()
@@ -133,10 +144,14 @@ def diff_behavior(
         # We attempt both orderings of functions. This helps by:
         # (1) avoiding code path explosions in one of the functions
         # (2) using both signatures (in case they differ)
-        all_diffs.extend(diff_behavior_with_signature(fn1, fn2, sig1, half1))
+        all_diffs.extend(
+            diff_behavior_with_signature(fn1, fn2, sig1, half1, exception_equivalence)
+        )
         all_diffs.extend(
             diff.reverse()
-            for diff in diff_behavior_with_signature(fn2, fn1, sig2, half2)
+            for diff in diff_behavior_with_signature(
+                fn2, fn1, sig2, half2, exception_equivalence
+            )
         )
     debug("diff candidates:", all_diffs)
     # greedily pick results:
@@ -160,7 +175,11 @@ def diff_behavior(
 
 
 def diff_behavior_with_signature(
-    fn1: Callable, fn2: Callable, sig: inspect.Signature, options: AnalysisOptions
+    fn1: Callable,
+    fn2: Callable,
+    sig: inspect.Signature,
+    options: AnalysisOptions,
+    exception_equivalence: ExceptionEquivalenceType,
 ) -> Iterable[BehaviorDiff]:
     search_root = RootNode()
     condition_start = time.monotonic()
@@ -185,7 +204,9 @@ def diff_behavior_with_signature(
             output = None
             try:
                 with ResumedTracing():
-                    (verification_status, output) = run_iteration(fn1, fn2, sig, space)
+                    (verification_status, output) = run_iteration(
+                        fn1, fn2, sig, space, exception_equivalence
+                    )
             except IgnoreAttempt:
                 verification_status = None
             except UnexploredPath:
@@ -214,8 +235,27 @@ def diff_behavior_with_signature(
                     break
 
 
+def check_exception_equivalence(
+    exception_equivalence_type: ExceptionEquivalenceType,
+    exc1: BaseException,
+    exc2: BaseException,
+) -> bool:
+    if exception_equivalence_type == ExceptionEquivalenceType.ALL:
+        return True
+    elif exception_equivalence_type == ExceptionEquivalenceType.SAME_TYPE:
+        return type(exc1) == type(exc2)
+    elif exception_equivalence_type == ExceptionEquivalenceType.TYPE_AND_MESSAGE:
+        return repr(exc1) == repr(exc2)
+    else:
+        raise CrosshairUnsupported("Invalid exception_equivalence type")
+
+
 def run_iteration(
-    fn1: Callable, fn2: Callable, sig: inspect.Signature, space: StateSpace
+    fn1: Callable,
+    fn2: Callable,
+    sig: inspect.Signature,
+    space: StateSpace,
+    exception_equivalence: ExceptionEquivalenceType,
 ) -> Tuple[Optional[VerificationStatus], Optional[BehaviorDiff]]:
     with NoTracing():
         original_args = gen_args(sig)
@@ -228,9 +268,19 @@ def run_iteration(
         result1 = describe_behavior(fn1, args1)
         result2 = describe_behavior(fn2, args2)
         space.detach_path()
-        if flexible_equal(result1, result2) and flexible_equal(args1, args2):
-            debug("Functions equivalent")
-            return (VerificationStatus.CONFIRMED, None)
+        if flexible_equal(  # Compare the output.
+            result1[0], result2[0]
+        ) and flexible_equal(args1, args2):
+            if not (
+                isinstance(result1[1], BaseException)
+                and isinstance(result2[1], BaseException)
+            ) or check_exception_equivalence(
+                exception_equivalence, result1[1], result2[1]
+            ):
+                # Functions are equivalent if both have the same result,
+                # and deemed to have the same kind of error.
+                debug("Functions equivalent")
+                return (VerificationStatus.CONFIRMED, None)
         debug("Functions differ")
         realized_args = {
             k: repr(deep_realize(v)) for (k, v) in original_args.arguments.items()
@@ -245,12 +295,12 @@ def run_iteration(
             realized_args,
             Result(
                 repr(deep_realize(result1[0])),
-                realize(result1[1]),
+                realize(repr(result1[1]) if result1[1] else None),
                 post_execution_args1,
             ),
             Result(
                 repr(deep_realize(result2[0])),
-                realize(result2[1]),
+                realize(repr(result2[1]) if result2[1] else None),
                 post_execution_args2,
             ),
             coverage_manager.get_results(fn1),

diff --git a/crosshair/diff_behavior_test.py b/crosshair/diff_behavior_test.py
@@ -2,8 +2,13 @@
 import unittest
 from typing import Callable, List, Optional
 
-from crosshair.diff_behavior import BehaviorDiff, diff_behavior
+from crosshair.diff_behavior import (
+    BehaviorDiff,
+    ExceptionEquivalenceType,
+    diff_behavior,
+)
 from crosshair.fnutil import FunctionInfo, walk_qualname
+from crosshair.main import unwalled_main
 from crosshair.options import DEFAULT_OPTIONS
 from crosshair.util import IgnoreAttempt, debug, set_debug
 
@@ -50,6 +55,33 @@ def foo(self):
         return 11
 
 
+def _sum_list_original(int_list):
+    count = 0
+    for i in int_list:
+        count += i
+    return count
+
+
+def _sum_list_rewrite(int_list):
+    count = 0
+    for i in range(len(int_list)):
+        count += int_list[i]
+    return count
+
+
+def _sum_list_rewrite_2(int_list):
+    class CustomException(Exception):
+        pass
+
+    try:
+        count = 0
+        for i in range(len(int_list)):
+            count += int_list[i]
+    except:  # noqa E722
+        raise CustomException()
+    return count
+
+
 class BehaviorDiffTest(unittest.TestCase):
     def test_diff_method(self):
         diffs = diff_behavior(
@@ -146,6 +178,81 @@ def f(a: Optional[Callable[[int], int]]):
     assert diffs == []
 
 
+def test_diffbehavior_exceptions_default() -> None:
+    """
+    Default behavior of `diffbehavior` - treating exceptions as different.
+    """
+
+    diffs = diff_behavior(
+        FunctionInfo.from_fn(_sum_list_original),
+        FunctionInfo.from_fn(_sum_list_rewrite),
+        DEFAULT_OPTIONS,
+    )
+    debug("diffs=", diffs)
+    assert len(diffs) == 1  # finds a counter-example
+    assert isinstance(diffs[0], BehaviorDiff)
+    assert diffs[0].result1
+    assert isinstance(diffs[0].result1.error, str)
+    assert isinstance(diffs[0].result2.error, str)
+    assert diffs[0].result1.error.startswith("TypeError")
+    assert diffs[0].result2.error.startswith("TypeError")
+    assert (
+        diffs[0].result1.error != diffs[0].result2.error
+    )  # Both code-blocks raise a different type error
+
+
+def test_diffbehavior_exceptions_same_type() -> None:
+    """
+    Treat exceptions of the same type as equivalent.
+    """
+
+    diffs = diff_behavior(
+        FunctionInfo.from_fn(_sum_list_original),
+        FunctionInfo.from_fn(_sum_list_rewrite),
+        DEFAULT_OPTIONS,
+        exception_equivalence=ExceptionEquivalenceType.SAME_TYPE,
+    )
+    debug("diffs=", diffs)
+    assert len(diffs) == 0  # No-counter example, because all TypeErrors are equal
+
+
+def test_diffbehavior_exceptions_all() -> None:
+    """
+    Treat exceptions of all types as equivalent.
+    """
+
+    diffs = diff_behavior(
+        FunctionInfo.from_fn(_sum_list_original),
+        FunctionInfo.from_fn(_sum_list_rewrite_2),
+        DEFAULT_OPTIONS,
+        exception_equivalence=ExceptionEquivalenceType.ALL,
+    )
+    debug("diffs=", diffs)
+    assert len(diffs) == 0  # No-counter example, because all TypeErrors are equal
+
+
+def test_diffbehavior_exceptions_same_type_different() -> None:
+    """
+    Find a counter-example when raising different exception types.
+    """
+
+    diffs = diff_behavior(
+        FunctionInfo.from_fn(_sum_list_original),
+        FunctionInfo.from_fn(_sum_list_rewrite_2),
+        DEFAULT_OPTIONS,
+        exception_equivalence=ExceptionEquivalenceType.SAME_TYPE,
+    )
+    debug("diffs=", diffs)
+    assert (
+        len(diffs) == 1
+    )  # finds a counter-example, because TypeError!=CustomException
+    assert isinstance(diffs[0], BehaviorDiff)
+    assert isinstance(diffs[0].result1.error, str)
+    assert isinstance(diffs[0].result2.error, str)
+    assert diffs[0].result1.error.startswith("TypeError")
+    assert diffs[0].result2.error.startswith("CustomException")
+
+
 def test_diff_behavior_nan() -> None:
     def f(x: float):
         return x

diff --git a/crosshair/main.py b/crosshair/main.py
@@ -36,7 +36,7 @@
     installed_plugins,
     run_checkables,
 )
-from crosshair.diff_behavior import diff_behavior
+from crosshair.diff_behavior import ExceptionEquivalenceType, diff_behavior
 from crosshair.fnutil import (
     FUNCTIONINFO_DESCRIPTOR_TYPES,
     FunctionInfo,
@@ -265,6 +265,21 @@ def command_line_parser() -> argparse.ArgumentParser:
         type=str,
         help="second fully-qualified function to compare",
     )
+    diffbehavior_parser.add_argument(
+        "--exception_equivalence",
+        metavar="EXCEPTION_EQUIVALENCE",
+        type=ExceptionEquivalenceType,
+        default=ExceptionEquivalenceType.TYPE_AND_MESSAGE,
+        choices=ExceptionEquivalenceType.__members__.values(),
+        help=textwrap.dedent(
+            """\
+            Decide how to treat exceptions, while searching for a counter-example.
+            `ALL` treats all exceptions as equivalent,
+            `SAME_TYPE`, considers matches on the type.
+            `TYPE_AND_MESSAGE` matches for the same type and message.
+            """
+        ),
+    )
     cover_parser = subparsers.add_parser(
         "cover",
         formatter_class=argparse.RawTextHelpFormatter,
@@ -679,10 +694,11 @@ def diffbehavior(
     (fn_name1, fn_name2) = (args.fn1, args.fn2)
     fn1 = checked_fn_load(fn_name1, stderr)
     fn2 = checked_fn_load(fn_name2, stderr)
+    exception_equivalence = args.exception_equivalence
     if fn1 is None or fn2 is None:
         return 2
     options.stats = Counter()
-    diffs = diff_behavior(fn1, fn2, options)
+    diffs = diff_behavior(fn1, fn2, options, exception_equivalence)
     debug("stats", options.stats)
     if isinstance(diffs, str):
         print(diffs, file=stderr)

diff --git a/crosshair/main_test.py b/crosshair/main_test.py
@@ -61,7 +61,12 @@ def call_check(
 def call_diffbehavior(fn1: str, fn2: str) -> Tuple[int, List[str]]:
     buf: io.StringIO = io.StringIO()
     errbuf: io.StringIO = io.StringIO()
-    retcode = diffbehavior(Namespace(fn1=fn1, fn2=fn2), DEFAULT_OPTIONS, buf, errbuf)
+    retcode = diffbehavior(
+        Namespace(fn1=fn1, fn2=fn2, exception_equivalence="type_and_message"),
+        DEFAULT_OPTIONS,
+        buf,
+        errbuf,
+    )
     lines = [
         ls for ls in buf.getvalue().split("\n") + errbuf.getvalue().split("\n") if ls
     ]

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -121,3 +121,4 @@ In order of initial commit. Many thanks!
 * `Alec Delaney <https://github.com/tekktrik>`_
 * `Zac Hatfield-Dodds <https://github.com/Zac-HD>`_
 * `Tomasz Kosiński <https://github.com/azewiusz>`_
+* `Abhiram Bellur <https://github.com/Abhiram98>`_
diff --git a/doc/source/diff_behavior.rst b/doc/source/diff_behavior.rst
@@ -46,6 +46,7 @@ How do I try it?
 
     usage: crosshair diffbehavior [-h] [--verbose]
                                   [--extra_plugin EXTRA_PLUGIN [EXTRA_PLUGIN ...]]
+                                  [--exception_equivalence EXCEPTION_EQUIVALENCE]
                                   [--max_uninteresting_iterations MAX_UNINTERESTING_ITERATIONS]
                                   [--per_path_timeout FLOAT]
                                   [--per_condition_timeout FLOAT]
@@ -63,6 +64,11 @@ How do I try it?
       --verbose, -v         Output additional debugging information on stderr
       --extra_plugin EXTRA_PLUGIN [EXTRA_PLUGIN ...]
                             Plugin file(s) you wish to use during the current execution
+      --exception_equivalence EXCEPTION_EQUIVALENCE
+                            Decide how to treat exceptions, while searching for a counter-example.
+                            `ALL` treats all exceptions as equivalent,
+                            `SAME_TYPE`, considers matches on the type.
+                            `TYPE_AND_MESSAGE` matches for the same type and message.
       --max_uninteresting_iterations MAX_UNINTERESTING_ITERATIONS
                             Maximum number of consecutive iterations to run without making
                             significant progress in exploring the codebase.