diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index e7de1110c7..6053dbf74f 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -1737,19 +1737,12 @@ def _draw(self, ir_type, kwargs, *, observe, forced, fake_forced):
             debug_report(f"overrun because hit {self.max_length_ir=}")
             self.mark_overrun()
 
-        if self.ir_prefix is not None and observe:
-            if self.index_ir < len(self.ir_prefix):
-                choice = self._pop_choice(ir_type, kwargs, forced=forced)
-            else:
-                try:
-                    choice = (
-                        forced
-                        if forced is not None
-                        else draw_choice(ir_type, kwargs, random=self.__random)
-                    )
-                except StopTest:
-                    debug_report("overrun because draw_choice overran")
-                    self.mark_overrun()
+        if (
+            observe
+            and self.ir_prefix is not None
+            and self.index_ir < len(self.ir_prefix)
+        ):
+            choice = self._pop_choice(ir_type, kwargs, forced=forced)
 
             if forced is None:
                 forced = choice
@@ -2261,7 +2254,7 @@ def draw_bits(
         elif self._bytes_drawn < len(self.__prefix):
             index = self._bytes_drawn
             buf = self.__prefix[index : index + n_bytes]
-            if len(buf) < n_bytes:
+            if len(buf) < n_bytes:  # pragma: no cover # removing soon
                 assert self.__random is not None
                 buf += uniform(self.__random, n_bytes - len(buf))
         else:
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
index fdcb23903c..931636f480 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -18,17 +18,7 @@
 from datetime import timedelta
 from enum import Enum
 from random import Random, getrandbits
-from typing import (
-    Callable,
-    Final,
-    List,
-    Literal,
-    NoReturn,
-    Optional,
-    Union,
-    cast,
-    overload,
-)
+from typing import Callable, Final, List, Literal, NoReturn, Optional, Union, cast
 
 import attr
 
@@ -283,7 +273,6 @@ def __init__(
         # shrinking where we need to know about the structure of the
         # executed test case.
         self.__data_cache = LRUReusedCache(CACHE_SIZE)
-        self.__data_cache_ir = LRUReusedCache(CACHE_SIZE)
 
         self.reused_previously_shrunk_test_case = False
 
@@ -359,26 +348,8 @@ def _cache_key(self, choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]:
 
     def _cache(self, data: ConjectureData) -> None:
         result = data.as_result()
-        self.__data_cache[data.buffer] = result
-
-        # interesting buffer-based data can mislead the shrinker if we cache them.
-        #
-        #   @given(st.integers())
-        #   def f(n):
-        #     assert n < 100
-        #
-        # may generate two counterexamples, n=101 and n=m > 101, in that order,
-        # where the buffer corresponding to n is large due to eg failed probes.
-        # We shrink m and eventually try n=101, but it is cached to a large buffer
-        # and so the best we can do is n=102, a non-ideal shrink.
-        #
-        # We can cache ir-based buffers fine, which always correspond to the
-        # smallest buffer via forced=. The overhead here is small because almost
-        # all interesting data are ir-based via the shrinker (and that overhead
-        # will tend towards zero as we move generation to the ir).
-        if data.ir_prefix is not None or data.status < Status.INTERESTING:
-            key = self._cache_key(data.choices)
-            self.__data_cache_ir[key] = result
+        key = self._cache_key(data.choices)
+        self.__data_cache[key] = result
 
     def cached_test_function_ir(
         self,
@@ -387,6 +358,14 @@ def cached_test_function_ir(
         error_on_discard: bool = False,
         extend: int = 0,
     ) -> Union[ConjectureResult, _Overrun]:
+        """
+        If ``error_on_discard`` is set to True this will raise ``ContainsDiscard``
+        in preference to running the actual test function. This is to allow us
+        to skip test cases we expect to be redundant in some cases. Note that
+        it may be the case that we don't raise ``ContainsDiscard`` even if the
+        result has discards if we cannot determine from previous runs whether
+        it will have a discard.
+        """
         # node templates represent a not-yet-filled hole and therefore cannot
         # be cached or retrieved from the cache.
         if not any(isinstance(choice, NodeTemplate) for choice in choices):
@@ -395,7 +374,7 @@ def cached_test_function_ir(
             choices = cast(Sequence[ChoiceT], choices)
             key = self._cache_key(choices)
             try:
-                cached = self.__data_cache_ir[key]
+                cached = self.__data_cache[key]
                 # if we have a cached overrun for this key, but we're allowing extensions
                 # of the nodes, it could in fact run to a valid data if we try.
                 if extend == 0 or cached.status is not Status.OVERRUN:
@@ -429,14 +408,19 @@ def kill_branch(self) -> NoReturn:
         else:
             trial_data.freeze()
             key = self._cache_key(trial_data.choices)
-            if trial_data.status is Status.OVERRUN:
+            if trial_data.status > Status.OVERRUN:
+                try:
+                    return self.__data_cache[key]
+                except KeyError:
+                    pass
+            else:
                 # if we simulated to an overrun, then we our result is certainly
                 # an overrun; no need to consult the cache. (and we store this result
                 # for simulation-less lookup later).
-                self.__data_cache_ir[key] = Overrun
+                self.__data_cache[key] = Overrun
                 return Overrun
             try:
-                return self.__data_cache_ir[key]
+                return self.__data_cache[key]
             except KeyError:
                 pass
 
@@ -567,8 +551,8 @@ def test_function(self, data: ConjectureData) -> None:
 
         if data.status == Status.INTERESTING:
             if not self.using_hypothesis_backend:
-                # drive the ir tree through the test function to convert it
-                # to a buffer
+                # replay this failure on the hypothesis backend to ensure it still
+                # finds a failure. otherwise, it is flaky.
                 initial_origin = data.interesting_origin
                 initial_traceback = getattr(
                     data.extra_information, "_expected_traceback", None
@@ -611,13 +595,13 @@ def test_function(self, data: ConjectureData) -> None:
                 if sort_key_ir(data.ir_nodes) < sort_key_ir(existing.ir_nodes):
                     self.shrinks += 1
                     self.downgrade_buffer(ir_to_bytes(existing.choices))
-                    self.__data_cache.unpin(existing.buffer)
+                    self.__data_cache.unpin(self._cache_key(existing.choices))
                     changed = True
 
             if changed:
                 self.save_choices(data.choices)
                 self.interesting_examples[key] = data.as_result()  # type: ignore
-                self.__data_cache.pin(data.buffer, data.as_result())
+                self.__data_cache.pin(self._cache_key(data.choices), data.as_result())
                 self.shrunk_examples.discard(key)
 
             if self.shrinks >= MAX_SHRINKS:
@@ -969,11 +953,13 @@ def generate_new_examples(self) -> None:
         self.debug("Generating new examples")
 
         assert self.should_generate_more()
-        zero_data = self.cached_test_function(bytes(BUFFER_SIZE))
+        zero_data = self.cached_test_function_ir(
+            (NodeTemplate("simplest", size=BUFFER_SIZE),)
+        )
         if zero_data.status > Status.OVERRUN:
             assert isinstance(zero_data, ConjectureResult)
             self.__data_cache.pin(
-                zero_data.buffer, zero_data.as_result()
+                self._cache_key(zero_data.choices), zero_data.as_result()
             )  # Pin forever
 
         if zero_data.status == Status.OVERRUN or (
@@ -1048,7 +1034,7 @@ def generate_new_examples(self) -> None:
             # not whatever is specified by the backend. We can improve this
             # once more things are on the ir.
             if not self.using_hypothesis_backend:
-                data = self.new_conjecture_data(prefix=b"", max_length=BUFFER_SIZE)
+                data = self.new_conjecture_data_ir([], max_length=BUFFER_SIZE)
                 with suppress(BackendCannotProceed):
                     self.test_function(data)
                 continue
@@ -1228,7 +1214,7 @@ def generate_mutations_from(
                     assert isinstance(new_data, ConjectureResult)
                     if (
                         new_data.status >= data.status
-                        and data.buffer != new_data.buffer
+                        and choices_key(data.choices) != choices_key(new_data.choices)
                         and all(
                             k in new_data.target_observations
                             and new_data.target_observations[k] >= v
@@ -1332,32 +1318,6 @@ def new_conjecture_data_ir(
             random=self.random,
         )
 
-    def new_conjecture_data(
-        self,
-        prefix: Union[bytes, bytearray],
-        max_length: int = BUFFER_SIZE,
-        observer: Optional[DataObserver] = None,
-    ) -> ConjectureData:
-        provider = (
-            HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
-        )
-        observer = observer or self.tree.new_observer()
-        if not self.using_hypothesis_backend:
-            observer = DataObserver()
-
-        return ConjectureData(
-            prefix=prefix,
-            max_length=max_length,
-            random=self.random,
-            observer=observer,
-            provider=provider,
-        )
-
-    def new_conjecture_data_for_buffer(
-        self, buffer: Union[bytes, bytearray]
-    ) -> ConjectureData:
-        return self.new_conjecture_data(buffer, max_length=len(buffer))
-
     def shrink_interesting_examples(self) -> None:
         """If we've found interesting examples, try to replace each of them
         with a minimal interesting example with the same interesting_origin.
@@ -1468,88 +1428,6 @@ def new_shrinker(
             in_target_phase=self._current_phase == "target",
         )
 
-    def cached_test_function(
-        self,
-        buffer: Union[bytes, bytearray],
-        *,
-        extend: int = 0,
-    ) -> Union[ConjectureResult, _Overrun]:  # pragma: no cover # removing function soon
-        """Checks the tree to see if we've tested this buffer, and returns the
-        previous result if we have.
-
-        Otherwise we call through to ``test_function``, and return a
-        fresh result.
-
-        If ``error_on_discard`` is set to True this will raise ``ContainsDiscard``
-        in preference to running the actual test function. This is to allow us
-        to skip test cases we expect to be redundant in some cases. Note that
-        it may be the case that we don't raise ``ContainsDiscard`` even if the
-        result has discards if we cannot determine from previous runs whether
-        it will have a discard.
-        """
-        buffer = bytes(buffer)[:BUFFER_SIZE]
-
-        max_length = min(BUFFER_SIZE, len(buffer) + extend)
-
-        @overload
-        def check_result(result: _Overrun) -> _Overrun: ...
-        @overload
-        def check_result(result: ConjectureResult) -> ConjectureResult: ...
-        def check_result(
-            result: Union[_Overrun, ConjectureResult],
-        ) -> Union[_Overrun, ConjectureResult]:
-            assert result is Overrun or (
-                isinstance(result, ConjectureResult) and result.status != Status.OVERRUN
-            )
-            return result
-
-        try:
-            cached = check_result(self.__data_cache[buffer])
-            if cached.status > Status.OVERRUN or extend == 0:
-                return cached
-        except KeyError:
-            pass
-
-        observer = DataObserver()
-        dummy_data = self.new_conjecture_data(
-            prefix=buffer, max_length=max_length, observer=observer
-        )
-
-        if self.using_hypothesis_backend:
-            try:
-                self.tree.simulate_test_function(dummy_data)
-            except PreviouslyUnseenBehaviour:
-                pass
-            else:
-                if dummy_data.status > Status.OVERRUN:
-                    dummy_data.freeze()
-                    try:
-                        return self.__data_cache[dummy_data.buffer]
-                    except KeyError:
-                        pass
-                else:
-                    self.__data_cache[buffer] = Overrun
-                    return Overrun
-
-        # We didn't find a match in the tree, so we need to run the test
-        # function normally. Note that test_function will automatically
-        # add this to the tree so we don't need to update the cache.
-
-        result = None
-
-        data = self.new_conjecture_data(
-            prefix=max((buffer, dummy_data.buffer), key=len), max_length=max_length
-        )
-        self.test_function(data)
-        result = check_result(data.as_result())
-        if extend == 0 or (
-            result is not Overrun
-            and not isinstance(result, _Overrun)
-            and len(result.buffer) <= len(buffer)
-        ):
-            self.__data_cache[buffer] = result
-        return result
-
     def passing_choice_sequences(
         self, prefix: Sequence[IRNode] = ()
     ) -> frozenset[bytes]:
@@ -1558,8 +1436,8 @@ def passing_choice_sequences(
         """
         return frozenset(
             result.ir_nodes
-            for key in self.__data_cache_ir
-            if (result := self.__data_cache_ir[key]).status is Status.VALID
+            for key in self.__data_cache
+            if (result := self.__data_cache[key]).status is Status.VALID
             and startswith(result.ir_nodes, prefix)
         )
 
diff --git a/hypothesis-python/tests/conjecture/test_engine.py b/hypothesis-python/tests/conjecture/test_engine.py
index edef062c12..a912c0c5e9 100644
--- a/hypothesis-python/tests/conjecture/test_engine.py
+++ b/hypothesis-python/tests/conjecture/test_engine.py
@@ -216,7 +216,7 @@ def f(data):
         data.mark_interesting()
 
     runner = ConjectureRunner(f, settings=settings(max_examples=5000, database=None))
-    with buffer_size_limit(2):
+    with buffer_size_limit(4):
         runner.run()
     assert runner.interesting_examples
 
@@ -1505,19 +1505,6 @@ def test(data):
         assert d2.status is Status.VALID
 
 
-def test_draw_bits_partly_from_prefix_and_partly_random():
-    # a draw_bits call which straddles the end of our prefix has a slightly
-    # different code branch.
-    def test(data):
-        # float consumes draw_bits(64)
-        data.draw_float()
-
-    with deterministic_PRNG():
-        runner = ConjectureRunner(test, settings=TEST_SETTINGS)
-        d = runner.cached_test_function(bytes(10), extend=100)
-        assert d.status == Status.VALID
-
-
 def test_can_be_set_to_ignore_limits():
     def test(data):
         data.draw_integer(0, 2**8 - 1)