OpenHands · csmith49 · Dec 23, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -146,7 +146,7 @@ def step(
 
         # Prepare LLM messages using the utility function
         _messages_or_condensation = prepare_llm_messages(
-            state.events, condenser=self.condenser
+            state.events, condenser=self.condenser, llm=self.llm
         )
 
         # Process condensation event before agent sampels another action

diff --git a/openhands-sdk/openhands/sdk/agent/utils.py b/openhands-sdk/openhands/sdk/agent/utils.py
@@ -117,6 +117,7 @@ def prepare_llm_messages(
     events: Sequence[Event],
     condenser: None = None,
     additional_messages: list[Message] | None = None,
+    llm: LLM | None = None,
 ) -> list[Message]: ...
 
 
@@ -125,13 +126,15 @@ def prepare_llm_messages(
     events: Sequence[Event],
     condenser: CondenserBase,
     additional_messages: list[Message] | None = None,
+    llm: LLM | None = None,
 ) -> list[Message] | Condensation: ...
 
 
 def prepare_llm_messages(
     events: Sequence[Event],
     condenser: CondenserBase | None = None,
     additional_messages: list[Message] | None = None,
+    llm: LLM | None = None,
 ) -> list[Message] | Condensation:
     """Prepare LLM messages from conversation context.
 
@@ -140,13 +143,15 @@ def prepare_llm_messages(
     It handles condensation internally and calls the callback when needed.
 
     Args:
-        state: The conversation state containing events
+        events: Sequence of events to prepare messages from
         condenser: Optional condenser for handling context window limits
         additional_messages: Optional additional messages to append
-        on_event: Optional callback for handling condensation events
+        llm: Optional LLM instance from the agent, passed to condenser for
+            token counting or other LLM features
 
     Returns:
-        List of messages ready for LLM completion
+        List of messages ready for LLM completion, or a Condensation event
+        if condensation is needed
 
     Raises:
         RuntimeError: If condensation is needed but no callback is provided
@@ -160,7 +165,7 @@ def prepare_llm_messages(
     # produce a list of events, exactly as expected, or a
     # new condensation that needs to be processed
     if condenser is not None:
-        condensation_result = condenser.condense(view)
+        condensation_result = condenser.condense(view, agent_llm=llm)
 
         match condensation_result:
             case View():

diff --git a/openhands-sdk/openhands/sdk/context/condenser/base.py b/openhands-sdk/openhands/sdk/context/condenser/base.py
@@ -3,6 +3,7 @@
 
 from openhands.sdk.context.view import View
 from openhands.sdk.event.condenser import Condensation
+from openhands.sdk.llm import LLM
 from openhands.sdk.utils.models import (
     DiscriminatedUnionMixin,
 )
@@ -28,7 +29,7 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
     """
 
     @abstractmethod
-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
         """Condense a sequence of events into a potentially smaller list.
 
         New condenser strategies should override this method to implement their own
@@ -37,6 +38,8 @@ def condense(self, view: View) -> View | Condensation:
 
         Args:
             view: A view of the history containing all events that should be condensed.
+            agent_llm: LLM instance used by the agent. Condensers use this for token
+                counting purposes. Defaults to None.
 
         Returns:
             View | Condensation: A condensed view of the events or an event indicating
@@ -77,18 +80,20 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
     """
 
     @abstractmethod
-    def should_condense(self, view: View) -> bool:
+    def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
         """Determine if a view should be condensed."""
 
     @abstractmethod
-    def get_condensation(self, view: View) -> Condensation:
+    def get_condensation(
+        self, view: View, agent_llm: LLM | None = None
+    ) -> Condensation:
         """Get the condensation from a view."""
 
-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
         # If we trigger the condenser-specific condensation threshold, compute and
         # return the condensation.
-        if self.should_condense(view):
-            return self.get_condensation(view)
+        if self.should_condense(view, agent_llm=agent_llm):
+            return self.get_condensation(view, agent_llm=agent_llm)
 
         # Otherwise we're safe to just return the view.
         else:

diff --git a/openhands-sdk/openhands/sdk/context/condenser/llm_summarizing_condenser.py b/openhands-sdk/openhands/sdk/context/condenser/llm_summarizing_condenser.py
@@ -1,19 +1,43 @@
 import os
+from collections.abc import Sequence
+from enum import Enum
 
 from pydantic import Field, model_validator
 
 from openhands.sdk.context.condenser.base import RollingCondenser
+from openhands.sdk.context.condenser.utils import (
+    get_suffix_length_for_token_reduction,
+    get_total_token_count,
+)
 from openhands.sdk.context.prompts import render_template
 from openhands.sdk.context.view import View
+from openhands.sdk.event.base import LLMConvertibleEvent
 from openhands.sdk.event.condenser import Condensation
 from openhands.sdk.event.llm_convertible import MessageEvent
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.observability.laminar import observe
 
 
+class Reason(Enum):
+    """Reasons for condensation."""
+
+    REQUEST = "request"
+    TOKENS = "tokens"
+    EVENTS = "events"
+
+
 class LLMSummarizingCondenser(RollingCondenser):
+    """LLM-based condenser that summarizes forgotten events.
+
+    Uses an independent LLM (stored in the `llm` attribute) for generating summaries
+    of forgotten events. The optional `agent_llm` parameter passed to condense() is
+    the LLM used by the agent for token counting purposes, and you should not assume
+    it is the same as the one defined in this condenser.
+    """
+
     llm: LLM
     max_size: int = Field(default=120, gt=0)
+    max_tokens: int | None = None
     keep_first: int = Field(default=4, ge=0)
 
     @model_validator(mode="after")
@@ -29,23 +53,47 @@ def validate_keep_first_vs_max_size(self):
     def handles_condensation_requests(self) -> bool:
         return True
 
-    def should_condense(self, view: View) -> bool:
-        if view.unhandled_condensation_request:
-            return True
-        return len(view) > self.max_size
+    def get_condensation_reasons(
+        self, view: View, agent_llm: LLM | None = None
+    ) -> set[Reason]:
+        """Determine the reasons why the view should be condensed.
+
+        Args:
+            view: The current view to evaluate.
+            agent_llm: The LLM used by the agent. Required if token counting is needed.
+
+        Returns:
+            A set of Reason enums indicating why condensation is needed.
+        """
+        reasons = set()
 
-    @observe(ignore_inputs=["view"])
-    def get_condensation(self, view: View) -> Condensation:
-        head = view[: self.keep_first]
-        target_size = self.max_size // 2
+        # Reason 1: Unhandled condensation request. The view handles the detection of
+        # these requests while processing the event stream.
         if view.unhandled_condensation_request:
-            # Condensation triggered by a condensation request
-            # should be calculated based on the view size.
-            target_size = len(view) // 2
-        # Number of events to keep from the tail -- target size, minus however many
-        # prefix events from the head, minus one for the summarization event
-        events_from_tail = target_size - len(head) - 1
+            reasons.add(Reason.REQUEST)
+
+        # Reason 2: Token limit is provided and exceeded.
+        if self.max_tokens and agent_llm:
+            total_tokens = get_total_token_count(view.events, agent_llm)
+            if total_tokens > self.max_tokens:
+                reasons.add(Reason.TOKENS)
+
+        # Reason 3: View exceeds maximum size in number of events.
+        if len(view) > self.max_size:
+            reasons.add(Reason.EVENTS)
+
+        return reasons
 
+    def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
+        reasons = self.get_condensation_reasons(view, agent_llm)
+        return reasons != set()
+
+    def _get_summary_event_content(self, view: View) -> str:
+        """Extract the text content from the summary event in the view, if any.
+
+        If there is no summary event or it does not contain text content, returns an
+        empty string.
+        """
         summary_event_content: str = ""
 
         summary_event = view.summary_event
@@ -54,9 +102,23 @@ def get_condensation(self, view: View) -> Condensation:
             if isinstance(message_content, TextContent):
                 summary_event_content = message_content.text
 
-        # Identify events to be forgotten (those not in head or tail)
-        forgotten_events = view[self.keep_first : -events_from_tail]
+        return summary_event_content
 
+    def _generate_condensation(
+        self,
+        summary_event_content: str,
+        forgotten_events: Sequence[LLMConvertibleEvent],
+    ) -> Condensation:
+        """Generate a condensation by using the condenser's LLM to summarize forgotten
+        events.
+
+        Args:
+            summary_event_content: The content of the previous summary event.
+            forgotten_events: The list of events to be summarized.
+
+        Returns:
+            Condensation: The generated condensation object.
+        """
         # Convert events to strings for the template
         event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]
 
@@ -87,3 +149,72 @@ def get_condensation(self, view: View) -> Condensation:
             summary_offset=self.keep_first,
             llm_response_id=llm_response.id,
         )
+
+    def _get_forgotten_events(
+        self, view: View, agent_llm: LLM | None = None
+    ) -> Sequence[LLMConvertibleEvent]:
+        """Identify events to be forgotten.
+
+        Relies on the condensation reasons to determine how many events we need to drop
+        in order to maintain our resource constraints.
+
+        Args:
+            view: The current view from which to identify forgotten events.
+            agent_llm: The LLM used by the agent, required for token-based calculations.
+
+        Returns:
+            A sequence of events to be forgotten.
+        """
+        reasons = self.get_condensation_reasons(view, agent_llm=agent_llm)
+        assert reasons != set(), "No condensation reasons found."
+
+        suffix_events_to_keep: set[int] = set()
+
+        if Reason.REQUEST in reasons:
+            target_size = len(view) // 2
+            suffix_events_to_keep.add(target_size - self.keep_first - 1)
+
+        if Reason.EVENTS in reasons:
+            target_size = self.max_size // 2
+            suffix_events_to_keep.add(target_size - self.keep_first - 1)
+
+        if Reason.TOKENS in reasons:
+            # Compute the number of tokens we need to eliminate to be under half the
+            # max_tokens value. We know max_tokens and the agent LLM are not None here
+            # because we can't have Reason.TOKENS without them.
+            assert self.max_tokens is not None
+            assert agent_llm is not None
+
+            total_tokens = get_total_token_count(view.events, agent_llm)
+            tokens_to_reduce = total_tokens - (self.max_tokens // 2)
+
+            suffix_length = get_suffix_length_for_token_reduction(
+                events=view.events[self.keep_first :],
+                llm=agent_llm,
+                token_reduction=tokens_to_reduce,
+            )
+
+            suffix_events_to_keep.add(suffix_length)
+
+        # We might have multiple reasons to condense, so pick the strictest condensation
+        # to ensure all resource constraints are met.
+        events_from_tail = min(suffix_events_to_keep)
+
+        # Identify events to be forgotten (those not in head or tail)
+        if events_from_tail == 0:
+            return view[self.keep_first :]
+        return view[self.keep_first : -events_from_tail]
+
+    @observe(ignore_inputs=["view", "agent_llm"])
+    def get_condensation(
+        self, view: View, agent_llm: LLM | None = None
+    ) -> Condensation:
+        # The condensation is dependent on the events we want to drop and the previous
+        # summary.
+        summary_event_content = self._get_summary_event_content(view)
+        forgotten_events = self._get_forgotten_events(view, agent_llm=agent_llm)
+
+        return self._generate_condensation(
+            summary_event_content=summary_event_content,
+            forgotten_events=forgotten_events,
+        )
diff --git a/openhands-sdk/openhands/sdk/context/condenser/no_op_condenser.py b/openhands-sdk/openhands/sdk/context/condenser/no_op_condenser.py
@@ -1,6 +1,7 @@
 from openhands.sdk.context.condenser.base import CondenserBase
 from openhands.sdk.context.view import View
 from openhands.sdk.event.condenser import Condensation
+from openhands.sdk.llm import LLM
 
 
 class NoOpCondenser(CondenserBase):
@@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
     Primarily intended for testing purposes.
     """
 
-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:  # noqa: ARG002
         return view
diff --git a/openhands-sdk/openhands/sdk/context/condenser/pipeline_condenser.py b/openhands-sdk/openhands/sdk/context/condenser/pipeline_condenser.py
@@ -1,15 +1,16 @@
 from openhands.sdk.context.condenser.base import CondenserBase
 from openhands.sdk.context.view import View
 from openhands.sdk.event.condenser import Condensation
+from openhands.sdk.llm import LLM
 
 
 class PipelineCondenser(CondenserBase):
     """A condenser that applies a sequence of condensers in order.
 
     All condensers are defined primarily by their `condense` method, which takes a
-    `View` and returns either a new `View` or a `Condensation` event. That means we can
-    chain multiple condensers together by passing `View`s along and exiting early if any
-    condenser returns a `Condensation`.
+    `View` and an optional `agent_llm` parameter, returning either a new `View` or a
+    `Condensation` event. That means we can chain multiple condensers together by
+    passing `View`s along and exiting early if any condenser returns a `Condensation`.
 
     For example:
 
@@ -20,33 +21,33 @@ class PipelineCondenser(CondenserBase):
             CondenserC(...),
         ])
 
-        result = condenser.condense(view)
+        result = condenser.condense(view, agent_llm=agent_llm)
 
         # Doing the same thing without the pipeline condenser requires more boilerplate
         # for the monadic chaining
         other_result = view
 
         if isinstance(other_result, View):
-            other_result = CondenserA(...).condense(other_result)
+            other_result = CondenserA(...).condense(other_result, agent_llm=agent_llm)
 
         if isinstance(other_result, View):
-            other_result = CondenserB(...).condense(other_result)
+            other_result = CondenserB(...).condense(other_result, agent_llm=agent_llm)
 
         if isinstance(other_result, View):
-            other_result = CondenserC(...).condense(other_result)
+            other_result = CondenserC(...).condense(other_result, agent_llm=agent_llm)
 
         assert result == other_result
     """
 
     condensers: list[CondenserBase]
     """The list of condensers to apply in order."""
 
-    def condense(self, view: View) -> View | Condensation:
+    def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
         result: View | Condensation = view
         for condenser in self.condensers:
             if isinstance(result, Condensation):
                 break
-            result = condenser.condense(result)
+            result = condenser.condense(result, agent_llm=agent_llm)
         return result
 
     def handles_condensation_requests(self) -> bool: