Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
ea026aa
token-aware utils, first pass
Dec 10, 2025
4093731
tests and type fixes for token-aware utils
Dec 10, 2025
980c1af
condenser api now takes agent llm
Dec 10, 2025
0d2b16d
trigger condensation w/ token limits
Dec 10, 2025
d3d875e
minor refactor of llm summarizing condenser
Dec 10, 2025
7504dba
one last refactor
Dec 10, 2025
753d088
resolution of multiple condensation reasons at once
Dec 10, 2025
226b488
updating tests
Dec 10, 2025
81b5590
events_from_tail calculation fix
Dec 10, 2025
cf710cd
fixing aggressive condensation logic
Dec 10, 2025
de66479
tests for combos of reasons
Dec 10, 2025
3bafbec
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 10, 2025
16a5be5
linting
Dec 10, 2025
bc63019
minor formatting errors
Dec 11, 2025
4bba5fd
ignoring unknown attributes in tests
Dec 11, 2025
119e868
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 11, 2025
093eeb9
fixing type hints with overloaded prepare_llm_messages
Dec 11, 2025
e5518b5
removing TYPE_CHECKING flags
Dec 11, 2025
acc15c8
fix: correct import name in test_resolve_model_config.py
openhands-agent Dec 11, 2025
011b50f
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 16, 2025
4a1fc71
Update openhands-sdk/openhands/sdk/context/condenser/utils.py
csmith49 Dec 21, 2025
6fb0b8f
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 21, 2025
55e5b0e
llm -> summarizing_llm (and linting pass)
Dec 21, 2025
44dd213
fix failing cross tests -- updating the string ref to llm -> summariz…
Dec 21, 2025
fd27c75
Revert "fix failing cross tests -- updating the string ref to llm -> …
Dec 22, 2025
d63d40f
Revert "llm -> summarizing_llm (and linting pass)"
Dec 22, 2025
3c6aebe
condenser api changes: llm -> agent_llm
Dec 22, 2025
8869dd0
linting
Dec 22, 2025
2145e0e
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 22, 2025
f3d1e4f
fixing tests
Dec 22, 2025
aa5d9c6
condenser property in integration test scaffold
Dec 22, 2025
fb7644e
update integration base with max iterations
Dec 23, 2025
3b013f1
token counting integration test
Dec 23, 2025
fe083cd
linting
Dec 23, 2025
0b226c7
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 23, 2025
d698399
fixing import
Dec 23, 2025
7249b12
upping cap for tokens
Dec 23, 2025
bdd0a78
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 23, 2025
1180c9d
one last change to max tokens
Dec 23, 2025
e337ae6
minor indexing bug
Dec 23, 2025
15ae1a6
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 23, 2025
301232e
minor prompt tweak
Dec 23, 2025
23d154d
Merge branch 'main' into csmith49/token-aware-condensation
csmith49 Dec 23, 2025
e73b00f
minor prompt tweak to get gpt and devstral to actually do their job
Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openhands-sdk/openhands/sdk/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def step(

# Prepare LLM messages using the utility function
_messages_or_condensation = prepare_llm_messages(
state.events, condenser=self.condenser
state.events, condenser=self.condenser, llm=self.llm
)

# Process condensation event before agent sampels another action
Expand Down
13 changes: 9 additions & 4 deletions openhands-sdk/openhands/sdk/agent/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def prepare_llm_messages(
events: Sequence[Event],
condenser: None = None,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message]: ...


Expand All @@ -125,13 +126,15 @@ def prepare_llm_messages(
events: Sequence[Event],
condenser: CondenserBase,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message] | Condensation: ...


def prepare_llm_messages(
events: Sequence[Event],
condenser: CondenserBase | None = None,
additional_messages: list[Message] | None = None,
llm: LLM | None = None,
) -> list[Message] | Condensation:
"""Prepare LLM messages from conversation context.

Expand All @@ -140,13 +143,15 @@ def prepare_llm_messages(
It handles condensation internally and calls the callback when needed.

Args:
state: The conversation state containing events
events: Sequence of events to prepare messages from
condenser: Optional condenser for handling context window limits
additional_messages: Optional additional messages to append
on_event: Optional callback for handling condensation events
llm: Optional LLM instance from the agent, passed to condenser for
token counting or other LLM features

Returns:
List of messages ready for LLM completion
List of messages ready for LLM completion, or a Condensation event
if condensation is needed

Raises:
RuntimeError: If condensation is needed but no callback is provided
Expand All @@ -160,7 +165,7 @@ def prepare_llm_messages(
# produce a list of events, exactly as expected, or a
# new condensation that needs to be processed
if condenser is not None:
condensation_result = condenser.condense(view)
condensation_result = condenser.condense(view, agent_llm=llm)

match condensation_result:
case View():
Expand Down
17 changes: 11 additions & 6 deletions openhands-sdk/openhands/sdk/context/condenser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM
from openhands.sdk.utils.models import (
DiscriminatedUnionMixin,
)
Expand All @@ -28,7 +29,7 @@ class CondenserBase(DiscriminatedUnionMixin, ABC):
"""

@abstractmethod
def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
"""Condense a sequence of events into a potentially smaller list.

New condenser strategies should override this method to implement their own
Expand All @@ -37,6 +38,8 @@ def condense(self, view: View) -> View | Condensation:

Args:
view: A view of the history containing all events that should be condensed.
agent_llm: LLM instance used by the agent. Condensers use this for token
counting purposes. Defaults to None.

Returns:
View | Condensation: A condensed view of the events or an event indicating
Expand Down Expand Up @@ -77,18 +80,20 @@ class RollingCondenser(PipelinableCondenserBase, ABC):
"""

@abstractmethod
def should_condense(self, view: View) -> bool:
def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
"""Determine if a view should be condensed."""

@abstractmethod
def get_condensation(self, view: View) -> Condensation:
def get_condensation(
self, view: View, agent_llm: LLM | None = None
) -> Condensation:
"""Get the condensation from a view."""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
# If we trigger the condenser-specific condensation threshold, compute and
# return the condensation.
if self.should_condense(view):
return self.get_condensation(view)
if self.should_condense(view, agent_llm=agent_llm):
return self.get_condensation(view, agent_llm=agent_llm)

# Otherwise we're safe to just return the view.
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,43 @@
import os
from collections.abc import Sequence
from enum import Enum

from pydantic import Field, model_validator

from openhands.sdk.context.condenser.base import RollingCondenser
from openhands.sdk.context.condenser.utils import (
get_suffix_length_for_token_reduction,
get_total_token_count,
)
from openhands.sdk.context.prompts import render_template
from openhands.sdk.context.view import View
from openhands.sdk.event.base import LLMConvertibleEvent
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.event.llm_convertible import MessageEvent
from openhands.sdk.llm import LLM, Message, TextContent
from openhands.sdk.observability.laminar import observe


class Reason(Enum):
"""Reasons for condensation."""

REQUEST = "request"
TOKENS = "tokens"
EVENTS = "events"


class LLMSummarizingCondenser(RollingCondenser):
"""LLM-based condenser that summarizes forgotten events.

Uses an independent LLM (stored in the `llm` attribute) for generating summaries
of forgotten events. The optional `agent_llm` parameter passed to condense() is
the LLM used by the agent for token counting purposes, and you should not assume
it is the same as the one defined in this condenser.
"""

llm: LLM
max_size: int = Field(default=120, gt=0)
max_tokens: int | None = None
keep_first: int = Field(default=4, ge=0)

@model_validator(mode="after")
Expand All @@ -29,23 +53,47 @@ def validate_keep_first_vs_max_size(self):
def handles_condensation_requests(self) -> bool:
return True

def should_condense(self, view: View) -> bool:
if view.unhandled_condensation_request:
return True
return len(view) > self.max_size
def get_condensation_reasons(
self, view: View, agent_llm: LLM | None = None
) -> set[Reason]:
"""Determine the reasons why the view should be condensed.

Args:
view: The current view to evaluate.
agent_llm: The LLM used by the agent. Required if token counting is needed.

Returns:
A set of Reason enums indicating why condensation is needed.
"""
reasons = set()

@observe(ignore_inputs=["view"])
def get_condensation(self, view: View) -> Condensation:
head = view[: self.keep_first]
target_size = self.max_size // 2
# Reason 1: Unhandled condensation request. The view handles the detection of
# these requests while processing the event stream.
if view.unhandled_condensation_request:
# Condensation triggered by a condensation request
# should be calculated based on the view size.
target_size = len(view) // 2
# Number of events to keep from the tail -- target size, minus however many
# prefix events from the head, minus one for the summarization event
events_from_tail = target_size - len(head) - 1
reasons.add(Reason.REQUEST)

# Reason 2: Token limit is provided and exceeded.
if self.max_tokens and agent_llm:
total_tokens = get_total_token_count(view.events, agent_llm)
if total_tokens > self.max_tokens:
reasons.add(Reason.TOKENS)

# Reason 3: View exceeds maximum size in number of events.
if len(view) > self.max_size:
reasons.add(Reason.EVENTS)

return reasons

def should_condense(self, view: View, agent_llm: LLM | None = None) -> bool:
reasons = self.get_condensation_reasons(view, agent_llm)
return reasons != set()

def _get_summary_event_content(self, view: View) -> str:
"""Extract the text content from the summary event in the view, if any.

If there is no summary event or it does not contain text content, returns an
empty string.
"""
summary_event_content: str = ""

summary_event = view.summary_event
Expand All @@ -54,9 +102,23 @@ def get_condensation(self, view: View) -> Condensation:
if isinstance(message_content, TextContent):
summary_event_content = message_content.text

# Identify events to be forgotten (those not in head or tail)
forgotten_events = view[self.keep_first : -events_from_tail]
return summary_event_content

def _generate_condensation(
self,
summary_event_content: str,
forgotten_events: Sequence[LLMConvertibleEvent],
) -> Condensation:
"""Generate a condensation by using the condenser's LLM to summarize forgotten
events.

Args:
summary_event_content: The content of the previous summary event.
forgotten_events: The list of events to be summarized.

Returns:
Condensation: The generated condensation object.
"""
# Convert events to strings for the template
event_strings = [str(forgotten_event) for forgotten_event in forgotten_events]

Expand Down Expand Up @@ -87,3 +149,72 @@ def get_condensation(self, view: View) -> Condensation:
summary_offset=self.keep_first,
llm_response_id=llm_response.id,
)

def _get_forgotten_events(
self, view: View, agent_llm: LLM | None = None
) -> Sequence[LLMConvertibleEvent]:
"""Identify events to be forgotten.

Relies on the condensation reasons to determine how many events we need to drop
in order to maintain our resource constraints.

Args:
view: The current view from which to identify forgotten events.
agent_llm: The LLM used by the agent, required for token-based calculations.

Returns:
A sequence of events to be forgotten.
"""
reasons = self.get_condensation_reasons(view, agent_llm=agent_llm)
assert reasons != set(), "No condensation reasons found."

suffix_events_to_keep: set[int] = set()

if Reason.REQUEST in reasons:
target_size = len(view) // 2
suffix_events_to_keep.add(target_size - self.keep_first - 1)

if Reason.EVENTS in reasons:
target_size = self.max_size // 2
suffix_events_to_keep.add(target_size - self.keep_first - 1)

if Reason.TOKENS in reasons:
# Compute the number of tokens we need to eliminate to be under half the
# max_tokens value. We know max_tokens and the agent LLM are not None here
# because we can't have Reason.TOKENS without them.
assert self.max_tokens is not None
assert agent_llm is not None

total_tokens = get_total_token_count(view.events, agent_llm)
tokens_to_reduce = total_tokens - (self.max_tokens // 2)

suffix_length = get_suffix_length_for_token_reduction(
events=view.events[self.keep_first :],
llm=agent_llm,
token_reduction=tokens_to_reduce,
)

suffix_events_to_keep.add(suffix_length)

# We might have multiple reasons to condense, so pick the strictest condensation
# to ensure all resource constraints are met.
events_from_tail = min(suffix_events_to_keep)

# Identify events to be forgotten (those not in head or tail)
if events_from_tail == 0:
return view[self.keep_first :]
return view[self.keep_first : -events_from_tail]

@observe(ignore_inputs=["view", "agent_llm"])
def get_condensation(
self, view: View, agent_llm: LLM | None = None
) -> Condensation:
# The condensation is dependent on the events we want to drop and the previous
# summary.
summary_event_content = self._get_summary_event_content(view)
forgotten_events = self._get_forgotten_events(view, agent_llm=agent_llm)

return self._generate_condensation(
summary_event_content=summary_event_content,
forgotten_events=forgotten_events,
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from openhands.sdk.context.condenser.base import CondenserBase
from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM


class NoOpCondenser(CondenserBase):
Expand All @@ -9,5 +10,5 @@ class NoOpCondenser(CondenserBase):
Primarily intended for testing purposes.
"""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation: # noqa: ARG002
return view
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from openhands.sdk.context.condenser.base import CondenserBase
from openhands.sdk.context.view import View
from openhands.sdk.event.condenser import Condensation
from openhands.sdk.llm import LLM


class PipelineCondenser(CondenserBase):
"""A condenser that applies a sequence of condensers in order.

All condensers are defined primarily by their `condense` method, which takes a
`View` and returns either a new `View` or a `Condensation` event. That means we can
chain multiple condensers together by passing `View`s along and exiting early if any
condenser returns a `Condensation`.
`View` and an optional `agent_llm` parameter, returning either a new `View` or a
`Condensation` event. That means we can chain multiple condensers together by
passing `View`s along and exiting early if any condenser returns a `Condensation`.

For example:

Expand All @@ -20,33 +21,33 @@ class PipelineCondenser(CondenserBase):
CondenserC(...),
])

result = condenser.condense(view)
result = condenser.condense(view, agent_llm=agent_llm)

# Doing the same thing without the pipeline condenser requires more boilerplate
# for the monadic chaining
other_result = view

if isinstance(other_result, View):
other_result = CondenserA(...).condense(other_result)
other_result = CondenserA(...).condense(other_result, agent_llm=agent_llm)

if isinstance(other_result, View):
other_result = CondenserB(...).condense(other_result)
other_result = CondenserB(...).condense(other_result, agent_llm=agent_llm)

if isinstance(other_result, View):
other_result = CondenserC(...).condense(other_result)
other_result = CondenserC(...).condense(other_result, agent_llm=agent_llm)

assert result == other_result
"""

condensers: list[CondenserBase]
"""The list of condensers to apply in order."""

def condense(self, view: View) -> View | Condensation:
def condense(self, view: View, agent_llm: LLM | None = None) -> View | Condensation:
result: View | Condensation = view
for condenser in self.condensers:
if isinstance(result, Condensation):
break
result = condenser.condense(result)
result = condenser.condense(result, agent_llm=agent_llm)
return result

def handles_condensation_requests(self) -> bool:
Expand Down
Loading
Loading