Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
86 commits
Select commit Hold shift + click to select a range
563fbe2
comiples
katzdave Jul 18, 2025
aa43f09
tests
katzdave Jul 18, 2025
9ea702b
context usage fix
katzdave Jul 18, 2025
6bbba20
change overhead
katzdave Jul 18, 2025
ff5c9a2
shrink works
katzdave Jul 18, 2025
75c074b
merge
katzdave Jul 21, 2025
52099ce
new prompt
katzdave Jul 21, 2025
f8b37d0
move token counter
katzdave Jul 21, 2025
e83cadc
reset changes to mod.rs and agent.rs to focus on summarization algorithm
katzdave Jul 21, 2025
324c511
rm old summarizer
katzdave Jul 22, 2025
5546bef
shrink token counting
katzdave Jul 22, 2025
dc668bb
reset token counting
katzdave Jul 22, 2025
7b6f17e
fmt
katzdave Jul 22, 2025
c4ec13f
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 22, 2025
e55dc40
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 22, 2025
f71fe31
fix test
katzdave Jul 22, 2025
ed54252
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 23, 2025
970b197
appending to pr: ads auto summarize to one shot (#3600)
michaelneale Jul 24, 2025
0f37623
rm tool call removal fns
katzdave Jul 24, 2025
fcf0f27
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 24, 2025
ef432b5
one more unused symbol
katzdave Jul 24, 2025
624e3eb
fmt
katzdave Jul 24, 2025
caf4557
split compaction into check fn
katzdave Jul 24, 2025
1c27b65
refactor into agent reply
katzdave Jul 24, 2025
ad7ca3d
add logging
katzdave Jul 25, 2025
94dc7a0
fix token reduction
katzdave Jul 25, 2025
46a8ec7
merge
katzdave Jul 25, 2025
0aa079a
fmt tidy up
michaelneale Jul 25, 2025
23162e4
import the right tool, don't rely on wildcard
michaelneale Jul 25, 2025
8342d35
rm debug logs
katzdave Jul 25, 2025
55dd73a
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 25, 2025
8e04278
merge
katzdave Jul 25, 2025
4b12c2b
rm extra file + clean comments
katzdave Jul 25, 2025
fc62ac9
merge conflict
katzdave Jul 28, 2025
97fa0f8
fmt
katzdave Jul 28, 2025
b233c74
autocompact splice last message
katzdave Jul 28, 2025
1a1733e
fix threshold
katzdave Jul 28, 2025
2966295
fmt
katzdave Jul 28, 2025
8df19c5
unused
katzdave Jul 28, 2025
d8f07ae
rm stray files
katzdave Jul 28, 2025
c7c2dd8
Merge branch 'main' into dkatz/goose-compact2
michaelneale Jul 29, 2025
158b5d0
merge
katzdave Jul 29, 2025
719086c
Merge branch 'dkatz/goose-compact2' of github.com:block/goose into dk…
katzdave Jul 29, 2025
985dfcd
rm noise
katzdave Jul 29, 2025
4d83217
rip apart old summarization algorithm
katzdave Jul 29, 2025
e7a0db3
rm some token counter standalone scripts
katzdave Jul 29, 2025
3f5b4ed
fix build
katzdave Jul 29, 2025
a7e68b6
replace with shorter summary
katzdave Jul 29, 2025
bffff01
remove dep on summarize and tokenizer
katzdave Jul 29, 2025
35179ac
Merge branch 'main' of github.com:block/goose into dkatz/goose-compact2
katzdave Jul 29, 2025
739eb0b
fix test
katzdave Jul 29, 2025
d72a1fb
merge
katzdave Jul 29, 2025
70d4f6f
messy merge conflict
katzdave Jul 31, 2025
7c69b28
sig change
katzdave Jul 31, 2025
7717ca9
fix summarize tests
katzdave Jul 31, 2025
93f8501
rm redundant tests
katzdave Jul 31, 2025
84ba00f
merge
katzdave Aug 4, 2025
3ebab01
move default token counts
katzdave Aug 4, 2025
f2da80c
v1 ensure_token_usage
katzdave Aug 4, 2025
bc86396
swap to ensure_tokens
katzdave Aug 5, 2025
d678fe9
rm comments
katzdave Aug 5, 2025
e205379
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 5, 2025
688e39d
combine back in provider usage
katzdave Aug 5, 2025
1810585
token usage
katzdave Aug 5, 2025
1dfaf7f
fix compiler error
katzdave Aug 5, 2025
bbe59e8
token counting actually feels sane
katzdave Aug 6, 2025
3c82c25
update metadata in /summarize
katzdave Aug 6, 2025
7021485
fix template filling
katzdave Aug 6, 2025
3c9d912
rm file changes
katzdave Aug 6, 2025
68a7a33
trim down message info
katzdave Aug 6, 2025
4032c07
Swap back to debug
katzdave Aug 6, 2025
9e631a8
fmt
katzdave Aug 6, 2025
8ebc10c
rm tokens before
katzdave Aug 6, 2025
8f13415
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 6, 2025
f2da61c
messy merge resolved
katzdave Aug 7, 2025
abde666
fmt
katzdave Aug 7, 2025
624557a
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 8, 2025
8143d03
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 11, 2025
7b05a94
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 12, 2025
b887cb5
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 12, 2025
205aa72
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 13, 2025
9689670
summarization usage fix
katzdave Aug 13, 2025
601ee4b
update message
katzdave Aug 13, 2025
9749033
Merge branch 'main' of github.com:block/goose into dkatz/token-counting
katzdave Aug 13, 2025
bac76c8
rm warning
katzdave Aug 13, 2025
b8c82b7
fmt
katzdave Aug 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/goose-cli/src/commands/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ async fn process_message_streaming(

// For now, auto-summarize in web mode
// TODO: Implement proper UI for context handling
let (summarized_messages, _) =
let (summarized_messages, _, _) =
agent.summarize_context(messages.messages()).await?;
{
let mut session_msgs = session_messages.lock().await;
Expand Down
46 changes: 43 additions & 3 deletions crates/goose-cli/src/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ impl Session {
message_suffix: &str,
) -> Result<()> {
// Summarize messages to fit within context length
let (summarized_messages, _) = agent.summarize_context(messages.messages()).await?;
let (summarized_messages, _, _) = agent.summarize_context(messages.messages()).await?;
let msg = format!("Context maxed out\n{}\n{}", "-".repeat(50), message_suffix);
output::render_text(&msg, Some(Color::Yellow), true);
*messages = summarized_messages;
Expand Down Expand Up @@ -719,15 +719,15 @@ impl Session {
let provider = self.agent.provider().await?;

// Call the summarize_context method which uses the summarize_messages function
let (summarized_messages, _) = self
let (summarized_messages, _token_counts, summarization_usage) = self
.agent
.summarize_context(self.messages.messages())
.await?;

// Update the session messages with the summarized ones
self.messages = summarized_messages;

// Persist the summarized messages
// Persist the summarized messages and update session metadata with new token counts
if let Some(session_file) = &self.session_file {
let working_dir = std::env::current_dir().ok();
session::persist_messages_with_schedule_id(
Expand All @@ -738,6 +738,46 @@ impl Session {
working_dir,
)
.await?;

// Update session metadata with the new token counts from summarization
if let Some(usage) = summarization_usage {
let session_file_path = session::storage::get_path(
session::storage::Identifier::Path(session_file.to_path_buf()),
)?;
let mut metadata =
session::storage::read_metadata(&session_file_path)?;

// Update token counts with the summarization usage
// Use output tokens as total since that's what's actually in the context going forward
let summary_tokens = usage.usage.output_tokens.unwrap_or(0);
metadata.total_tokens = Some(summary_tokens);
metadata.input_tokens = None; // Clear input tokens since we now have a summary
metadata.output_tokens = Some(summary_tokens);
metadata.message_count = self.messages.len();

// Update accumulated tokens (add the summarization cost)
let accumulate = |a: Option<i32>, b: Option<i32>| -> Option<i32> {
match (a, b) {
(Some(x), Some(y)) => Some(x + y),
_ => a.or(b),
}
};
metadata.accumulated_total_tokens = accumulate(
metadata.accumulated_total_tokens,
usage.usage.total_tokens,
);
metadata.accumulated_input_tokens = accumulate(
metadata.accumulated_input_tokens,
usage.usage.input_tokens,
);
metadata.accumulated_output_tokens = accumulate(
metadata.accumulated_output_tokens,
usage.usage.output_tokens,
);

session::storage::update_metadata(&session_file_path, &metadata)
.await?;
}
}

output::hide_thinking();
Expand Down
2 changes: 1 addition & 1 deletion crates/goose-server/src/routes/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ async fn manage_context(
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
} else if request.manage_action == "summarize" {
(processed_messages, token_counts) = agent
(processed_messages, token_counts, _) = agent
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this still use the old algorithm? any reason not to switch this over?

also, the truncate above is not used i think

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not clear on truncate. Will talk to @michaelneale about it but I think makes sense to not touch that until the next wave.

This uses the same call as the old algorithm made, but summarize.rs is completely gutted to just have the single shot summarizer.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think truncate may have veen used on CLI for people using recipes, but as long as summarize works out of the box, it is ok to clean it out I think (as ideally we don't want people having to specify it, in the past it would just break if you didn't specify a strategy). so I think ok to get rid of it if it helps?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it can always be done with a later release

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I was just going to pick it apart on the next wave since this PR is already large.

.summarize_context(&request.messages)
.await
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
Expand Down
8 changes: 0 additions & 8 deletions crates/goose/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,3 @@ path = "examples/agent.rs"
[[example]]
name = "databricks_oauth"
path = "examples/databricks_oauth.rs"

[[example]]
name = "async_token_counter_demo"
path = "examples/async_token_counter_demo.rs"

[[bench]]
name = "tokenization_benchmark"
harness = false
70 changes: 0 additions & 70 deletions crates/goose/benches/tokenization_benchmark.rs

This file was deleted.

98 changes: 0 additions & 98 deletions crates/goose/examples/async_token_counter_demo.rs

This file was deleted.

44 changes: 26 additions & 18 deletions crates/goose/src/agents/agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,13 @@ impl Agent {
&self,
messages: &[Message],
session: &Option<SessionConfig>,
) -> Result<Option<(Conversation, String)>> {
) -> Result<
Option<(
Conversation,
String,
Option<crate::providers::base::ProviderUsage>,
)>,
> {
// Try to get session metadata for more accurate token counts
let session_metadata = if let Some(session_config) = session {
match session::storage::get_path(session_config.id.clone()) {
Expand All @@ -865,21 +871,23 @@ impl Agent {
if compact_result.compacted {
let compacted_messages = compact_result.messages;

// Create compaction notification message
let compaction_msg = if let (Some(before), Some(after)) =
(compact_result.tokens_before, compact_result.tokens_after)
{
format!(
"Auto-compacted context: {} → {} tokens ({:.0}% reduction)\n\n",
before,
after,
(1.0 - (after as f64 / before as f64)) * 100.0
)
} else {
"Auto-compacted context to reduce token usage\n\n".to_string()
};
// Get threshold from config to include in message
let config = crate::config::Config::global();
let threshold = config
.get_param::<f64>("GOOSE_AUTO_COMPACT_THRESHOLD")
.unwrap_or(0.8); // Default to 80%
let threshold_percentage = (threshold * 100.0) as u32;

let compaction_msg = format!(
"Exceeded auto-compact threshold of {}%. Context has been summarized and reduced.\n\n",
threshold_percentage
);

return Ok(Some((compacted_messages, compaction_msg)));
return Ok(Some((
compacted_messages,
compaction_msg,
compact_result.summarization_usage,
)));
}

Ok(None)
Expand All @@ -893,16 +901,16 @@ impl Agent {
cancel_token: Option<CancellationToken>,
) -> Result<BoxStream<'_, Result<AgentEvent>>> {
// Handle auto-compaction before processing
let (messages, compaction_msg) = match self
let (messages, compaction_msg, _summarization_usage) = match self
.handle_auto_compaction(unfixed_conversation.messages(), &session)
.await?
{
Some((compacted_messages, msg)) => (compacted_messages, Some(msg)),
Some((compacted_messages, msg, usage)) => (compacted_messages, Some(msg), usage),
None => {
let context = self
.prepare_reply_context(unfixed_conversation, &session)
.await?;
(context.messages, None)
(context.messages, None, None)
}
};

Expand Down
Loading
Loading