Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/tools/compress.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export function createCompressTool(ctx: PruneToolContext): ReturnType<typeof too
throw new Error("content.summary is required and must be a non-empty string")
}

logger.info("Compress tool invoked")
// logger.info("Compress tool invoked")
// logger.info(
// JSON.stringify({
// startString: startString?.substring(0, 50) + "...",
Expand Down
263 changes: 213 additions & 50 deletions lib/tools/utils.ts
Original file line number Diff line number Diff line change
@@ -1,82 +1,252 @@
import { partial_ratio } from "fuzzball"
import type { WithParts, CompressSummary } from "../state"
import type { Logger } from "../logger"

/**
* Searches messages for a string and returns the message ID where it's found.
* Searches in text parts, tool outputs, tool inputs, and other textual content.
* Also searches through existing compress summaries to enable chained compression.
* Throws an error if the string is not found or found more than once.
*/
export function findStringInMessages(
export interface FuzzyConfig {
minScore: number
minGap: number
}

export const DEFAULT_FUZZY_CONFIG: FuzzyConfig = {
minScore: 95,
minGap: 15,
}

interface MatchResult {
messageId: string
messageIndex: number
score: number
matchType: "exact" | "fuzzy"
}

function extractMessageContent(msg: WithParts): string {
const parts = Array.isArray(msg.parts) ? msg.parts : []
let content = ""

for (const part of parts) {
const p = part as Record<string, unknown>

switch (part.type) {
case "text":
case "reasoning":
if (typeof p.text === "string") {
content += " " + p.text
}
break

case "tool": {
const state = p.state as Record<string, unknown> | undefined
if (!state) break

// Include tool output (completed or error)
if (state.status === "completed" && typeof state.output === "string") {
content += " " + state.output
} else if (state.status === "error" && typeof state.error === "string") {
content += " " + state.error
}

// Include tool input
if (state.input) {
content +=
" " +
(typeof state.input === "string"
? state.input
: JSON.stringify(state.input))
}
break
}

case "compaction":
if (typeof p.summary === "string") {
content += " " + p.summary
}
break

case "subtask":
if (typeof p.summary === "string") {
content += " " + p.summary
}
if (typeof p.result === "string") {
content += " " + p.result
}
break
}
}

return content
}

function findExactMatches(
messages: WithParts[],
searchString: string,
logger: Logger,
compressSummaries: CompressSummary[] = [],
stringType: "startString" | "endString",
): { messageId: string; messageIndex: number } {
const matches: { messageId: string; messageIndex: number }[] = []
compressSummaries: CompressSummary[],
): MatchResult[] {
const matches: MatchResult[] = []
const seenMessageIds = new Set<string>()

// First, search through existing compress summaries
// This allows referencing text from previous compress operations
// Search compress summaries first
for (const summary of compressSummaries) {
if (summary.summary.includes(searchString)) {
const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
if (anchorIndex !== -1) {
if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
seenMessageIds.add(summary.anchorMessageId)
matches.push({
messageId: summary.anchorMessageId,
messageIndex: anchorIndex,
score: 100,
matchType: "exact",
})
}
}
}

// Then search through raw messages
// Search raw messages
for (let i = 0; i < messages.length; i++) {
const msg = messages[i]
const parts = Array.isArray(msg.parts) ? msg.parts : []
if (seenMessageIds.has(msg.info.id)) continue

for (const part of parts) {
let content = ""
const content = extractMessageContent(msg)
if (content.includes(searchString)) {
seenMessageIds.add(msg.info.id)
matches.push({
messageId: msg.info.id,
messageIndex: i,
score: 100,
matchType: "exact",
})
}
}

if (part.type === "text" && typeof part.text === "string") {
content = part.text
} else if (part.type === "tool" && part.state?.status === "completed") {
if (typeof part.state.output === "string") {
content = part.state.output
}
if (part.state.input) {
const inputStr =
typeof part.state.input === "string"
? part.state.input
: JSON.stringify(part.state.input)
content += " " + inputStr
}
}
return matches
}

if (content.includes(searchString)) {
matches.push({ messageId: msg.info.id, messageIndex: i })
function findFuzzyMatches(
messages: WithParts[],
searchString: string,
compressSummaries: CompressSummary[],
minScore: number,
): MatchResult[] {
const matches: MatchResult[] = []
const seenMessageIds = new Set<string>()

// Search compress summaries first
for (const summary of compressSummaries) {
const score = partial_ratio(searchString, summary.summary)
if (score >= minScore) {
const anchorIndex = messages.findIndex((m) => m.info.id === summary.anchorMessageId)
if (anchorIndex !== -1 && !seenMessageIds.has(summary.anchorMessageId)) {
seenMessageIds.add(summary.anchorMessageId)
matches.push({
messageId: summary.anchorMessageId,
messageIndex: anchorIndex,
score,
matchType: "fuzzy",
})
}
}
}

if (matches.length === 0) {
// Search raw messages
for (let i = 0; i < messages.length; i++) {
const msg = messages[i]
if (seenMessageIds.has(msg.info.id)) continue

const content = extractMessageContent(msg)
const score = partial_ratio(searchString, content)
if (score >= minScore) {
seenMessageIds.add(msg.info.id)
matches.push({
messageId: msg.info.id,
messageIndex: i,
score,
matchType: "fuzzy",
})
}
}

return matches
}

export function findStringInMessages(
messages: WithParts[],
searchString: string,
logger: Logger,
compressSummaries: CompressSummary[] = [],
stringType: "startString" | "endString",
fuzzyConfig: FuzzyConfig = DEFAULT_FUZZY_CONFIG,
): { messageId: string; messageIndex: number } {
const searchableMessages = messages.length > 1 ? messages.slice(0, -1) : messages
const lastMessage = messages.length > 0 ? messages[messages.length - 1] : undefined

const exactMatches = findExactMatches(searchableMessages, searchString, compressSummaries)

if (exactMatches.length === 1) {
return { messageId: exactMatches[0].messageId, messageIndex: exactMatches[0].messageIndex }
}

if (exactMatches.length > 1) {
throw new Error(
`${stringType} not found in conversation. Make sure the string exists and is spelled exactly as it appears.`,
`Found multiple matches for ${stringType}. ` +
`Provide more surrounding context to uniquely identify the intended match.`,
)
}

if (matches.length > 1) {
const fuzzyMatches = findFuzzyMatches(
searchableMessages,
searchString,
compressSummaries,
fuzzyConfig.minScore,
)

if (fuzzyMatches.length === 0) {
if (lastMessage) {
const lastMsgContent = extractMessageContent(lastMessage)
const lastMsgIndex = messages.length - 1
if (lastMsgContent.includes(searchString)) {
// logger.info(
// `${stringType} found in last message (last resort) at index ${lastMsgIndex}`,
// )
return {
messageId: lastMessage.info.id,
messageIndex: lastMsgIndex,
}
}
}

throw new Error(
`Found multiple matches for ${stringType}. Provide more surrounding context to uniquely identify the intended match.`,
`${stringType} not found in conversation. ` +
`Make sure the string exists and is spelled exactly as it appears.`,
)
}

return matches[0]
fuzzyMatches.sort((a, b) => b.score - a.score)

const best = fuzzyMatches[0]
const secondBest = fuzzyMatches[1]

// Log fuzzy match candidates
// logger.info(
// `Fuzzy match for ${stringType}: best=${best.score}% (msg ${best.messageIndex})` +
// (secondBest
// ? `, secondBest=${secondBest.score}% (msg ${secondBest.messageIndex})`
// : ""),
// )

// Check confidence gap - best must be significantly better than second best
if (secondBest && best.score - secondBest.score < fuzzyConfig.minGap) {
throw new Error(
`Found multiple matches for ${stringType}. ` +
`Provide more unique surrounding context to disambiguate.`,
)
}

logger.info(
`Fuzzy matched ${stringType} with ${best.score}% confidence at message index ${best.messageIndex}`,
)

return { messageId: best.messageId, messageIndex: best.messageIndex }
}

/**
* Collects all tool callIDs from messages between start and end indices (inclusive).
*/
export function collectToolIdsInRange(
messages: WithParts[],
startIndex: number,
Expand All @@ -100,9 +270,6 @@ export function collectToolIdsInRange(
return toolIds
}

/**
* Collects all message IDs from messages between start and end indices (inclusive).
*/
export function collectMessageIdsInRange(
messages: WithParts[],
startIndex: number,
Expand All @@ -120,10 +287,6 @@ export function collectMessageIdsInRange(
return messageIds
}

/**
* Collects all textual content (text parts, tool inputs, and tool outputs)
* from a range of messages. Used for token estimation.
*/
export function collectContentInRange(
messages: WithParts[],
startIndex: number,
Expand Down
30 changes: 30 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
"dependencies": {
"@anthropic-ai/tokenizer": "^0.0.4",
"@opencode-ai/sdk": "^1.1.48",
"fuzzball": "^2.2.3",
"jsonc-parser": "^3.3.1",
"ulid": "^3.0.2",
"zod": "^4.3.6"
Expand Down