Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ Thumbs.db
# OpenCode
.opencode/

# Python cache
__pycache__/
*.py[cod]
*$py.class

# Generated prompt files (from scripts/generate-prompts.ts)
lib/prompts/**/*.generated.ts

Expand All @@ -40,4 +45,4 @@ test-update.ts
docs/
SCHEMA_NOTES.md

repomix-output.xml
repomix-output.xml
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ For model-facing behavior (prompts and tool calls), this capability is always ad

### Tool

**Compress** — Exposes a single `compress` tool with one method: match a conversation range using `startString` and `endString`, then replace it with a technical summary.
**Compress** — Exposes a single `compress` tool with one method: select a conversation range using injected `startId` and `endId` (`mNNNN` or `bN`), then replace it with a technical summary.

The model can use that same method at different scales: tiny ranges for noise cleanup, focused ranges for preserving key findings, and full chapters for completed work.

Expand Down
7 changes: 6 additions & 1 deletion lib/hooks.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import type { SessionState, WithParts } from "./state"
import type { Logger } from "./logger"
import type { PluginConfig } from "./config"
import { assignMessageRefs } from "./message-ids"
import { syncToolCache } from "./state/tool-cache"
import { deduplicate, supersedeWrites, purgeErrors } from "./strategies"
import { prune, insertCompressToolContext } from "./messages"
import { prune, insertCompressToolContext, insertMessageIdContext } from "./messages"
import { buildToolIdList, isIgnoredUserMessage } from "./messages/utils"
import { checkSession } from "./state"
import { renderSystemPrompt } from "./prompts"
Expand Down Expand Up @@ -104,6 +105,8 @@ export function createChatMessageTransformHandler(

cacheSystemPromptTokens(state, output.messages)

assignMessageRefs(state, output.messages)

syncToolCache(state, config, logger, output.messages)
buildToolIdList(state, output.messages)

Expand All @@ -113,6 +116,8 @@ export function createChatMessageTransformHandler(

prune(state, logger, config, output.messages)

insertMessageIdContext(state, output.messages)

insertCompressToolContext(state, config, logger, output.messages)

applyPendingManualTriggerPrompt(state, output.messages, logger)
Expand Down
132 changes: 132 additions & 0 deletions lib/message-ids.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import type { SessionState, WithParts } from "./state"

const MESSAGE_REF_REGEX = /^m(\d{4})$/
const BLOCK_REF_REGEX = /^b([1-9]\d*)$/

const MESSAGE_REF_WIDTH = 4
const MESSAGE_REF_MIN_INDEX = 0
export const MESSAGE_REF_MAX_INDEX = 9999

export type ParsedBoundaryId =
| {
kind: "message"
ref: string
index: number
}
| {
kind: "compressed-block"
ref: string
blockId: number
}

export function formatMessageRef(index: number): string {
if (
!Number.isInteger(index) ||
index < MESSAGE_REF_MIN_INDEX ||
index > MESSAGE_REF_MAX_INDEX
) {
throw new Error(
`Message ID index out of bounds: ${index}. Supported range is 0-${MESSAGE_REF_MAX_INDEX}.`,
)
}
return `m${index.toString().padStart(MESSAGE_REF_WIDTH, "0")}`
}

export function formatBlockRef(blockId: number): string {
if (!Number.isInteger(blockId) || blockId < 1) {
throw new Error(`Invalid block ID: ${blockId}`)
}
return `b${blockId}`
}

export function parseMessageRef(ref: string): number | null {
const normalized = ref.trim().toLowerCase()
const match = normalized.match(MESSAGE_REF_REGEX)
if (!match) {
return null
}
const index = Number.parseInt(match[1], 10)
return Number.isInteger(index) ? index : null
}

export function parseBlockRef(ref: string): number | null {
const normalized = ref.trim().toLowerCase()
const match = normalized.match(BLOCK_REF_REGEX)
if (!match) {
return null
}
const id = Number.parseInt(match[1], 10)
return Number.isInteger(id) ? id : null
}

export function parseBoundaryId(id: string): ParsedBoundaryId | null {
const normalized = id.trim().toLowerCase()
const messageIndex = parseMessageRef(normalized)
if (messageIndex !== null) {
return {
kind: "message",
ref: formatMessageRef(messageIndex),
index: messageIndex,
}
}

const blockId = parseBlockRef(normalized)
if (blockId !== null) {
return {
kind: "compressed-block",
ref: formatBlockRef(blockId),
blockId,
}
}

return null
}

export function formatMessageIdMarker(ref: string): string {
return `Message ID: ${ref}`
}

export function assignMessageRefs(state: SessionState, messages: WithParts[]): number {
let assigned = 0

for (const message of messages) {
const rawMessageId = message.info.id
if (typeof rawMessageId !== "string" || rawMessageId.length === 0) {
continue
}

const existingRef = state.messageIds.byRawId.get(rawMessageId)
if (existingRef) {
if (state.messageIds.byRef.get(existingRef) !== rawMessageId) {
state.messageIds.byRef.set(existingRef, rawMessageId)
}
continue
}

const ref = allocateNextMessageRef(state)
state.messageIds.byRawId.set(rawMessageId, ref)
state.messageIds.byRef.set(ref, rawMessageId)
assigned++
}

return assigned
}

function allocateNextMessageRef(state: SessionState): string {
let candidate = Number.isInteger(state.messageIds.nextRef)
? Math.max(MESSAGE_REF_MIN_INDEX, state.messageIds.nextRef)
: MESSAGE_REF_MIN_INDEX

while (candidate <= MESSAGE_REF_MAX_INDEX) {
const ref = formatMessageRef(candidate)
if (!state.messageIds.byRef.has(ref)) {
state.messageIds.nextRef = candidate + 1
return ref
}
candidate++
}

throw new Error(
`Message ID alias capacity exceeded. Cannot allocate more than ${formatMessageRef(MESSAGE_REF_MAX_INDEX)} aliases in this session.`,
)
}
1 change: 1 addition & 0 deletions lib/messages/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { prune } from "./prune"
export { insertCompressToolContext } from "./inject/inject"
export { insertMessageIdContext } from "./inject/inject"
51 changes: 51 additions & 0 deletions lib/messages/inject/inject.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import type { SessionState, WithParts } from "../../state"
import type { Logger } from "../../logger"
import type { PluginConfig } from "../../config"
import { formatMessageIdMarker } from "../../message-ids"
import { createSyntheticTextPart, createSyntheticToolPart, isIgnoredUserMessage } from "../utils"
import {
addAnchor,
applyAnchoredNudge,
Expand Down Expand Up @@ -56,3 +58,52 @@ export const insertCompressToolContext = (
persistAnchors(state, logger)
}
}

export const insertMessageIdContext = (state: SessionState, messages: WithParts[]): void => {
const { modelId } = getModelInfo(messages)
const toolModelId = modelId || ""

for (const message of messages) {
if (message.info.role === "user" && isIgnoredUserMessage(message)) {
continue
}

const messageRef = state.messageIds.byRawId.get(message.info.id)
if (!messageRef) {
continue
}

const marker = formatMessageIdMarker(messageRef)

if (message.info.role === "user") {
const hasMarker = message.parts.some(
(part) => part.type === "text" && part.text.trim() === marker,
)
if (!hasMarker) {
message.parts.push(createSyntheticTextPart(message, marker))
}
continue
}

if (message.info.role !== "assistant") {
continue
}

const hasMarker = message.parts.some((part) => {
if (part.type !== "tool") {
return false
}
if (part.tool !== "context_info") {
return false
}
return (
part.state?.status === "completed" &&
typeof part.state.output === "string" &&
part.state.output.trim() === marker
)
})
if (!hasMarker) {
message.parts.push(createSyntheticToolPart(message, marker, toolModelId))
}
}
}
71 changes: 41 additions & 30 deletions lib/prompts/compress.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,29 @@ USER INTENT FIDELITY
When the compressed range includes user messages, preserve the user's intent with extra care. Do not change scope, constraints, priorities, acceptance criteria, or requested outcomes.
Directly quote user messages when they are short enough to include safely. Direct quotes are preferred when they best preserve exact meaning.

COMPRESSED BLOCK PLACEHOLDERS
When the selected range includes previously compressed blocks, use this exact placeholder format when referencing one:

- `{block_N}`

Rules:

- Include every required block placeholder exactly once.
- Do not invent placeholders for blocks outside the selected range.
- Treat `{block_N}` placeholders as RESERVED TOKENS. Do not emit `{block_N}` text anywhere except intentional placeholders.
- If you need to mention a block in prose, use plain text like `compressed bN` (without curly braces).
- Preflight check before finalizing: the set of `{block_N}` placeholders in your summary must exactly match the required set, with no duplicates.

These placeholders are semantic references. They will be replaced with the full stored compressed block content when the tool processes your output.

FLOW PRESERVATION WITH PLACEHOLDERS
When you use compressed block placeholders, write the surrounding summary text so it still reads correctly AFTER placeholder expansion.

- Treat each placeholder as a stand-in for a full conversation segment, not as a short label.
- Ensure transitions before and after each placeholder preserve chronology and causality.
- Do not write text that depends on the placeholder staying literal (for example, "as noted in {block_2}").
- Your final meaning must be coherent once each placeholder is replaced with its full compressed block content.

Yet be LEAN. Strip away the noise: failed attempts that led nowhere, verbose tool outputs, back-and-forth exploration. What remains should be pure signal - golden nuggets of detail that preserve full understanding with zero ambiguity.

THE WAYS OF COMPRESS
Expand All @@ -43,7 +66,7 @@ Exploration exhausted and patterns understood
Compress smaller ranges when:
You need to discard dead-end noise without waiting for a whole chapter to close
You need to preserve key findings from a narrow slice while freeing context quickly
You can bound a stale range cleanly with unique boundaries
You can bound a stale range cleanly with injected IDs

Do NOT compress when:
You may need exact code, error messages, or file contents from the range in the immediate next steps
Expand All @@ -52,40 +75,28 @@ You cannot identify reliable boundaries yet

Before compressing, ask: _"Is this range closed enough to become summary-only right now?"_ Compression is irreversible. The summary replaces everything in the range.

BOUNDARY MATCHING
You specify boundaries by matching unique text strings in the conversation. CRITICAL: In code-centric conversations, strings repeat often. Provide sufficiently unique text to match exactly once. Be conservative and choose longer, highly specific boundaries when in doubt. If a match fails (not found or found multiple times), the tool will error - extend your boundary string with more surrounding context in order to make SURE the tool does NOT error.

WHERE TO PICK STRINGS FROM (important for reliable matching):

- Your own assistant text responses (MOST RELIABLE - always stored verbatim)
- The user's own words in their messages
- Tool result output text (distinctive substrings within the output)
- Previous compress summaries
- Tool input string values (LEAST RELIABLE - only single concrete field values, not keys or schema fields, may be transformed by AI SDK)

NEVER USE GENERIC OR REPEATING STRINGS:
BOUNDARY IDS
You specify boundaries by ID

Tool status messages repeat identically across every invocation. These are ALWAYS ambiguous:
Use the injected IDs visible in the conversation:

- "Edit applied successfully." (appears in EVERY successful edit)
- "File written successfully" or any tool success/error boilerplate
- Common tool output patterns that are identical across calls
- `mNNNN` IDs identify raw messages
- `bN` IDs identify previously compressed blocks

Instead, combine the generic output with surrounding unique context (a file path, a specific code snippet, or your own unique assistant text).
Rules:

Each boundary string you choose MUST be unique to ONE specific message. Before using a string, ask: "Could this exact text appear in any other place in this conversation?" If yes, extend it or pick a different string.
- Pick `startId` and `endId` directly from injected IDs in context.
- IDs must exist in the current visible context.
- `startId` must appear before `endId`.
- Prefer boundaries that produce short, closed ranges.

WHERE TO NEVER PICK STRINGS FROM:
ID SOURCES

- `<system-reminder>` tags or any XML wrapper/meta-commentary around messages
- Injected system instructions (plan mode text, max-steps warnings, mode-switch text, environment info)
- Reasoning parts or chain-of-thought text
- File/directory listing framing text (e.g. "Called the Read tool with the following input...")
- Strings that span across message or part boundaries
- Entire serialized JSON objects (key ordering may differ - pick a distinctive substring within instead)
- User messages include a text marker with their `mNNNN` ID.
- Assistant messages include a `context_info` tool marker with their `mNNNN` ID.
- Compressed blocks are addressable by `bN` IDs.

CRITICAL: AVOID USING TOOL INPUT VALUES
NEVER use tool input schema keys or field names as boundary strings (e.g., "startString", "endString", "filePath", "content"). These may be transformed by the AI SDK and are not reliable. The ONLY acceptable use of tool input strings is a SINGLE concrete field VALUE (not the key), and even then, prefer using assistant text, user messages, or tool result outputs instead. When in doubt, choose boundaries from your own assistant responses or distinctive user message content.
Do not invent IDs. Use only IDs that are present in context.

PARALLEL COMPRESS EXECUTION
When multiple independent ranges are ready and their boundaries do not overlap, launch MULTIPLE `compress` calls in parallel in a single response. This is the PREFERRED pattern over a single large-range compression when the work can be safely split. Run compression sequentially only when ranges overlap or when a later range depends on the result of an earlier compression.
Expand All @@ -96,8 +107,8 @@ THE FORMAT OF COMPRESS
{
topic: string, // Short label (3-5 words) - e.g., "Auth System Exploration"
content: {
startString: string, // Unique text string marking the beginning of the range
endString: string, // Unique text string marking the end of the range
startId: string, // Boundary ID at range start: mNNNN or bN
endId: string, // Boundary ID at range end: mNNNN or bN
summary: string // Complete technical summary replacing all content in the range
}
}
Expand Down
5 changes: 1 addition & 4 deletions lib/prompts/nudge.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,7 @@ Do not jump to a single broad range when the same cleanup can be done safely wit

If you are performing a critical atomic operation, do not interrupt it, but make sure to perform context management rapidly

BE VERY MINDFUL of the startString and endString you use for compression for RELIABLE boundary matching. NEVER use generic tool outputs like "Edit applied successfully." or generic status message as boundaries. Use unique assistant text or distinctive content instead with enough surrounding context to ensure uniqueness.

CRITICAL: AVOID USING TOOL INPUT VALUES AS BOUNDARIES
NEVER use tool input schema keys or field names. The ONLY acceptable use of tool input strings is a SINGLE concrete field VALUE (not the key), and even then, prefer assistant text, user messages, or tool result outputs instead.
Use injected boundary IDs for compression (`mNNNN` for messages, `bN` for compressed blocks). Pick IDs that are visible in context and ensure `startId` appears before `endId`.

Ensure your summaries are inclusive of all parts of the range.
If the compressed range includes user messages, preserve user intent exactly. Prefer direct quotes for short user messages to avoid semantic drift.
Expand Down
1 change: 1 addition & 0 deletions lib/state/persistence.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ export async function loadSessionState(
(s): s is CompressSummary =>
s !== null &&
typeof s === "object" &&
typeof s.blockId === "number" &&
typeof s.anchorMessageId === "string" &&
typeof s.summary === "string",
)
Expand Down
Loading