diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index 0169c68e617..fbc8e6e05fe 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -28,6 +28,7 @@ import { Prompt, type PromptRef } from "@tui/component/prompt" import type { AssistantMessage, Part, ToolPart, UserMessage, TextPart, ReasoningPart } from "@opencode-ai/sdk" import { useLocal } from "@tui/context/local" import { Locale } from "@/util/locale" +import { Token } from "@/util/token" import type { Tool } from "@/tool/tool" import type { ReadTool } from "@/tool/read" import type { WriteTool } from "@/tool/write" @@ -80,6 +81,7 @@ const context = createContext<{ conceal: () => boolean showThinking: () => boolean showTimestamps: () => boolean + showTokens: () => boolean }>() function use() { @@ -106,11 +108,20 @@ export function Session() { return messages().findLast((x) => x.role === "assistant") }) + const local = useLocal() + + const contextLimit = createMemo(() => { + const c = local.model.current() + const provider = sync.data.provider.find((p) => p.id === c.providerID) + return provider?.models[c.modelID]?.limit.context ?? 200000 + }) + const dimensions = useTerminalDimensions() const [sidebar, setSidebar] = createSignal<"show" | "hide" | "auto">(kv.get("sidebar", "auto")) const [conceal, setConceal] = createSignal(true) const [showThinking, setShowThinking] = createSignal(true) const [showTimestamps, setShowTimestamps] = createSignal(kv.get("timestamps", "hide") === "show") + const [showTokens, setShowTokens] = createSignal(kv.get("tokens", "hide") === "show") const wide = createMemo(() => dimensions().width > 120) const sidebarVisible = createMemo(() => sidebar() === "show" || (sidebar() === "auto" && wide())) @@ -204,8 +215,6 @@ export function Session() { }, 50) } - const local = useLocal() - function moveChild(direction: number) { const parentID = session()?.parentID ?? session()?.id let children = sync.data.session @@ -428,6 +437,19 @@ export function Session() { dialog.clear() }, }, + { + title: "Toggle tokens", + value: "session.toggle.tokens", + category: "Session", + onSelect: (dialog) => { + setShowTokens((prev) => { + const next = !prev + kv.set("tokens", next ? "show" : "hide") + return next + }) + dialog.clear() + }, + }, { title: "Page up", value: "session.page.up", @@ -729,6 +751,7 @@ export function Session() { conceal, showThinking, showTimestamps, + showTokens, }} > @@ -864,6 +887,7 @@ export function Session() { last={lastAssistant()?.id === message.id} message={message as AssistantMessage} parts={sync.data.part[message.id] ?? []} + contextLimit={contextLimit()} /> @@ -917,6 +941,13 @@ function UserMessage(props: { const queued = createMemo(() => props.pending && props.message.id > props.pending) const color = createMemo(() => (queued() ? theme.accent : theme.secondary)) + const individualTokens = createMemo(() => { + return props.parts.reduce((sum, part) => { + if (part.type === "text") return sum + Token.estimate(part.text) + return sum + }, 0) + }) + const compaction = createMemo(() => props.parts.find((x) => x.type === "compaction")) return ( @@ -977,6 +1008,9 @@ function UserMessage(props: { > QUEUED + 0}> + ⬝~{individualTokens().toLocaleString()} tok + @@ -994,7 +1028,8 @@ function UserMessage(props: { ) } -function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean }) { +function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; last: boolean; contextLimit: number }) { + const ctx = use() const local = useLocal() const { theme } = useTheme() const sync = useSync() @@ -1004,12 +1039,71 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las return props.message.finish && !["tool-calls", "unknown"].includes(props.message.finish) }) + // Find the parent user message (reused by duration and token calculations) + const user = createMemo(() => messages().find((x) => x.role === "user" && x.id === props.message.parentID)) + const duration = createMemo(() => { if (!final()) return 0 if (!props.message.time.completed) return 0 - const user = messages().find((x) => x.role === "user" && x.id === props.message.parentID) - if (!user || !user.time) return 0 - return props.message.time.completed - user.time.created + const u = user() + if (!u || !u.time) return 0 + return props.message.time.completed - u.time.created + }) + + // OUT tokens (sent TO API) - includes user text + tool results from previous assistant + const outEstimate = createMemo(() => props.message.sentEstimate) + + // IN tokens (from API TO computer) + const inTokens = createMemo(() => props.message.tokens.output) + const inEstimate = createMemo(() => props.message.outputEstimate) + + // Reasoning tokens (must be defined BEFORE inDisplay) + const reasoningTokens = createMemo(() => props.message.tokens.reasoning) + const reasoningEstimate = createMemo(() => props.message.reasoningEstimate) + + const outDisplay = createMemo(() => { + const estimate = outEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + const tokens = props.message.tokens.input + if (tokens > 0) return tokens.toLocaleString() + return "0" + }) + + const inDisplay = createMemo(() => { + const estimate = inEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + const tokens = inTokens() + if (tokens > 0) return tokens.toLocaleString() + // Show ~0 during streaming when we have reasoning but no output yet + if (reasoningEstimate() !== undefined || reasoningTokens() > 0) return "~0" + return undefined + }) + + const tokensDisplay = createMemo(() => { + const inVal = inDisplay() + if (!inVal) return undefined + return `${inVal}↓/${outDisplay()}↑` + }) + + const reasoningDisplay = createMemo(() => { + const estimate = reasoningEstimate() + if (estimate !== undefined) return "~" + estimate.toLocaleString() + const tokens = reasoningTokens() + if (tokens > 0) return tokens.toLocaleString() + return undefined + }) + + const contextEstimate = createMemo(() => props.message.contextEstimate) + + const cumulativeTokens = createMemo(() => { + const estimate = contextEstimate() + if (estimate !== undefined) return estimate + return props.message.tokens.input + props.message.tokens.cache.read + props.message.tokens.cache.write + }) + + const percentage = createMemo(() => { + if (!props.contextLimit) return 0 + return Math.round((cumulativeTokens() / props.contextLimit) * 100) }) return ( @@ -1053,6 +1147,22 @@ function AssistantMessage(props: { message: AssistantMessage; parts: Part[]; las ⬝{Locale.duration(duration())} + + + {" "} + ⬝ {tokensDisplay()} tok + + {" · "} + {reasoningDisplay()} think + + 0 || inEstimate() !== undefined || reasoningEstimate() !== undefined} + > + {" · "} + {cumulativeTokens().toLocaleString()} context ({percentage()}%) + + + diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 1255d39f0b8..bfd935537b2 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -98,6 +98,9 @@ export namespace SessionCompaction { }) { const model = await Provider.getModel(input.model.providerID, input.model.modelID) const system = [...SystemPrompt.compaction(model.providerID)] + const lastFinished = input.messages.find((m) => m.info.role === "assistant" && m.info.finish)?.info as + | MessageV2.Assistant + | undefined const msg = (await Session.updateMessage({ id: Identifier.ascending("message"), role: "assistant", @@ -121,6 +124,10 @@ export namespace SessionCompaction { time: { created: Date.now(), }, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: lastFinished?.sentEstimate, })) as MessageV2.Assistant const processor = SessionProcessor.create({ assistantMessage: msg, diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index c451ae2b38d..e9e6d5e72d8 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -301,6 +301,8 @@ export namespace MessageV2 { }), system: z.string().optional(), tools: z.record(z.string(), z.boolean()).optional(), + sentEstimate: z.number().optional(), + contextEstimate: z.number().optional(), }).meta({ ref: "UserMessage", }) @@ -360,6 +362,10 @@ export namespace MessageV2 { write: z.number(), }), }), + outputEstimate: z.number().optional(), + reasoningEstimate: z.number().optional(), + contextEstimate: z.number().optional(), + sentEstimate: z.number().optional(), finish: z.string().optional(), }).meta({ ref: "AssistantMessage", diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 6d1125c66b3..570879df1b4 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -11,6 +11,7 @@ import { SessionSummary } from "./summary" import { Bus } from "@/bus" import { SessionRetry } from "./retry" import { SessionStatus } from "./status" +import { Token } from "@/util/token" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -40,6 +41,9 @@ export namespace SessionProcessor { }, async process(fn: () => StreamTextResult, never>) { log.info("process") + // Initialize from existing estimates (convert tokens to characters) to accumulate across multiple process() calls + let reasoningTotal = Token.toCharCount(input.assistantMessage.reasoningEstimate ?? 0) + let textTotal = Token.toCharCount(input.assistantMessage.outputEstimate ?? 0) while (true) { try { let currentText: MessageV2.TextPart | undefined @@ -75,7 +79,15 @@ export namespace SessionProcessor { const part = reasoningMap[value.id] part.text += value.text if (value.providerMetadata) part.metadata = value.providerMetadata - if (part.text) await Session.updatePart({ part, delta: value.text }) + if (part.text) { + const active = Object.values(reasoningMap).reduce((sum, p) => sum + p.text.length, 0) + const estimate = Token.toTokenEstimate(Math.max(0, reasoningTotal + active)) + if (input.assistantMessage.reasoningEstimate !== estimate) { + input.assistantMessage.reasoningEstimate = estimate + await Session.updateMessage(input.assistantMessage) + } + await Session.updatePart({ part, delta: value.text }) + } } break @@ -89,6 +101,7 @@ export namespace SessionProcessor { end: Date.now(), } if (value.providerMetadata) part.metadata = value.providerMetadata + reasoningTotal += part.text.length await Session.updatePart(part) delete reasoningMap[value.id] } @@ -248,6 +261,8 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + input.assistantMessage.contextEstimate = + usage.tokens.input + usage.tokens.cache.read + usage.tokens.cache.write await Session.updatePart({ id: Identifier.ascending("part"), reason: value.finishReason, @@ -297,11 +312,17 @@ export namespace SessionProcessor { if (currentText) { currentText.text += value.text if (value.providerMetadata) currentText.metadata = value.providerMetadata - if (currentText.text) + if (currentText.text) { + const estimate = Token.toTokenEstimate(Math.max(0, textTotal + currentText.text.length)) + if (input.assistantMessage.outputEstimate !== estimate) { + input.assistantMessage.outputEstimate = estimate + await Session.updateMessage(input.assistantMessage) + } await Session.updatePart({ part: currentText, delta: value.text, }) + } } break @@ -313,6 +334,7 @@ export namespace SessionProcessor { end: Date.now(), } if (value.providerMetadata) currentText.metadata = value.providerMetadata + textTotal += currentText.text.length await Session.updatePart(currentText) } currentText = undefined diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index b3c3c467168..9b36b3d2632 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -48,6 +48,7 @@ import { fn } from "@/util/fn" import { SessionProcessor } from "./processor" import { TaskTool } from "@/tool/task" import { SessionStatus } from "./status" +import { Token } from "@/util/token" // @ts-ignore globalThis.AI_SDK_LOG_WARNINGS = false @@ -313,71 +314,50 @@ export namespace SessionPrompt { time: { created: Date.now(), }, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0), })) as MessageV2.Assistant - let part = (await Session.updatePart({ + + const part: MessageV2.ToolPart = { + type: "tool", id: Identifier.ascending("part"), messageID: assistantMessage.id, - sessionID: assistantMessage.sessionID, - type: "tool", + sessionID, + tool: "task", callID: ulid(), - tool: TaskTool.id, state: { status: "running", - input: { - prompt: task.prompt, - description: task.description, - subagent_type: task.agent, - }, time: { start: Date.now(), }, - }, - })) as MessageV2.ToolPart - const result = await taskTool - .execute( - { + input: { prompt: task.prompt, description: task.description, subagent_type: task.agent, }, - { - agent: task.agent, - messageID: assistantMessage.id, - sessionID: sessionID, - abort, - async metadata(input) { - await Session.updatePart({ - ...part, - type: "tool", - state: { - ...part.state, - ...input, - }, - } satisfies MessageV2.ToolPart) - }, - }, - ) - .catch(() => {}) - assistantMessage.finish = "tool-calls" - assistantMessage.time.completed = Date.now() - await Session.updateMessage(assistantMessage) - if (result && part.state.status === "running") { - await Session.updatePart({ - ...part, - state: { - status: "completed", - input: part.state.input, - title: result.title, - metadata: result.metadata, - output: result.output, - attachments: result.attachments, - time: { - ...part.state.time, - end: Date.now(), - }, - }, - } satisfies MessageV2.ToolPart) + }, } + await Session.updatePart(part) + + const result = await taskTool.execute( + { + prompt: task.prompt, + description: task.description, + subagent_type: task.agent, + }, + { + sessionID, + abort, + agent: lastUser.agent, + messageID: assistantMessage.id, + callID: part.callID, + extra: { providerID: model.providerID, modelID: model.modelID }, + metadata: async () => {}, + }, + ) + if (!result) { await Session.updatePart({ ...part, @@ -433,6 +413,17 @@ export namespace SessionPrompt { messages: msgs, agent, }) + + // Calculate tokens for tool results from previous assistant that will be sent in this API call + // Reuse parts from already-loaded messages to avoid redundant query + let toolResultTokens = 0 + if (lastAssistant && step > 1) { + const assistantMessage = msgs.find((m) => m.info.id === lastAssistant.id) + if (assistantMessage) { + toolResultTokens = Token.calculateToolResultTokens(assistantMessage.parts) + } + } + const processor = SessionProcessor.create({ assistantMessage: (await Session.updateMessage({ id: Identifier.ascending("message"), @@ -456,6 +447,10 @@ export namespace SessionPrompt { created: Date.now(), }, sessionID, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (lastUser.sentEstimate || 0), })) as MessageV2.Assistant, sessionID: sessionID, model: model.info, @@ -1065,6 +1060,25 @@ export namespace SessionPrompt { }, ) + const userText = parts + .filter((p) => p.type === "text" && !p.ignored) + .map((p) => (p as MessageV2.TextPart).text) + .join("") + + // Calculate user message tokens + let sentTokens = Token.estimate(userText) + + // Add tokens from tool results that will be sent with this message + // Tool results from the previous assistant message are included in the API request + const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + const lastAssistant = msgs.findLast((m) => m.info.role === "assistant") + if (lastAssistant) { + sentTokens += Token.calculateToolResultTokens(lastAssistant.parts) + } + + info.sentEstimate = sentTokens + info.contextEstimate = sentTokens + await Session.updateMessage(info) for (const part of parts) { await Session.updatePart(part) @@ -1134,6 +1148,8 @@ export namespace SessionPrompt { providerID: model.providerID, modelID: model.modelID, }, + sentEstimate: 0, + contextEstimate: 0, } await Session.updateMessage(userMsg) const userPart: MessageV2.Part = { @@ -1146,6 +1162,12 @@ export namespace SessionPrompt { } await Session.updatePart(userPart) + const msgs = await MessageV2.filterCompacted(MessageV2.stream(input.sessionID)) + const lastFinished = msgs.find((m) => m.info.role === "assistant" && m.info.finish)?.info as + | MessageV2.Assistant + | undefined + const lastAssistant = msgs.find((m) => m.info.role === "assistant")?.info as MessageV2.Assistant | undefined + const msg: MessageV2.Assistant = { id: Identifier.ascending("message"), sessionID: input.sessionID, @@ -1168,6 +1190,10 @@ export namespace SessionPrompt { }, modelID: model.modelID, providerID: model.providerID, + outputEstimate: lastFinished?.outputEstimate, + reasoningEstimate: lastFinished?.reasoningEstimate, + contextEstimate: lastFinished?.contextEstimate, + sentEstimate: (lastAssistant?.sentEstimate || 0) + (userMsg.sentEstimate || 0), } await Session.updateMessage(msg) const part: MessageV2.Part = { diff --git a/packages/opencode/src/util/token.ts b/packages/opencode/src/util/token.ts index cee5adc3771..fc47a98ae0a 100644 --- a/packages/opencode/src/util/token.ts +++ b/packages/opencode/src/util/token.ts @@ -4,4 +4,50 @@ export namespace Token { export function estimate(input: string) { return Math.max(0, Math.round((input || "").length / CHARS_PER_TOKEN)) } + + /** + * Convert token estimate to character count + * Used when accumulating text across stream deltas + */ + export function toCharCount(tokenEstimate: number): number { + return tokenEstimate * CHARS_PER_TOKEN + } + + /** + * Convert character count to token estimate + * Used when converting accumulated text back to tokens + */ + export function toTokenEstimate(charCount: number): number { + return Math.round(charCount / CHARS_PER_TOKEN) + } + + /** + * Calculate tokens for tool results that will be sent to the API + * Includes tool input JSON, output (or compaction message), and errors + */ + export function calculateToolResultTokens(parts: Array<{ type: string; state?: any }>) { + let tokens = 0 + for (const part of parts) { + if (part.type === "tool") { + // Add null check for part.state + if (!part.state) continue + + // Safe access to input + if (part.state.input) { + tokens += estimate(JSON.stringify(part.state.input)) + } + + if (part.state.status === "completed") { + // Use optional chaining for compacted check + const output = part.state.time?.compacted ? "[Old tool result content cleared]" : (part.state.output ?? "") + tokens += estimate(output) + } + + if (part.state.status === "error" && part.state.error) { + tokens += estimate(part.state.error) + } + } + } + return tokens + } } diff --git a/packages/sdk/js/src/gen/types.gen.ts b/packages/sdk/js/src/gen/types.gen.ts index e2e611db13a..c6c86adb993 100644 --- a/packages/sdk/js/src/gen/types.gen.ts +++ b/packages/sdk/js/src/gen/types.gen.ts @@ -130,6 +130,10 @@ export type AssistantMessage = { write: number } } + outputEstimate?: number + reasoningEstimate?: number + contextEstimate?: number + sentEstimate?: number finish?: string }