Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions packages/opencode/src/provider/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,41 @@ import { ProviderTransform } from "./transform"
export namespace Provider {
const log = Log.create({ service: "provider" })

const DEFAULT_OLLAMA_CONTEXT = 4096

async function isOllamaServer(baseURL: string): Promise<boolean> {
try {
const ollamaBase = baseURL.replace(/\/v1\/?$/, "")
const response = await fetch(ollamaBase, {
signal: AbortSignal.timeout(2000),
})
if (!response.ok) return false
const text = await response.text()
return text === "Ollama is running"
} catch {
return false
}
}

async function fetchOllamaModelContext(baseURL: string, modelName: string): Promise<number> {
try {
const ollamaBase = baseURL.replace(/\/v1\/?$/, "")
const response = await fetch(`${ollamaBase}/api/show`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: modelName }),
signal: AbortSignal.timeout(2000),
})
if (!response.ok) return DEFAULT_OLLAMA_CONTEXT
const data = (await response.json()) as { parameters?: string }
const match = data.parameters?.match(/num_ctx\s+(\d+)/)
if (match) return parseInt(match[1], 10)
} catch {
// Error querying Ollama - use default
}
return DEFAULT_OLLAMA_CONTEXT
}

function isGpt5OrLater(modelID: string): boolean {
const match = /^gpt-(\d+)/.exec(modelID)
if (!match) {
Expand Down Expand Up @@ -806,6 +841,18 @@ export namespace Provider {
parsed.models[modelID] = parsedModel
}
database[providerID] = parsed

// Fetch context limits from Ollama API if this is an Ollama server
// Only query Ollama if no limit was configured (config takes priority)
if (parsed.options.baseURL && (await isOllamaServer(parsed.options.baseURL))) {
const contextFetches = Object.entries(parsed.models).map(async ([modelID, model]) => {
if (model.limit.context === 0) {
const context = await fetchOllamaModelContext(parsed.options.baseURL, modelID)
model.limit.context = context
}
})
await Promise.all(contextFetches)
}
}

// load env
Expand Down
4 changes: 3 additions & 1 deletion packages/opencode/src/session/compaction.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ export namespace SessionCompaction {
),
}

const OUTPUT_RESERVE_RATIO = 0.10

export async function isOverflow(input: { tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
const config = await Config.get()
if (config.compaction?.auto === false) return false
const context = input.model.limit.context
if (context === 0) return false
const count = input.tokens.input + input.tokens.cache.read + input.tokens.output
const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || SessionPrompt.OUTPUT_TOKEN_MAX
const output = Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) || Math.min(Math.floor(context * OUTPUT_RESERVE_RATIO), SessionPrompt.OUTPUT_TOKEN_MAX)
const usable = input.model.limit.input || context - output
return count > usable
}
Expand Down