Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@ export const reasoningEffortWithMinimalSchema = z.union([reasoningEffortsSchema,

export type ReasoningEffortWithMinimal = z.infer<typeof reasoningEffortWithMinimalSchema>

/**
* Extended Reasoning Effort (includes "none" and "minimal")
* Note: "disable" is a UI/control value, not a value sent as effort
*/
export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high"] as const

export const reasoningEffortExtendedSchema = z.enum(reasoningEffortsExtended)

export type ReasoningEffortExtended = z.infer<typeof reasoningEffortExtendedSchema>

/**
* Reasoning Effort user setting (includes "disable")
*/
export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high"] as const
export const reasoningEffortSettingSchema = z.enum(reasoningEffortSettingValues)

/**
* Verbosity
*/
Expand Down Expand Up @@ -67,7 +83,9 @@ export const modelInfoSchema = z.object({
supportsTemperature: z.boolean().optional(),
defaultTemperature: z.number().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z.boolean().optional(),
supportsReasoningEffort: z
.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high"]))])
.optional(),
requiredReasoningEffort: z.boolean().optional(),
preserveReasoning: z.boolean().optional(),
supportedParameters: z.array(modelParametersSchema).optional(),
Expand All @@ -76,7 +94,8 @@ export const modelInfoSchema = z.object({
cacheWritesPrice: z.number().optional(),
cacheReadsPrice: z.number().optional(),
description: z.string().optional(),
reasoningEffort: reasoningEffortsSchema.optional(),
// Default effort value for models that support reasoning effort
reasoningEffort: reasoningEffortExtendedSchema.optional(),
minTokensPerCachePoint: z.number().optional(),
maxCachePoints: z.number().optional(),
cachableFields: z.array(z.string()).optional(),
Expand Down
4 changes: 2 additions & 2 deletions packages/types/src/provider-settings.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { z } from "zod"

import { modelInfoSchema, reasoningEffortWithMinimalSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
import { modelInfoSchema, reasoningEffortSettingSchema, verbosityLevelsSchema, serviceTierSchema } from "./model.js"
import { codebaseIndexProviderSchema } from "./codebase-index.js"
import {
anthropicModels,
Expand Down Expand Up @@ -176,7 +176,7 @@ const baseProviderSettingsSchema = z.object({

// Model reasoning.
enableReasoningEffort: z.boolean().optional(),
reasoningEffort: reasoningEffortWithMinimalSchema.optional(),
reasoningEffort: reasoningEffortSettingSchema.optional(),
modelMaxTokens: z.number().optional(),
modelMaxThinkingTokens: z.number().optional(),

Expand Down
154 changes: 124 additions & 30 deletions packages/types/src/providers/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,85 +3,128 @@ import type { ModelInfo } from "../model.js"
// https://openai.com/api/pricing/
export type OpenAiNativeModelId = keyof typeof openAiNativeModels

export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5-2025-08-07"
export const openAiNativeDefaultModelId: OpenAiNativeModelId = "gpt-5.1"

export const openAiNativeModels = {
"gpt-5-chat-latest": {
"gpt-5.1": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: false,
supportsReasoningEffort: ["none", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.13,
description: "GPT-5 Chat Latest: Optimized for conversational AI and non-reasoning tasks",
cacheReadsPrice: 0.125,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
],
description: "GPT-5.1: The best model for coding and agentic tasks across domains",
},
"gpt-5-2025-08-07": {
"gpt-5.1-codex": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.13,
description: "GPT-5: The best model for coding and agentic tasks across domains",
// supportsVerbosity is a new capability; ensure ModelInfo includes it
cacheReadsPrice: 0.125,
supportsTemperature: false,
tiers: [{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 }],
description: "GPT-5.1 Codex: A version of GPT-5.1 optimized for agentic coding in Codex",
},
"gpt-5.1-codex-mini": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 0.25,
outputPrice: 2.0,
cacheReadsPrice: 0.025,
supportsTemperature: false,
description: "GPT-5.1 Codex mini: A version of GPT-5.1 optimized for agentic coding in Codex",
},
"gpt-5": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.125,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
],
description: "GPT-5: The best model for coding and agentic tasks across domains",
},
"gpt-5-mini-2025-08-07": {
"gpt-5-mini": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 0.25,
outputPrice: 2.0,
cacheReadsPrice: 0.03,
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
cacheReadsPrice: 0.025,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
],
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
},
"gpt-5-nano-2025-08-07": {
"gpt-5-codex": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.125,
supportsTemperature: false,
tiers: [{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 }],
description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex",
},
"gpt-5-nano": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 0.05,
outputPrice: 0.4,
cacheReadsPrice: 0.01,
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
cacheReadsPrice: 0.005,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
},
"gpt-5-codex": {
"gpt-5-chat-latest": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: true,
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.13,
description: "GPT-5-Codex: A version of GPT-5 optimized for agentic coding in Codex",
supportsVerbosity: true,
supportsTemperature: false,
cacheReadsPrice: 0.125,
description: "GPT-5 Chat: Optimized for conversational AI and non-reasoning tasks",
},
"gpt-4.1": {
maxTokens: 32_768,
Expand Down Expand Up @@ -130,7 +173,7 @@ export const openAiNativeModels = {
inputPrice: 2.0,
outputPrice: 8.0,
cacheReadsPrice: 0.5,
supportsReasoningEffort: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
supportsTemperature: false,
tiers: [
Expand Down Expand Up @@ -168,7 +211,7 @@ export const openAiNativeModels = {
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.275,
supportsReasoningEffort: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
supportsTemperature: false,
tiers: [
Expand Down Expand Up @@ -206,7 +249,7 @@ export const openAiNativeModels = {
inputPrice: 1.1,
outputPrice: 4.4,
cacheReadsPrice: 0.55,
supportsReasoningEffort: true,
supportsReasoningEffort: ["low", "medium", "high"],
reasoningEffort: "medium",
supportsTemperature: false,
},
Expand Down Expand Up @@ -295,11 +338,63 @@ export const openAiNativeModels = {
supportsPromptCache: false,
inputPrice: 1.5,
outputPrice: 6,
cacheReadsPrice: 0,
cacheReadsPrice: 0.375,
supportsTemperature: false,
description:
"Codex Mini: Cloud-based software engineering agent powered by codex-1, a version of o3 optimized for coding tasks. Trained with reinforcement learning to generate human-style code, adhere to instructions, and iteratively run tests.",
},
// Dated clones (snapshots) preserved for backward compatibility
"gpt-5-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 1.25,
outputPrice: 10.0,
cacheReadsPrice: 0.125,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.625, outputPrice: 5.0, cacheReadsPrice: 0.0625 },
{ name: "priority", contextWindow: 400000, inputPrice: 2.5, outputPrice: 20.0, cacheReadsPrice: 0.25 },
],
description: "GPT-5: The best model for coding and agentic tasks across domains",
},
"gpt-5-mini-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 0.25,
outputPrice: 2.0,
cacheReadsPrice: 0.025,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [
{ name: "flex", contextWindow: 400000, inputPrice: 0.125, outputPrice: 1.0, cacheReadsPrice: 0.0125 },
{ name: "priority", contextWindow: 400000, inputPrice: 0.45, outputPrice: 3.6, cacheReadsPrice: 0.045 },
],
description: "GPT-5 Mini: A faster, more cost-efficient version of GPT-5 for well-defined tasks",
},
"gpt-5-nano-2025-08-07": {
maxTokens: 128000,
contextWindow: 400000,
supportsImages: true,
supportsPromptCache: true,
supportsReasoningEffort: ["minimal", "low", "medium", "high"],
reasoningEffort: "medium",
inputPrice: 0.05,
outputPrice: 0.4,
cacheReadsPrice: 0.005,
supportsVerbosity: true,
supportsTemperature: false,
tiers: [{ name: "flex", contextWindow: 400000, inputPrice: 0.025, outputPrice: 0.2, cacheReadsPrice: 0.0025 }],
description: "GPT-5 Nano: Fastest, most cost-efficient version of GPT-5",
},
} as const satisfies Record<string, ModelInfo>

export const openAiModelInfoSaneDefaults: ModelInfo = {
Expand All @@ -316,6 +411,5 @@ export const openAiModelInfoSaneDefaults: ModelInfo = {
export const azureOpenAiDefaultApiVersion = "2024-08-01-preview"

export const OPENAI_NATIVE_DEFAULT_TEMPERATURE = 0
export const GPT5_DEFAULT_TEMPERATURE = 1.0

export const OPENAI_AZURE_AI_INFERENCE_PATH = "/models/chat/completions"
Loading
Loading