Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,146 @@ describe("VertexHandler", () => {
text: "Second thinking block",
})
})

it("should filter out internal reasoning blocks before sending to API", async () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-3-5-sonnet-v2@20241022",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const mockCreate = vitest.fn().mockImplementation(async (options) => {
return {
async *[Symbol.asyncIterator]() {
yield {
type: "message_start",
message: {
usage: {
input_tokens: 10,
output_tokens: 0,
},
},
}
yield {
type: "content_block_start",
index: 0,
content_block: {
type: "text",
text: "Response",
},
}
},
}
})
;(handler["client"].messages as any).create = mockCreate

// Messages with internal reasoning blocks (from stored conversation history)
const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Hello",
},
{
role: "assistant",
content: [
{
type: "reasoning" as any,
text: "This is internal reasoning that should be filtered",
},
{
type: "text",
text: "This is the response",
},
],
},
{
role: "user",
content: "Continue",
},
]

const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
const chunks: ApiStreamChunk[] = []

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify the API was called with filtered messages (no reasoning blocks)
const calledMessages = mockCreate.mock.calls[0][0].messages
expect(calledMessages).toHaveLength(3)

// Check user message 1
expect(calledMessages[0]).toMatchObject({
role: "user",
})

// Check assistant message - should have reasoning block filtered out
const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
expect(assistantMessage).toBeDefined()
expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])

// Verify reasoning blocks were NOT sent to the API
expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
})

it("should filter empty messages after removing all reasoning blocks", async () => {
handler = new AnthropicVertexHandler({
apiModelId: "claude-3-5-sonnet-v2@20241022",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const mockCreate = vitest.fn().mockImplementation(async (options) => {
return {
async *[Symbol.asyncIterator]() {
yield {
type: "message_start",
message: {
usage: {
input_tokens: 10,
output_tokens: 0,
},
},
}
},
}
})
;(handler["client"].messages as any).create = mockCreate

// Message with only reasoning content (should be completely filtered)
const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Hello",
},
{
role: "assistant",
content: [
{
type: "reasoning" as any,
text: "Only reasoning, no actual text",
},
],
},
{
role: "user",
content: "Continue",
},
]

const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
const chunks: ApiStreamChunk[] = []

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify empty message was filtered out
const calledMessages = mockCreate.mock.calls[0][0].messages
expect(calledMessages).toHaveLength(2) // Only the two user messages
expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
})
})

describe("completePrompt", () => {
Expand Down
95 changes: 95 additions & 0 deletions src/api/providers/__tests__/anthropic.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,99 @@ describe("AnthropicHandler", () => {
expect(model.info.outputPrice).toBe(22.5)
})
})

describe("reasoning block filtering", () => {
const systemPrompt = "You are a helpful assistant."

it("should filter out internal reasoning blocks before sending to API", async () => {
handler = new AnthropicHandler({
apiKey: "test-api-key",
apiModelId: "claude-3-5-sonnet-20241022",
})

// Messages with internal reasoning blocks (from stored conversation history)
const messagesWithReasoning: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Hello",
},
{
role: "assistant",
content: [
{
type: "reasoning" as any,
text: "This is internal reasoning that should be filtered",
},
{
type: "text",
text: "This is the response",
},
],
},
{
role: "user",
content: "Continue",
},
]

const stream = handler.createMessage(systemPrompt, messagesWithReasoning)
const chunks: any[] = []

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify the API was called with filtered messages (no reasoning blocks)
const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
expect(calledMessages).toHaveLength(3)

// Check assistant message - should have reasoning block filtered out
const assistantMessage = calledMessages.find((m: any) => m.role === "assistant")
expect(assistantMessage).toBeDefined()
expect(assistantMessage.content).toEqual([{ type: "text", text: "This is the response" }])

// Verify reasoning blocks were NOT sent to the API
expect(assistantMessage.content).not.toContainEqual(expect.objectContaining({ type: "reasoning" }))
})

it("should filter empty messages after removing all reasoning blocks", async () => {
handler = new AnthropicHandler({
apiKey: "test-api-key",
apiModelId: "claude-3-5-sonnet-20241022",
})

// Message with only reasoning content (should be completely filtered)
const messagesWithOnlyReasoning: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: "Hello",
},
{
role: "assistant",
content: [
{
type: "reasoning" as any,
text: "Only reasoning, no actual text",
},
],
},
{
role: "user",
content: "Continue",
},
]

const stream = handler.createMessage(systemPrompt, messagesWithOnlyReasoning)
const chunks: any[] = []

for await (const chunk of stream) {
chunks.push(chunk)
}

// Verify empty message was filtered out
const calledMessages = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0].messages
expect(calledMessages.length).toBe(2) // Only the two user messages
expect(calledMessages.every((m: any) => m.role === "user")).toBe(true)
})
})
})
12 changes: 11 additions & 1 deletion src/api/providers/anthropic-vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { safeJsonParse } from "../../shared/safeJsonParse"
import { ApiStream } from "../transform/stream"
import { addCacheBreakpoints } from "../transform/caching/vertex"
import { getModelParams } from "../transform/model-params"
import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"

import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
Expand Down Expand Up @@ -70,6 +71,9 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
reasoning: thinking,
} = this.getModel()

// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
const sanitizedMessages = filterNonAnthropicBlocks(messages)

/**
* Vertex API has specific limitations for prompt caching:
* 1. Maximum of 4 blocks can have cache_control
Expand All @@ -92,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple
system: supportsPromptCache
? [{ text: systemPrompt, type: "text" as const, cache_control: { type: "ephemeral" } }]
: systemPrompt,
messages: supportsPromptCache ? addCacheBreakpoints(messages) : messages,
messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages,
stream: true,
}

Expand Down Expand Up @@ -158,6 +162,12 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple

break
}
case "content_block_stop": {
// Block complete - no action needed for now.
// Note: Signature for multi-turn thinking would require using stream.finalMessage()
// after iteration completes, which requires restructuring the streaming approach.
break
}
}
}
}
Expand Down
13 changes: 10 additions & 3 deletions src/api/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import type { ApiHandlerOptions } from "../../shared/api"

import { ApiStream } from "../transform/stream"
import { getModelParams } from "../transform/model-params"
import { filterNonAnthropicBlocks } from "../transform/anthropic-filter"

import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
Expand Down Expand Up @@ -45,6 +46,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
const cacheControl: CacheControlEphemeral = { type: "ephemeral" }
let { id: modelId, betas = [], maxTokens, temperature, reasoning: thinking } = this.getModel()

// Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API
const sanitizedMessages = filterNonAnthropicBlocks(messages)

// Add 1M context beta flag if enabled for Claude Sonnet 4 and 4.5
if (
(modelId === "claude-sonnet-4-20250514" || modelId === "claude-sonnet-4-5") &&
Expand Down Expand Up @@ -75,7 +79,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
* know the last message to retrieve from the cache for the
* current request.
*/
const userMsgIndices = messages.reduce(
const userMsgIndices = sanitizedMessages.reduce(
(acc, msg, index) => (msg.role === "user" ? [...acc, index] : acc),
[] as number[],
)
Expand All @@ -91,7 +95,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
thinking,
// Setting cache breakpoint for system prompt so new tasks can reuse it.
system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }],
messages: messages.map((message, index) => {
messages: sanitizedMessages.map((message, index) => {
if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) {
return {
...message,
Expand Down Expand Up @@ -142,7 +146,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa
max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
temperature,
system: [{ text: systemPrompt, type: "text" }],
messages,
messages: sanitizedMessages,
stream: true,
})) as any
break
Expand Down Expand Up @@ -227,6 +231,9 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa

break
case "content_block_stop":
// Block complete - no action needed for now.
// Note: Signature for multi-turn thinking would require using stream.finalMessage()
// after iteration completes, which requires restructuring the streaming approach.
break
}
}
Expand Down
1 change: 1 addition & 0 deletions src/api/providers/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl
}

const params: GenerateContentParameters = { model, contents, config }

try {
const result = await this.client.models.generateContentStream(params)

Expand Down
Loading
Loading