From 3dfd56c59ee6ce39218dde4a95ae3aef0c58b3f6 Mon Sep 17 00:00:00 2001 From: Soby Chacko Date: Wed, 20 Aug 2025 20:47:43 -0400 Subject: [PATCH] GH-1403: Add Anthropic prompt caching via AnthropicChatOptions - Add cacheControl field to AnthropicChatOptions with builder method - Create AnthropicCacheType enum with EPHEMERAL type for type-safe cache creation - Update AnthropicChatModel.createRequest() to apply cache control from options to user message ContentBlocks - Extend ContentBlock record with cacheControl parameter and constructor for API compatibility - Update Usage record to include cacheCreationInputTokens and cacheReadInputTokens fields - Update StreamHelper to handle new Usage constructor with cache token parameters - Add AnthropicApiIT.chatWithPromptCache() test for low-level API validation - Add AnthropicChatModelIT.chatWithPromptCacheViaOptions() integration test - Add comprehensive unit tests for AnthropicChatOptions cache control functionality - Update documentation with cacheControl() method examples and usage patterns Cache control is configured through AnthropicChatOptions rather than message classes to maintain provider portability. The cache control gets applied during request creation in AnthropicChatModel when building ContentBlocks for user messages. Original implementation provided by @Claudio-code (Claudio Silva Junior) See https://github.com/spring-projects/spring-ai/pull/4139/commits/15e50263e515312c159d4176a2914f760bcce465 Fixes https://github.com/spring-projects/spring-ai/issues/1403 Signed-off-by: Soby Chacko --- .../ai/anthropic/AnthropicChatModel.java | 16 +- .../ai/anthropic/AnthropicChatOptions.java | 29 ++- .../ai/anthropic/api/AnthropicApi.java | 37 +++- .../ai/anthropic/api/AnthropicCacheType.java | 57 ++++++ .../ai/anthropic/api/StreamHelper.java | 12 +- .../ai/anthropic/AnthropicChatModelIT.java | 54 ++++++ .../anthropic/AnthropicChatOptionsTests.java | 108 +++++++++++ .../ai/anthropic/api/AnthropicApiIT.java | 35 ++++ .../ROOT/pages/api/chat/anthropic-chat.adoc | 175 ++++++++++++++++++ 9 files changed, 506 insertions(+), 17 deletions(-) create mode 100644 models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java index 5ea1195c3a7..0485e552584 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatModel.java @@ -483,12 +483,25 @@ private Map mergeHttpHeaders(Map runtimeHttpHead ChatCompletionRequest createRequest(Prompt prompt, boolean stream) { + // Get cache control from options + AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions(); + AnthropicApi.ChatCompletionRequest.CacheControl cacheControl = (requestOptions != null) + ? requestOptions.getCacheControl() : null; + List userMessages = prompt.getInstructions() .stream() .filter(message -> message.getMessageType() != MessageType.SYSTEM) .map(message -> { if (message.getMessageType() == MessageType.USER) { - List contents = new ArrayList<>(List.of(new ContentBlock(message.getText()))); + List contents = new ArrayList<>(); + + // Apply cache control if enabled for user messages + if (cacheControl != null) { + contents.add(new ContentBlock(message.getText(), cacheControl)); + } + else { + contents.add(new ContentBlock(message.getText())); + } if (message instanceof UserMessage userMessage) { if (!CollectionUtils.isEmpty(userMessage.getMedia())) { List mediaContent = userMessage.getMedia().stream().map(media -> { @@ -538,7 +551,6 @@ else if (message.getMessageType() == MessageType.TOOL) { ChatCompletionRequest request = new ChatCompletionRequest(this.defaultOptions.getModel(), userMessages, systemPrompt, this.defaultOptions.getMaxTokens(), this.defaultOptions.getTemperature(), stream); - AnthropicChatOptions requestOptions = (AnthropicChatOptions) prompt.getOptions(); request = ModelOptionsUtils.merge(requestOptions, request, ChatCompletionRequest.class); // Add the tool definitions to the request's tools parameter. diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java index dbfbee561c8..16421eb04d0 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java @@ -44,6 +44,7 @@ * @author Thomas Vitale * @author Alexandros Pappas * @author Ilayaperumal Gopinathan + * @author Soby Chacko * @since 1.0.0 */ @JsonInclude(Include.NON_NULL) @@ -59,6 +60,20 @@ public class AnthropicChatOptions implements ToolCallingChatOptions { private @JsonProperty("top_k") Integer topK; private @JsonProperty("thinking") ChatCompletionRequest.ThinkingConfig thinking; + /** + * Cache control for user messages. When set, enables caching for user messages. + * Uses the existing CacheControl record from AnthropicApi.ChatCompletionRequest. + */ + private @JsonProperty("cache_control") ChatCompletionRequest.CacheControl cacheControl; + + public ChatCompletionRequest.CacheControl getCacheControl() { + return this.cacheControl; + } + + public void setCacheControl(ChatCompletionRequest.CacheControl cacheControl) { + this.cacheControl = cacheControl; + } + /** * Collection of {@link ToolCallback}s to be used for tool calling in the chat * completion requests. @@ -111,6 +126,7 @@ public static AnthropicChatOptions fromOptions(AnthropicChatOptions fromOptions) .internalToolExecutionEnabled(fromOptions.getInternalToolExecutionEnabled()) .toolContext(fromOptions.getToolContext() != null ? new HashMap<>(fromOptions.getToolContext()) : null) .httpHeaders(fromOptions.getHttpHeaders() != null ? new HashMap<>(fromOptions.getHttpHeaders()) : null) + .cacheControl(fromOptions.getCacheControl()) .build(); } @@ -282,14 +298,15 @@ public boolean equals(Object o) { && Objects.equals(this.toolNames, that.toolNames) && Objects.equals(this.internalToolExecutionEnabled, that.internalToolExecutionEnabled) && Objects.equals(this.toolContext, that.toolContext) - && Objects.equals(this.httpHeaders, that.httpHeaders); + && Objects.equals(this.httpHeaders, that.httpHeaders) + && Objects.equals(this.cacheControl, that.cacheControl); } @Override public int hashCode() { return Objects.hash(this.model, this.maxTokens, this.metadata, this.stopSequences, this.temperature, this.topP, this.topK, this.thinking, this.toolCallbacks, this.toolNames, this.internalToolExecutionEnabled, - this.toolContext, this.httpHeaders); + this.toolContext, this.httpHeaders, this.cacheControl); } public static class Builder { @@ -389,6 +406,14 @@ public Builder httpHeaders(Map httpHeaders) { return this; } + /** + * Set cache control for user messages + */ + public Builder cacheControl(ChatCompletionRequest.CacheControl cacheControl) { + this.options.cacheControl = cacheControl; + return this; + } + public AnthropicChatOptions build() { return this.options; } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java index b573ff8a139..e7bb4d0406f 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicApi.java @@ -35,6 +35,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; import org.springframework.ai.anthropic.api.StreamHelper.ChatCompletionResponseBuilder; import org.springframework.ai.model.ApiKey; import org.springframework.ai.model.ChatModelDescription; @@ -65,6 +66,7 @@ * @author Jonghoon Park * @author Claudio Silva Junior * @author Filip Hrisafov + * @author Soby Chacko * @since 1.0.0 */ public final class AnthropicApi { @@ -557,6 +559,14 @@ public record Metadata(@JsonProperty("user_id") String userId) { } + /** + * @param type is the cache type supported by anthropic. Doc + */ + @JsonInclude(Include.NON_NULL) + public record CacheControl(String type) { + } + /** * Configuration for the model's thinking mode. * @@ -763,8 +773,11 @@ public record ContentBlock( @JsonProperty("thinking") String thinking, // Redacted Thinking only - @JsonProperty("data") String data - ) { + @JsonProperty("data") String data, + + // cache object + @JsonProperty("cache_control") CacheControl cacheControl + ) { // @formatter:on /** @@ -782,7 +795,7 @@ public ContentBlock(String mediaType, String data) { * @param source The source of the content. */ public ContentBlock(Type type, Source source) { - this(type, source, null, null, null, null, null, null, null, null, null, null); + this(type, source, null, null, null, null, null, null, null, null, null, null, null); } /** @@ -790,7 +803,7 @@ public ContentBlock(Type type, Source source) { * @param source The source of the content. */ public ContentBlock(Source source) { - this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null); + this(Type.IMAGE, source, null, null, null, null, null, null, null, null, null, null, null); } /** @@ -798,7 +811,11 @@ public ContentBlock(Source source) { * @param text The text of the content. */ public ContentBlock(String text) { - this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null); + this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, null); + } + + public ContentBlock(String text, CacheControl cache) { + this(Type.TEXT, null, text, null, null, null, null, null, null, null, null, null, cache); } // Tool result @@ -809,7 +826,7 @@ public ContentBlock(String text) { * @param content The content of the tool result. */ public ContentBlock(Type type, String toolUseId, String content) { - this(type, null, null, null, null, null, null, toolUseId, content, null, null, null); + this(type, null, null, null, null, null, null, toolUseId, content, null, null, null, null); } /** @@ -820,7 +837,7 @@ public ContentBlock(Type type, String toolUseId, String content) { * @param index The index of the content block. */ public ContentBlock(Type type, Source source, String text, Integer index) { - this(type, source, text, index, null, null, null, null, null, null, null, null); + this(type, source, text, index, null, null, null, null, null, null, null, null, null); } // Tool use input JSON delta streaming @@ -832,7 +849,7 @@ public ContentBlock(Type type, Source source, String text, Integer index) { * @param input The input of the tool use. */ public ContentBlock(Type type, String id, String name, Map input) { - this(type, null, null, null, id, name, input, null, null, null, null, null); + this(type, null, null, null, id, name, input, null, null, null, null, null, null); } /** @@ -1026,7 +1043,9 @@ public record ChatCompletionResponse( public record Usage( // @formatter:off @JsonProperty("input_tokens") Integer inputTokens, - @JsonProperty("output_tokens") Integer outputTokens) { + @JsonProperty("output_tokens") Integer outputTokens, + @JsonProperty("cache_creation_input_tokens") Integer cacheCreationInputTokens, + @JsonProperty("cache_read_input_tokens") Integer cacheReadInputTokens) { // @formatter:off } diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java new file mode 100644 index 00000000000..0348670573a --- /dev/null +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheType.java @@ -0,0 +1,57 @@ +/* + * Copyright 2025-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.springframework.ai.anthropic.api; + +import java.util.function.Supplier; + +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; + +/** + * Cache types supported by Anthropic's prompt caching feature. + * + *

+ * Prompt caching allows reusing frequently used prompts to reduce costs and improve + * response times for repeated interactions. + * + * @see Anthropic Prompt + * Caching + * @author Claudio Silva Junior + * @author Soby Chacko + */ +public enum AnthropicCacheType { + + /** + * Ephemeral cache with 5-minute lifetime, refreshed on each use. + */ + EPHEMERAL(() -> new CacheControl("ephemeral")); + + private final Supplier value; + + AnthropicCacheType(Supplier value) { + this.value = value; + } + + /** + * Returns a new CacheControl instance for this cache type. + * @return a CacheControl instance configured for this cache type + */ + public CacheControl cacheControl() { + return this.value.get(); + } + +} diff --git a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java index 673685e6d13..ca519a11d0e 100644 --- a/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java +++ b/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/StreamHelper.java @@ -55,6 +55,8 @@ * @author Christian Tzolov * @author Jihoon Kim * @author Alexandros Pappas + * @author Claudio Silva Junior + * @author Soby Chacko * @since 1.0.0 */ public class StreamHelper { @@ -159,7 +161,7 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_START)) { } else if (contentBlockStartEvent.contentBlock() instanceof ContentBlockThinking thinkingBlock) { ContentBlock cb = new ContentBlock(Type.THINKING, null, null, contentBlockStartEvent.index(), null, - null, null, null, null, null, thinkingBlock.thinking(), null); + null, null, null, null, null, thinkingBlock.thinking(), null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else { @@ -176,12 +178,12 @@ else if (event.type().equals(EventType.CONTENT_BLOCK_DELTA)) { } else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaThinking thinking) { ContentBlock cb = new ContentBlock(Type.THINKING_DELTA, null, null, contentBlockDeltaEvent.index(), - null, null, null, null, null, null, thinking.thinking(), null); + null, null, null, null, null, null, thinking.thinking(), null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else if (contentBlockDeltaEvent.delta() instanceof ContentBlockDeltaSignature sig) { ContentBlock cb = new ContentBlock(Type.SIGNATURE_DELTA, null, null, contentBlockDeltaEvent.index(), - null, null, null, null, null, sig.signature(), null, null); + null, null, null, null, null, sig.signature(), null, null, null); contentBlockReference.get().withType(event.type().name()).withContent(List.of(cb)); } else { @@ -205,7 +207,9 @@ else if (event.type().equals(EventType.MESSAGE_DELTA)) { if (messageDeltaEvent.usage() != null) { Usage totalUsage = new Usage(contentBlockReference.get().usage.inputTokens(), - messageDeltaEvent.usage().outputTokens()); + messageDeltaEvent.usage().outputTokens(), + contentBlockReference.get().usage.cacheCreationInputTokens(), + contentBlockReference.get().usage.cacheReadInputTokens()); contentBlockReference.get().withUsage(totalUsage); } } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java index 6570d5ee6a6..c522f75cf4b 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatModelIT.java @@ -32,6 +32,7 @@ import reactor.core.publisher.Flux; import org.springframework.ai.anthropic.api.AnthropicApi; +import org.springframework.ai.anthropic.api.AnthropicCacheType; import org.springframework.ai.anthropic.api.tool.MockWeatherService; import org.springframework.ai.chat.client.ChatClient; import org.springframework.ai.chat.messages.AssistantMessage; @@ -491,6 +492,59 @@ void testToolUseContentBlock() { } } + @Test + void chatWithPromptCacheViaOptions() { + String userMessageText = "It could be eitherr a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " + + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " + + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " + + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " + + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; + + // Repeat content to meet minimum token requirements for caching (1024+ tokens) + String largeContent = userMessageText.repeat(20); + + // First request - should create cache + ChatResponse firstResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .temperature(0.8) + .build())); + + // Access native Anthropic usage data + AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata().getUsage().getNativeUsage(); + + // Verify first request created cache + assertThat(firstUsage.cacheCreationInputTokens()).isGreaterThan(0); + assertThat(firstUsage.cacheReadInputTokens()).isEqualTo(0); + + // Second request with identical content - should read from cache + ChatResponse secondResponse = this.chatModel.call(new Prompt(List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model(AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue()) + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .temperature(0.8) + .build())); + + // Access native Anthropic usage data + AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata().getUsage().getNativeUsage(); + + // Verify second request used cache + assertThat(secondUsage.cacheCreationInputTokens()).isEqualTo(0); + assertThat(secondUsage.cacheReadInputTokens()).isGreaterThan(0); + + // Both responses should be valid + assertThat(firstResponse.getResult().getOutput().getText()).isNotBlank(); + assertThat(secondResponse.getResult().getOutput().getText()).isNotBlank(); + + logger.info("First request - Cache creation: {}, Cache read: {}", firstUsage.cacheCreationInputTokens(), + firstUsage.cacheReadInputTokens()); + logger.info("Second request - Cache creation: {}, Cache read: {}", secondUsage.cacheCreationInputTokens(), + secondUsage.cacheReadInputTokens()); + } + record ActorsFilmsRecord(String actor, List movies) { } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java index d9470070e95..6cc4c689022 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicChatOptionsTests.java @@ -22,7 +22,9 @@ import org.junit.jupiter.api.Test; +import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.CacheControl; import org.springframework.ai.anthropic.api.AnthropicApi.ChatCompletionRequest.Metadata; +import org.springframework.ai.anthropic.api.AnthropicCacheType; import static org.assertj.core.api.Assertions.assertThat; @@ -30,6 +32,7 @@ * Tests for {@link AnthropicChatOptions}. * * @author Alexandros Pappas + * @author Soby Chacko */ class AnthropicChatOptionsTests { @@ -471,4 +474,109 @@ void testSetterOverwriteBehavior() { assertThat(options.getMaxTokens()).isEqualTo(10); } + @Test + void testCacheControlBuilder() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(cacheControl) + .build(); + + assertThat(options.getCacheControl()).isEqualTo(cacheControl); + assertThat(options.getCacheControl().type()).isEqualTo("ephemeral"); + } + + @Test + void testCacheControlDefaultValue() { + AnthropicChatOptions options = new AnthropicChatOptions(); + assertThat(options.getCacheControl()).isNull(); + } + + @Test + void testCacheControlEqualsAndHashCode() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options1 = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(cacheControl) + .build(); + + AnthropicChatOptions options2 = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build(); + + AnthropicChatOptions options3 = AnthropicChatOptions.builder().model("test-model").build(); + + assertThat(options1).isEqualTo(options2); + assertThat(options1.hashCode()).isEqualTo(options2.hashCode()); + + assertThat(options1).isNotEqualTo(options3); + assertThat(options1.hashCode()).isNotEqualTo(options3.hashCode()); + } + + @Test + void testCacheControlCopy() { + CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions original = AnthropicChatOptions.builder() + .model("test-model") + .cacheControl(originalCacheControl) + .build(); + + AnthropicChatOptions copied = original.copy(); + + assertThat(copied).isNotSameAs(original).isEqualTo(original); + assertThat(copied.getCacheControl()).isEqualTo(original.getCacheControl()); + assertThat(copied.getCacheControl()).isEqualTo(originalCacheControl); + } + + @Test + void testCacheControlWithNullValue() { + AnthropicChatOptions options = AnthropicChatOptions.builder().model("test-model").cacheControl(null).build(); + + assertThat(options.getCacheControl()).isNull(); + } + + @Test + void testBuilderWithAllFieldsIncludingCacheControl() { + CacheControl cacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions options = AnthropicChatOptions.builder() + .model("test-model") + .maxTokens(100) + .stopSequences(List.of("stop1", "stop2")) + .temperature(0.7) + .topP(0.8) + .topK(50) + .metadata(new Metadata("userId_123")) + .cacheControl(cacheControl) + .build(); + + assertThat(options) + .extracting("model", "maxTokens", "stopSequences", "temperature", "topP", "topK", "metadata", + "cacheControl") + .containsExactly("test-model", 100, List.of("stop1", "stop2"), 0.7, 0.8, 50, new Metadata("userId_123"), + cacheControl); + } + + @Test + void testCacheControlMutationDoesNotAffectOriginal() { + CacheControl originalCacheControl = AnthropicCacheType.EPHEMERAL.cacheControl(); + + AnthropicChatOptions original = AnthropicChatOptions.builder() + .model("original-model") + .cacheControl(originalCacheControl) + .build(); + + AnthropicChatOptions copy = original.copy(); + copy.setCacheControl(null); + + // Original should remain unchanged + assertThat(original.getCacheControl()).isEqualTo(originalCacheControl); + // Copy should have null cache control + assertThat(copy.getCacheControl()).isNull(); + } + } diff --git a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java index c78386fb7ce..d6800dbdb74 100644 --- a/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java +++ b/models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/api/AnthropicApiIT.java @@ -44,6 +44,8 @@ * @author Christian Tzolov * @author Jihoon Kim * @author Alexandros Pappas + * @author Claudio Silva Junior + * @author Soby Chacko */ @EnabledIfEnvironmentVariable(named = "ANTHROPIC_API_KEY", matches = ".+") public class AnthropicApiIT { @@ -70,6 +72,39 @@ public class AnthropicApiIT { } """))); + @Test + void chatWithPromptCache() { + String userMessageText = "It could be either a contraction of the full title Quenta Silmarillion (\"Tale of the Silmarils\") or also a plain Genitive which " + + "(as in Ancient Greek) signifies reference. This genitive is translated in English with \"about\" or \"of\" " + + "constructions; the titles of the chapters in The Silmarillion are examples of this genitive in poetic English " + + "(Of the Sindar, Of Men, Of the Darkening of Valinor etc), where \"of\" means \"about\" or \"concerning\". " + + "In the same way, Silmarillion can be taken to mean \"Of/About the Silmarils\""; + + AnthropicMessage chatCompletionMessage = new AnthropicMessage( + List.of(new ContentBlock(userMessageText.repeat(20), AnthropicCacheType.EPHEMERAL.cacheControl())), + Role.USER); + + ChatCompletionRequest chatCompletionRequest = new ChatCompletionRequest( + AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), List.of(chatCompletionMessage), null, 100, 0.8, + false); + + // First request - creates cache + AnthropicApi.Usage createdCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest) + .getBody() + .usage(); + + assertThat(createdCacheToken.cacheCreationInputTokens()).isGreaterThan(0); + assertThat(createdCacheToken.cacheReadInputTokens()).isEqualTo(0); + + // Second request - reads from cache (same request) + AnthropicApi.Usage readCacheToken = this.anthropicApi.chatCompletionEntity(chatCompletionRequest) + .getBody() + .usage(); + + assertThat(readCacheToken.cacheCreationInputTokens()).isEqualTo(0); + assertThat(readCacheToken.cacheReadInputTokens()).isGreaterThan(0); + } + @Test void chatCompletionEntity() { diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc index 2094ab4ee17..f8d08b31e8a 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/anthropic-chat.adoc @@ -191,6 +191,181 @@ ChatResponse response = chatModel.call( TIP: In addition to the model specific https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/AnthropicChatOptions.java[AnthropicChatOptions] you can use a portable link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/ChatOptions.java[ChatOptions] instance, created with the link:https://github.com/spring-projects/spring-ai/blob/main/spring-ai-model/src/main/java/org/springframework/ai/chat/prompt/DefaultChatOptionsBuilder.java[ChatOptions#builder()]. +== Prompt Caching + +Anthropic's prompt caching feature allows you to cache frequently used prompts to reduce costs and improve response times for repeated interactions. +When you cache a prompt, subsequent identical requests can reuse the cached content, significantly reducing the number of input tokens processed. + +[NOTE] +==== +*Supported Models* + +Prompt caching is currently supported on Claude Opus 4, Claude Sonnet 4, Claude Sonnet 3.7, Claude Sonnet 3.5, Claude Haiku 3.5, Claude Haiku 3, and Claude Opus 3. +==== + +=== Cache Types + +Spring AI supports Anthropic's cache types through the `AnthropicCacheType` enum: + +* `EPHEMERAL`: Temporary caching suitable for short-term reuse within a session + +=== Enabling Prompt Caching + +To enable prompt caching, use the `cacheControl()` method in `AnthropicChatOptions`: + +==== Basic Usage + +[source,java] +---- +// Enable caching with ephemeral type +ChatResponse response = chatModel.call( + new Prompt( + List.of(new UserMessage("Large content to be cached...")), + AnthropicChatOptions.builder() + .model("claude-3-5-sonnet-latest") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build() + ) +); +---- + +==== Using ChatClient Fluent API + +[source,java] +---- +String response = ChatClient.create(chatModel) + .prompt() + .user("Analyze this large document: " + document) + .options(AnthropicChatOptions.builder() + .model("claude-3-5-sonnet-latest") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .build()) + .call() + .content(); +---- + +=== Usage Example + +Here's a complete example demonstrating prompt caching with cost tracking: + +[source,java] +---- +// Create content that will be reused multiple times +String largeContent = "Large document content that meets minimum token requirements..."; + +// First request - creates cache +ChatResponse firstResponse = chatModel.call( + new Prompt( + List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model("claude-3-haiku-20240307") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .build() + ) +); + +// Access cache-related token usage +AnthropicApi.Usage firstUsage = (AnthropicApi.Usage) firstResponse.getMetadata() + .getUsage().getNativeUsage(); + +System.out.println("Cache creation tokens: " + firstUsage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + firstUsage.cacheReadInputTokens()); + +// Second request with identical content - reads from cache +ChatResponse secondResponse = chatModel.call( + new Prompt( + List.of(new UserMessage(largeContent)), + AnthropicChatOptions.builder() + .model("claude-3-haiku-20240307") + .cacheControl(AnthropicCacheType.EPHEMERAL.cacheControl()) + .maxTokens(100) + .build() + ) +); + +AnthropicApi.Usage secondUsage = (AnthropicApi.Usage) secondResponse.getMetadata() + .getUsage().getNativeUsage(); + +System.out.println("Cache creation tokens: " + secondUsage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + secondUsage.cacheReadInputTokens()); +---- + +=== Token Usage Tracking + +The `Usage` record provides detailed information about cache-related token consumption. +To access Anthropic-specific cache metrics, use the `getNativeUsage()` method: + +[source,java] +---- +AnthropicApi.Usage usage = (AnthropicApi.Usage) response.getMetadata() + .getUsage().getNativeUsage(); +---- + +Cache-specific metrics include: + +* `cacheCreationInputTokens()`: Returns the number of tokens used when creating a cache entry +* `cacheReadInputTokens()`: Returns the number of tokens read from an existing cache entry + +When you first send a cached prompt: +- `cacheCreationInputTokens()` will be greater than 0 +- `cacheReadInputTokens()` will be 0 + +When you send the same cached prompt again: +- `cacheCreationInputTokens()` will be 0 +- `cacheReadInputTokens()` will be greater than 0 + +=== Best Practices + +1. **Cache Long Prompts**: Focus on caching prompts that meet the minimum token requirements (1024+ tokens for most models, 2048+ for Haiku models). + +2. **Reuse Identical Content**: Caching works best with exact matches of prompt content. +Even small changes will require a new cache entry. + +3. **Monitor Token Usage**: Use the enhanced usage statistics to track cache effectiveness and optimize your caching strategy. + +4. **Place Static Content First**: Position cached content (system instructions, context, examples) at the beginning of your prompt for optimal performance. + +5. **5-Minute Cache Lifetime**: Ephemeral caches expire after 5 minutes of inactivity. +Each time cached content is accessed, the 5-minute timer resets. + +=== Low-level API Usage + +When using the low-level `AnthropicApi` directly, you can specify cache control through the `ContentBlock` constructor: + +[source,java] +---- +// Create content block with cache control +ContentBlock cachedContent = new ContentBlock( + "", + AnthropicCacheType.EPHEMERAL.cacheControl() +); + +AnthropicMessage message = new AnthropicMessage( + List.of(cachedContent), + Role.USER +); + +ChatCompletionRequest request = new ChatCompletionRequest( + AnthropicApi.ChatModel.CLAUDE_3_HAIKU.getValue(), + List.of(message), + null, 100, 0.8, false +); + +ResponseEntity response = anthropicApi.chatCompletionEntity(request); + +// Access cache-related token usage +Usage usage = response.getBody().usage(); +System.out.println("Cache creation tokens: " + usage.cacheCreationInputTokens()); +System.out.println("Cache read tokens: " + usage.cacheReadInputTokens()); +---- + +=== Implementation Details + +Cache control is configured through `AnthropicChatOptions` rather than individual messages. +This preserves compatibility when switching between different AI providers. +The cache control gets applied during request creation in `AnthropicChatModel`. + == Thinking Anthropic Claude models support a "thinking" feature that allows the model to show its reasoning process before providing a final answer. This feature enables more transparent and detailed problem-solving, particularly for complex questions that require step-by-step reasoning.