Skip to content

Commit 38ea4ff

Browse files
committed
Anthropic Prompt Caching: Fix SYSTEM_ONLY cache strategy to not expli…
Squashed 2 commits from PR #4537 Signed-off-by: Soby Chacko <soby.chacko@broadcom.com>
1 parent a897177 commit 38ea4ff

File tree

5 files changed

+466
-37
lines changed

5 files changed

+466
-37
lines changed

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java

Lines changed: 78 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,104 @@
2222
* system → messages.
2323
*
2424
* @author Mark Pollack
25+
* @author Soby Chacko
2526
* @since 1.1.0
2627
*/
2728
public enum AnthropicCacheStrategy {
2829

2930
/**
30-
* No caching (default behavior).
31+
* No caching (default behavior). All content is processed fresh on each request.
32+
* <p>
33+
* Use this when:
34+
* <ul>
35+
* <li>Requests are one-off or highly variable</li>
36+
* <li>Content doesn't meet minimum token requirements (1024+ tokens)</li>
37+
* <li>You want to avoid caching overhead</li>
38+
* </ul>
3139
*/
3240
NONE,
3341

42+
/**
43+
* Cache tool definitions only. Places a cache breakpoint on the last tool, while
44+
* system messages and conversation history remain uncached and are processed fresh on
45+
* each request.
46+
* <p>
47+
* Use this when:
48+
* <ul>
49+
* <li>Tool definitions are large and stable (5000+ tokens)</li>
50+
* <li>System prompts change frequently or are small (&lt;500 tokens)</li>
51+
* <li>You want to share cached tools across different system contexts (e.g.,
52+
* multi-tenant applications, A/B testing system prompts)</li>
53+
* <li>Tool definitions rarely change</li>
54+
* </ul>
55+
* <p>
56+
* <strong>Important:</strong> Changing any tool definition will invalidate this cache
57+
* entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
58+
* any downstream cache breakpoints (system, messages) if used in combination with
59+
* other strategies.
60+
*/
61+
TOOLS_ONLY,
62+
3463
/**
3564
* Cache system instructions only. Places a cache breakpoint on the system message
36-
* content.
65+
* content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
66+
* mechanism (content before the cache breakpoint is included in the cache).
67+
* <p>
68+
* Use this when:
69+
* <ul>
70+
* <li>System prompts are large and stable (1024+ tokens)</li>
71+
* <li>Tool definitions are relatively small (&lt;20 tools)</li>
72+
* <li>You want simple, single-breakpoint caching</li>
73+
* </ul>
74+
* <p>
75+
* <strong>Note:</strong> Changing tools will invalidate the cache since tools are
76+
* part of the cache prefix (they appear before system in the request hierarchy).
3777
*/
3878
SYSTEM_ONLY,
3979

4080
/**
4181
* Cache system instructions and tool definitions. Places cache breakpoints on the
42-
* last tool and system message content.
82+
* last tool (breakpoint 1) and system message content (breakpoint 2).
83+
* <p>
84+
* Use this when:
85+
* <ul>
86+
* <li>Both tools and system prompts are large and stable</li>
87+
* <li>You have many tools (20+ tools, beyond the automatic lookback window)</li>
88+
* <li>You want deterministic, explicit caching of both components</li>
89+
* <li>System prompts may change independently of tools</li>
90+
* </ul>
91+
* <p>
92+
* <strong>Behavior:</strong>
93+
* <ul>
94+
* <li>If only tools change: Both caches invalidated (tools + system)</li>
95+
* <li>If only system changes: Tools cache remains valid, system cache
96+
* invalidated</li>
97+
* </ul>
98+
* This allows efficient reuse of tool cache when only system prompts are updated.
4399
*/
44100
SYSTEM_AND_TOOLS,
45101

46102
/**
47103
* Cache the entire conversation history up to (but not including) the current user
48-
* question. This is ideal for multi-turn conversations where you want to reuse the
49-
* conversation context while asking new questions.
104+
* question. Places a cache breakpoint on the last user message in the conversation
105+
* history, enabling incremental caching as the conversation grows.
106+
* <p>
107+
* Use this when:
108+
* <ul>
109+
* <li>Building multi-turn conversational applications (chatbots, assistants)</li>
110+
* <li>Conversation history is large and grows over time</li>
111+
* <li>You want to reuse conversation context while asking new questions</li>
112+
* <li>Using chat memory advisors or conversation persistence</li>
113+
* </ul>
114+
* <p>
115+
* <strong>Behavior:</strong> Each turn builds on the previous cached prefix. The
116+
* cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
117+
* + Message2], etc. This provides significant cost savings (90%+) and performance
118+
* improvements for long conversations.
119+
* <p>
120+
* <strong>Important:</strong> Changing tools or system prompts will invalidate the
121+
* entire conversation cache due to cascade invalidation. Tool and system stability is
122+
* critical for this strategy.
50123
*/
51124
CONVERSATION_HISTORY
52125

models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
* definition messages.
4040
*
4141
* @author Austin Dase
42+
* @author Soby Chacko
4243
* @since 1.1.0
4344
**/
4445
public class CacheEligibilityResolver {
@@ -84,6 +85,7 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
8485
return switch (anthropicCacheStrategy) {
8586
case NONE -> Set.of();
8687
case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
88+
case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
8789
case CONVERSATION_HISTORY -> Set.of(MessageType.values());
8890
};
8991
}
@@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa
108110
}
109111

110112
public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() {
111-
// Tool definitions are only cache-eligible when caching is enabled and
112-
// the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or
113-
// CONVERSATION_HISTORY). When NONE, tools must not be cached.
114-
if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE)
115-
|| this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
113+
// Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
114+
// CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
115+
// relying on Anthropic's cache hierarchy to implicitly cache tools.
116+
if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
117+
&& this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
118+
&& this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
119+
logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy);
120+
return null;
121+
}
122+
123+
if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
116124
logger.debug("Caching not enabled for tool definition, usedBreakpoints={}",
117125
this.cacheBreakpointTracker.getCount());
118126
return null;

models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception {
104104
this.mockWebServer
105105
.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
106106

107+
// Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
108+
var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
109+
MethodToolCallback toolCallback = MethodToolCallback.builder()
110+
.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
111+
.toolMethod(toolMethod)
112+
.build();
113+
107114
// Test with SYSTEM_ONLY cache strategy
108115
AnthropicChatOptions options = AnthropicChatOptions.builder()
109116
.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
117+
.toolCallbacks(List.of(toolCallback))
110118
.build();
111119

112120
Prompt prompt = new Prompt(
@@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception {
130138
assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
131139
}
132140

141+
// Verify tools exist but DO NOT have cache_control (key difference from
142+
// SYSTEM_AND_TOOLS)
143+
if (requestBody.has("tools")) {
144+
JsonNode toolsArray = requestBody.get("tools");
145+
assertThat(toolsArray.isArray()).isTrue();
146+
// Verify NO tool has cache_control
147+
for (int i = 0; i < toolsArray.size(); i++) {
148+
JsonNode tool = toolsArray.get(i);
149+
assertThat(tool.has("cache_control")).isFalse();
150+
}
151+
}
152+
133153
// Verify response
134154
assertThat(response).isNotNull();
135155
assertThat(response.getResult().getOutput().getText()).contains("Hello!");

0 commit comments

Comments
 (0)