spring-projects
diff --git a/‎models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java‎
Lines changed: 78 additions & 5 deletions b/‎models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/AnthropicCacheStrategy.java‎
Lines changed: 78 additions & 5 deletions
diff --git a/‎models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java‎
Lines changed: 13 additions & 5 deletions b/‎models/spring-ai-anthropic/src/main/java/org/springframework/ai/anthropic/api/utils/CacheEligibilityResolver.java‎
Lines changed: 13 additions & 5 deletions
diff --git a/‎models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java‎
Lines changed: 20 additions & 0 deletions b/‎models/spring-ai-anthropic/src/test/java/org/springframework/ai/anthropic/AnthropicPromptCachingMockTest.java‎
Lines changed: 20 additions & 0 deletions
@@ -22,31 +22,104 @@
  * system → messages.
  *
  * @author Mark Pollack
+ * @author Soby Chacko
  * @since 1.1.0
  */
 public enum AnthropicCacheStrategy {
 
 	/**
-	 * No caching (default behavior).
+	 * No caching (default behavior). All content is processed fresh on each request.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Requests are one-off or highly variable</li>
+	 * <li>Content doesn't meet minimum token requirements (1024+ tokens)</li>
+	 * <li>You want to avoid caching overhead</li>
+	 * </ul>
 	 */
 	NONE,
 
+	/**
+	 * Cache tool definitions only. Places a cache breakpoint on the last tool, while
+	 * system messages and conversation history remain uncached and are processed fresh on
+	 * each request.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Tool definitions are large and stable (5000+ tokens)</li>
+	 * <li>System prompts change frequently or are small (&lt;500 tokens)</li>
+	 * <li>You want to share cached tools across different system contexts (e.g.,
+	 * multi-tenant applications, A/B testing system prompts)</li>
+	 * <li>Tool definitions rarely change</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Important:</strong> Changing any tool definition will invalidate this cache
+	 * entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
+	 * any downstream cache breakpoints (system, messages) if used in combination with
+	 * other strategies.
+	 */
+	TOOLS_ONLY,
+
 	/**
 	 * Cache system instructions only. Places a cache breakpoint on the system message
-	 * content.
+	 * content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
+	 * mechanism (content before the cache breakpoint is included in the cache).
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>System prompts are large and stable (1024+ tokens)</li>
+	 * <li>Tool definitions are relatively small (&lt;20 tools)</li>
+	 * <li>You want simple, single-breakpoint caching</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Note:</strong> Changing tools will invalidate the cache since tools are
+	 * part of the cache prefix (they appear before system in the request hierarchy).
 	 */
 	SYSTEM_ONLY,
 
 	/**
 	 * Cache system instructions and tool definitions. Places cache breakpoints on the
-	 * last tool and system message content.
+	 * last tool (breakpoint 1) and system message content (breakpoint 2).
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Both tools and system prompts are large and stable</li>
+	 * <li>You have many tools (20+ tools, beyond the automatic lookback window)</li>
+	 * <li>You want deterministic, explicit caching of both components</li>
+	 * <li>System prompts may change independently of tools</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Behavior:</strong>
+	 * <ul>
+	 * <li>If only tools change: Both caches invalidated (tools + system)</li>
+	 * <li>If only system changes: Tools cache remains valid, system cache
+	 * invalidated</li>
+	 * </ul>
+	 * This allows efficient reuse of tool cache when only system prompts are updated.
 	 */
 	SYSTEM_AND_TOOLS,
 
 	/**
 	 * Cache the entire conversation history up to (but not including) the current user
-	 * question. This is ideal for multi-turn conversations where you want to reuse the
-	 * conversation context while asking new questions.
+	 * question. Places a cache breakpoint on the last user message in the conversation
+	 * history, enabling incremental caching as the conversation grows.
+	 * <p>
+	 * Use this when:
+	 * <ul>
+	 * <li>Building multi-turn conversational applications (chatbots, assistants)</li>
+	 * <li>Conversation history is large and grows over time</li>
+	 * <li>You want to reuse conversation context while asking new questions</li>
+	 * <li>Using chat memory advisors or conversation persistence</li>
+	 * </ul>
+	 * <p>
+	 * <strong>Behavior:</strong> Each turn builds on the previous cached prefix. The
+	 * cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
+	 * + Message2], etc. This provides significant cost savings (90%+) and performance
+	 * improvements for long conversations.
+	 * <p>
+	 * <strong>Important:</strong> Changing tools or system prompts will invalidate the
+	 * entire conversation cache due to cascade invalidation. Tool and system stability is
+	 * critical for this strategy.
 	 */
 	CONVERSATION_HISTORY
 
 
@@ -39,6 +39,7 @@
  * definition messages.
  *
  * @author Austin Dase
+ * @author Soby Chacko
  * @since 1.1.0
  **/
 public class CacheEligibilityResolver {
@@ -84,6 +85,7 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
 		return switch (anthropicCacheStrategy) {
 			case NONE -> Set.of();
 			case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
+			case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
 			case CONVERSATION_HISTORY -> Set.of(MessageType.values());
 		};
 	}
@@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa
 	}
 
 	public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() {
-		// Tool definitions are only cache-eligible when caching is enabled and
-		// the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or
-		// CONVERSATION_HISTORY). When NONE, tools must not be cached.
-		if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE)
-				|| this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
+		// Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
+		// CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
+		// relying on Anthropic's cache hierarchy to implicitly cache tools.
+		if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
+				&& this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
+				&& this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
+			logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy);
+			return null;
+		}
+
+		if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
 			logger.debug("Caching not enabled for tool definition, usedBreakpoints={}",
 					this.cacheBreakpointTracker.getCount());
 			return null;
 
@@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception {
 		this.mockWebServer
 			.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));
 
+		// Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
+		var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
+		MethodToolCallback toolCallback = MethodToolCallback.builder()
+			.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
+			.toolMethod(toolMethod)
+			.build();
+
 		// Test with SYSTEM_ONLY cache strategy
 		AnthropicChatOptions options = AnthropicChatOptions.builder()
 			.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
+			.toolCallbacks(List.of(toolCallback))
 			.build();
 
 		Prompt prompt = new Prompt(
@@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception {
 			assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
 		}
 
+		// Verify tools exist but DO NOT have cache_control (key difference from
+		// SYSTEM_AND_TOOLS)
+		if (requestBody.has("tools")) {
+			JsonNode toolsArray = requestBody.get("tools");
+			assertThat(toolsArray.isArray()).isTrue();
+			// Verify NO tool has cache_control
+			for (int i = 0; i < toolsArray.size(); i++) {
+				JsonNode tool = toolsArray.get(i);
+				assertThat(tool.has("cache_control")).isFalse();
+			}
+		}
+
 		// Verify response
 		assertThat(response).isNotNull();
 		assertThat(response.getResult().getOutput().getText()).contains("Hello!");