From effc83e8873e14c78ae1b9478547495ebc2a0b77 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Fri, 21 Nov 2025 12:18:52 -0500
Subject: [PATCH 1/3] fix: implement model cache refresh to prevent stale disk
 cache

- Add refreshModels() function to force fresh API fetch bypassing cache
- Add initializeModelCacheRefresh() for background refresh on extension load
- Update flushModels() with optional refresh parameter
- Refresh public providers (OpenRouter, Glama, Vercel AI Gateway) on startup
- Update manual refresh triggers to actually fetch fresh data
- Use atomic writes to keep cache available during refresh

Previously, the disk cache was written once and never refreshed, causing
stale model info (pricing, context windows, new models) to persist
indefinitely. This fix ensures the cache is refreshed on extension load
and whenever users manually refresh models.
---
 src/api/providers/fetchers/lmstudio.ts    |   4 +-
 src/api/providers/fetchers/modelCache.ts  | 116 +++++++++++++++++++++-
 src/core/webview/webviewMessageHandler.ts |  14 +--
 src/extension.ts                          |  17 ++--
 4 files changed, 131 insertions(+), 20 deletions(-)
diff --git a/src/api/providers/fetchers/lmstudio.ts b/src/api/providers/fetchers/lmstudio.ts
index de3f804c28a..3068a962d85 100644
--- a/src/api/providers/fetchers/lmstudio.ts
+++ b/src/api/providers/fetchers/lmstudio.ts
@@ -18,8 +18,8 @@ export const forceFullModelDetailsLoad = async (baseUrl: string, modelId: string
 
 		const client = new LMStudioClient({ baseUrl: lmsUrl })
 		await client.llm.model(modelId)
-		await flushModels("lmstudio")
-		await getModels({ provider: "lmstudio" }) // Force cache update now.
+		// Flush and refresh cache to get updated model details
+		await flushModels("lmstudio", true)
 
 		// Mark this model as having full details loaded.
 		modelsWithLoadedDetails.add(modelId)
diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index 16b1cf07906..17df3c255b9 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -145,13 +145,127 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 	}
 }
 
+/**
+ * Force-refresh models from API, bypassing cache.
+ * Uses atomic writes so cache remains available during refresh.
+ *
+ * @param options - Provider options for fetching models
+ * @returns Fresh models from API
+ */
+export const refreshModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
+	const { provider } = options
+
+	let models: ModelRecord
+
+	try {
+		// Force fresh API fetch - skip getModelsFromCache() check
+		switch (provider) {
+			case "openrouter":
+				models = await getOpenRouterModels()
+				break
+			case "requesty":
+				models = await getRequestyModels(options.baseUrl, options.apiKey)
+				break
+			case "glama":
+				models = await getGlamaModels()
+				break
+			case "unbound":
+				models = await getUnboundModels(options.apiKey)
+				break
+			case "litellm":
+				models = await getLiteLLMModels(options.apiKey, options.baseUrl)
+				break
+			case "ollama":
+				models = await getOllamaModels(options.baseUrl, options.apiKey)
+				break
+			case "lmstudio":
+				models = await getLMStudioModels(options.baseUrl)
+				break
+			case "deepinfra":
+				models = await getDeepInfraModels(options.apiKey, options.baseUrl)
+				break
+			case "io-intelligence":
+				models = await getIOIntelligenceModels(options.apiKey)
+				break
+			case "vercel-ai-gateway":
+				models = await getVercelAiGatewayModels()
+				break
+			case "huggingface":
+				models = await getHuggingFaceModels()
+				break
+			case "roo": {
+				const rooBaseUrl =
+					options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
+				models = await getRooModels(rooBaseUrl, options.apiKey)
+				break
+			}
+			case "chutes":
+				models = await getChutesModels(options.apiKey)
+				break
+			default: {
+				const exhaustiveCheck: never = provider
+				throw new Error(`Unknown provider: ${exhaustiveCheck}`)
+			}
+		}
+
+		// Update memory cache first
+		memoryCache.set(provider, models)
+
+		// Atomically write to disk (safeWriteJson handles atomic writes)
+		await writeModels(provider, models).catch((err) =>
+			console.error(`[refreshModels] Error writing ${provider} models to disk:`, err),
+		)
+
+		return models
+	} catch (error) {
+		console.debug(`[refreshModels] Failed to refresh ${provider}:`, error)
+		// On error, return existing cache if available (graceful degradation)
+		return getModelsFromCache(provider) || {}
+	}
+}
+
+/**
+ * Initialize background model cache refresh.
+ * Refreshes public provider caches without blocking or requiring auth.
+ * Should be called once during extension activation.
+ */
+export async function initializeModelCacheRefresh(): Promise<void> {
+	// Wait for extension to fully activate before refreshing
+	setTimeout(async () => {
+		// Providers that work without API keys
+		const publicProviders: Array<{ provider: RouterName; options: GetModelsOptions }> = [
+			{ provider: "openrouter", options: { provider: "openrouter" } },
+			{ provider: "glama", options: { provider: "glama" } },
+			{ provider: "vercel-ai-gateway", options: { provider: "vercel-ai-gateway" } },
+		]
+
+		// Refresh each provider in background (fire and forget)
+		for (const { options } of publicProviders) {
+			refreshModels(options).catch(() => {
+				// Silent fail - old cache remains available
+			})
+
+			// Small delay between refreshes to avoid API rate limits
+			await new Promise((resolve) => setTimeout(resolve, 500))
+		}
+	}, 2000)
+}
+
 /**
  * Flush models memory cache for a specific router.
  *
  * @param router - The router to flush models for.
+ * @param refresh - If true, immediately fetch fresh data from API
  */
-export const flushModels = async (router: RouterName) => {
+export const flushModels = async (router: RouterName, refresh: boolean = false): Promise<void> => {
 	memoryCache.del(router)
+
+	if (refresh) {
+		// Trigger background refresh - don't await to avoid blocking
+		refreshModels({ provider: router } as GetModelsOptions).catch((error) => {
+			console.error(`[flushModels] Refresh failed for ${router}:`, error)
+		})
+	}
 }
 
 /**
diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts
index 8f89a9ec516..e6779b4764d 100644
--- a/src/core/webview/webviewMessageHandler.ts
+++ b/src/core/webview/webviewMessageHandler.ts
@@ -790,7 +790,7 @@ export const webviewMessageHandler = async (
 			break
 		case "flushRouterModels":
 			const routerNameFlush: RouterName = toRouterName(message.text)
-			await flushModels(routerNameFlush)
+			await flushModels(routerNameFlush, true)
 			break
 		case "requestRouterModels":
 			const { apiConfiguration } = await provider.getState()
@@ -932,8 +932,8 @@ export const webviewMessageHandler = async (
 			// Specific handler for Ollama models only.
 			const { apiConfiguration: ollamaApiConfig } = await provider.getState()
 			try {
-				// Flush cache first to ensure fresh models.
-				await flushModels("ollama")
+				// Flush cache and refresh to ensure fresh models.
+				await flushModels("ollama", true)
 
 				const ollamaModels = await getModels({
 					provider: "ollama",
@@ -954,8 +954,8 @@ export const webviewMessageHandler = async (
 			// Specific handler for LM Studio models only.
 			const { apiConfiguration: lmStudioApiConfig } = await provider.getState()
 			try {
-				// Flush cache first to ensure fresh models.
-				await flushModels("lmstudio")
+				// Flush cache and refresh to ensure fresh models.
+				await flushModels("lmstudio", true)
 
 				const lmStudioModels = await getModels({
 					provider: "lmstudio",
@@ -977,8 +977,8 @@ export const webviewMessageHandler = async (
 		case "requestRooModels": {
 			// Specific handler for Roo models only - flushes cache to ensure fresh auth token is used
 			try {
-				// Flush cache first to ensure fresh models with current auth state
-				await flushModels("roo")
+				// Flush cache and refresh to ensure fresh models with current auth state
+				await flushModels("roo", true)
 
 				const rooModels = await getModels({
 					provider: "roo",
diff --git a/src/extension.ts b/src/extension.ts
index bf0ceec02c2..f19a1b8f784 100644
--- a/src/extension.ts
+++ b/src/extension.ts
@@ -40,7 +40,7 @@ import {
 	CodeActionProvider,
 } from "./activate"
 import { initializeI18n } from "./i18n"
-import { flushModels, getModels } from "./api/providers/fetchers/modelCache"
+import { flushModels, getModels, initializeModelCacheRefresh } from "./api/providers/fetchers/modelCache"
 
 /**
  * Built using https://github.com/microsoft/vscode-webview-ui-toolkit
@@ -145,17 +145,11 @@ export async function activate(context: vscode.ExtensionContext) {
 		// Handle Roo models cache based on auth state
 		const handleRooModelsCache = async () => {
 			try {
-				await flushModels("roo")
+				// Flush and refresh cache on auth state changes
+				await flushModels("roo", true)
 
 				if (data.state === "active-session") {
-					// Reload models with the new auth token
-					const sessionToken = cloudService?.authService?.getSessionToken()
-					await getModels({
-						provider: "roo",
-						baseUrl: process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy",
-						apiKey: sessionToken,
-					})
-					cloudLogger(`[authStateChangedHandler] Reloaded Roo models cache for active session`)
+					cloudLogger(`[authStateChangedHandler] Refreshed Roo models cache for active session`)
 				} else {
 					cloudLogger(`[authStateChangedHandler] Flushed Roo models cache on logout`)
 				}
@@ -353,6 +347,9 @@ export async function activate(context: vscode.ExtensionContext) {
 		})
 	}
 
+	// Initialize background model cache refresh
+	initializeModelCacheRefresh()
+
 	return new API(outputChannel, provider, socketPath, enableLogging)
 }
 

From 77eb3c281dc36d7d2432a59d3fcd9f48b79f6257 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Fri, 21 Nov 2025 12:53:34 -0500
Subject: [PATCH 2/3] fix: prevent race condition in flushModels with refresh

Keep old cache in memory when refresh=true to avoid gap in cache
availability. This prevents getModels() from triggering a fresh API
fetch if called immediately after flushModels(router, true).

The refreshModels() function will atomically replace the memory cache
when the refresh completes, maintaining graceful degradation.
---
 src/api/providers/fetchers/modelCache.ts | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index 17df3c255b9..1e367f7f7d1 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -258,13 +258,16 @@ export async function initializeModelCacheRefresh(): Promise<void> {
  * @param refresh - If true, immediately fetch fresh data from API
  */
 export const flushModels = async (router: RouterName, refresh: boolean = false): Promise<void> => {
-	memoryCache.del(router)
-
 	if (refresh) {
-		// Trigger background refresh - don't await to avoid blocking
+		// Don't delete memory cache - let refreshModels atomically replace it
+		// This prevents a race condition where getModels() might be called
+		// before refresh completes, avoiding a gap in cache availability
 		refreshModels({ provider: router } as GetModelsOptions).catch((error) => {
 			console.error(`[flushModels] Refresh failed for ${router}:`, error)
 		})
+	} else {
+		// Only delete memory cache when not refreshing
+		memoryCache.del(router)
 	}
 }
 

From 04af1c9b48766f70601f135c6b5fef8cbae15076 Mon Sep 17 00:00:00 2001
From: daniel-lxs <ricciodaniel98@gmail.com>
Date: Fri, 21 Nov 2025 17:15:44 -0500
Subject: [PATCH 3/3] refactor: extract fetchModelsFromProvider to avoid
 duplication

Extract the provider switch statement into a shared fetchModelsFromProvider()
function to eliminate duplication between getModels() and refreshModels().

This ensures we only maintain the provider list in one place, making it
easier to add new providers without forgetting to update both functions.
---
 src/api/providers/fetchers/modelCache.ts | 173 +++++++++--------------
 1 file changed, 70 insertions(+), 103 deletions(-)

diff --git a/src/api/providers/fetchers/modelCache.ts b/src/api/providers/fetchers/modelCache.ts
index 1e367f7f7d1..50edbf274a8 100644
--- a/src/api/providers/fetchers/modelCache.ts
+++ b/src/api/providers/fetchers/modelCache.ts
@@ -49,6 +49,74 @@ async function readModels(router: RouterName): Promise<ModelRecord | undefined>
 	return exists ? JSON.parse(await fs.readFile(filePath, "utf8")) : undefined
 }
 
+/**
+ * Fetch models from the provider API.
+ * Extracted to avoid duplication between getModels() and refreshModels().
+ *
+ * @param options - Provider options for fetching models
+ * @returns Fresh models from the provider API
+ */
+async function fetchModelsFromProvider(options: GetModelsOptions): Promise<ModelRecord> {
+	const { provider } = options
+
+	let models: ModelRecord
+
+	switch (provider) {
+		case "openrouter":
+			models = await getOpenRouterModels()
+			break
+		case "requesty":
+			// Requesty models endpoint requires an API key for per-user custom policies.
+			models = await getRequestyModels(options.baseUrl, options.apiKey)
+			break
+		case "glama":
+			models = await getGlamaModels()
+			break
+		case "unbound":
+			// Unbound models endpoint requires an API key to fetch application specific models.
+			models = await getUnboundModels(options.apiKey)
+			break
+		case "litellm":
+			// Type safety ensures apiKey and baseUrl are always provided for LiteLLM.
+			models = await getLiteLLMModels(options.apiKey, options.baseUrl)
+			break
+		case "ollama":
+			models = await getOllamaModels(options.baseUrl, options.apiKey)
+			break
+		case "lmstudio":
+			models = await getLMStudioModels(options.baseUrl)
+			break
+		case "deepinfra":
+			models = await getDeepInfraModels(options.apiKey, options.baseUrl)
+			break
+		case "io-intelligence":
+			models = await getIOIntelligenceModels(options.apiKey)
+			break
+		case "vercel-ai-gateway":
+			models = await getVercelAiGatewayModels()
+			break
+		case "huggingface":
+			models = await getHuggingFaceModels()
+			break
+		case "roo": {
+			// Roo Code Cloud provider requires baseUrl and optional apiKey
+			const rooBaseUrl = options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
+			models = await getRooModels(rooBaseUrl, options.apiKey)
+			break
+		}
+		case "chutes":
+			models = await getChutesModels(options.apiKey)
+			break
+		default: {
+			// Ensures router is exhaustively checked if RouterName is a strict union.
+			const exhaustiveCheck: never = provider
+			throw new Error(`Unknown provider: ${exhaustiveCheck}`)
+		}
+	}
+
+	return models
+}
+
 /**
  * Get models from the cache or fetch them from the provider and cache them.
  * There are two caches:
@@ -70,59 +138,7 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 	}
 
 	try {
-		switch (provider) {
-			case "openrouter":
-				models = await getOpenRouterModels()
-				break
-			case "requesty":
-				// Requesty models endpoint requires an API key for per-user custom policies.
-				models = await getRequestyModels(options.baseUrl, options.apiKey)
-				break
-			case "glama":
-				models = await getGlamaModels()
-				break
-			case "unbound":
-				// Unbound models endpoint requires an API key to fetch application specific models.
-				models = await getUnboundModels(options.apiKey)
-				break
-			case "litellm":
-				// Type safety ensures apiKey and baseUrl are always provided for LiteLLM.
-				models = await getLiteLLMModels(options.apiKey, options.baseUrl)
-				break
-			case "ollama":
-				models = await getOllamaModels(options.baseUrl, options.apiKey)
-				break
-			case "lmstudio":
-				models = await getLMStudioModels(options.baseUrl)
-				break
-			case "deepinfra":
-				models = await getDeepInfraModels(options.apiKey, options.baseUrl)
-				break
-			case "io-intelligence":
-				models = await getIOIntelligenceModels(options.apiKey)
-				break
-			case "vercel-ai-gateway":
-				models = await getVercelAiGatewayModels()
-				break
-			case "huggingface":
-				models = await getHuggingFaceModels()
-				break
-			case "roo": {
-				// Roo Code Cloud provider requires baseUrl and optional apiKey
-				const rooBaseUrl =
-					options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
-				models = await getRooModels(rooBaseUrl, options.apiKey)
-				break
-			}
-			case "chutes":
-				models = await getChutesModels(options.apiKey)
-				break
-			default: {
-				// Ensures router is exhaustively checked if RouterName is a strict union.
-				const exhaustiveCheck: never = provider
-				throw new Error(`Unknown provider: ${exhaustiveCheck}`)
-			}
-		}
+		models = await fetchModelsFromProvider(options)
 
 		// Cache the fetched models (even if empty, to signify a successful fetch with no models).
 		memoryCache.set(provider, models)
@@ -155,58 +171,9 @@ export const getModels = async (options: GetModelsOptions): Promise<ModelRecord>
 export const refreshModels = async (options: GetModelsOptions): Promise<ModelRecord> => {
 	const { provider } = options
 
-	let models: ModelRecord
-
 	try {
 		// Force fresh API fetch - skip getModelsFromCache() check
-		switch (provider) {
-			case "openrouter":
-				models = await getOpenRouterModels()
-				break
-			case "requesty":
-				models = await getRequestyModels(options.baseUrl, options.apiKey)
-				break
-			case "glama":
-				models = await getGlamaModels()
-				break
-			case "unbound":
-				models = await getUnboundModels(options.apiKey)
-				break
-			case "litellm":
-				models = await getLiteLLMModels(options.apiKey, options.baseUrl)
-				break
-			case "ollama":
-				models = await getOllamaModels(options.baseUrl, options.apiKey)
-				break
-			case "lmstudio":
-				models = await getLMStudioModels(options.baseUrl)
-				break
-			case "deepinfra":
-				models = await getDeepInfraModels(options.apiKey, options.baseUrl)
-				break
-			case "io-intelligence":
-				models = await getIOIntelligenceModels(options.apiKey)
-				break
-			case "vercel-ai-gateway":
-				models = await getVercelAiGatewayModels()
-				break
-			case "huggingface":
-				models = await getHuggingFaceModels()
-				break
-			case "roo": {
-				const rooBaseUrl =
-					options.baseUrl ?? process.env.ROO_CODE_PROVIDER_URL ?? "https://api.roocode.com/proxy"
-				models = await getRooModels(rooBaseUrl, options.apiKey)
-				break
-			}
-			case "chutes":
-				models = await getChutesModels(options.apiKey)
-				break
-			default: {
-				const exhaustiveCheck: never = provider
-				throw new Error(`Unknown provider: ${exhaustiveCheck}`)
-			}
-		}
+		const models = await fetchModelsFromProvider(options)
 
 		// Update memory cache first
 		memoryCache.set(provider, models)