RooCodeInc · mrubens · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025 · Jul 23, 2025
@@ -32,6 +32,7 @@ export const providerNames = [
 	"groq",
 	"chutes",
 	"litellm",
+	"huggingface",
 ] as const
 
 export const providerNamesSchema = z.enum(providerNames)
@@ -219,6 +220,12 @@ const groqSchema = apiModelIdProviderModelSchema.extend({
 	groqApiKey: z.string().optional(),
 })
 
+const huggingFaceSchema = baseProviderSettingsSchema.extend({
+	huggingFaceApiKey: z.string().optional(),
+	huggingFaceModelId: z.string().optional(),
+	huggingFaceInferenceProvider: z.string().optional(),
+})
+
 const chutesSchema = apiModelIdProviderModelSchema.extend({
 	chutesApiKey: z.string().optional(),
 })
@@ -256,6 +263,7 @@ export const providerSettingsSchemaDiscriminated = z.discriminatedUnion("apiProv
 	fakeAiSchema.merge(z.object({ apiProvider: z.literal("fake-ai") })),
 	xaiSchema.merge(z.object({ apiProvider: z.literal("xai") })),
 	groqSchema.merge(z.object({ apiProvider: z.literal("groq") })),
+	huggingFaceSchema.merge(z.object({ apiProvider: z.literal("huggingface") })),
 	chutesSchema.merge(z.object({ apiProvider: z.literal("chutes") })),
 	litellmSchema.merge(z.object({ apiProvider: z.literal("litellm") })),
 	defaultSchema,
@@ -285,6 +293,7 @@ export const providerSettingsSchema = z.object({
 	...fakeAiSchema.shape,
 	...xaiSchema.shape,
 	...groqSchema.shape,
+	...huggingFaceSchema.shape,
 	...chutesSchema.shape,
 	...litellmSchema.shape,
 	...codebaseIndexProviderSchema.shape,
@@ -304,6 +313,7 @@ export const MODEL_ID_KEYS: Partial<keyof ProviderSettings>[] = [
 	"unboundModelId",
 	"requestyModelId",
 	"litellmModelId",
+	"huggingFaceModelId",
 ]
 
 export const getModelId = (settings: ProviderSettings): string | undefined => {

@@ -0,0 +1,17 @@
+import { fetchHuggingFaceModels, type HuggingFaceModel } from "../services/huggingface-models"
+
+export interface HuggingFaceModelsResponse {
+	models: HuggingFaceModel[]
+	cached: boolean
+	timestamp: number
+}
+
+export async function getHuggingFaceModels(): Promise<HuggingFaceModelsResponse> {
+	const models = await fetchHuggingFaceModels()
+
+	return {
+		models,
+		cached: false, // We could enhance this to track if data came from cache
+		timestamp: Date.now(),
+	}
+}
@@ -26,6 +26,7 @@ import {
 	FakeAIHandler,
 	XAIHandler,
 	GroqHandler,
+	HuggingFaceHandler,
 	ChutesHandler,
 	LiteLLMHandler,
 	ClaudeCodeHandler,
@@ -108,6 +109,8 @@ export function buildApiHandler(configuration: ProviderSettings): ApiHandler {
 			return new XAIHandler(options)
 		case "groq":
 			return new GroqHandler(options)
+		case "huggingface":
+			return new HuggingFaceHandler(options)
 		case "chutes":
 			return new ChutesHandler(options)
 		case "litellm":

@@ -0,0 +1,99 @@
+import OpenAI from "openai"
+import { Anthropic } from "@anthropic-ai/sdk"
+
+import type { ApiHandlerOptions } from "../../shared/api"
+import { ApiStream } from "../transform/stream"
+import { convertToOpenAiMessages } from "../transform/openai-format"
+import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
+import { DEFAULT_HEADERS } from "./constants"
+import { BaseProvider } from "./base-provider"
+
+export class HuggingFaceHandler extends BaseProvider implements SingleCompletionHandler {
+	private client: OpenAI
+	private options: ApiHandlerOptions
+
+	constructor(options: ApiHandlerOptions) {
+		super()
+		this.options = options
+
+		if (!this.options.huggingFaceApiKey) {
+			throw new Error("Hugging Face API key is required")
+		}
+
+		this.client = new OpenAI({
+			baseURL: "https://router.huggingface.co/v1",
+			apiKey: this.options.huggingFaceApiKey,
+			defaultHeaders: DEFAULT_HEADERS,
+		})
+	}
+
+	override async *createMessage(
+		systemPrompt: string,
+		messages: Anthropic.Messages.MessageParam[],
+		metadata?: ApiHandlerCreateMessageMetadata,
+	): ApiStream {
+		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+		const temperature = this.options.modelTemperature ?? 0.7
+
+		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
+			model: modelId,
+			temperature,
+			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			stream: true,
+			stream_options: { include_usage: true },
+		}
+
+		const stream = await this.client.chat.completions.create(params)
+
+		for await (const chunk of stream) {
+			const delta = chunk.choices[0]?.delta
+
+			if (delta?.content) {
+				yield {
+					type: "text",
+					text: delta.content,
+				}
+			}
+
+			if (chunk.usage) {
+				yield {
+					type: "usage",
+					inputTokens: chunk.usage.prompt_tokens || 0,
+					outputTokens: chunk.usage.completion_tokens || 0,
+				}
+			}
+		}
+	}
+
+	async completePrompt(prompt: string): Promise<string> {
+		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+
+		try {
+			const response = await this.client.chat.completions.create({
+				model: modelId,
+				messages: [{ role: "user", content: prompt }],
+			})
+
+			return response.choices[0]?.message.content || ""
+		} catch (error) {
+			if (error instanceof Error) {
+				throw new Error(`Hugging Face completion error: ${error.message}`)
+			}
+
+			throw error
+		}
+	}
+
+	override getModel() {
+		const modelId = this.options.huggingFaceModelId || "meta-llama/Llama-3.3-70B-Instruct"
+		return {
+			id: modelId,
+			info: {
+				maxTokens: 8192,
+				contextWindow: 131072,
+				supportsImages: false,
+				supportsPromptCache: false,
+			},
+		}
+	}
+}
@@ -9,6 +9,7 @@ export { FakeAIHandler } from "./fake-ai"
 export { GeminiHandler } from "./gemini"
 export { GlamaHandler } from "./glama"
 export { GroqHandler } from "./groq"
+export { HuggingFaceHandler } from "./huggingface"
 export { HumanRelayHandler } from "./human-relay"
 export { LiteLLMHandler } from "./lite-llm"
 export { LmStudioHandler } from "./lm-studio"

@@ -674,6 +674,22 @@ export const webviewMessageHandler = async (
 			// TODO: Cache like we do for OpenRouter, etc?
 			provider.postMessageToWebview({ type: "vsCodeLmModels", vsCodeLmModels })
 			break
+		case "requestHuggingFaceModels":
+			try {
+				const { getHuggingFaceModels } = await import("../../api/huggingface-models")
+				const huggingFaceModelsResponse = await getHuggingFaceModels()
+				provider.postMessageToWebview({
+					type: "huggingFaceModels",
+					huggingFaceModels: huggingFaceModelsResponse.models,
+				})
+			} catch (error) {
+				console.error("Failed to fetch Hugging Face models:", error)
+				provider.postMessageToWebview({
+					type: "huggingFaceModels",
+					huggingFaceModels: [],
+				})
+			}
+			break
 		case "openImage":
 			openImage(message.text!, { values: message.values })
 			break

@@ -0,0 +1,171 @@
+export interface HuggingFaceModel {
+	_id: string
+	id: string
+	inferenceProviderMapping: InferenceProviderMapping[]
+	trendingScore: number
+	config: ModelConfig
+	tags: string[]
+	pipeline_tag: "text-generation" | "image-text-to-text"
+	library_name?: string
+}
+
+export interface InferenceProviderMapping {
+	provider: string
+	providerId: string
+	status: "live" | "staging" | "error"
+	task: "conversational"
+}
+
+export interface ModelConfig {
+	architectures: string[]
+	model_type: string
+	tokenizer_config?: {
+		chat_template?: string | Array<{ name: string; template: string }>
+		model_max_length?: number
+	}
+}
+
+interface HuggingFaceApiParams {
+	pipeline_tag?: "text-generation" | "image-text-to-text"
+	filter: string
+	inference_provider: string
+	limit: number
+	expand: string[]
+}
+
+const DEFAULT_PARAMS: HuggingFaceApiParams = {
+	filter: "conversational",
+	inference_provider: "all",
+	limit: 100,
+	expand: [
+		"inferenceProviderMapping",
+		"config",
+		"library_name",
+		"pipeline_tag",
+		"tags",
+		"mask_token",
+		"trendingScore",
+	],
+}
+
+const BASE_URL = "https://huggingface.co/api/models"
+const CACHE_DURATION = 1000 * 60 * 60 // 1 hour
+
+interface CacheEntry {
+	data: HuggingFaceModel[]
+	timestamp: number
+	status: "success" | "partial" | "error"
+}
+
+let cache: CacheEntry | null = null
+
+function buildApiUrl(params: HuggingFaceApiParams): string {
+	const url = new URL(BASE_URL)
+
+	// Add simple params
+	Object.entries(params).forEach(([key, value]) => {
+		if (!Array.isArray(value)) {
+			url.searchParams.append(key, String(value))
+		}
+	})
+
+	// Handle array params specially
+	params.expand.forEach((item) => {
+		url.searchParams.append("expand[]", item)
+	})
+
+	return url.toString()
+}
+
+const headers: HeadersInit = {
+	"Upgrade-Insecure-Requests": "1",
+	"Sec-Fetch-Dest": "document",
+	"Sec-Fetch-Mode": "navigate",
+	"Sec-Fetch-Site": "none",
+	"Sec-Fetch-User": "?1",
+	Priority: "u=0, i",
+	Pragma: "no-cache",
+	"Cache-Control": "no-cache",
+}
+
+const requestInit: RequestInit = {
+	credentials: "include",
+	headers,
+	method: "GET",
+	mode: "cors",
+}
+
+export async function fetchHuggingFaceModels(): Promise<HuggingFaceModel[]> {
+	const now = Date.now()
+
+	// Check cache
+	if (cache && now - cache.timestamp < CACHE_DURATION) {
+		console.log("Using cached Hugging Face models")
+		return cache.data
+	}
+
+	try {
+		console.log("Fetching Hugging Face models from API...")
+
+		// Fetch both text-generation and image-text-to-text models in parallel
+		const [textGenResponse, imgTextResponse] = await Promise.allSettled([
+			fetch(buildApiUrl({ ...DEFAULT_PARAMS, pipeline_tag: "text-generation" }), requestInit),
+			fetch(buildApiUrl({ ...DEFAULT_PARAMS, pipeline_tag: "image-text-to-text" }), requestInit),
+		])
+
+		let textGenModels: HuggingFaceModel[] = []
+		let imgTextModels: HuggingFaceModel[] = []
+		let hasErrors = false
+
+		// Process text-generation models
+		if (textGenResponse.status === "fulfilled" && textGenResponse.value.ok) {
+			textGenModels = await textGenResponse.value.json()
+		} else {
+			console.error("Failed to fetch text-generation models:", textGenResponse)
+			hasErrors = true
+		}
+
+		// Process image-text-to-text models
+		if (imgTextResponse.status === "fulfilled" && imgTextResponse.value.ok) {
+			imgTextModels = await imgTextResponse.value.json()
+		} else {
+			console.error("Failed to fetch image-text-to-text models:", imgTextResponse)
+			hasErrors = true
+		}
+
+		// Combine and filter models
+		const allModels = [...textGenModels, ...imgTextModels]
+			.filter((model) => model.inferenceProviderMapping.length > 0)
+			.sort((a, b) => a.id.toLowerCase().localeCompare(b.id.toLowerCase()))
+
+		// Update cache
+		cache = {
+			data: allModels,
+			timestamp: now,
+			status: hasErrors ? "partial" : "success",
+		}
+
+		console.log(`Fetched ${allModels.length} Hugging Face models (status: ${cache.status})`)
+		return allModels
+	} catch (error) {
+		console.error("Error fetching Hugging Face models:", error)
+
+		// Return cached data if available
+		if (cache) {
+			console.log("Using stale cached data due to fetch error")
+			cache.status = "error"
+			return cache.data
+		}
+
+		// No cache available, return empty array
+		return []
+	}
+}
+
+export function getCachedModels(): HuggingFaceModel[] | null {
+	return cache?.data || null
+}
+
+export function clearCache(): void {
+	cache = null
+}