diff --git a/.changeset/cyan-turkeys-sneeze.md b/.changeset/cyan-turkeys-sneeze.md new file mode 100644 index 000000000..ddafcb422 --- /dev/null +++ b/.changeset/cyan-turkeys-sneeze.md @@ -0,0 +1,5 @@ +--- +"create-llama": patch +--- + +Feat: re-add --ask-models diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index dc17cddd4..61fa7a8d2 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -23,7 +23,7 @@ jobs: os: [macos-latest, windows-latest, ubuntu-22.04] frameworks: ["fastapi"] datasources: ["--no-files", "--example-file", "--llamacloud"] - template-types: ["streaming", "llamaindexserver"] + template-types: ["llamaindexserver"] defaults: run: shell: bash @@ -105,7 +105,7 @@ jobs: os: [macos-latest, windows-latest, ubuntu-22.04] frameworks: ["nextjs"] datasources: ["--no-files", "--example-file", "--llamacloud"] - template-types: ["streaming", "llamaindexserver"] + template-types: ["llamaindexserver"] defaults: run: shell: bash diff --git a/packages/create-llama/e2e/shared/reflex_template.spec.ts b/packages/create-llama/e2e/shared/reflex_template.spec.ts deleted file mode 100644 index 28d0db466..000000000 --- a/packages/create-llama/e2e/shared/reflex_template.spec.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { expect, test } from "@playwright/test"; -import { ChildProcess } from "child_process"; -import fs from "fs"; -import path from "path"; -import { TemplateFramework, TemplateUseCase } from "../../helpers"; -import { createTestDir, runCreateLlama } from "../utils"; - -const templateFramework: TemplateFramework = process.env.FRAMEWORK - ? (process.env.FRAMEWORK as TemplateFramework) - : "fastapi"; -const dataSource: string = process.env.DATASOURCE - ? process.env.DATASOURCE - : "--example-file"; -const templateUseCases: TemplateUseCase[] = ["extractor", "contract_review"]; - -// The reflex template currently only works with FastAPI and files (and not on Windows) -if ( - process.platform !== "win32" && - templateFramework === "fastapi" && - dataSource === "--example-file" -) { - for (const useCase of templateUseCases) { - test.describe(`Test reflex template ${useCase} ${templateFramework} ${dataSource}`, async () => { - let appPort: number; - let name: string; - let appProcess: ChildProcess; - let cwd: string; - - // Create reflex app - test.beforeAll(async () => { - cwd = await createTestDir(); - appPort = Math.floor(Math.random() * 10000) + 10000; - const result = await runCreateLlama({ - cwd, - templateType: "reflex", - templateFramework: "fastapi", - dataSource: "--example-file", - vectorDb: "none", - port: appPort, - postInstallAction: "runApp", - useCase, - }); - name = result.projectName; - appProcess = result.appProcess; - }); - - test.afterAll(async () => { - appProcess.kill(); - }); - - test("App folder should exist", async () => { - const dirExists = fs.existsSync(path.join(cwd, name)); - expect(dirExists).toBeTruthy(); - }); - test("Frontend should have a title", async ({ page }) => { - await page.goto(`http://localhost:${appPort}`); - await expect(page.getByText("Built by LlamaIndex")).toBeVisible({ - timeout: 2000 * 60, - }); - }); - }); - } -} diff --git a/packages/create-llama/e2e/shared/streaming_template.spec.ts b/packages/create-llama/e2e/shared/streaming_template.spec.ts deleted file mode 100644 index 7055a1631..000000000 --- a/packages/create-llama/e2e/shared/streaming_template.spec.ts +++ /dev/null @@ -1,127 +0,0 @@ -import { expect, test } from "@playwright/test"; -import { ChildProcess } from "child_process"; -import fs from "fs"; -import path from "path"; -import type { - TemplateFramework, - TemplatePostInstallAction, - TemplateUI, -} from "../../helpers"; -import { createTestDir, runCreateLlama, type AppType } from "../utils"; - -const templateFramework: TemplateFramework = process.env.FRAMEWORK - ? (process.env.FRAMEWORK as TemplateFramework) - : "fastapi"; -const dataSource: string = process.env.DATASOURCE - ? process.env.DATASOURCE - : "--example-file"; -const templateUI: TemplateUI = "shadcn"; -const templatePostInstallAction: TemplatePostInstallAction = "runApp"; - -const llamaCloudProjectName = "create-llama"; -const llamaCloudIndexName = "e2e-test"; - -const appType: AppType = templateFramework === "fastapi" ? "--frontend" : ""; -const userMessage = - dataSource !== "--no-files" ? "Physical standard for letters" : "Hello"; - -test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => { - const isNode18 = process.version.startsWith("v18"); - const isLlamaCloud = dataSource === "--llamacloud"; - // llamacloud is using File API which is not supported on node 18 - if (isNode18 && isLlamaCloud) { - test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source"); - } - - let port: number; - let cwd: string; - let name: string; - let appProcess: ChildProcess; - // Only test without using vector db for now - const vectorDb = "none"; - - test.beforeAll(async () => { - port = Math.floor(Math.random() * 10000) + 10000; - cwd = await createTestDir(); - const result = await runCreateLlama({ - cwd, - templateType: "streaming", - templateFramework, - dataSource, - vectorDb, - port, - postInstallAction: templatePostInstallAction, - templateUI, - appType, - llamaCloudProjectName, - llamaCloudIndexName, - }); - name = result.projectName; - appProcess = result.appProcess; - }); - - test("App folder should exist", async () => { - const dirExists = fs.existsSync(path.join(cwd, name)); - expect(dirExists).toBeTruthy(); - }); - - test("Frontend should have a title", async ({ page }) => { - test.skip( - templatePostInstallAction !== "runApp" || templateFramework === "express", - ); - await page.goto(`http://localhost:${port}`); - await expect(page.getByText("Built by LlamaIndex")).toBeVisible(); - }); - - test("Frontend should be able to submit a message and receive a response", async ({ - page, - }) => { - test.skip( - templatePostInstallAction !== "runApp" || templateFramework === "express", - ); - await page.goto(`http://localhost:${port}`); - await page.fill("form textarea", userMessage); - const [response] = await Promise.all([ - page.waitForResponse( - (res) => { - return res.url().includes("/api/chat") && res.status() === 200; - }, - { - timeout: 1000 * 60, - }, - ), - page.click("form button[type=submit]"), - ]); - const text = await response.text(); - console.log("AI response when submitting message: ", text); - expect(response.ok()).toBeTruthy(); - }); - - test("Backend frameworks should response when calling non-streaming chat API", async ({ - request, - }) => { - test.skip(templatePostInstallAction !== "runApp"); - test.skip(templateFramework === "nextjs"); - const response = await request.post( - `http://localhost:${port}/api/chat/request`, - { - data: { - messages: [ - { - role: "user", - content: userMessage, - }, - ], - }, - }, - ); - const text = await response.text(); - console.log("AI response when calling API: ", text); - expect(response.ok()).toBeTruthy(); - }); - - // clean processes - test.afterAll(async () => { - appProcess?.kill(); - }); -}); diff --git a/packages/create-llama/helpers/env-variables.ts b/packages/create-llama/helpers/env-variables.ts index 58c361ed8..44d04e1d7 100644 --- a/packages/create-llama/helpers/env-variables.ts +++ b/packages/create-llama/helpers/env-variables.ts @@ -254,11 +254,6 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => { description: "Name of the embedding model to use.", value: modelConfig.embeddingModel, }, - { - name: "EMBEDDING_DIM", - description: "Dimension of the embedding model to use.", - value: modelConfig.dimensions.toString(), - }, { name: "CONVERSATION_STARTERS", description: "The questions to help users get started (multi-line).", @@ -597,16 +592,9 @@ export const createBackendEnvFile = async ( ...getFrameworkEnvs(opts.framework, opts.template, opts.port), // Add environment variables of each component ...(opts.template === "llamaindexserver" - ? [ - { - name: "OPENAI_API_KEY", - description: "The OpenAI API key to use.", - value: opts.modelConfig.apiKey, - }, - ] + ? [...getModelEnvs(opts.modelConfig)] : [ // don't use this stuff for llama-indexserver - ...getModelEnvs(opts.modelConfig), ...getEngineEnvs(), ...getTemplateEnvs(opts.template), ...getObservabilityEnvs(opts.observability), diff --git a/packages/create-llama/helpers/models.ts b/packages/create-llama/helpers/models.ts new file mode 100644 index 000000000..4963810c4 --- /dev/null +++ b/packages/create-llama/helpers/models.ts @@ -0,0 +1,12 @@ +import { ModelConfig } from "./types"; + +export const getGpt41ModelConfig = (openAiKey?: string): ModelConfig => ({ + provider: "openai", + apiKey: openAiKey, + model: "gpt-4.1", + embeddingModel: "text-embedding-3-large", + dimensions: 1536, + isConfigured(): boolean { + return !!openAiKey; + }, +}); diff --git a/packages/create-llama/helpers/providers/anthropic.ts b/packages/create-llama/helpers/providers/anthropic.ts index 080ffdeae..01092eb0f 100644 --- a/packages/create-llama/helpers/providers/anthropic.ts +++ b/packages/create-llama/helpers/providers/anthropic.ts @@ -31,17 +31,9 @@ const EMBEDDING_MODELS: Record = { const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0]; const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions; -type AnthropicQuestionsParams = { - apiKey?: string; - askModels: boolean; -}; - -export async function askAnthropicQuestions({ - askModels, - apiKey, -}: AnthropicQuestionsParams): Promise { +export async function askAnthropicQuestions(): Promise { const config: ModelConfigParams = { - apiKey, + apiKey: process.env.ANTHROPIC_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: DEFAULT_DIMENSIONS, @@ -69,35 +61,33 @@ export async function askAnthropicQuestions({ config.apiKey = key || process.env.ANTHROPIC_API_KEY; } - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: MODELS.map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: MODELS.map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = - EMBEDDING_MODELS[ - embeddingModel as HuggingFaceEmbeddingModelType - ].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = + EMBEDDING_MODELS[ + embeddingModel as HuggingFaceEmbeddingModelType + ].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/azure.ts b/packages/create-llama/helpers/providers/azure.ts index 8f3a3a710..ec7efa7ac 100644 --- a/packages/create-llama/helpers/providers/azure.ts +++ b/packages/create-llama/helpers/providers/azure.ts @@ -1,5 +1,5 @@ import prompts from "prompts"; -import { ModelConfigParams, ModelConfigQuestionsParams } from "."; +import { ModelConfigParams } from "."; import { questionHandlers } from "../../questions/utils"; const ALL_AZURE_OPENAI_CHAT_MODELS: Record = { @@ -51,12 +51,9 @@ const ALL_AZURE_OPENAI_EMBEDDING_MODELS: Record< const DEFAULT_MODEL = "gpt-4o"; const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"; -export async function askAzureQuestions({ - openAiKey, - askModels, -}: ModelConfigQuestionsParams): Promise { +export async function askAzureQuestions(): Promise { const config: ModelConfigParams = { - apiKey: openAiKey || process.env.AZURE_OPENAI_KEY, + apiKey: process.env.AZURE_OPENAI_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL), @@ -66,32 +63,30 @@ export async function askAzureQuestions({ }, }; - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: getAvailableModelChoices(), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: getAvailableModelChoices(), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: getAvailableEmbeddingModelChoices(), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = getDimensions(embeddingModel); - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: getAvailableEmbeddingModelChoices(), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = getDimensions(embeddingModel); return config; } diff --git a/packages/create-llama/helpers/providers/gemini.ts b/packages/create-llama/helpers/providers/gemini.ts index 65b556c4d..2d03f52bd 100644 --- a/packages/create-llama/helpers/providers/gemini.ts +++ b/packages/create-llama/helpers/providers/gemini.ts @@ -2,7 +2,15 @@ import prompts from "prompts"; import { ModelConfigParams } from "."; import { questionHandlers, toChoice } from "../../questions/utils"; -const MODELS = ["gemini-1.5-pro-latest", "gemini-pro", "gemini-pro-vision"]; +const MODELS = [ + "gemini-2.5-pro", + "gemini-2.5-flash", + "gemini-2.0-flash", + "gemini-2.0-flash-lite", + "gemini-1.5-pro-latest", + "gemini-pro", + "gemini-pro-vision", +]; type ModelData = { dimensions: number; }; @@ -15,17 +23,9 @@ const DEFAULT_MODEL = MODELS[0]; const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0]; const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions; -type GeminiQuestionsParams = { - apiKey?: string; - askModels: boolean; -}; - -export async function askGeminiQuestions({ - askModels, - apiKey, -}: GeminiQuestionsParams): Promise { +export async function askGeminiQuestions(): Promise { const config: ModelConfigParams = { - apiKey, + apiKey: process.env.GOOGLE_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: DEFAULT_DIMENSIONS, @@ -53,32 +53,30 @@ export async function askGeminiQuestions({ config.apiKey = key || process.env.GOOGLE_API_KEY; } - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: MODELS.map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: MODELS.map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/groq.ts b/packages/create-llama/helpers/providers/groq.ts index 61b82a5dc..aaccdb2e1 100644 --- a/packages/create-llama/helpers/providers/groq.ts +++ b/packages/create-llama/helpers/providers/groq.ts @@ -71,17 +71,9 @@ const EMBEDDING_MODELS: Record = { const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0]; const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions; -type GroqQuestionsParams = { - apiKey?: string; - askModels: boolean; -}; - -export async function askGroqQuestions({ - askModels, - apiKey, -}: GroqQuestionsParams): Promise { +export async function askGroqQuestions(): Promise { const config: ModelConfigParams = { - apiKey, + apiKey: process.env.GROQ_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: DEFAULT_DIMENSIONS, @@ -109,37 +101,35 @@ export async function askGroqQuestions({ config.apiKey = key || process.env.GROQ_API_KEY; } - if (askModels) { - const modelChoices = await getAvailableModelChoicesGroq(config.apiKey!); + const modelChoices = await getAvailableModelChoicesGroq(config.apiKey!); - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: modelChoices, - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: modelChoices, + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = - EMBEDDING_MODELS[ - embeddingModel as HuggingFaceEmbeddingModelType - ].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = + EMBEDDING_MODELS[ + embeddingModel as HuggingFaceEmbeddingModelType + ].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/huggingface.ts b/packages/create-llama/helpers/providers/huggingface.ts index 039b8e323..d49837ae5 100644 --- a/packages/create-llama/helpers/providers/huggingface.ts +++ b/packages/create-llama/helpers/providers/huggingface.ts @@ -21,13 +21,7 @@ const DEFAULT_MODEL = MODELS[0]; const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0]; const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions; -type HuggingfaceQuestionsParams = { - askModels: boolean; -}; - -export async function askHuggingfaceQuestions({ - askModels, -}: HuggingfaceQuestionsParams): Promise { +export async function askHuggingfaceQuestions(): Promise { const config: ModelConfigParams = { model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, @@ -37,32 +31,30 @@ export async function askHuggingfaceQuestions({ }, }; - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which Hugging Face model would you like to use?", - choices: MODELS.map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which Hugging Face model would you like to use?", + choices: MODELS.map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/index.ts b/packages/create-llama/helpers/providers/index.ts index 9f29b8b06..37d25169b 100644 --- a/packages/create-llama/helpers/providers/index.ts +++ b/packages/create-llama/helpers/providers/index.ts @@ -1,6 +1,6 @@ import prompts from "prompts"; import { questionHandlers } from "../../questions/utils"; -import { ModelConfig, ModelProvider, TemplateFramework } from "../types"; +import { ModelConfig, TemplateFramework } from "../types"; import { askAnthropicQuestions } from "./anthropic"; import { askAzureQuestions } from "./azure"; import { askGeminiQuestions } from "./gemini"; @@ -11,81 +11,68 @@ import { askMistralQuestions } from "./mistral"; import { askOllamaQuestions } from "./ollama"; import { askOpenAIQuestions } from "./openai"; -const DEFAULT_MODEL_PROVIDER = "openai"; - export type ModelConfigQuestionsParams = { - openAiKey?: string; - askModels: boolean; framework?: TemplateFramework; }; export type ModelConfigParams = Omit; export async function askModelConfig({ - askModels, - openAiKey, framework, }: ModelConfigQuestionsParams): Promise { - let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER; - if (askModels) { - const choices = [ - { title: "OpenAI", value: "openai" }, - { title: "Groq", value: "groq" }, - { title: "Ollama", value: "ollama" }, - { title: "Anthropic", value: "anthropic" }, - { title: "Gemini", value: "gemini" }, - { title: "Mistral", value: "mistral" }, - { title: "AzureOpenAI", value: "azure-openai" }, - ]; + const choices = [ + { title: "OpenAI", value: "openai" }, + { title: "Groq", value: "groq" }, + { title: "Ollama", value: "ollama" }, + { title: "Anthropic", value: "anthropic" }, + { title: "Gemini", value: "gemini" }, + { title: "Mistral", value: "mistral" }, + { title: "AzureOpenAI", value: "azure-openai" }, + ]; - if (framework === "fastapi") { - choices.push({ title: "T-Systems", value: "t-systems" }); - choices.push({ title: "Huggingface", value: "huggingface" }); - } - const { provider } = await prompts( - { - type: "select", - name: "provider", - message: "Which model provider would you like to use", - choices: choices, - initial: 0, - }, - questionHandlers, - ); - modelProvider = provider; + if (framework === "fastapi") { + choices.push({ title: "T-Systems", value: "t-systems" }); + choices.push({ title: "Huggingface", value: "huggingface" }); } + const { provider: modelProvider } = await prompts( + { + type: "select", + name: "provider", + message: "Which model provider would you like to use", + choices: choices, + initial: 0, + }, + questionHandlers, + ); let modelConfig: ModelConfigParams; switch (modelProvider) { case "ollama": - modelConfig = await askOllamaQuestions({ askModels }); + modelConfig = await askOllamaQuestions(); break; case "groq": - modelConfig = await askGroqQuestions({ askModels }); + modelConfig = await askGroqQuestions(); break; case "anthropic": - modelConfig = await askAnthropicQuestions({ askModels }); + modelConfig = await askAnthropicQuestions(); break; case "gemini": - modelConfig = await askGeminiQuestions({ askModels }); + modelConfig = await askGeminiQuestions(); break; case "mistral": - modelConfig = await askMistralQuestions({ askModels }); + modelConfig = await askMistralQuestions(); break; case "azure-openai": - modelConfig = await askAzureQuestions({ askModels }); + modelConfig = await askAzureQuestions(); break; case "t-systems": - modelConfig = await askLLMHubQuestions({ askModels }); + modelConfig = await askLLMHubQuestions(); break; case "huggingface": - modelConfig = await askHuggingfaceQuestions({ askModels }); + modelConfig = await askHuggingfaceQuestions(); break; default: - modelConfig = await askOpenAIQuestions({ - openAiKey, - askModels, - }); + modelConfig = await askOpenAIQuestions(); } return { ...modelConfig, diff --git a/packages/create-llama/helpers/providers/llmhub.ts b/packages/create-llama/helpers/providers/llmhub.ts index 531e5e431..a1c8b6d5e 100644 --- a/packages/create-llama/helpers/providers/llmhub.ts +++ b/packages/create-llama/helpers/providers/llmhub.ts @@ -31,17 +31,9 @@ const LLMHUB_EMBEDDING_MODELS = [ "text-embedding-bge-m3", ]; -type LLMHubQuestionsParams = { - apiKey?: string; - askModels: boolean; -}; - -export async function askLLMHubQuestions({ - askModels, - apiKey, -}: LLMHubQuestionsParams): Promise { +export async function askLLMHubQuestions(): Promise { const config: ModelConfigParams = { - apiKey, + apiKey: process.env.T_SYSTEMS_LLMHUB_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL), @@ -61,11 +53,10 @@ export async function askLLMHubQuestions({ { type: "text", name: "key", - message: askModels - ? "Please provide your LLMHub API key (or leave blank to use T_SYSTEMS_LLMHUB_API_KEY env variable):" - : "Please provide your LLMHub API key (leave blank to skip):", + message: + "Please provide your LLMHub API key (or leave blank to use T_SYSTEMS_LLMHUB_API_KEY env variable):", validate: (value: string) => { - if (askModels && !value) { + if (!value) { if (process.env.T_SYSTEMS_LLMHUB_API_KEY) { return true; } @@ -79,32 +70,30 @@ export async function askLLMHubQuestions({ config.apiKey = key || process.env.T_SYSTEMS_LLMHUB_API_KEY; } - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: await getAvailableModelChoices(false, config.apiKey), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: await getAvailableModelChoices(false, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: await getAvailableModelChoices(true, config.apiKey), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = getDimensions(embeddingModel); - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: await getAvailableModelChoices(true, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = getDimensions(embeddingModel); return config; } diff --git a/packages/create-llama/helpers/providers/mistral.ts b/packages/create-llama/helpers/providers/mistral.ts index 1b11ae544..d6f10c19f 100644 --- a/packages/create-llama/helpers/providers/mistral.ts +++ b/packages/create-llama/helpers/providers/mistral.ts @@ -14,17 +14,9 @@ const DEFAULT_MODEL = MODELS[0]; const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0]; const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions; -type MistralQuestionsParams = { - apiKey?: string; - askModels: boolean; -}; - -export async function askMistralQuestions({ - askModels, - apiKey, -}: MistralQuestionsParams): Promise { +export async function askMistralQuestions(): Promise { const config: ModelConfigParams = { - apiKey, + apiKey: process.env.MISTRAL_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: DEFAULT_DIMENSIONS, @@ -52,32 +44,30 @@ export async function askMistralQuestions({ config.apiKey = key || process.env.MISTRAL_API_KEY; } - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: MODELS.map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: MODELS.map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/ollama.ts b/packages/create-llama/helpers/providers/ollama.ts index b9c797e0e..c48df5618 100644 --- a/packages/create-llama/helpers/providers/ollama.ts +++ b/packages/create-llama/helpers/providers/ollama.ts @@ -17,13 +17,7 @@ const EMBEDDING_MODELS: Record = { }; const DEFAULT_EMBEDDING_MODEL: string = Object.keys(EMBEDDING_MODELS)[0]; -type OllamaQuestionsParams = { - askModels: boolean; -}; - -export async function askOllamaQuestions({ - askModels, -}: OllamaQuestionsParams): Promise { +export async function askOllamaQuestions(): Promise { const config: ModelConfigParams = { model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, @@ -33,34 +27,32 @@ export async function askOllamaQuestions({ }, }; - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: MODELS.map(toChoice), - initial: 0, - }, - questionHandlers, - ); - await ensureModel(model); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: MODELS.map(toChoice), + initial: 0, + }, + questionHandlers, + ); + await ensureModel(model); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: Object.keys(EMBEDDING_MODELS).map(toChoice), - initial: 0, - }, - questionHandlers, - ); - await ensureModel(embeddingModel); - config.embeddingModel = embeddingModel; - config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: Object.keys(EMBEDDING_MODELS).map(toChoice), + initial: 0, + }, + questionHandlers, + ); + await ensureModel(embeddingModel); + config.embeddingModel = embeddingModel; + config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions; return config; } diff --git a/packages/create-llama/helpers/providers/openai.ts b/packages/create-llama/helpers/providers/openai.ts index c26ff4c4f..f2f6d9152 100644 --- a/packages/create-llama/helpers/providers/openai.ts +++ b/packages/create-llama/helpers/providers/openai.ts @@ -2,7 +2,7 @@ import got from "got"; import ora from "ora"; import { red } from "picocolors"; import prompts from "prompts"; -import { ModelConfigParams, ModelConfigQuestionsParams } from "."; +import { ModelConfigParams } from "."; import { isCI } from "../../questions"; import { questionHandlers } from "../../questions/utils"; @@ -11,12 +11,9 @@ const OPENAI_API_URL = "https://api.openai.com/v1"; const DEFAULT_MODEL = "gpt-4o-mini"; const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"; -export async function askOpenAIQuestions({ - openAiKey, - askModels, -}: ModelConfigQuestionsParams): Promise { +export async function askOpenAIQuestions(): Promise { const config: ModelConfigParams = { - apiKey: openAiKey, + apiKey: process.env.OPENAI_API_KEY, model: DEFAULT_MODEL, embeddingModel: DEFAULT_EMBEDDING_MODEL, dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL), @@ -36,11 +33,10 @@ export async function askOpenAIQuestions({ { type: "text", name: "key", - message: askModels - ? "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):" - : "Please provide your OpenAI API key (leave blank to skip):", + message: + "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):", validate: (value: string) => { - if (askModels && !value) { + if (!value) { if (process.env.OPENAI_API_KEY) { return true; } @@ -54,32 +50,30 @@ export async function askOpenAIQuestions({ config.apiKey = key || process.env.OPENAI_API_KEY; } - if (askModels) { - const { model } = await prompts( - { - type: "select", - name: "model", - message: "Which LLM model would you like to use?", - choices: await getAvailableModelChoices(false, config.apiKey), - initial: 0, - }, - questionHandlers, - ); - config.model = model; + const { model } = await prompts( + { + type: "select", + name: "model", + message: "Which LLM model would you like to use?", + choices: await getAvailableModelChoices(false, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.model = model; - const { embeddingModel } = await prompts( - { - type: "select", - name: "embeddingModel", - message: "Which embedding model would you like to use?", - choices: await getAvailableModelChoices(true, config.apiKey), - initial: 0, - }, - questionHandlers, - ); - config.embeddingModel = embeddingModel; - config.dimensions = getDimensions(embeddingModel); - } + const { embeddingModel } = await prompts( + { + type: "select", + name: "embeddingModel", + message: "Which embedding model would you like to use?", + choices: await getAvailableModelChoices(true, config.apiKey), + initial: 0, + }, + questionHandlers, + ); + config.embeddingModel = embeddingModel; + config.dimensions = getDimensions(embeddingModel); return config; } diff --git a/packages/create-llama/helpers/python.ts b/packages/create-llama/helpers/python.ts index 8c874d782..b3a571bad 100644 --- a/packages/create-llama/helpers/python.ts +++ b/packages/create-llama/helpers/python.ts @@ -209,12 +209,12 @@ const getAdditionalDependencies = ( break; case "gemini": dependencies.push({ - name: "llama-index-llms-gemini", - version: ">=0.4.0,<0.5.0", + name: "llama-index-llms-google-genai", + version: ">=0.2.0,<0.3.0", }); dependencies.push({ - name: "llama-index-embeddings-gemini", - version: ">=0.3.0,<0.4.0", + name: "llama-index-embeddings-google-genai", + version: ">=0.2.0,<0.3.0", }); break; case "mistral": @@ -571,7 +571,11 @@ const installLlamaIndexServerTemplate = async ({ root, useCase, useLlamaParse, -}: Pick) => { + modelConfig, +}: Pick< + InstallTemplateArgs, + "root" | "useCase" | "useLlamaParse" | "modelConfig" +>) => { if (!useCase) { console.log( red( @@ -586,6 +590,17 @@ const installLlamaIndexServerTemplate = async ({ cwd: path.join(templatesDir, "components", "use-cases", "python", useCase), }); + // copy model provider settings to app folder + await copy("**", path.join(root, "app"), { + cwd: path.join( + templatesDir, + "components", + "providers", + "python", + modelConfig.provider, + ), + }); + // Copy custom UI component code await copy(`*`, path.join(root, "components"), { parents: true, @@ -677,6 +692,7 @@ export const installPythonTemplate = async ({ root, useCase, useLlamaParse, + modelConfig, }); } else { await installLegacyPythonTemplate({ diff --git a/packages/create-llama/helpers/typescript.ts b/packages/create-llama/helpers/typescript.ts index f09c47070..807580eb1 100644 --- a/packages/create-llama/helpers/typescript.ts +++ b/packages/create-llama/helpers/typescript.ts @@ -13,7 +13,11 @@ const installLlamaIndexServerTemplate = async ({ root, useCase, vectorDb, -}: Pick) => { + modelConfig, +}: Pick< + InstallTemplateArgs, + "root" | "useCase" | "vectorDb" | "modelConfig" +>) => { if (!useCase) { console.log( red( @@ -32,6 +36,17 @@ const installLlamaIndexServerTemplate = async ({ process.exit(1); } + // copy model provider settings to app folder + await copy("**", path.join(root, "src", "app"), { + cwd: path.join( + templatesDir, + "components", + "providers", + "typescript", + modelConfig.provider, + ), + }); + await copy("**", path.join(root), { cwd: path.join( templatesDir, @@ -346,6 +361,7 @@ export const installTSTemplate = async ({ root, useCase, vectorDb, + modelConfig, }); } else { await installLegacyTSTemplate({ diff --git a/packages/create-llama/questions/ci.ts b/packages/create-llama/questions/ci.ts index 07027a301..77dd38024 100644 --- a/packages/create-llama/questions/ci.ts +++ b/packages/create-llama/questions/ci.ts @@ -1,4 +1,4 @@ -import { askModelConfig } from "../helpers/providers"; +import { getGpt41ModelConfig } from "../helpers/models"; import { QuestionArgs, QuestionResults } from "./types"; const defaults: Omit = { @@ -21,10 +21,6 @@ export async function getCIQuestionResults( return { ...defaults, ...program, - modelConfig: await askModelConfig({ - openAiKey: program.openAiKey, - askModels: false, - framework: program.framework, - }), + modelConfig: getGpt41ModelConfig(program.openAiKey), }; } diff --git a/packages/create-llama/questions/questions.ts b/packages/create-llama/questions/questions.ts index 559839975..97b571762 100644 --- a/packages/create-llama/questions/questions.ts +++ b/packages/create-llama/questions/questions.ts @@ -237,8 +237,6 @@ export const askProQuestions = async (program: QuestionArgs) => { if (!program.modelConfig) { const modelConfig = await askModelConfig({ - openAiKey: program.openAiKey, - askModels: program.askModels ?? false, framework: program.framework, }); program.modelConfig = modelConfig; diff --git a/packages/create-llama/questions/simple.ts b/packages/create-llama/questions/simple.ts index 56a45f3f5..cd54fcc1a 100644 --- a/packages/create-llama/questions/simple.ts +++ b/packages/create-llama/questions/simple.ts @@ -1,6 +1,7 @@ import prompts from "prompts"; import { NO_DATA_USE_CASES } from "../helpers/constant"; import { EXAMPLE_10K_SEC_FILES, EXAMPLE_FILE } from "../helpers/datasources"; +import { getGpt41ModelConfig } from "../helpers/models"; import { askModelConfig } from "../helpers/providers"; import { getTools } from "../helpers/tools"; import { ModelConfig, TemplateFramework } from "../helpers/types"; @@ -135,59 +136,59 @@ const convertAnswers = async ( args: PureQuestionArgs, answers: SimpleAnswers, ): Promise => { - const MODEL_GPT41: ModelConfig = { - provider: "openai", - apiKey: args.openAiKey, - model: "gpt-4.1", - embeddingModel: "text-embedding-3-large", - dimensions: 1536, - isConfigured(): boolean { - return !!args.openAiKey; - }, - }; + const modelGpt41 = getGpt41ModelConfig(args.openAiKey); const lookup: Record< AppType, Pick & { - modelConfig?: ModelConfig; + modelConfig: ModelConfig; } > = { agentic_rag: { template: "llamaindexserver", dataSources: [EXAMPLE_FILE], + modelConfig: modelGpt41, }, financial_report: { template: "llamaindexserver", dataSources: EXAMPLE_10K_SEC_FILES, tools: getTools(["interpreter", "document_generator"]), - modelConfig: MODEL_GPT41, + modelConfig: modelGpt41, }, deep_research: { template: "llamaindexserver", dataSources: EXAMPLE_10K_SEC_FILES, tools: [], - modelConfig: MODEL_GPT41, + modelConfig: modelGpt41, }, code_generator: { template: "llamaindexserver", dataSources: [], tools: [], - modelConfig: MODEL_GPT41, + modelConfig: modelGpt41, }, document_generator: { template: "llamaindexserver", dataSources: [], tools: [], - modelConfig: MODEL_GPT41, + modelConfig: modelGpt41, }, hitl: { template: "llamaindexserver", dataSources: [], tools: [], - modelConfig: MODEL_GPT41, + modelConfig: modelGpt41, }, }; const results = lookup[answers.appType]; + + let modelConfig = results.modelConfig; + if (args.askModels) { + modelConfig = await askModelConfig({ + framework: answers.language, + }); + } + return { framework: answers.language, useCase: answers.appType, @@ -196,13 +197,7 @@ const convertAnswers = async ( useLlamaParse: answers.useLlamaCloud, vectorDb: answers.useLlamaCloud ? "llamacloud" : "none", ...results, - modelConfig: - results.modelConfig ?? - (await askModelConfig({ - openAiKey: args.openAiKey, - askModels: args.askModels ?? false, - framework: answers.language, - })), + modelConfig, frontend: true, }; }; diff --git a/packages/create-llama/templates/components/providers/python/anthropic/settings.py b/packages/create-llama/templates/components/providers/python/anthropic/settings.py new file mode 100644 index 000000000..e5780a92e --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/anthropic/settings.py @@ -0,0 +1,21 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.fastembed import FastEmbedEmbedding +from llama_index.llms.anthropic import Anthropic + +EMBEDDING_MODEL_MAP = { + "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2", +} + + +def init_settings(): + if os.getenv("ANTHROPIC_API_KEY") is None: + raise RuntimeError("ANTHROPIC_API_KEY is missing in environment variables") + Settings.llm = Anthropic(model=os.getenv("MODEL") or "claude-3-sonnet") + # This will download the model automatically if it is not already downloaded + embed_model_name = EMBEDDING_MODEL_MAP[ + os.getenv("EMBEDDING_MODEL") or "all-MiniLM-L6-v2" + ] + Settings.embed_model = FastEmbedEmbedding(model_name=embed_model_name) diff --git a/packages/create-llama/templates/components/providers/python/azure-openai/settings.py b/packages/create-llama/templates/components/providers/python/azure-openai/settings.py new file mode 100644 index 000000000..fe7dd1e3a --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/azure-openai/settings.py @@ -0,0 +1,40 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding +from llama_index.llms.azure_openai import AzureOpenAI + + +def init_settings(): + api_key = os.getenv("AZURE_OPENAI_API_KEY") + endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + llm_deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT") + embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT") + api_version = os.getenv("AZURE_OPENAI_API_VERSION") + if api_key is None: + raise RuntimeError("AZURE_OPENAI_API_KEY is missing in environment variables") + if endpoint is None: + raise RuntimeError("AZURE_OPENAI_ENDPOINT is missing in environment variables") + if llm_deployment is None: + raise RuntimeError( + "AZURE_OPENAI_LLM_DEPLOYMENT is missing in environment variables" + ) + if embedding_deployment is None: + raise RuntimeError( + "AZURE_OPENAI_EMBEDDING_DEPLOYMENT is missing in environment variables" + ) + + azure_config = { + "api_key": api_key, + "azure_endpoint": endpoint, + "api_version": api_version, + } + + Settings.llm = AzureOpenAI( + model="gpt-4.1", deployment_name=llm_deployment, **azure_config + ) + Settings.embed_model = AzureOpenAIEmbedding( + model="text-embedding-3-large", + deployment_name=embedding_deployment, + **azure_config, + ) diff --git a/packages/create-llama/templates/components/providers/python/gemini/settings.py b/packages/create-llama/templates/components/providers/python/gemini/settings.py new file mode 100644 index 000000000..d31ba0bf2 --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/gemini/settings.py @@ -0,0 +1,14 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.google_genai import GoogleGenAIEmbedding +from llama_index.llms.google_genai import GoogleGenAI + + +def init_settings(): + if os.getenv("GOOGLE_API_KEY") is None: + raise RuntimeError("GOOGLE_API_KEY is missing in environment variables") + Settings.llm = GoogleGenAI(model=os.getenv("MODEL") or "gemini-2.0-flash") + Settings.embed_model = GoogleGenAIEmbedding( + model=os.getenv("EMBEDDING_MODEL") or "text-embedding-004" + ) diff --git a/packages/create-llama/templates/components/providers/python/groq/settings.py b/packages/create-llama/templates/components/providers/python/groq/settings.py new file mode 100644 index 000000000..194fee181 --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/groq/settings.py @@ -0,0 +1,21 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.fastembed import FastEmbedEmbedding +from llama_index.llms.groq import Groq + +EMBEDDING_MODEL_MAP = { + "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2", +} + + +def init_settings(): + if os.getenv("GROQ_API_KEY") is None: + raise RuntimeError("GROQ_API_KEY is missing in environment variables") + Settings.llm = Groq(model=os.getenv("MODEL") or "llama-3.1-8b-instant") + # This will download the model automatically if it is not already downloaded + embed_model_name = EMBEDDING_MODEL_MAP[ + os.getenv("EMBEDDING_MODEL") or "all-MiniLM-L6-v2" + ] + Settings.embed_model = FastEmbedEmbedding(model_name=embed_model_name) diff --git a/packages/create-llama/templates/components/providers/python/huggingface/settings.py b/packages/create-llama/templates/components/providers/python/huggingface/settings.py new file mode 100644 index 000000000..f5459ff7b --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/huggingface/settings.py @@ -0,0 +1,10 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.huggingface import HuggingFaceEmbedding +from llama_index.llms.huggingface import HuggingFaceLLM + + +def init_settings(): + Settings.llm = HuggingFaceLLM(model_name=os.getenv("MODEL")) + Settings.embed_model = HuggingFaceEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) diff --git a/packages/create-llama/templates/components/providers/python/ollama/settings.py b/packages/create-llama/templates/components/providers/python/ollama/settings.py new file mode 100644 index 000000000..91cd909d7 --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/ollama/settings.py @@ -0,0 +1,16 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.ollama import OllamaEmbedding +from llama_index.llms.ollama import Ollama + + +def init_settings(): + if os.getenv("OLLAMA_BASE_URL") is None: + raise RuntimeError("OLLAMA_BASE_URL is missing in environment variables") + base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" + llm_model = os.getenv("MODEL") or "llama3.1" + embed_model = os.getenv("EMBEDDING_MODEL") or "nomic-embed-text" + + Settings.llm = Ollama(model=llm_model, base_url=base_url) + Settings.embed_model = OllamaEmbedding(model=embed_model, base_url=base_url) diff --git a/packages/create-llama/templates/components/providers/python/openai/settings.py b/packages/create-llama/templates/components/providers/python/openai/settings.py new file mode 100644 index 000000000..e648ae87f --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/openai/settings.py @@ -0,0 +1,14 @@ +import os + +from llama_index.core import Settings +from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.llms.openai import OpenAI + + +def init_settings(): + if os.getenv("OPENAI_API_KEY") is None: + raise RuntimeError("OPENAI_API_KEY is missing in environment variables") + Settings.llm = OpenAI(model=os.getenv("MODEL") or "gpt-4.1") + Settings.embed_model = OpenAIEmbedding( + model=os.getenv("EMBEDDING_MODEL") or "text-embedding-3-large" + ) diff --git a/packages/create-llama/templates/components/providers/python/t-systems/settings.py b/packages/create-llama/templates/components/providers/python/t-systems/settings.py new file mode 100644 index 000000000..c144553fc --- /dev/null +++ b/packages/create-llama/templates/components/providers/python/t-systems/settings.py @@ -0,0 +1,72 @@ +import logging +import os +from typing import Dict + +from llama_index.core.settings import Settings +from llama_index.embeddings.openai import OpenAIEmbedding + +logger = logging.getLogger(__name__) + +DEFAULT_MODEL = "gpt-3.5-turbo" +DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large" + + +class TSIEmbedding(OpenAIEmbedding): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._query_engine = self._text_engine = self.model_name + + +def llm_config_from_env() -> Dict: + from llama_index.core.constants import DEFAULT_TEMPERATURE + + model = os.getenv("MODEL", DEFAULT_MODEL) + temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) + max_tokens = os.getenv("LLM_MAX_TOKENS") + api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") + api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") + + config = { + "model": model, + "api_key": api_key, + "api_base": api_base, + "temperature": float(temperature), + "max_tokens": int(max_tokens) if max_tokens is not None else None, + } + return config + + +def embedding_config_from_env() -> Dict: + from llama_index.core.constants import DEFAULT_EMBEDDING_DIM + + model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL) + dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM) + api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY") + api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL") + + config = { + "model_name": model, + "dimension": int(dimension) if dimension is not None else None, + "api_key": api_key, + "api_base": api_base, + } + return config + + +def init_settings(): + try: + from llama_index.llms.openai_like import OpenAILike + except ImportError: + logger.error("Failed to import OpenAILike. Make sure llama_index is installed.") + raise + + llm_configs = llm_config_from_env() + embedding_configs = embedding_config_from_env() + + Settings.embed_model = TSIEmbedding(**embedding_configs) + Settings.llm = OpenAILike( + **llm_configs, + is_chat_model=True, + is_function_calling_model=False, + context_window=4096, + ) diff --git a/packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts b/packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts similarity index 94% rename from packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts rename to packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts index 750d0e38f..81085a924 100644 --- a/packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts @@ -5,7 +5,7 @@ import { import { HuggingFaceEmbedding } from "@llamaindex/huggingface"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { const embedModelMap: Record = { "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2", "all-mpnet-base-v2": "Xenova/all-mpnet-base-v2", diff --git a/packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts b/packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts similarity index 97% rename from packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts rename to packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts index 739c645dc..bb79df638 100644 --- a/packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts @@ -1,7 +1,7 @@ import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { // Map Azure OpenAI model names to OpenAI model names (only for TS) const AZURE_OPENAI_MODEL_MAP: Record = { "gpt-35-turbo": "gpt-3.5-turbo", diff --git a/packages/create-llama/templates/components/providers/typescript/gemini/provider.ts b/packages/create-llama/templates/components/providers/typescript/gemini/settings.ts similarity index 91% rename from packages/create-llama/templates/components/providers/typescript/gemini/provider.ts rename to packages/create-llama/templates/components/providers/typescript/gemini/settings.ts index cb4f6d8f9..aa250c94b 100644 --- a/packages/create-llama/templates/components/providers/typescript/gemini/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/gemini/settings.ts @@ -6,7 +6,7 @@ import { } from "@llamaindex/google"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { Settings.llm = new Gemini({ model: process.env.MODEL as GEMINI_MODEL, }); diff --git a/packages/create-llama/templates/components/providers/typescript/groq/provider.ts b/packages/create-llama/templates/components/providers/typescript/groq/settings.ts similarity index 93% rename from packages/create-llama/templates/components/providers/typescript/groq/provider.ts rename to packages/create-llama/templates/components/providers/typescript/groq/settings.ts index fe597c802..e1426ee25 100644 --- a/packages/create-llama/templates/components/providers/typescript/groq/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/groq/settings.ts @@ -2,7 +2,7 @@ import { Groq } from "@llamaindex/groq"; import { HuggingFaceEmbedding } from "@llamaindex/huggingface"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { const embedModelMap: Record = { "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2", "all-mpnet-base-v2": "Xenova/all-mpnet-base-v2", diff --git a/packages/create-llama/templates/components/providers/typescript/mistral/provider.ts b/packages/create-llama/templates/components/providers/typescript/mistral/settings.ts similarity index 92% rename from packages/create-llama/templates/components/providers/typescript/mistral/provider.ts rename to packages/create-llama/templates/components/providers/typescript/mistral/settings.ts index fe91620f6..68e01cfa8 100644 --- a/packages/create-llama/templates/components/providers/typescript/mistral/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/mistral/settings.ts @@ -6,7 +6,7 @@ import { } from "@llamaindex/mistral"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { Settings.llm = new MistralAI({ model: process.env.MODEL as keyof typeof ALL_AVAILABLE_MISTRAL_MODELS, }); diff --git a/packages/create-llama/templates/components/providers/typescript/ollama/provider.ts b/packages/create-llama/templates/components/providers/typescript/ollama/settings.ts similarity index 91% rename from packages/create-llama/templates/components/providers/typescript/ollama/provider.ts rename to packages/create-llama/templates/components/providers/typescript/ollama/settings.ts index a26b8da1f..d13614737 100644 --- a/packages/create-llama/templates/components/providers/typescript/ollama/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/ollama/settings.ts @@ -1,7 +1,7 @@ import { Ollama, OllamaEmbedding } from "@llamaindex/ollama"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { const config = { host: process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434", }; diff --git a/packages/create-llama/templates/components/providers/typescript/openai/provider.ts b/packages/create-llama/templates/components/providers/typescript/openai/settings.ts similarity index 93% rename from packages/create-llama/templates/components/providers/typescript/openai/provider.ts rename to packages/create-llama/templates/components/providers/typescript/openai/settings.ts index 09c49ec8a..1be026d65 100644 --- a/packages/create-llama/templates/components/providers/typescript/openai/provider.ts +++ b/packages/create-llama/templates/components/providers/typescript/openai/settings.ts @@ -1,7 +1,7 @@ import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai"; import { Settings } from "llamaindex"; -export function setupProvider() { +export function initSettings() { Settings.llm = new OpenAI({ model: process.env.MODEL ?? "gpt-4o-mini", maxTokens: process.env.LLM_MAX_TOKENS diff --git a/packages/create-llama/templates/components/settings/typescript/settings.ts b/packages/create-llama/templates/components/settings/typescript/settings.ts deleted file mode 100644 index ce2c3b94d..000000000 --- a/packages/create-llama/templates/components/settings/typescript/settings.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { Settings } from "llamaindex"; -import { setupProvider } from "./provider"; - -const CHUNK_SIZE = 512; -const CHUNK_OVERLAP = 20; - -export const initSettings = async () => { - console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`); - - if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) { - throw new Error("'MODEL' and 'EMBEDDING_MODEL' env variables must be set."); - } - - Settings.chunkSize = CHUNK_SIZE; - Settings.chunkOverlap = CHUNK_OVERLAP; - - setupProvider(); -}; diff --git a/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py b/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py index 687c5b991..2b792288f 100644 --- a/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py +++ b/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py @@ -2,7 +2,6 @@ import os from dotenv import load_dotenv -from llama_index.llms.openai import OpenAI logging.basicConfig(level=logging.INFO) logger = logging.getLogger() @@ -44,8 +43,13 @@ def generate_ui_for_workflow(): """ import asyncio + from app.settings import init_settings + from llama_index.core.settings import Settings from main import COMPONENT_DIR + load_dotenv() + init_settings() + # To generate UI components for additional event types, # import the corresponding data model (e.g., MyCustomEventData) # and run the generate_ui_for_workflow function with the imported model. @@ -56,8 +60,9 @@ def generate_ui_for_workflow(): raise ImportError("Couldn't generate UI component for the current workflow.") from llama_index.server.gen_ui import generate_event_component - # works also well with Claude 3.7 Sonnet or Gemini Pro 2.5 - llm = OpenAI(model="gpt-4.1") - code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm)) + # works well with OpenAI gpt-4.1, Claude 3.7 Sonnet or Gemini Pro 2.5 + code = asyncio.run( + generate_event_component(event_cls=UIEventData, llm=Settings.llm) + ) with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f: f.write(code) diff --git a/python/llama-index-server/llama_index/server/api/routers/chat.py b/python/llama-index-server/llama_index/server/api/routers/chat.py index 57580d7eb..4c0abb9e4 100644 --- a/python/llama-index-server/llama_index/server/api/routers/chat.py +++ b/python/llama-index-server/llama_index/server/api/routers/chat.py @@ -6,7 +6,6 @@ from fastapi import APIRouter, BackgroundTasks, HTTPException from fastapi.responses import StreamingResponse - from llama_index.core.agent.workflow.workflow_events import ( AgentInput, AgentSetup, @@ -36,6 +35,7 @@ from llama_index.server.services.file import FileService from llama_index.server.services.llamacloud import LlamaCloudFileService from llama_index.server.services.workflow import HITLWorkflowService +from pydantic_core import PydanticSerializationError def chat_router( @@ -193,7 +193,12 @@ async def _text_stream( else: # Ignore unnecessary agent workflow events if not isinstance(event, (AgentInput, AgentSetup)): - yield VercelStreamResponse.convert_data(event.model_dump()) + try: + yield VercelStreamResponse.convert_data(event.model_dump()) + except PydanticSerializationError: + logger.warning(f"Error serializing event: {event}") + # Skip events that can't be serialized + pass await handler.wait_for_completion() except asyncio.CancelledError: