diff --git a/.changeset/cyan-turkeys-sneeze.md b/.changeset/cyan-turkeys-sneeze.md
new file mode 100644
index 000000000..ddafcb422
--- /dev/null
+++ b/.changeset/cyan-turkeys-sneeze.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Feat: re-add --ask-models
diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml
index dc17cddd4..61fa7a8d2 100644
--- a/.github/workflows/e2e.yml
+++ b/.github/workflows/e2e.yml
@@ -23,7 +23,7 @@ jobs:
         os: [macos-latest, windows-latest, ubuntu-22.04]
         frameworks: ["fastapi"]
         datasources: ["--no-files", "--example-file", "--llamacloud"]
-        template-types: ["streaming", "llamaindexserver"]
+        template-types: ["llamaindexserver"]
     defaults:
       run:
         shell: bash
@@ -105,7 +105,7 @@ jobs:
         os: [macos-latest, windows-latest, ubuntu-22.04]
         frameworks: ["nextjs"]
         datasources: ["--no-files", "--example-file", "--llamacloud"]
-        template-types: ["streaming", "llamaindexserver"]
+        template-types: ["llamaindexserver"]
     defaults:
       run:
         shell: bash
diff --git a/packages/create-llama/e2e/shared/reflex_template.spec.ts b/packages/create-llama/e2e/shared/reflex_template.spec.ts
deleted file mode 100644
index 28d0db466..000000000
--- a/packages/create-llama/e2e/shared/reflex_template.spec.ts
+++ /dev/null
@@ -1,63 +0,0 @@
-import { expect, test } from "@playwright/test";
-import { ChildProcess } from "child_process";
-import fs from "fs";
-import path from "path";
-import { TemplateFramework, TemplateUseCase } from "../../helpers";
-import { createTestDir, runCreateLlama } from "../utils";
-
-const templateFramework: TemplateFramework = process.env.FRAMEWORK
-  ? (process.env.FRAMEWORK as TemplateFramework)
-  : "fastapi";
-const dataSource: string = process.env.DATASOURCE
-  ? process.env.DATASOURCE
-  : "--example-file";
-const templateUseCases: TemplateUseCase[] = ["extractor", "contract_review"];
-
-// The reflex template currently only works with FastAPI and files (and not on Windows)
-if (
-  process.platform !== "win32" &&
-  templateFramework === "fastapi" &&
-  dataSource === "--example-file"
-) {
-  for (const useCase of templateUseCases) {
-    test.describe(`Test reflex template ${useCase} ${templateFramework} ${dataSource}`, async () => {
-      let appPort: number;
-      let name: string;
-      let appProcess: ChildProcess;
-      let cwd: string;
-
-      // Create reflex app
-      test.beforeAll(async () => {
-        cwd = await createTestDir();
-        appPort = Math.floor(Math.random() * 10000) + 10000;
-        const result = await runCreateLlama({
-          cwd,
-          templateType: "reflex",
-          templateFramework: "fastapi",
-          dataSource: "--example-file",
-          vectorDb: "none",
-          port: appPort,
-          postInstallAction: "runApp",
-          useCase,
-        });
-        name = result.projectName;
-        appProcess = result.appProcess;
-      });
-
-      test.afterAll(async () => {
-        appProcess.kill();
-      });
-
-      test("App folder should exist", async () => {
-        const dirExists = fs.existsSync(path.join(cwd, name));
-        expect(dirExists).toBeTruthy();
-      });
-      test("Frontend should have a title", async ({ page }) => {
-        await page.goto(`http://localhost:${appPort}`);
-        await expect(page.getByText("Built by LlamaIndex")).toBeVisible({
-          timeout: 2000 * 60,
-        });
-      });
-    });
-  }
-}
diff --git a/packages/create-llama/e2e/shared/streaming_template.spec.ts b/packages/create-llama/e2e/shared/streaming_template.spec.ts
deleted file mode 100644
index 7055a1631..000000000
--- a/packages/create-llama/e2e/shared/streaming_template.spec.ts
+++ /dev/null
@@ -1,127 +0,0 @@
-import { expect, test } from "@playwright/test";
-import { ChildProcess } from "child_process";
-import fs from "fs";
-import path from "path";
-import type {
-  TemplateFramework,
-  TemplatePostInstallAction,
-  TemplateUI,
-} from "../../helpers";
-import { createTestDir, runCreateLlama, type AppType } from "../utils";
-
-const templateFramework: TemplateFramework = process.env.FRAMEWORK
-  ? (process.env.FRAMEWORK as TemplateFramework)
-  : "fastapi";
-const dataSource: string = process.env.DATASOURCE
-  ? process.env.DATASOURCE
-  : "--example-file";
-const templateUI: TemplateUI = "shadcn";
-const templatePostInstallAction: TemplatePostInstallAction = "runApp";
-
-const llamaCloudProjectName = "create-llama";
-const llamaCloudIndexName = "e2e-test";
-
-const appType: AppType = templateFramework === "fastapi" ? "--frontend" : "";
-const userMessage =
-  dataSource !== "--no-files" ? "Physical standard for letters" : "Hello";
-
-test.describe(`Test streaming template ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
-  const isNode18 = process.version.startsWith("v18");
-  const isLlamaCloud = dataSource === "--llamacloud";
-  // llamacloud is using File API which is not supported on node 18
-  if (isNode18 && isLlamaCloud) {
-    test.skip(true, "Skipping tests for Node 18 and LlamaCloud data source");
-  }
-
-  let port: number;
-  let cwd: string;
-  let name: string;
-  let appProcess: ChildProcess;
-  // Only test without using vector db for now
-  const vectorDb = "none";
-
-  test.beforeAll(async () => {
-    port = Math.floor(Math.random() * 10000) + 10000;
-    cwd = await createTestDir();
-    const result = await runCreateLlama({
-      cwd,
-      templateType: "streaming",
-      templateFramework,
-      dataSource,
-      vectorDb,
-      port,
-      postInstallAction: templatePostInstallAction,
-      templateUI,
-      appType,
-      llamaCloudProjectName,
-      llamaCloudIndexName,
-    });
-    name = result.projectName;
-    appProcess = result.appProcess;
-  });
-
-  test("App folder should exist", async () => {
-    const dirExists = fs.existsSync(path.join(cwd, name));
-    expect(dirExists).toBeTruthy();
-  });
-
-  test("Frontend should have a title", async ({ page }) => {
-    test.skip(
-      templatePostInstallAction !== "runApp" || templateFramework === "express",
-    );
-    await page.goto(`http://localhost:${port}`);
-    await expect(page.getByText("Built by LlamaIndex")).toBeVisible();
-  });
-
-  test("Frontend should be able to submit a message and receive a response", async ({
-    page,
-  }) => {
-    test.skip(
-      templatePostInstallAction !== "runApp" || templateFramework === "express",
-    );
-    await page.goto(`http://localhost:${port}`);
-    await page.fill("form textarea", userMessage);
-    const [response] = await Promise.all([
-      page.waitForResponse(
-        (res) => {
-          return res.url().includes("/api/chat") && res.status() === 200;
-        },
-        {
-          timeout: 1000 * 60,
-        },
-      ),
-      page.click("form button[type=submit]"),
-    ]);
-    const text = await response.text();
-    console.log("AI response when submitting message: ", text);
-    expect(response.ok()).toBeTruthy();
-  });
-
-  test("Backend frameworks should response when calling non-streaming chat API", async ({
-    request,
-  }) => {
-    test.skip(templatePostInstallAction !== "runApp");
-    test.skip(templateFramework === "nextjs");
-    const response = await request.post(
-      `http://localhost:${port}/api/chat/request`,
-      {
-        data: {
-          messages: [
-            {
-              role: "user",
-              content: userMessage,
-            },
-          ],
-        },
-      },
-    );
-    const text = await response.text();
-    console.log("AI response when calling API: ", text);
-    expect(response.ok()).toBeTruthy();
-  });
-
-  // clean processes
-  test.afterAll(async () => {
-    appProcess?.kill();
-  });
-});
diff --git a/packages/create-llama/helpers/env-variables.ts b/packages/create-llama/helpers/env-variables.ts
index 58c361ed8..44d04e1d7 100644
--- a/packages/create-llama/helpers/env-variables.ts
+++ b/packages/create-llama/helpers/env-variables.ts
@@ -254,11 +254,6 @@ const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
       description: "Name of the embedding model to use.",
       value: modelConfig.embeddingModel,
     },
-    {
-      name: "EMBEDDING_DIM",
-      description: "Dimension of the embedding model to use.",
-      value: modelConfig.dimensions.toString(),
-    },
     {
       name: "CONVERSATION_STARTERS",
       description: "The questions to help users get started (multi-line).",
@@ -597,16 +592,9 @@ export const createBackendEnvFile = async (
     ...getFrameworkEnvs(opts.framework, opts.template, opts.port),
     // Add environment variables of each component
     ...(opts.template === "llamaindexserver"
-      ? [
-          {
-            name: "OPENAI_API_KEY",
-            description: "The OpenAI API key to use.",
-            value: opts.modelConfig.apiKey,
-          },
-        ]
+      ? [...getModelEnvs(opts.modelConfig)]
       : [
           // don't use this stuff for llama-indexserver
-          ...getModelEnvs(opts.modelConfig),
           ...getEngineEnvs(),
           ...getTemplateEnvs(opts.template),
           ...getObservabilityEnvs(opts.observability),
diff --git a/packages/create-llama/helpers/models.ts b/packages/create-llama/helpers/models.ts
new file mode 100644
index 000000000..4963810c4
--- /dev/null
+++ b/packages/create-llama/helpers/models.ts
@@ -0,0 +1,12 @@
+import { ModelConfig } from "./types";
+
+export const getGpt41ModelConfig = (openAiKey?: string): ModelConfig => ({
+  provider: "openai",
+  apiKey: openAiKey,
+  model: "gpt-4.1",
+  embeddingModel: "text-embedding-3-large",
+  dimensions: 1536,
+  isConfigured(): boolean {
+    return !!openAiKey;
+  },
+});
diff --git a/packages/create-llama/helpers/providers/anthropic.ts b/packages/create-llama/helpers/providers/anthropic.ts
index 080ffdeae..01092eb0f 100644
--- a/packages/create-llama/helpers/providers/anthropic.ts
+++ b/packages/create-llama/helpers/providers/anthropic.ts
@@ -31,17 +31,9 @@ const EMBEDDING_MODELS: Record<HuggingFaceEmbeddingModelType, ModelData> = {
 const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
 const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
 
-type AnthropicQuestionsParams = {
-  apiKey?: string;
-  askModels: boolean;
-};
-
-export async function askAnthropicQuestions({
-  askModels,
-  apiKey,
-}: AnthropicQuestionsParams): Promise<ModelConfigParams> {
+export async function askAnthropicQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey,
+    apiKey: process.env.ANTHROPIC_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: DEFAULT_DIMENSIONS,
@@ -69,35 +61,33 @@ export async function askAnthropicQuestions({
     config.apiKey = key || process.env.ANTHROPIC_API_KEY;
   }
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: MODELS.map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: MODELS.map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions =
-      EMBEDDING_MODELS[
-        embeddingModel as HuggingFaceEmbeddingModelType
-      ].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions =
+    EMBEDDING_MODELS[
+      embeddingModel as HuggingFaceEmbeddingModelType
+    ].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/azure.ts b/packages/create-llama/helpers/providers/azure.ts
index 8f3a3a710..ec7efa7ac 100644
--- a/packages/create-llama/helpers/providers/azure.ts
+++ b/packages/create-llama/helpers/providers/azure.ts
@@ -1,5 +1,5 @@
 import prompts from "prompts";
-import { ModelConfigParams, ModelConfigQuestionsParams } from ".";
+import { ModelConfigParams } from ".";
 import { questionHandlers } from "../../questions/utils";
 
 const ALL_AZURE_OPENAI_CHAT_MODELS: Record<string, { openAIModel: string }> = {
@@ -51,12 +51,9 @@ const ALL_AZURE_OPENAI_EMBEDDING_MODELS: Record<
 const DEFAULT_MODEL = "gpt-4o";
 const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
 
-export async function askAzureQuestions({
-  openAiKey,
-  askModels,
-}: ModelConfigQuestionsParams): Promise<ModelConfigParams> {
+export async function askAzureQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey: openAiKey || process.env.AZURE_OPENAI_KEY,
+    apiKey: process.env.AZURE_OPENAI_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
@@ -66,32 +63,30 @@ export async function askAzureQuestions({
     },
   };
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: getAvailableModelChoices(),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: getAvailableModelChoices(),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: getAvailableEmbeddingModelChoices(),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = getDimensions(embeddingModel);
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: getAvailableEmbeddingModelChoices(),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = getDimensions(embeddingModel);
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/gemini.ts b/packages/create-llama/helpers/providers/gemini.ts
index 65b556c4d..2d03f52bd 100644
--- a/packages/create-llama/helpers/providers/gemini.ts
+++ b/packages/create-llama/helpers/providers/gemini.ts
@@ -2,7 +2,15 @@ import prompts from "prompts";
 import { ModelConfigParams } from ".";
 import { questionHandlers, toChoice } from "../../questions/utils";
 
-const MODELS = ["gemini-1.5-pro-latest", "gemini-pro", "gemini-pro-vision"];
+const MODELS = [
+  "gemini-2.5-pro",
+  "gemini-2.5-flash",
+  "gemini-2.0-flash",
+  "gemini-2.0-flash-lite",
+  "gemini-1.5-pro-latest",
+  "gemini-pro",
+  "gemini-pro-vision",
+];
 type ModelData = {
   dimensions: number;
 };
@@ -15,17 +23,9 @@ const DEFAULT_MODEL = MODELS[0];
 const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
 const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
 
-type GeminiQuestionsParams = {
-  apiKey?: string;
-  askModels: boolean;
-};
-
-export async function askGeminiQuestions({
-  askModels,
-  apiKey,
-}: GeminiQuestionsParams): Promise<ModelConfigParams> {
+export async function askGeminiQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey,
+    apiKey: process.env.GOOGLE_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: DEFAULT_DIMENSIONS,
@@ -53,32 +53,30 @@ export async function askGeminiQuestions({
     config.apiKey = key || process.env.GOOGLE_API_KEY;
   }
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: MODELS.map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: MODELS.map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/groq.ts b/packages/create-llama/helpers/providers/groq.ts
index 61b82a5dc..aaccdb2e1 100644
--- a/packages/create-llama/helpers/providers/groq.ts
+++ b/packages/create-llama/helpers/providers/groq.ts
@@ -71,17 +71,9 @@ const EMBEDDING_MODELS: Record<HuggingFaceEmbeddingModelType, ModelData> = {
 const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
 const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
 
-type GroqQuestionsParams = {
-  apiKey?: string;
-  askModels: boolean;
-};
-
-export async function askGroqQuestions({
-  askModels,
-  apiKey,
-}: GroqQuestionsParams): Promise<ModelConfigParams> {
+export async function askGroqQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey,
+    apiKey: process.env.GROQ_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: DEFAULT_DIMENSIONS,
@@ -109,37 +101,35 @@ export async function askGroqQuestions({
     config.apiKey = key || process.env.GROQ_API_KEY;
   }
 
-  if (askModels) {
-    const modelChoices = await getAvailableModelChoicesGroq(config.apiKey!);
+  const modelChoices = await getAvailableModelChoicesGroq(config.apiKey!);
 
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: modelChoices,
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: modelChoices,
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions =
-      EMBEDDING_MODELS[
-        embeddingModel as HuggingFaceEmbeddingModelType
-      ].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions =
+    EMBEDDING_MODELS[
+      embeddingModel as HuggingFaceEmbeddingModelType
+    ].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/huggingface.ts b/packages/create-llama/helpers/providers/huggingface.ts
index 039b8e323..d49837ae5 100644
--- a/packages/create-llama/helpers/providers/huggingface.ts
+++ b/packages/create-llama/helpers/providers/huggingface.ts
@@ -21,13 +21,7 @@ const DEFAULT_MODEL = MODELS[0];
 const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
 const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
 
-type HuggingfaceQuestionsParams = {
-  askModels: boolean;
-};
-
-export async function askHuggingfaceQuestions({
-  askModels,
-}: HuggingfaceQuestionsParams): Promise<ModelConfigParams> {
+export async function askHuggingfaceQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
@@ -37,32 +31,30 @@ export async function askHuggingfaceQuestions({
     },
   };
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which Hugging Face model would you like to use?",
-        choices: MODELS.map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which Hugging Face model would you like to use?",
+      choices: MODELS.map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/index.ts b/packages/create-llama/helpers/providers/index.ts
index 9f29b8b06..37d25169b 100644
--- a/packages/create-llama/helpers/providers/index.ts
+++ b/packages/create-llama/helpers/providers/index.ts
@@ -1,6 +1,6 @@
 import prompts from "prompts";
 import { questionHandlers } from "../../questions/utils";
-import { ModelConfig, ModelProvider, TemplateFramework } from "../types";
+import { ModelConfig, TemplateFramework } from "../types";
 import { askAnthropicQuestions } from "./anthropic";
 import { askAzureQuestions } from "./azure";
 import { askGeminiQuestions } from "./gemini";
@@ -11,81 +11,68 @@ import { askMistralQuestions } from "./mistral";
 import { askOllamaQuestions } from "./ollama";
 import { askOpenAIQuestions } from "./openai";
 
-const DEFAULT_MODEL_PROVIDER = "openai";
-
 export type ModelConfigQuestionsParams = {
-  openAiKey?: string;
-  askModels: boolean;
   framework?: TemplateFramework;
 };
 
 export type ModelConfigParams = Omit<ModelConfig, "provider">;
 
 export async function askModelConfig({
-  askModels,
-  openAiKey,
   framework,
 }: ModelConfigQuestionsParams): Promise<ModelConfig> {
-  let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER;
-  if (askModels) {
-    const choices = [
-      { title: "OpenAI", value: "openai" },
-      { title: "Groq", value: "groq" },
-      { title: "Ollama", value: "ollama" },
-      { title: "Anthropic", value: "anthropic" },
-      { title: "Gemini", value: "gemini" },
-      { title: "Mistral", value: "mistral" },
-      { title: "AzureOpenAI", value: "azure-openai" },
-    ];
+  const choices = [
+    { title: "OpenAI", value: "openai" },
+    { title: "Groq", value: "groq" },
+    { title: "Ollama", value: "ollama" },
+    { title: "Anthropic", value: "anthropic" },
+    { title: "Gemini", value: "gemini" },
+    { title: "Mistral", value: "mistral" },
+    { title: "AzureOpenAI", value: "azure-openai" },
+  ];
 
-    if (framework === "fastapi") {
-      choices.push({ title: "T-Systems", value: "t-systems" });
-      choices.push({ title: "Huggingface", value: "huggingface" });
-    }
-    const { provider } = await prompts(
-      {
-        type: "select",
-        name: "provider",
-        message: "Which model provider would you like to use",
-        choices: choices,
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    modelProvider = provider;
+  if (framework === "fastapi") {
+    choices.push({ title: "T-Systems", value: "t-systems" });
+    choices.push({ title: "Huggingface", value: "huggingface" });
   }
+  const { provider: modelProvider } = await prompts(
+    {
+      type: "select",
+      name: "provider",
+      message: "Which model provider would you like to use",
+      choices: choices,
+      initial: 0,
+    },
+    questionHandlers,
+  );
 
   let modelConfig: ModelConfigParams;
   switch (modelProvider) {
     case "ollama":
-      modelConfig = await askOllamaQuestions({ askModels });
+      modelConfig = await askOllamaQuestions();
       break;
     case "groq":
-      modelConfig = await askGroqQuestions({ askModels });
+      modelConfig = await askGroqQuestions();
       break;
     case "anthropic":
-      modelConfig = await askAnthropicQuestions({ askModels });
+      modelConfig = await askAnthropicQuestions();
       break;
     case "gemini":
-      modelConfig = await askGeminiQuestions({ askModels });
+      modelConfig = await askGeminiQuestions();
       break;
     case "mistral":
-      modelConfig = await askMistralQuestions({ askModels });
+      modelConfig = await askMistralQuestions();
       break;
     case "azure-openai":
-      modelConfig = await askAzureQuestions({ askModels });
+      modelConfig = await askAzureQuestions();
       break;
     case "t-systems":
-      modelConfig = await askLLMHubQuestions({ askModels });
+      modelConfig = await askLLMHubQuestions();
       break;
     case "huggingface":
-      modelConfig = await askHuggingfaceQuestions({ askModels });
+      modelConfig = await askHuggingfaceQuestions();
       break;
     default:
-      modelConfig = await askOpenAIQuestions({
-        openAiKey,
-        askModels,
-      });
+      modelConfig = await askOpenAIQuestions();
   }
   return {
     ...modelConfig,
diff --git a/packages/create-llama/helpers/providers/llmhub.ts b/packages/create-llama/helpers/providers/llmhub.ts
index 531e5e431..a1c8b6d5e 100644
--- a/packages/create-llama/helpers/providers/llmhub.ts
+++ b/packages/create-llama/helpers/providers/llmhub.ts
@@ -31,17 +31,9 @@ const LLMHUB_EMBEDDING_MODELS = [
   "text-embedding-bge-m3",
 ];
 
-type LLMHubQuestionsParams = {
-  apiKey?: string;
-  askModels: boolean;
-};
-
-export async function askLLMHubQuestions({
-  askModels,
-  apiKey,
-}: LLMHubQuestionsParams): Promise<ModelConfigParams> {
+export async function askLLMHubQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey,
+    apiKey: process.env.T_SYSTEMS_LLMHUB_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
@@ -61,11 +53,10 @@ export async function askLLMHubQuestions({
       {
         type: "text",
         name: "key",
-        message: askModels
-          ? "Please provide your LLMHub API key (or leave blank to use T_SYSTEMS_LLMHUB_API_KEY env variable):"
-          : "Please provide your LLMHub API key (leave blank to skip):",
+        message:
+          "Please provide your LLMHub API key (or leave blank to use T_SYSTEMS_LLMHUB_API_KEY env variable):",
         validate: (value: string) => {
-          if (askModels && !value) {
+          if (!value) {
             if (process.env.T_SYSTEMS_LLMHUB_API_KEY) {
               return true;
             }
@@ -79,32 +70,30 @@ export async function askLLMHubQuestions({
     config.apiKey = key || process.env.T_SYSTEMS_LLMHUB_API_KEY;
   }
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: await getAvailableModelChoices(false, config.apiKey),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: await getAvailableModelChoices(false, config.apiKey),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: await getAvailableModelChoices(true, config.apiKey),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = getDimensions(embeddingModel);
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: await getAvailableModelChoices(true, config.apiKey),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = getDimensions(embeddingModel);
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/mistral.ts b/packages/create-llama/helpers/providers/mistral.ts
index 1b11ae544..d6f10c19f 100644
--- a/packages/create-llama/helpers/providers/mistral.ts
+++ b/packages/create-llama/helpers/providers/mistral.ts
@@ -14,17 +14,9 @@ const DEFAULT_MODEL = MODELS[0];
 const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
 const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
 
-type MistralQuestionsParams = {
-  apiKey?: string;
-  askModels: boolean;
-};
-
-export async function askMistralQuestions({
-  askModels,
-  apiKey,
-}: MistralQuestionsParams): Promise<ModelConfigParams> {
+export async function askMistralQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey,
+    apiKey: process.env.MISTRAL_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: DEFAULT_DIMENSIONS,
@@ -52,32 +44,30 @@ export async function askMistralQuestions({
     config.apiKey = key || process.env.MISTRAL_API_KEY;
   }
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: MODELS.map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: MODELS.map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/ollama.ts b/packages/create-llama/helpers/providers/ollama.ts
index b9c797e0e..c48df5618 100644
--- a/packages/create-llama/helpers/providers/ollama.ts
+++ b/packages/create-llama/helpers/providers/ollama.ts
@@ -17,13 +17,7 @@ const EMBEDDING_MODELS: Record<string, ModelData> = {
 };
 const DEFAULT_EMBEDDING_MODEL: string = Object.keys(EMBEDDING_MODELS)[0];
 
-type OllamaQuestionsParams = {
-  askModels: boolean;
-};
-
-export async function askOllamaQuestions({
-  askModels,
-}: OllamaQuestionsParams): Promise<ModelConfigParams> {
+export async function askOllamaQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
@@ -33,34 +27,32 @@ export async function askOllamaQuestions({
     },
   };
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: MODELS.map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    await ensureModel(model);
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: MODELS.map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  await ensureModel(model);
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    await ensureModel(embeddingModel);
-    config.embeddingModel = embeddingModel;
-    config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  await ensureModel(embeddingModel);
+  config.embeddingModel = embeddingModel;
+  config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
 
   return config;
 }
diff --git a/packages/create-llama/helpers/providers/openai.ts b/packages/create-llama/helpers/providers/openai.ts
index c26ff4c4f..f2f6d9152 100644
--- a/packages/create-llama/helpers/providers/openai.ts
+++ b/packages/create-llama/helpers/providers/openai.ts
@@ -2,7 +2,7 @@ import got from "got";
 import ora from "ora";
 import { red } from "picocolors";
 import prompts from "prompts";
-import { ModelConfigParams, ModelConfigQuestionsParams } from ".";
+import { ModelConfigParams } from ".";
 import { isCI } from "../../questions";
 import { questionHandlers } from "../../questions/utils";
 
@@ -11,12 +11,9 @@ const OPENAI_API_URL = "https://api.openai.com/v1";
 const DEFAULT_MODEL = "gpt-4o-mini";
 const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
 
-export async function askOpenAIQuestions({
-  openAiKey,
-  askModels,
-}: ModelConfigQuestionsParams): Promise<ModelConfigParams> {
+export async function askOpenAIQuestions(): Promise<ModelConfigParams> {
   const config: ModelConfigParams = {
-    apiKey: openAiKey,
+    apiKey: process.env.OPENAI_API_KEY,
     model: DEFAULT_MODEL,
     embeddingModel: DEFAULT_EMBEDDING_MODEL,
     dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
@@ -36,11 +33,10 @@ export async function askOpenAIQuestions({
       {
         type: "text",
         name: "key",
-        message: askModels
-          ? "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):"
-          : "Please provide your OpenAI API key (leave blank to skip):",
+        message:
+          "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):",
         validate: (value: string) => {
-          if (askModels && !value) {
+          if (!value) {
             if (process.env.OPENAI_API_KEY) {
               return true;
             }
@@ -54,32 +50,30 @@ export async function askOpenAIQuestions({
     config.apiKey = key || process.env.OPENAI_API_KEY;
   }
 
-  if (askModels) {
-    const { model } = await prompts(
-      {
-        type: "select",
-        name: "model",
-        message: "Which LLM model would you like to use?",
-        choices: await getAvailableModelChoices(false, config.apiKey),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.model = model;
+  const { model } = await prompts(
+    {
+      type: "select",
+      name: "model",
+      message: "Which LLM model would you like to use?",
+      choices: await getAvailableModelChoices(false, config.apiKey),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.model = model;
 
-    const { embeddingModel } = await prompts(
-      {
-        type: "select",
-        name: "embeddingModel",
-        message: "Which embedding model would you like to use?",
-        choices: await getAvailableModelChoices(true, config.apiKey),
-        initial: 0,
-      },
-      questionHandlers,
-    );
-    config.embeddingModel = embeddingModel;
-    config.dimensions = getDimensions(embeddingModel);
-  }
+  const { embeddingModel } = await prompts(
+    {
+      type: "select",
+      name: "embeddingModel",
+      message: "Which embedding model would you like to use?",
+      choices: await getAvailableModelChoices(true, config.apiKey),
+      initial: 0,
+    },
+    questionHandlers,
+  );
+  config.embeddingModel = embeddingModel;
+  config.dimensions = getDimensions(embeddingModel);
 
   return config;
 }
diff --git a/packages/create-llama/helpers/python.ts b/packages/create-llama/helpers/python.ts
index 8c874d782..b3a571bad 100644
--- a/packages/create-llama/helpers/python.ts
+++ b/packages/create-llama/helpers/python.ts
@@ -209,12 +209,12 @@ const getAdditionalDependencies = (
       break;
     case "gemini":
       dependencies.push({
-        name: "llama-index-llms-gemini",
-        version: ">=0.4.0,<0.5.0",
+        name: "llama-index-llms-google-genai",
+        version: ">=0.2.0,<0.3.0",
       });
       dependencies.push({
-        name: "llama-index-embeddings-gemini",
-        version: ">=0.3.0,<0.4.0",
+        name: "llama-index-embeddings-google-genai",
+        version: ">=0.2.0,<0.3.0",
       });
       break;
     case "mistral":
@@ -571,7 +571,11 @@ const installLlamaIndexServerTemplate = async ({
   root,
   useCase,
   useLlamaParse,
-}: Pick<InstallTemplateArgs, "root" | "useCase" | "useLlamaParse">) => {
+  modelConfig,
+}: Pick<
+  InstallTemplateArgs,
+  "root" | "useCase" | "useLlamaParse" | "modelConfig"
+>) => {
   if (!useCase) {
     console.log(
       red(
@@ -586,6 +590,17 @@ const installLlamaIndexServerTemplate = async ({
     cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
   });
 
+  // copy model provider settings to app folder
+  await copy("**", path.join(root, "app"), {
+    cwd: path.join(
+      templatesDir,
+      "components",
+      "providers",
+      "python",
+      modelConfig.provider,
+    ),
+  });
+
   // Copy custom UI component code
   await copy(`*`, path.join(root, "components"), {
     parents: true,
@@ -677,6 +692,7 @@ export const installPythonTemplate = async ({
       root,
       useCase,
       useLlamaParse,
+      modelConfig,
     });
   } else {
     await installLegacyPythonTemplate({
diff --git a/packages/create-llama/helpers/typescript.ts b/packages/create-llama/helpers/typescript.ts
index f09c47070..807580eb1 100644
--- a/packages/create-llama/helpers/typescript.ts
+++ b/packages/create-llama/helpers/typescript.ts
@@ -13,7 +13,11 @@ const installLlamaIndexServerTemplate = async ({
   root,
   useCase,
   vectorDb,
-}: Pick<InstallTemplateArgs, "root" | "useCase" | "vectorDb">) => {
+  modelConfig,
+}: Pick<
+  InstallTemplateArgs,
+  "root" | "useCase" | "vectorDb" | "modelConfig"
+>) => {
   if (!useCase) {
     console.log(
       red(
@@ -32,6 +36,17 @@ const installLlamaIndexServerTemplate = async ({
     process.exit(1);
   }
 
+  // copy model provider settings to app folder
+  await copy("**", path.join(root, "src", "app"), {
+    cwd: path.join(
+      templatesDir,
+      "components",
+      "providers",
+      "typescript",
+      modelConfig.provider,
+    ),
+  });
+
   await copy("**", path.join(root), {
     cwd: path.join(
       templatesDir,
@@ -346,6 +361,7 @@ export const installTSTemplate = async ({
       root,
       useCase,
       vectorDb,
+      modelConfig,
     });
   } else {
     await installLegacyTSTemplate({
diff --git a/packages/create-llama/questions/ci.ts b/packages/create-llama/questions/ci.ts
index 07027a301..77dd38024 100644
--- a/packages/create-llama/questions/ci.ts
+++ b/packages/create-llama/questions/ci.ts
@@ -1,4 +1,4 @@
-import { askModelConfig } from "../helpers/providers";
+import { getGpt41ModelConfig } from "../helpers/models";
 import { QuestionArgs, QuestionResults } from "./types";
 
 const defaults: Omit<QuestionArgs, "modelConfig"> = {
@@ -21,10 +21,6 @@ export async function getCIQuestionResults(
   return {
     ...defaults,
     ...program,
-    modelConfig: await askModelConfig({
-      openAiKey: program.openAiKey,
-      askModels: false,
-      framework: program.framework,
-    }),
+    modelConfig: getGpt41ModelConfig(program.openAiKey),
   };
 }
diff --git a/packages/create-llama/questions/questions.ts b/packages/create-llama/questions/questions.ts
index 559839975..97b571762 100644
--- a/packages/create-llama/questions/questions.ts
+++ b/packages/create-llama/questions/questions.ts
@@ -237,8 +237,6 @@ export const askProQuestions = async (program: QuestionArgs) => {
 
   if (!program.modelConfig) {
     const modelConfig = await askModelConfig({
-      openAiKey: program.openAiKey,
-      askModels: program.askModels ?? false,
       framework: program.framework,
     });
     program.modelConfig = modelConfig;
diff --git a/packages/create-llama/questions/simple.ts b/packages/create-llama/questions/simple.ts
index 56a45f3f5..cd54fcc1a 100644
--- a/packages/create-llama/questions/simple.ts
+++ b/packages/create-llama/questions/simple.ts
@@ -1,6 +1,7 @@
 import prompts from "prompts";
 import { NO_DATA_USE_CASES } from "../helpers/constant";
 import { EXAMPLE_10K_SEC_FILES, EXAMPLE_FILE } from "../helpers/datasources";
+import { getGpt41ModelConfig } from "../helpers/models";
 import { askModelConfig } from "../helpers/providers";
 import { getTools } from "../helpers/tools";
 import { ModelConfig, TemplateFramework } from "../helpers/types";
@@ -135,59 +136,59 @@ const convertAnswers = async (
   args: PureQuestionArgs,
   answers: SimpleAnswers,
 ): Promise<QuestionResults> => {
-  const MODEL_GPT41: ModelConfig = {
-    provider: "openai",
-    apiKey: args.openAiKey,
-    model: "gpt-4.1",
-    embeddingModel: "text-embedding-3-large",
-    dimensions: 1536,
-    isConfigured(): boolean {
-      return !!args.openAiKey;
-    },
-  };
+  const modelGpt41 = getGpt41ModelConfig(args.openAiKey);
   const lookup: Record<
     AppType,
     Pick<QuestionResults, "template" | "tools" | "dataSources" | "useCase"> & {
-      modelConfig?: ModelConfig;
+      modelConfig: ModelConfig;
     }
   > = {
     agentic_rag: {
       template: "llamaindexserver",
       dataSources: [EXAMPLE_FILE],
+      modelConfig: modelGpt41,
     },
     financial_report: {
       template: "llamaindexserver",
       dataSources: EXAMPLE_10K_SEC_FILES,
       tools: getTools(["interpreter", "document_generator"]),
-      modelConfig: MODEL_GPT41,
+      modelConfig: modelGpt41,
     },
     deep_research: {
       template: "llamaindexserver",
       dataSources: EXAMPLE_10K_SEC_FILES,
       tools: [],
-      modelConfig: MODEL_GPT41,
+      modelConfig: modelGpt41,
     },
     code_generator: {
       template: "llamaindexserver",
       dataSources: [],
       tools: [],
-      modelConfig: MODEL_GPT41,
+      modelConfig: modelGpt41,
     },
     document_generator: {
       template: "llamaindexserver",
       dataSources: [],
       tools: [],
-      modelConfig: MODEL_GPT41,
+      modelConfig: modelGpt41,
     },
     hitl: {
       template: "llamaindexserver",
       dataSources: [],
       tools: [],
-      modelConfig: MODEL_GPT41,
+      modelConfig: modelGpt41,
     },
   };
 
   const results = lookup[answers.appType];
+
+  let modelConfig = results.modelConfig;
+  if (args.askModels) {
+    modelConfig = await askModelConfig({
+      framework: answers.language,
+    });
+  }
+
   return {
     framework: answers.language,
     useCase: answers.appType,
@@ -196,13 +197,7 @@ const convertAnswers = async (
     useLlamaParse: answers.useLlamaCloud,
     vectorDb: answers.useLlamaCloud ? "llamacloud" : "none",
     ...results,
-    modelConfig:
-      results.modelConfig ??
-      (await askModelConfig({
-        openAiKey: args.openAiKey,
-        askModels: args.askModels ?? false,
-        framework: answers.language,
-      })),
+    modelConfig,
     frontend: true,
   };
 };
diff --git a/packages/create-llama/templates/components/providers/python/anthropic/settings.py b/packages/create-llama/templates/components/providers/python/anthropic/settings.py
new file mode 100644
index 000000000..e5780a92e
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/anthropic/settings.py
@@ -0,0 +1,21 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.fastembed import FastEmbedEmbedding
+from llama_index.llms.anthropic import Anthropic
+
+EMBEDDING_MODEL_MAP = {
+    "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
+    "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
+}
+
+
+def init_settings():
+    if os.getenv("ANTHROPIC_API_KEY") is None:
+        raise RuntimeError("ANTHROPIC_API_KEY is missing in environment variables")
+    Settings.llm = Anthropic(model=os.getenv("MODEL") or "claude-3-sonnet")
+    # This will download the model automatically if it is not already downloaded
+    embed_model_name = EMBEDDING_MODEL_MAP[
+        os.getenv("EMBEDDING_MODEL") or "all-MiniLM-L6-v2"
+    ]
+    Settings.embed_model = FastEmbedEmbedding(model_name=embed_model_name)
diff --git a/packages/create-llama/templates/components/providers/python/azure-openai/settings.py b/packages/create-llama/templates/components/providers/python/azure-openai/settings.py
new file mode 100644
index 000000000..fe7dd1e3a
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/azure-openai/settings.py
@@ -0,0 +1,40 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
+from llama_index.llms.azure_openai import AzureOpenAI
+
+
+def init_settings():
+    api_key = os.getenv("AZURE_OPENAI_API_KEY")
+    endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+    llm_deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT")
+    embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
+    api_version = os.getenv("AZURE_OPENAI_API_VERSION")
+    if api_key is None:
+        raise RuntimeError("AZURE_OPENAI_API_KEY is missing in environment variables")
+    if endpoint is None:
+        raise RuntimeError("AZURE_OPENAI_ENDPOINT is missing in environment variables")
+    if llm_deployment is None:
+        raise RuntimeError(
+            "AZURE_OPENAI_LLM_DEPLOYMENT is missing in environment variables"
+        )
+    if embedding_deployment is None:
+        raise RuntimeError(
+            "AZURE_OPENAI_EMBEDDING_DEPLOYMENT is missing in environment variables"
+        )
+
+    azure_config = {
+        "api_key": api_key,
+        "azure_endpoint": endpoint,
+        "api_version": api_version,
+    }
+
+    Settings.llm = AzureOpenAI(
+        model="gpt-4.1", deployment_name=llm_deployment, **azure_config
+    )
+    Settings.embed_model = AzureOpenAIEmbedding(
+        model="text-embedding-3-large",
+        deployment_name=embedding_deployment,
+        **azure_config,
+    )
diff --git a/packages/create-llama/templates/components/providers/python/gemini/settings.py b/packages/create-llama/templates/components/providers/python/gemini/settings.py
new file mode 100644
index 000000000..d31ba0bf2
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/gemini/settings.py
@@ -0,0 +1,14 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
+from llama_index.llms.google_genai import GoogleGenAI
+
+
+def init_settings():
+    if os.getenv("GOOGLE_API_KEY") is None:
+        raise RuntimeError("GOOGLE_API_KEY is missing in environment variables")
+    Settings.llm = GoogleGenAI(model=os.getenv("MODEL") or "gemini-2.0-flash")
+    Settings.embed_model = GoogleGenAIEmbedding(
+        model=os.getenv("EMBEDDING_MODEL") or "text-embedding-004"
+    )
diff --git a/packages/create-llama/templates/components/providers/python/groq/settings.py b/packages/create-llama/templates/components/providers/python/groq/settings.py
new file mode 100644
index 000000000..194fee181
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/groq/settings.py
@@ -0,0 +1,21 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.fastembed import FastEmbedEmbedding
+from llama_index.llms.groq import Groq
+
+EMBEDDING_MODEL_MAP = {
+    "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
+    "all-mpnet-base-v2": "sentence-transformers/all-mpnet-base-v2",
+}
+
+
+def init_settings():
+    if os.getenv("GROQ_API_KEY") is None:
+        raise RuntimeError("GROQ_API_KEY is missing in environment variables")
+    Settings.llm = Groq(model=os.getenv("MODEL") or "llama-3.1-8b-instant")
+    # This will download the model automatically if it is not already downloaded
+    embed_model_name = EMBEDDING_MODEL_MAP[
+        os.getenv("EMBEDDING_MODEL") or "all-MiniLM-L6-v2"
+    ]
+    Settings.embed_model = FastEmbedEmbedding(model_name=embed_model_name)
diff --git a/packages/create-llama/templates/components/providers/python/huggingface/settings.py b/packages/create-llama/templates/components/providers/python/huggingface/settings.py
new file mode 100644
index 000000000..f5459ff7b
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/huggingface/settings.py
@@ -0,0 +1,10 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.llms.huggingface import HuggingFaceLLM
+
+
+def init_settings():
+    Settings.llm = HuggingFaceLLM(model_name=os.getenv("MODEL"))
+    Settings.embed_model = HuggingFaceEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
diff --git a/packages/create-llama/templates/components/providers/python/ollama/settings.py b/packages/create-llama/templates/components/providers/python/ollama/settings.py
new file mode 100644
index 000000000..91cd909d7
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/ollama/settings.py
@@ -0,0 +1,16 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.ollama import OllamaEmbedding
+from llama_index.llms.ollama import Ollama
+
+
+def init_settings():
+    if os.getenv("OLLAMA_BASE_URL") is None:
+        raise RuntimeError("OLLAMA_BASE_URL is missing in environment variables")
+    base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
+    llm_model = os.getenv("MODEL") or "llama3.1"
+    embed_model = os.getenv("EMBEDDING_MODEL") or "nomic-embed-text"
+
+    Settings.llm = Ollama(model=llm_model, base_url=base_url)
+    Settings.embed_model = OllamaEmbedding(model=embed_model, base_url=base_url)
diff --git a/packages/create-llama/templates/components/providers/python/openai/settings.py b/packages/create-llama/templates/components/providers/python/openai/settings.py
new file mode 100644
index 000000000..e648ae87f
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/openai/settings.py
@@ -0,0 +1,14 @@
+import os
+
+from llama_index.core import Settings
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.llms.openai import OpenAI
+
+
+def init_settings():
+    if os.getenv("OPENAI_API_KEY") is None:
+        raise RuntimeError("OPENAI_API_KEY is missing in environment variables")
+    Settings.llm = OpenAI(model=os.getenv("MODEL") or "gpt-4.1")
+    Settings.embed_model = OpenAIEmbedding(
+        model=os.getenv("EMBEDDING_MODEL") or "text-embedding-3-large"
+    )
diff --git a/packages/create-llama/templates/components/providers/python/t-systems/settings.py b/packages/create-llama/templates/components/providers/python/t-systems/settings.py
new file mode 100644
index 000000000..c144553fc
--- /dev/null
+++ b/packages/create-llama/templates/components/providers/python/t-systems/settings.py
@@ -0,0 +1,72 @@
+import logging
+import os
+from typing import Dict
+
+from llama_index.core.settings import Settings
+from llama_index.embeddings.openai import OpenAIEmbedding
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MODEL = "gpt-3.5-turbo"
+DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"
+
+
+class TSIEmbedding(OpenAIEmbedding):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._query_engine = self._text_engine = self.model_name
+
+
+def llm_config_from_env() -> Dict:
+    from llama_index.core.constants import DEFAULT_TEMPERATURE
+
+    model = os.getenv("MODEL", DEFAULT_MODEL)
+    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
+    max_tokens = os.getenv("LLM_MAX_TOKENS")
+    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
+    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
+
+    config = {
+        "model": model,
+        "api_key": api_key,
+        "api_base": api_base,
+        "temperature": float(temperature),
+        "max_tokens": int(max_tokens) if max_tokens is not None else None,
+    }
+    return config
+
+
+def embedding_config_from_env() -> Dict:
+    from llama_index.core.constants import DEFAULT_EMBEDDING_DIM
+
+    model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL)
+    dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM)
+    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
+    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
+
+    config = {
+        "model_name": model,
+        "dimension": int(dimension) if dimension is not None else None,
+        "api_key": api_key,
+        "api_base": api_base,
+    }
+    return config
+
+
+def init_settings():
+    try:
+        from llama_index.llms.openai_like import OpenAILike
+    except ImportError:
+        logger.error("Failed to import OpenAILike. Make sure llama_index is installed.")
+        raise
+
+    llm_configs = llm_config_from_env()
+    embedding_configs = embedding_config_from_env()
+
+    Settings.embed_model = TSIEmbedding(**embedding_configs)
+    Settings.llm = OpenAILike(
+        **llm_configs,
+        is_chat_model=True,
+        is_function_calling_model=False,
+        context_window=4096,
+    )
diff --git a/packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts b/packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts
similarity index 94%
rename from packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts
index 750d0e38f..81085a924 100644
--- a/packages/create-llama/templates/components/providers/typescript/anthropic/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/anthropic/settings.ts
@@ -5,7 +5,7 @@ import {
 import { HuggingFaceEmbedding } from "@llamaindex/huggingface";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   const embedModelMap: Record<string, string> = {
     "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
     "all-mpnet-base-v2": "Xenova/all-mpnet-base-v2",
diff --git a/packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts b/packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts
similarity index 97%
rename from packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts
index 739c645dc..bb79df638 100644
--- a/packages/create-llama/templates/components/providers/typescript/azure-openai/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/azure-openai/settings.ts
@@ -1,7 +1,7 @@
 import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   // Map Azure OpenAI model names to OpenAI model names (only for TS)
   const AZURE_OPENAI_MODEL_MAP: Record<string, string> = {
     "gpt-35-turbo": "gpt-3.5-turbo",
diff --git a/packages/create-llama/templates/components/providers/typescript/gemini/provider.ts b/packages/create-llama/templates/components/providers/typescript/gemini/settings.ts
similarity index 91%
rename from packages/create-llama/templates/components/providers/typescript/gemini/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/gemini/settings.ts
index cb4f6d8f9..aa250c94b 100644
--- a/packages/create-llama/templates/components/providers/typescript/gemini/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/gemini/settings.ts
@@ -6,7 +6,7 @@ import {
 } from "@llamaindex/google";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   Settings.llm = new Gemini({
     model: process.env.MODEL as GEMINI_MODEL,
   });
diff --git a/packages/create-llama/templates/components/providers/typescript/groq/provider.ts b/packages/create-llama/templates/components/providers/typescript/groq/settings.ts
similarity index 93%
rename from packages/create-llama/templates/components/providers/typescript/groq/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/groq/settings.ts
index fe597c802..e1426ee25 100644
--- a/packages/create-llama/templates/components/providers/typescript/groq/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/groq/settings.ts
@@ -2,7 +2,7 @@ import { Groq } from "@llamaindex/groq";
 import { HuggingFaceEmbedding } from "@llamaindex/huggingface";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   const embedModelMap: Record<string, string> = {
     "all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
     "all-mpnet-base-v2": "Xenova/all-mpnet-base-v2",
diff --git a/packages/create-llama/templates/components/providers/typescript/mistral/provider.ts b/packages/create-llama/templates/components/providers/typescript/mistral/settings.ts
similarity index 92%
rename from packages/create-llama/templates/components/providers/typescript/mistral/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/mistral/settings.ts
index fe91620f6..68e01cfa8 100644
--- a/packages/create-llama/templates/components/providers/typescript/mistral/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/mistral/settings.ts
@@ -6,7 +6,7 @@ import {
 } from "@llamaindex/mistral";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   Settings.llm = new MistralAI({
     model: process.env.MODEL as keyof typeof ALL_AVAILABLE_MISTRAL_MODELS,
   });
diff --git a/packages/create-llama/templates/components/providers/typescript/ollama/provider.ts b/packages/create-llama/templates/components/providers/typescript/ollama/settings.ts
similarity index 91%
rename from packages/create-llama/templates/components/providers/typescript/ollama/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/ollama/settings.ts
index a26b8da1f..d13614737 100644
--- a/packages/create-llama/templates/components/providers/typescript/ollama/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/ollama/settings.ts
@@ -1,7 +1,7 @@
 import { Ollama, OllamaEmbedding } from "@llamaindex/ollama";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   const config = {
     host: process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434",
   };
diff --git a/packages/create-llama/templates/components/providers/typescript/openai/provider.ts b/packages/create-llama/templates/components/providers/typescript/openai/settings.ts
similarity index 93%
rename from packages/create-llama/templates/components/providers/typescript/openai/provider.ts
rename to packages/create-llama/templates/components/providers/typescript/openai/settings.ts
index 09c49ec8a..1be026d65 100644
--- a/packages/create-llama/templates/components/providers/typescript/openai/provider.ts
+++ b/packages/create-llama/templates/components/providers/typescript/openai/settings.ts
@@ -1,7 +1,7 @@
 import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
 import { Settings } from "llamaindex";
 
-export function setupProvider() {
+export function initSettings() {
   Settings.llm = new OpenAI({
     model: process.env.MODEL ?? "gpt-4o-mini",
     maxTokens: process.env.LLM_MAX_TOKENS
diff --git a/packages/create-llama/templates/components/settings/typescript/settings.ts b/packages/create-llama/templates/components/settings/typescript/settings.ts
deleted file mode 100644
index ce2c3b94d..000000000
--- a/packages/create-llama/templates/components/settings/typescript/settings.ts
+++ /dev/null
@@ -1,18 +0,0 @@
-import { Settings } from "llamaindex";
-import { setupProvider } from "./provider";
-
-const CHUNK_SIZE = 512;
-const CHUNK_OVERLAP = 20;
-
-export const initSettings = async () => {
-  console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
-
-  if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
-    throw new Error("'MODEL' and 'EMBEDDING_MODEL' env variables must be set.");
-  }
-
-  Settings.chunkSize = CHUNK_SIZE;
-  Settings.chunkOverlap = CHUNK_OVERLAP;
-
-  setupProvider();
-};
diff --git a/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py b/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py
index 687c5b991..2b792288f 100644
--- a/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py
+++ b/packages/create-llama/templates/types/llamaindexserver/fastapi/generate.py
@@ -2,7 +2,6 @@
 import os
 
 from dotenv import load_dotenv
-from llama_index.llms.openai import OpenAI
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
@@ -44,8 +43,13 @@ def generate_ui_for_workflow():
     """
     import asyncio
 
+    from app.settings import init_settings
+    from llama_index.core.settings import Settings
     from main import COMPONENT_DIR
 
+    load_dotenv()
+    init_settings()
+
     # To generate UI components for additional event types,
     # import the corresponding data model (e.g., MyCustomEventData)
     # and run the generate_ui_for_workflow function with the imported model.
@@ -56,8 +60,9 @@ def generate_ui_for_workflow():
         raise ImportError("Couldn't generate UI component for the current workflow.")
     from llama_index.server.gen_ui import generate_event_component
 
-    # works also well with Claude 3.7 Sonnet or Gemini Pro 2.5
-    llm = OpenAI(model="gpt-4.1")
-    code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm))
+    # works well with OpenAI gpt-4.1, Claude 3.7 Sonnet or Gemini Pro 2.5
+    code = asyncio.run(
+        generate_event_component(event_cls=UIEventData, llm=Settings.llm)
+    )
     with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
         f.write(code)
diff --git a/python/llama-index-server/llama_index/server/api/routers/chat.py b/python/llama-index-server/llama_index/server/api/routers/chat.py
index 57580d7eb..4c0abb9e4 100644
--- a/python/llama-index-server/llama_index/server/api/routers/chat.py
+++ b/python/llama-index-server/llama_index/server/api/routers/chat.py
@@ -6,7 +6,6 @@
 
 from fastapi import APIRouter, BackgroundTasks, HTTPException
 from fastapi.responses import StreamingResponse
-
 from llama_index.core.agent.workflow.workflow_events import (
     AgentInput,
     AgentSetup,
@@ -36,6 +35,7 @@
 from llama_index.server.services.file import FileService
 from llama_index.server.services.llamacloud import LlamaCloudFileService
 from llama_index.server.services.workflow import HITLWorkflowService
+from pydantic_core import PydanticSerializationError
 
 
 def chat_router(
@@ -193,7 +193,12 @@ async def _text_stream(
             else:
                 # Ignore unnecessary agent workflow events
                 if not isinstance(event, (AgentInput, AgentSetup)):
-                    yield VercelStreamResponse.convert_data(event.model_dump())
+                    try:
+                        yield VercelStreamResponse.convert_data(event.model_dump())
+                    except PydanticSerializationError:
+                        logger.warning(f"Error serializing event: {event}")
+                        # Skip events that can't be serialized
+                        pass
 
         await handler.wait_for_completion()
     except asyncio.CancelledError: