openshift · stbenjam · Nov 5, 2025 · Nov 5, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/chat/.env.example b/chat/.env.example
@@ -1,28 +1,92 @@
-# LLM Configuration
-# For local Ollama (default)
-# I have found that llama3.1:8b and granite3.3:8b give the best results,
-# with a slight edge to granite.
-LLM_ENDPOINT=http://localhost:11434/v1
-MODEL_NAME=granite3.3:8b
-
-# For OpenAI
+# Sippy AI Agent Configuration
+# Copy this file to .env and configure for your setup
+
+# =============================================================================
+# Model Configuration - Choose ONE of the following setups:
+# =============================================================================
+
+# -----------------------------------------------------------------------------
+# Option 1: Local Ollama
+# -----------------------------------------------------------------------------
+# LLM_ENDPOINT=http://localhost:11434/v1
+# MODEL_NAME=llama3.1:8b
+
+# Other popular Ollama models:
+# MODEL_NAME=llama3.2:latest
+# MODEL_NAME=mistral:latest
+
+# -----------------------------------------------------------------------------
+# Option 2: OpenAI
+# -----------------------------------------------------------------------------
 # LLM_ENDPOINT=https://api.openai.com/v1
-# MODEL_NAME=gpt-4
-# OPENAI_API_KEY=your_openai_api_key_here
+# MODEL_NAME=gpt-4o
+# OPENAI_API_KEY=sk-your-openai-api-key-here
 
-# For Google Gemini
+# Other OpenAI models:
+# MODEL_NAME=gpt-4o-mini
+# MODEL_NAME=gpt-4-turbo
+# MODEL_NAME=gpt-3.5-turbo
+
+# -----------------------------------------------------------------------------
+# Option 3: Google Gemini via AI Studio API
+# -----------------------------------------------------------------------------
+# MODEL_NAME=gemini-1.5-pro
+# GOOGLE_API_KEY=your-google-api-key-here
+
+# OR use service account credentials:
 # MODEL_NAME=gemini-2.5-flash
-# GOOGLE_API_KEY=your_google_api_key_here
-# or GOOGLE_APPLICATION_CREDENTIALS=path_to_json_credentials
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+
+# -----------------------------------------------------------------------------
+# Option 4: Claude via Google Vertex AI (Recommended for Claude)
+# -----------------------------------------------------------------------------
+# Using gcloud auth (recommended for local development):
+# MODEL_NAME=claude-sonnet-4-5
+# GOOGLE_PROJECT_ID=your-gcp-project-id
+# GOOGLE_LOCATION=us-central1
 
-# Sippy API Configuration (for future use)
-SIPPY_API_URL=https://sippy.dptools.openshift.org
+# OR using service account credentials:
+# MODEL_NAME=claude-sonnet-4-5
+# GOOGLE_PROJECT_ID=your-gcp-project-id
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+# GOOGLE_LOCATION=us-central1
 
-# Sippy database connection for accessing data
-#SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
+# =============================================================================
+# Model Parameters
+# =============================================================================
+TEMPERATURE=0.0
 
-# Jira Configuration (for known incident tracking)
+# Token budget for Claude's extended thinking feature (only used when --thinking is enabled)
+# EXTENDED_THINKING_BUDGET=10000
+
+# =============================================================================
+# Sippy Configuration
+# =============================================================================
+# Sippy API URL (required for most tools to work)
+SIPPY_API_URL=https://sippy.dptools.openshift.org/api
+
+# Optional: Database access for advanced SQL queries (use read-only user!)
+# SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
+
+# =============================================================================
+# Jira Configuration (Optional - for incident tracking)
+# =============================================================================
 JIRA_URL=https://issues.redhat.com
 
-# Specify the MCP configuration to use.
-MCP_CONFIG_FILE=mcp_config.json
+# =============================================================================
+# Agent Behavior
+# =============================================================================
+# Maximum number of tool call iterations before stopping
+MAX_ITERATIONS=15
+
+# Maximum execution time in seconds (default: 300 = 5 minutes)
+MAX_EXECUTION_TIME=300
+
+# AI Persona (default, zorp, etc.)
+PERSONA=default
+
+# =============================================================================
+# MCP (Model Context Protocol) Integration (Optional)
+# =============================================================================
+# Path to MCP servers configuration file
+# MCP_CONFIG_FILE=mcp_config.json
diff --git a/chat/.gitignore b/chat/.gitignore
@@ -139,3 +139,7 @@ dmypy.json
 .DS_Store
 Thumbs.db
 mcp_config.json
+
+
+# Configuration for chat app
+models.yaml
diff --git a/chat/README.md b/chat/README.md
@@ -33,7 +33,7 @@ cp .env.example .env
 Edit `.env` for your LLM setup, according to the instructions in the
 .env file.
 
-**Optional: Database Access**
+#### Optional: Database Access
 
 To enable direct database queries (fallback tool for when standard tools don't provide enough information), set:
 
@@ -43,7 +43,91 @@ SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
 
 **Important:** Use a read-only database user for security. The tool enforces read-only queries at the application level as well.
 
-### 3. Run the Agent
+#### Optional: Claude Models via Google Vertex AI
+
+To use Claude models through Google's Vertex AI, you need:
+
+1. A Google Cloud project with Vertex AI API enabled
+2. Authentication via `gcloud auth` OR service account credentials
+3. Claude models enabled in your project (requires allowlist access)
+
+**Option 1: Using gcloud auth (recommended for local development):**
+
+```bash
+# Login with your Google Cloud account
+gcloud auth application-default login
+
+# Set required environment variables
+MODEL_NAME=claude-sonnet-4-5
+GOOGLE_PROJECT_ID=your-gcp-project-id
+GOOGLE_LOCATION=us-central1  # Optional, defaults to us-central1
+```
+
+**Option 2: Using service account credentials:**
+
+```bash
+MODEL_NAME=claude-sonnet-4-5
+GOOGLE_PROJECT_ID=your-gcp-project-id
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+GOOGLE_LOCATION=us-central1  # Optional, defaults to us-central1
+```
+
+**Claude Extended Thinking:**
+When using Claude with `--thinking` enabled, the model can use its extended thinking feature to show detailed reasoning. You can control the token budget:
+```bash
+# Use extended thinking with custom budget (if supported by your model/region)
+python main.py chat --model claude-sonnet-4-5 --thinking --thinking-budget 15000
+
+# Or set via environment variable
+export EXTENDED_THINKING_BUDGET=15000
+
+# If you encounter 400 errors, extended thinking may not be available
+# Disable it by setting the budget to 0:
+python main.py chat --model claude-sonnet-4-5 --thinking --thinking-budget 0
+```
+
+**Important Notes:**
+- Extended thinking **automatically sets temperature to 1.0** (required by Claude API)
+- Extended thinking availability may vary by Claude model version and Vertex AI region
+- If you encounter errors, you can still use `--thinking` to see the agent's tool usage and reasoning without Claude's extended thinking by setting budget to 0
+
+### 3. Multiple Model Configuration (Optional)
+
+Sippy Chat supports running with multiple AI models that users can switch between via the web UI. This is configured using a `models.yaml` file.
+
+**Create models.yaml:**
+
+```bash
+cp models.yaml.example models.yaml
+# Edit models.yaml to configure your models
+```
+
+**Configuration Options:**
+
+- `id`: Unique identifier for the model (required)
+- `name`: Display name shown in the UI (required)
+- `description`: Brief description shown in the UI (optional)
+- `model_name`: The actual model name to use with the provider (required)
+- `endpoint`: API endpoint URL (required for OpenAI-compatible APIs, empty for Vertex AI)
+- `temperature`: Temperature setting for the model (optional, default: 0.0)
+- `extended_thinking_budget`: Token budget for Claude's extended thinking (optional, default: 0)
+- `default`: Set to true to make this the default model (optional, only one should be true)
+
+**Important Notes:**
+
+- Environment variables (API keys, credentials) are still required and shared across all models
+- Users can switch models mid-conversation via the Settings panel in the web UI
+- If `models.yaml` doesn't exist, the system falls back to using a single model from environment variables
+
+**Start the server with models.yaml:**
+
+```bash
+python main.py serve --models-config models.yaml
+```
+
+If `models.yaml` exists in the `chat/` directory, it will be loaded automatically without the `--models-config` flag.
+
+### 4. Run the Agent
 
 **Interactive Chat CLI:**
 ```bash
@@ -72,6 +156,9 @@ python main.py chat --model gemini-1.5-pro
 
 # Using Google Gemini with service account
 python main.py serve --model gemini-1.5-pro --google-credentials /path/to/credentials.json
+
+# Using Claude models via Google Vertex AI
+python main.py serve --model claude-3-5-sonnet@20240620
 ```
 
 **Get help:**

diff --git a/chat/main.py b/chat/main.py
@@ -36,7 +36,11 @@ def common_options(f):
         click.option("--max-iterations", default=None, type=int, help="Maximum number of agent iterations (default: 25)"),
         click.option("--timeout", default=None, type=int, help="Maximum execution time in seconds (default: 1800 = 30 minutes)"),
         click.option("--google-credentials", default=None, help="Path to Google service account credentials JSON file"),
+        click.option("--google-project", default=None, help="Google Cloud project ID (required for Claude models via Vertex AI)"),
+        click.option("--google-location", default=None, help="Google Cloud location/region for Vertex AI (default: us-central1)"),
+        click.option("--thinking-budget", default=None, type=int, help="Token budget for Claude's extended thinking (default: 10000)"),
         click.option("--mcp-config", default=None, help="Path to MCP servers config file"),
+        click.option("--models-config", default=None, help="Path to models.yaml config file"),
     ]
     for option in reversed(options):
         f = option(f)
@@ -63,6 +67,12 @@ def apply_config_overrides(config: Config, **kwargs) -> None:
         config.max_execution_time = kwargs["timeout"]
     if kwargs.get("google_credentials") is not None:
         config.google_credentials_file = kwargs["google_credentials"]
+    if kwargs.get("google_project") is not None:
+        config.google_project_id = kwargs["google_project"]
+    if kwargs.get("google_location") is not None:
+        config.google_location = kwargs["google_location"]
+    if kwargs.get("thinking_budget") is not None:
+        config.extended_thinking_budget = kwargs["thinking_budget"]
     if kwargs.get("mcp_config") is not None:
         config.mcp_config_file = kwargs["mcp_config"]
 
@@ -147,7 +157,7 @@ def serve(host: str, port: int, metrics_port: Optional[int], reload: bool, **kwa
         console.print(f"[dim]Persona: {config.persona}[/dim]")
         console.print()
 
-        server = SippyWebServer(config, metrics_port=metrics_port)
+        server = SippyWebServer(config, metrics_port=metrics_port, models_config_path=kwargs.get("models_config"))
         server.run(host=host, port=port, reload=reload)
 
     except ValueError as e:

diff --git a/chat/models.yaml.example b/chat/models.yaml.example
@@ -0,0 +1,46 @@
+# Models Configuration for Sippy Chat
+#
+# This file defines the available AI models that users can select from in the chat interface.
+# Users can switch between models mid-conversation via the Settings page.
+#
+# Configuration options per model:
+#   id: Unique identifier for the model (required)
+#   name: Display name shown in the UI (required)
+#   description: Brief description shown in the UI (optional)
+#   model_name: The actual model name to use with the provider (required)
+#   endpoint: API endpoint URL (required for OpenAI-compatible APIs, empty for Vertex AI)
+#   temperature: Temperature setting for the model (optional, default: 0.0)
+#   extended_thinking_budget: Token budget for Claude's extended thinking (optional, default: 0)
+#   default: Set to true to make this the default model (optional, only one should be true)
+
+models:
+  # Google Gemini
+  - id: "gemini-2.5-flash"
+    name: "Gemini 2.5 Flash"
+    description: "Google's Gemini 2.5 Flash - best cost/performance"
+    model_name: "gemini-2.5-flash"
+    default: true
+
+  - id: "gemini-2.5-pro"
+    name: "Gemini 2.5 Pro"
+    description: "Google's Gemini 2.5 Pro with large context window"
+    model_name: "gemini-2.5-pro"
+
+  - id: "gemini-3-pro-preview"
+    name: "Gemini 3.0 Pro Preview"
+    description: "Preview of Google's Next Pro Model"
+    model_name: "gemini-3-pro-preview"
+
+  # Claude via Google Vertex AI
+  - id: "claude-sonnet-4.5"
+    name: "Claude Sonnet 4.5"
+    description: "Capable model for complex CI analysis"
+    model_name: "claude-sonnet-4-5@20250929"
+
+  # Claude via Google Vertex AI
+  - id: "claude-sonnet-4.5-thinking"
+    name: "Claude Sonnet 4.5 (Thinking)"
+    description: "Capable model for complex CI analysis with extended thinking"
+    model_name: "claude-sonnet-4-5@20250929"
+    temperature: 1.0 # Required when setting thinking budget
+    extended_thinking_budget: 10000
diff --git a/chat/requirements.txt b/chat/requirements.txt
@@ -1,13 +1,16 @@
-langgraph>=0.2.0
-langchain>=0.1.0
-langchain-openai>=0.1.0
-langchain-google-genai>=2.0.0
-langchain-community>=0.0.20
-langchain-core>=0.1.0
+langgraph>=1.0.0
+langchain>=1.0.0
+langchain-openai>=1.0.0
+langchain-google-genai>=3.0.0
+langchain-google-vertexai>=3.0.0
+langchain-community>=0.4.0
+langchain-core>=1.0.0
+anthropic>=0.20.0
 click>=8.0.0
 rich>=13.0.0
 python-dotenv>=1.0.0
 pydantic>=2.0.0
+pyyaml>=6.0.0
 httpx>=0.25.0
 typing-extensions>=4.5.0
 fastapi>=0.104.0