Skip to content

Commit 1d2bc37

Browse files
stbenjamclaude
andcommitted
Add multi-model support with configuration system
- Add support for multiple LLM models including ChatVertexAnthropic - Implement models.yaml configuration system for model management - Add model selection UI in chat settings - Update environment configuration and documentation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent f78f207 commit 1d2bc37

File tree

16 files changed

+820
-89
lines changed

16 files changed

+820
-89
lines changed

chat/.env.example

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,92 @@
1-
# LLM Configuration
2-
# For local Ollama (default)
3-
# I have found that llama3.1:8b and granite3.3:8b give the best results,
4-
# with a slight edge to granite.
5-
LLM_ENDPOINT=http://localhost:11434/v1
6-
MODEL_NAME=granite3.3:8b
7-
8-
# For OpenAI
1+
# Sippy AI Agent Configuration
2+
# Copy this file to .env and configure for your setup
3+
4+
# =============================================================================
5+
# Model Configuration - Choose ONE of the following setups:
6+
# =============================================================================
7+
8+
# -----------------------------------------------------------------------------
9+
# Option 1: Local Ollama
10+
# -----------------------------------------------------------------------------
11+
# LLM_ENDPOINT=http://localhost:11434/v1
12+
# MODEL_NAME=llama3.1:8b
13+
14+
# Other popular Ollama models:
15+
# MODEL_NAME=llama3.2:latest
16+
# MODEL_NAME=mistral:latest
17+
18+
# -----------------------------------------------------------------------------
19+
# Option 2: OpenAI
20+
# -----------------------------------------------------------------------------
921
# LLM_ENDPOINT=https://api.openai.com/v1
10-
# MODEL_NAME=gpt-4
11-
# OPENAI_API_KEY=your_openai_api_key_here
22+
# MODEL_NAME=gpt-4o
23+
# OPENAI_API_KEY=sk-your-openai-api-key-here
1224

13-
# For Google Gemini
25+
# Other OpenAI models:
26+
# MODEL_NAME=gpt-4o-mini
27+
# MODEL_NAME=gpt-4-turbo
28+
# MODEL_NAME=gpt-3.5-turbo
29+
30+
# -----------------------------------------------------------------------------
31+
# Option 3: Google Gemini via AI Studio API
32+
# -----------------------------------------------------------------------------
33+
# MODEL_NAME=gemini-1.5-pro
34+
# GOOGLE_API_KEY=your-google-api-key-here
35+
36+
# OR use service account credentials:
1437
# MODEL_NAME=gemini-2.5-flash
15-
# GOOGLE_API_KEY=your_google_api_key_here
16-
# or GOOGLE_APPLICATION_CREDENTIALS=path_to_json_credentials
38+
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
39+
40+
# -----------------------------------------------------------------------------
41+
# Option 4: Claude via Google Vertex AI (Recommended for Claude)
42+
# -----------------------------------------------------------------------------
43+
# Using gcloud auth (recommended for local development):
44+
# MODEL_NAME=claude-sonnet-4-5
45+
# GOOGLE_PROJECT_ID=your-gcp-project-id
46+
# GOOGLE_LOCATION=us-central1
1747

18-
# Sippy API Configuration (for future use)
19-
SIPPY_API_URL=https://sippy.dptools.openshift.org
48+
# OR using service account credentials:
49+
# MODEL_NAME=claude-sonnet-4-5
50+
# GOOGLE_PROJECT_ID=your-gcp-project-id
51+
# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
52+
# GOOGLE_LOCATION=us-central1
2053

21-
# Sippy database connection for accessing data
22-
#SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
54+
# =============================================================================
55+
# Model Parameters
56+
# =============================================================================
57+
TEMPERATURE=0.0
2358

24-
# Jira Configuration (for known incident tracking)
59+
# Token budget for Claude's extended thinking feature (only used when --thinking is enabled)
60+
# EXTENDED_THINKING_BUDGET=10000
61+
62+
# =============================================================================
63+
# Sippy Configuration
64+
# =============================================================================
65+
# Sippy API URL (required for most tools to work)
66+
SIPPY_API_URL=https://sippy.dptools.openshift.org/api
67+
68+
# Optional: Database access for advanced SQL queries (use read-only user!)
69+
# SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
70+
71+
# =============================================================================
72+
# Jira Configuration (Optional - for incident tracking)
73+
# =============================================================================
2574
JIRA_URL=https://issues.redhat.com
2675

27-
# Specify the MCP configuration to use.
28-
MCP_CONFIG_FILE=mcp_config.json
76+
# =============================================================================
77+
# Agent Behavior
78+
# =============================================================================
79+
# Maximum number of tool call iterations before stopping
80+
MAX_ITERATIONS=15
81+
82+
# Maximum execution time in seconds (default: 300 = 5 minutes)
83+
MAX_EXECUTION_TIME=300
84+
85+
# AI Persona (default, zorp, etc.)
86+
PERSONA=default
87+
88+
# =============================================================================
89+
# MCP (Model Context Protocol) Integration (Optional)
90+
# =============================================================================
91+
# Path to MCP servers configuration file
92+
# MCP_CONFIG_FILE=mcp_config.json

chat/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,7 @@ dmypy.json
139139
.DS_Store
140140
Thumbs.db
141141
mcp_config.json
142+
143+
144+
# Configuration for chat app
145+
models.yaml

chat/README.md

Lines changed: 88 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,91 @@ SIPPY_READ_ONLY_DATABASE_DSN=postgresql://readonly_user:password@host:5432/sippy
4343

4444
**Important:** Use a read-only database user for security. The tool enforces read-only queries at the application level as well.
4545

46-
### 3. Run the Agent
46+
**Optional: Claude Models via Google Vertex AI**
47+
48+
To use Claude models through Google's Vertex AI, you need:
49+
50+
1. A Google Cloud project with Vertex AI API enabled
51+
2. Authentication via `gcloud auth` OR service account credentials
52+
3. Claude models enabled in your project (requires allowlist access)
53+
54+
**Option 1: Using gcloud auth (recommended for local development):**
55+
56+
```bash
57+
# Login with your Google Cloud account
58+
gcloud auth application-default login
59+
60+
# Set required environment variables
61+
MODEL_NAME=claude-sonnet-4-5
62+
GOOGLE_PROJECT_ID=your-gcp-project-id
63+
GOOGLE_LOCATION=us-central1 # Optional, defaults to us-central1
64+
```
65+
66+
**Option 2: Using service account credentials:**
67+
68+
```bash
69+
MODEL_NAME=claude-sonnet-4-5
70+
GOOGLE_PROJECT_ID=your-gcp-project-id
71+
GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
72+
GOOGLE_LOCATION=us-central1 # Optional, defaults to us-central1
73+
```
74+
75+
**Claude Extended Thinking:**
76+
When using Claude with `--thinking` enabled, the model can use its extended thinking feature to show detailed reasoning. You can control the token budget:
77+
```bash
78+
# Use extended thinking with custom budget (if supported by your model/region)
79+
python main.py chat --model claude-sonnet-4-5 --thinking --thinking-budget 15000
80+
81+
# Or set via environment variable
82+
export EXTENDED_THINKING_BUDGET=15000
83+
84+
# If you encounter 400 errors, extended thinking may not be available
85+
# Disable it by setting the budget to 0:
86+
python main.py chat --model claude-sonnet-4-5 --thinking --thinking-budget 0
87+
```
88+
89+
**Important Notes:**
90+
- Extended thinking **automatically sets temperature to 1.0** (required by Claude API)
91+
- Extended thinking availability may vary by Claude model version and Vertex AI region
92+
- If you encounter errors, you can still use `--thinking` to see the agent's tool usage and reasoning without Claude's extended thinking by setting budget to 0
93+
94+
### 3. Multiple Model Configuration (Optional)
95+
96+
Sippy Chat supports running with multiple AI models that users can switch between via the web UI. This is configured using a `models.yaml` file.
97+
98+
**Create models.yaml:**
99+
100+
```bash
101+
cp models.yaml.example models.yaml
102+
# Edit models.yaml to configure your models
103+
```
104+
105+
**Configuration Options:**
106+
107+
- `id`: Unique identifier for the model (required)
108+
- `name`: Display name shown in the UI (required)
109+
- `description`: Brief description shown in the UI (optional)
110+
- `model_name`: The actual model name to use with the provider (required)
111+
- `endpoint`: API endpoint URL (required for OpenAI-compatible APIs, empty for Vertex AI)
112+
- `temperature`: Temperature setting for the model (optional, default: 0.0)
113+
- `extended_thinking_budget`: Token budget for Claude's extended thinking (optional, default: 0)
114+
- `default`: Set to true to make this the default model (optional, only one should be true)
115+
116+
**Important Notes:**
117+
118+
- Environment variables (API keys, credentials) are still required and shared across all models
119+
- Users can switch models mid-conversation via the Settings panel in the web UI
120+
- If `models.yaml` doesn't exist, the system falls back to using a single model from environment variables
121+
122+
**Start the server with models.yaml:**
123+
124+
```bash
125+
python main.py serve --models-config models.yaml
126+
```
127+
128+
If `models.yaml` exists in the `chat/` directory, it will be loaded automatically without the `--models-config` flag.
129+
130+
### 4. Run the Agent
47131

48132
**Interactive Chat CLI:**
49133
```bash
@@ -72,6 +156,9 @@ python main.py chat --model gemini-1.5-pro
72156

73157
# Using Google Gemini with service account
74158
python main.py serve --model gemini-1.5-pro --google-credentials /path/to/credentials.json
159+
160+
# Using Claude models via Google Vertex AI
161+
python main.py serve --model claude-3-5-sonnet@20240620
75162
```
76163

77164
**Get help:**

chat/main.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ def common_options(f):
3636
click.option("--max-iterations", default=None, type=int, help="Maximum number of agent iterations (default: 25)"),
3737
click.option("--timeout", default=None, type=int, help="Maximum execution time in seconds (default: 1800 = 30 minutes)"),
3838
click.option("--google-credentials", default=None, help="Path to Google service account credentials JSON file"),
39+
click.option("--google-project", default=None, help="Google Cloud project ID (required for Claude models via Vertex AI)"),
40+
click.option("--google-location", default=None, help="Google Cloud location/region for Vertex AI (default: us-central1)"),
41+
click.option("--thinking-budget", default=None, type=int, help="Token budget for Claude's extended thinking (default: 10000)"),
3942
click.option("--mcp-config", default=None, help="Path to MCP servers config file"),
43+
click.option("--models-config", default=None, help="Path to models.yaml config file"),
4044
]
4145
for option in reversed(options):
4246
f = option(f)
@@ -63,6 +67,12 @@ def apply_config_overrides(config: Config, **kwargs) -> None:
6367
config.max_execution_time = kwargs["timeout"]
6468
if kwargs.get("google_credentials") is not None:
6569
config.google_credentials_file = kwargs["google_credentials"]
70+
if kwargs.get("google_project") is not None:
71+
config.google_project_id = kwargs["google_project"]
72+
if kwargs.get("google_location") is not None:
73+
config.google_location = kwargs["google_location"]
74+
if kwargs.get("thinking_budget") is not None:
75+
config.extended_thinking_budget = kwargs["thinking_budget"]
6676
if kwargs.get("mcp_config") is not None:
6777
config.mcp_config_file = kwargs["mcp_config"]
6878

@@ -147,7 +157,7 @@ def serve(host: str, port: int, metrics_port: Optional[int], reload: bool, **kwa
147157
console.print(f"[dim]Persona: {config.persona}[/dim]")
148158
console.print()
149159

150-
server = SippyWebServer(config, metrics_port=metrics_port)
160+
server = SippyWebServer(config, metrics_port=metrics_port, models_config_path=kwargs.get("models_config"))
151161
server.run(host=host, port=port, reload=reload)
152162

153163
except ValueError as e:

chat/models.yaml.example

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Models Configuration for Sippy Chat
2+
#
3+
# This file defines the available AI models that users can select from in the chat interface.
4+
# Users can switch between models mid-conversation via the Settings page.
5+
#
6+
# Configuration options per model:
7+
# id: Unique identifier for the model (required)
8+
# name: Display name shown in the UI (required)
9+
# description: Brief description shown in the UI (optional)
10+
# model_name: The actual model name to use with the provider (required)
11+
# endpoint: API endpoint URL (required for OpenAI-compatible APIs, empty for Vertex AI)
12+
# temperature: Temperature setting for the model (optional, default: 0.0)
13+
# extended_thinking_budget: Token budget for Claude's extended thinking (optional, default: 0)
14+
# default: Set to true to make this the default model (optional, only one should be true)
15+
16+
models:
17+
# Google Gemini
18+
- id: "gemini-2.5-flash"
19+
name: "Gemini 2.5 Flash"
20+
description: "Google's Gemini 2.5 Flash - best cost/performance"
21+
model_name: "gemini-2.5-flash"
22+
default: true
23+
24+
- id: "gemini-2.5-pro"
25+
name: "Gemini 2.5 Pro"
26+
description: "Google's Gemini 2.5 Pro with large context window"
27+
model_name: "gemini-2.5-pro"
28+
29+
# Claude via Google Vertex AI
30+
- id: "claude-sonnet-4.5"
31+
name: "Claude Sonnet 4.5"
32+
description: "Capable model for complex CI analysis"
33+
model_name: "claude-sonnet-4-5@20250929"
34+
35+
# Claude via Google Vertex AI
36+
- id: "claude-sonnet-4.5-thinking"
37+
name: "Claude Sonnet 4.5 (Thinking)"
38+
description: "Capable model for complex CI analysis with extended thinking"
39+
model_name: "claude-sonnet-4-5@20250929"
40+
temperature: 1.0 # Required when setting thinking budget
41+
extended_thinking_budget: 10000

chat/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,15 @@ langgraph>=0.2.0
22
langchain>=0.1.0
33
langchain-openai>=0.1.0
44
langchain-google-genai>=2.0.0
5+
langchain-google-vertexai>=2.0.0
56
langchain-community>=0.0.20
67
langchain-core>=0.1.0
8+
anthropic>=0.20.0
79
click>=8.0.0
810
rich>=13.0.0
911
python-dotenv>=1.0.0
1012
pydantic>=2.0.0
13+
pyyaml>=6.0.0
1114
httpx>=0.25.0
1215
typing-extensions>=4.5.0
1316
fastapi>=0.104.0

0 commit comments

Comments
 (0)