bug-ops · bug-ops · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Redundant syntax highlighting and markdown parsing on every TUI frame: per-message render cache with content-hash keying (#501)
 
 ### Added
+- Vision (image input) support across Claude, OpenAI, and Ollama providers (#490)
+- `MessagePart::Image` content type with base64 serialization
+- `LlmProvider::supports_vision()` trait method for runtime capability detection
+- Claude structured content with `AnthropicContentBlock::Image` variant
+- OpenAI array content format with `image_url` data-URI encoding
+- Ollama `with_images()` support with optional `vision_model` config for dedicated model routing
+- `/image <path>` command in CLI and TUI channels
+- Telegram photo message handling with pre-download size guard
+- `vision_model` field in `[llm.ollama]` config section and `--init` wizard update
+- 20 MB max image size limit and path traversal protection
 - Interactive configuration wizard via `zeph init` subcommand with 5-step setup (LLM provider, memory, channels, secrets backend, config generation)
 - clap-based CLI argument parsing with `--help`, `--version` support
 - `Serialize` derive on `Config` and all nested types for TOML generation

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -15,6 +15,7 @@ age = { version = "0.11.2", default-features = false }
 clap = { version = "4.5", features = ["derive"] }
 dialoguer = "0.11"
 anyhow = "1.0"
+base64 = "0.22"
 candle-core = { version = "0.9", default-features = false }
 candle-nn = { version = "0.9", default-features = false }
 candle-transformers = { version = "0.9", default-features = false }

diff --git a/README.md b/README.md
@@ -240,6 +240,7 @@ Skills **evolve**: failure detection triggers self-reflection, and the agent gen
 | **MCP** | Connect external tool servers (stdio + HTTP) with SSRF protection |
 | **A2A** | Agent-to-agent communication via JSON-RPC 2.0 with SSE streaming |
 | **Audio input** | Speech-to-text via OpenAI Whisper API or local Candle Whisper (offline, feature-gated); Telegram and Slack audio files transcribed automatically |
+| **Vision** | Image input via CLI (`/image`), TUI (`/image`), and Telegram photo messages; supported by Claude, OpenAI, and Ollama providers (20 MB max, automatic MIME detection) |
 | **Channels** | CLI, Telegram (text + voice), Discord, Slack, TUI — all with streaming support |
 | **Gateway** | HTTP webhook ingestion with bearer auth and rate limiting |
 | **Native tool_use** | Structured tool calling via Claude/OpenAI APIs; text fallback for local models |

diff --git a/crates/zeph-channels/Cargo.toml b/crates/zeph-channels/Cargo.toml
@@ -33,6 +33,7 @@ harness = false
 
 [dev-dependencies]
 criterion.workspace = true
+tempfile.workspace = true
 
 [lints]
 workspace = true
diff --git a/crates/zeph-channels/README.md b/crates/zeph-channels/README.md
@@ -10,8 +10,8 @@ Implements I/O channel adapters that connect the agent to different frontends. S
 
 | Module | Description |
 |--------|-------------|
-| `cli` | `CliChannel` — interactive terminal I/O |
-| `telegram` | Telegram adapter via teloxide with streaming; voice/audio message detection and file download |
+| `cli` | `CliChannel` — interactive terminal I/O with `/image` command for vision input |
+| `telegram` | Telegram adapter via teloxide with streaming; voice/audio message detection and file download; photo message support for vision input |
 | `discord` | Discord adapter (optional feature) |
 | `slack` | Slack adapter (optional feature); audio file detection and download with Bearer auth |
 | `any` | `AnyChannel` — enum dispatch over all channels |

diff --git a/crates/zeph-channels/src/cli.rs b/crates/zeph-channels/src/cli.rs
@@ -1,4 +1,4 @@
-use zeph_core::channel::{Channel, ChannelError, ChannelMessage};
+use zeph_core::channel::{Attachment, AttachmentKind, Channel, ChannelError, ChannelMessage};
 
 /// CLI channel that reads from stdin and writes to stdout.
 #[derive(Debug)]
@@ -54,6 +54,33 @@ impl Channel for CliChannel {
         // Reset accumulated for new response
         self.accumulated.clear();
 
+        // Handle /image <path> command by reading the file into an attachment
+        if let Some(path) = trimmed.strip_prefix("/image").map(str::trim) {
+            if path.is_empty() {
+                println!("Usage: /image <path>");
+                return Ok(Some(ChannelMessage {
+                    text: String::new(),
+                    attachments: vec![],
+                }));
+            }
+            let path_owned = path.to_owned();
+            let data = tokio::fs::read(&path_owned)
+                .await
+                .map_err(ChannelError::Io)?;
+            let filename = std::path::Path::new(&path_owned)
+                .file_name()
+                .and_then(|n| n.to_str())
+                .map(str::to_owned);
+            return Ok(Some(ChannelMessage {
+                text: String::new(),
+                attachments: vec![Attachment {
+                    kind: AttachmentKind::Image,
+                    data,
+                    filename,
+                }],
+            }));
+        }
+
         Ok(Some(ChannelMessage {
             text: trimmed.to_string(),
             attachments: vec![],
@@ -143,4 +170,64 @@ mod tests {
         let mut ch = CliChannel::new();
         ch.flush_chunks().await.unwrap();
     }
+
+    #[tokio::test]
+    async fn image_command_valid_file_creates_attachment() {
+        use std::io::Write;
+
+        let mut tmp = tempfile::NamedTempFile::new().unwrap();
+        let image_bytes = b"\x89PNG\r\n\x1a\nfake-image-data";
+        tmp.write_all(image_bytes).unwrap();
+        tmp.flush().unwrap();
+
+        let path = tmp.path().to_str().unwrap().to_owned();
+        let filename = tmp.path().file_name().unwrap().to_str().unwrap().to_owned();
+
+        // Simulate /image <path> parsing: strip prefix and read file
+        let trimmed = format!("/image {path}");
+        let arg = trimmed.strip_prefix("/image").map(str::trim).unwrap();
+        assert!(!arg.is_empty());
+
+        let data = tokio::fs::read(arg).await.unwrap();
+        let parsed_filename = std::path::Path::new(arg)
+            .file_name()
+            .and_then(|n| n.to_str())
+            .map(str::to_owned);
+
+        assert_eq!(data, image_bytes);
+        assert_eq!(parsed_filename, Some(filename));
+
+        let attachment = Attachment {
+            kind: AttachmentKind::Image,
+            data,
+            filename: parsed_filename,
+        };
+        assert_eq!(attachment.kind, AttachmentKind::Image);
+        assert_eq!(attachment.data, image_bytes);
+    }
+
+    #[tokio::test]
+    async fn image_command_missing_file_returns_io_error() {
+        let result = tokio::fs::read("/nonexistent/path/image.png").await;
+        assert!(result.is_err());
+        // Verify it maps to ChannelError::Io correctly
+        let err = ChannelError::Io(result.unwrap_err());
+        assert!(matches!(err, ChannelError::Io(_)));
+    }
+
+    #[test]
+    fn image_command_empty_args_detected() {
+        // "/image " with only whitespace after stripping prefix yields empty arg
+        let trimmed = "/image";
+        let arg = trimmed.strip_prefix("/image").map(str::trim).unwrap_or("");
+        assert!(arg.is_empty());
+
+        // "/image " (with trailing space)
+        let trimmed_space = "/image   ";
+        let arg_space = trimmed_space
+            .strip_prefix("/image")
+            .map(str::trim)
+            .unwrap_or("");
+        assert!(arg_space.is_empty());
+    }
 }
diff --git a/crates/zeph-channels/src/telegram.rs b/crates/zeph-channels/src/telegram.rs
@@ -7,6 +7,7 @@ use tokio::sync::mpsc;
 use zeph_core::channel::{Attachment, AttachmentKind, Channel, ChannelError, ChannelMessage};
 
 const MAX_MESSAGE_LEN: usize = 4096;
+const MAX_IMAGE_BYTES: u32 = 20 * 1024 * 1024;
 
 /// Telegram channel adapter using teloxide.
 #[derive(Debug)]
@@ -87,11 +88,11 @@ impl TelegramChannel {
 
                     let audio_file_id = msg
                         .voice()
-                        .map(|v| v.file.id.0.clone())
-                        .or_else(|| msg.audio().map(|a| a.file.id.0.clone()));
+                        .map(|v| (v.file.id.0.clone(), v.file.size))
+                        .or_else(|| msg.audio().map(|a| (a.file.id.0.clone(), a.file.size)));
 
-                    if let Some(file_id) = audio_file_id {
-                        match download_file(&bot, file_id).await {
+                    if let Some((file_id, file_size)) = audio_file_id {
+                        match download_file(&bot, file_id, file_size).await {
                             Ok(data) => {
                                 attachments.push(Attachment {
                                     kind: AttachmentKind::Audio,
@@ -105,6 +106,34 @@ impl TelegramChannel {
                         }
                     }
 
+                    // Handle photo attachments (pick the largest available size)
+                    if let Some(photos) = msg.photo()
+                        && let Some(photo) = photos.iter().max_by_key(|p| p.file.size)
+                    {
+                        if photo.file.size > MAX_IMAGE_BYTES {
+                            tracing::warn!(
+                                size = photo.file.size,
+                                max = MAX_IMAGE_BYTES,
+                                "photo exceeds size limit, skipping"
+                            );
+                        } else {
+                            match download_file(&bot, photo.file.id.0.clone(), photo.file.size)
+                                .await
+                            {
+                                Ok(data) => {
+                                    attachments.push(Attachment {
+                                        kind: AttachmentKind::Image,
+                                        data,
+                                        filename: None,
+                                    });
+                                }
+                                Err(e) => {
+                                    tracing::warn!("failed to download photo attachment: {e}");
+                                }
+                            }
+                        }
+                    }
+
                     if text.is_empty() && attachments.is_empty() {
                         return respond(());
                     }
@@ -228,14 +257,14 @@ impl TelegramChannel {
     }
 }
 
-async fn download_file(bot: &Bot, file_id: String) -> Result<Vec<u8>, String> {
+async fn download_file(bot: &Bot, file_id: String, capacity: u32) -> Result<Vec<u8>, String> {
     use teloxide::net::Download;
 
     let file = bot
         .get_file(file_id.into())
         .await
         .map_err(|e| format!("get_file: {e}"))?;
-    let mut buf: Vec<u8> = Vec::new();
+    let mut buf: Vec<u8> = Vec::with_capacity(capacity as usize);
     bot.download_file(&file.path, &mut buf)
         .await
         .map_err(|e| format!("download_file: {e}"))?;
@@ -447,4 +476,41 @@ mod tests {
         assert!(channel.last_edit.is_none());
         assert!(channel.message_id.is_none());
     }
+
+    #[test]
+    fn max_image_bytes_is_20_mib() {
+        assert_eq!(MAX_IMAGE_BYTES, 20 * 1024 * 1024);
+    }
+
+    #[test]
+    fn photo_size_limit_enforcement() {
+        // Mirrors the guard in the photo extraction handler:
+        // photos.iter().max_by_key(|p| p.file.size) followed by
+        // if photo.file.size > MAX_IMAGE_BYTES { skip } else { download }
+        let size_within_limit: u32 = MAX_IMAGE_BYTES - 1;
+        let size_at_limit: u32 = MAX_IMAGE_BYTES;
+        let size_over_limit: u32 = MAX_IMAGE_BYTES + 1;
+
+        assert!(size_within_limit <= MAX_IMAGE_BYTES);
+        assert!(size_at_limit <= MAX_IMAGE_BYTES);
+        assert!(size_over_limit > MAX_IMAGE_BYTES);
+    }
+
+    #[test]
+    fn should_not_send_update_within_threshold() {
+        let token = "test_token".to_string();
+        let allowed_users = Vec::new();
+        let mut channel = TelegramChannel::new(token, allowed_users);
+        // Set last_edit to 1 second ago (well within the 10-second threshold)
+        channel.last_edit = Some(Instant::now() - Duration::from_secs(1));
+        assert!(!channel.should_send_update());
+    }
+
+    #[test]
+    fn start_rejects_empty_allowed_users() {
+        let channel = TelegramChannel::new("test_token".to_string(), Vec::new());
+        let result = channel.start();
+        assert!(result.is_err());
+        assert!(matches!(result.unwrap_err(), ChannelError::Other(_)));
+    }
 }
diff --git a/crates/zeph-core/README.md b/crates/zeph-core/README.md
@@ -12,7 +12,7 @@ Core orchestration crate for the Zeph agent. Manages the main agent loop, bootst
 |--------|-------------|
 | `agent` | `Agent` — main loop driving inference and tool execution |
 | `bootstrap` | `AppBuilder` — fluent builder for application startup |
-| `channel` | `Channel` trait defining I/O adapters; `Attachment` / `AttachmentKind` for multimodal inputs |
+| `channel` | `Channel` trait defining I/O adapters; `Attachment` / `AttachmentKind` for multimodal inputs (images, audio) |
 | `config` | TOML config with `ZEPH_*` env overrides |
 | `context` | LLM context assembly from history, skills, memory |
 | `cost` | Token cost tracking and budgeting |