bug-ops · bug-ops · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 18, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 - Document ingestion pipeline: load, split, embed, store via Qdrant (#472)
 - File size guard (50 MiB default) and path canonicalization for document loaders
 - Audio input support: `Attachment`/`AttachmentKind` types, `SpeechToText` trait, OpenAI Whisper backend behind `stt` feature flag (#520, #521, #522)
+- Telegram voice and audio message handling with automatic file download (#524)
+- STT bootstrap wiring: `WhisperProvider` created from `[llm.stt]` config behind `stt` feature (#529)
 
 ## [0.10.0] - 2026-02-18
 

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -124,7 +124,7 @@ scheduler = ["dep:zeph-scheduler"]
 otel = ["dep:opentelemetry", "dep:opentelemetry_sdk", "dep:opentelemetry-otlp", "dep:tracing-opentelemetry"]
 pdf = ["zeph-memory/pdf"]
 mock = ["zeph-llm/mock", "zeph-memory/mock"]
-stt = ["zeph-llm/stt"]
+stt = ["zeph-llm/stt", "dep:reqwest"]
 
 [dependencies]
 anyhow.workspace = true
@@ -151,6 +151,7 @@ zeph-tools.workspace = true
 zeph-gateway = { workspace = true, optional = true }
 zeph-scheduler = { workspace = true, optional = true }
 zeph-tui = { workspace = true, optional = true }
+reqwest = { workspace = true, optional = true, features = ["rustls"] }
 
 [dev-dependencies]
 tempfile.workspace = true

diff --git a/README.md b/README.md
@@ -236,8 +236,8 @@ Skills **evolve**: failure detection triggers self-reflection, and the agent gen
 |----------|-------------|
 | **MCP** | Connect external tool servers (stdio + HTTP) with SSRF protection |
 | **A2A** | Agent-to-agent communication via JSON-RPC 2.0 with SSE streaming |
-| **Audio input** | Speech-to-text transcription via OpenAI Whisper (25 MB limit) |
-| **Channels** | CLI, Telegram, Discord, Slack, TUI — all with streaming support |
+| **Audio input** | Speech-to-text via OpenAI Whisper (25 MB limit); Telegram voice messages transcribed automatically |
+| **Channels** | CLI, Telegram (text + voice), Discord, Slack, TUI — all with streaming support |
 | **Gateway** | HTTP webhook ingestion with bearer auth and rate limiting |
 | **Native tool_use** | Structured tool calling via Claude/OpenAI APIs; text fallback for local models |
 

diff --git a/crates/zeph-channels/README.md b/crates/zeph-channels/README.md
@@ -11,7 +11,7 @@ Implements I/O channel adapters that connect the agent to different frontends. S
 | Module | Description |
 |--------|-------------|
 | `cli` | `CliChannel` — interactive terminal I/O |
-| `telegram` | Telegram adapter via teloxide with streaming |
+| `telegram` | Telegram adapter via teloxide with streaming; voice/audio message detection and file download |
 | `discord` | Discord adapter (optional feature) |
 | `slack` | Slack adapter (optional feature) |
 | `any` | `AnyChannel` — enum dispatch over all channels |

diff --git a/crates/zeph-channels/src/telegram.rs b/crates/zeph-channels/src/telegram.rs
@@ -4,7 +4,7 @@ use crate::markdown::markdown_to_telegram;
 use teloxide::prelude::*;
 use teloxide::types::{ChatAction, MessageId, ParseMode};
 use tokio::sync::mpsc;
-use zeph_core::channel::{Channel, ChannelError, ChannelMessage};
+use zeph_core::channel::{Attachment, AttachmentKind, Channel, ChannelError, ChannelMessage};
 
 const MAX_MESSAGE_LEN: usize = 4096;
 
@@ -24,6 +24,7 @@ pub struct TelegramChannel {
 struct IncomingMessage {
     chat_id: ChatId,
     text: String,
+    attachments: Vec<Attachment>,
 }
 
 impl TelegramChannel {
@@ -62,7 +63,7 @@ impl TelegramChannel {
         let allowed = self.allowed_users.clone();
 
         tokio::spawn(async move {
-            let handler = Update::filter_message().endpoint(move |msg: Message, _bot: Bot| {
+            let handler = Update::filter_message().endpoint(move |msg: Message, bot: Bot| {
                 let tx = tx.clone();
                 let allowed = allowed.clone();
                 async move {
@@ -81,14 +82,38 @@ impl TelegramChannel {
                         }
                     }
 
-                    let Some(text) = msg.text() else {
+                    let text = msg.text().unwrap_or_default().to_string();
+                    let mut attachments = Vec::new();
+
+                    let audio_file_id = msg
+                        .voice()
+                        .map(|v| v.file.id.0.clone())
+                        .or_else(|| msg.audio().map(|a| a.file.id.0.clone()));
+
+                    if let Some(file_id) = audio_file_id {
+                        match download_file(&bot, file_id).await {
+                            Ok(data) => {
+                                attachments.push(Attachment {
+                                    kind: AttachmentKind::Audio,
+                                    data,
+                                    filename: msg.audio().and_then(|a| a.file_name.clone()),
+                                });
+                            }
+                            Err(e) => {
+                                tracing::warn!("failed to download audio attachment: {e}");
+                            }
+                        }
+                    }
+
+                    if text.is_empty() && attachments.is_empty() {
                         return respond(());
-                    };
+                    }
 
                     let _ = tx
                         .send(IncomingMessage {
                             chat_id: msg.chat.id,
-                            text: text.to_string(),
+                            text,
+                            attachments,
                         })
                         .await;
 
@@ -203,13 +228,27 @@ impl TelegramChannel {
     }
 }
 
+async fn download_file(bot: &Bot, file_id: String) -> Result<Vec<u8>, String> {
+    use teloxide::net::Download;
+
+    let file = bot
+        .get_file(file_id.into())
+        .await
+        .map_err(|e| format!("get_file: {e}"))?;
+    let mut buf: Vec<u8> = Vec::new();
+    bot.download_file(&file.path, &mut buf)
+        .await
+        .map_err(|e| format!("download_file: {e}"))?;
+    Ok(buf)
+}
+
 impl Channel for TelegramChannel {
     fn try_recv(&mut self) -> Option<ChannelMessage> {
         self.rx.try_recv().ok().map(|incoming| {
             self.chat_id = Some(incoming.chat_id);
             ChannelMessage {
                 text: incoming.text,
-                attachments: vec![],
+                attachments: incoming.attachments,
             }
         })
     }
@@ -252,7 +291,7 @@ impl Channel for TelegramChannel {
 
             return Ok(Some(ChannelMessage {
                 text: incoming.text,
-                attachments: vec![],
+                attachments: incoming.attachments,
             }));
         }
     }

diff --git a/docs/src/getting-started/configuration.md b/docs/src/getting-started/configuration.md
@@ -95,6 +95,11 @@ max_tokens = 4096
 # embedding_model = "text-embedding-3-small"
 # reasoning_effort = "medium"  # low, medium, high (for reasoning models)
 
+[llm.stt]
+provider = "whisper"
+model = "whisper-1"
+# Requires `stt` feature. Uses the OpenAI API key from [llm.openai] or ZEPH_OPENAI_API_KEY.
+
 [skills]
 paths = ["./skills"]
 max_active_skills = 5  # Top-K skills per query via embedding similarity

diff --git a/docs/src/guide/audio-input.md b/docs/src/guide/audio-input.md
@@ -35,6 +35,14 @@ The Whisper provider inherits the OpenAI API key from the `[llm.openai]` section
 | OpenAI Whisper API | `whisper` | `stt` | Available |
 | Local Whisper (candle) | — | — | Planned |
 
+## Telegram Voice Messages
+
+The Telegram channel automatically detects voice and audio messages. When a user sends a voice note or audio file, the adapter downloads the file bytes via the Telegram Bot API and wraps them as an `Attachment` with `AttachmentKind::Audio`. The attachment then follows the standard transcription pipeline described above.
+
+Download failures (network errors, expired file links) are logged at `warn` level and gracefully skipped — the message is delivered without an attachment rather than causing an error.
+
+Bootstrap wiring is automatic: when `[llm.stt]` is present in the config and the `stt` feature is enabled, `main.rs` creates a `WhisperProvider` and injects it into the agent via `with_stt()`. No additional setup is needed beyond the configuration shown above.
+
 ## Limitations
 
 - **25 MB file size limit** — audio files exceeding this are rejected before upload.

diff --git a/docs/src/guide/channels.md b/docs/src/guide/channels.md
@@ -256,7 +256,7 @@ When the queue is full (10 messages), new input is silently dropped until space
 
 ## Attachments
 
-`ChannelMessage` supports an optional `attachments` field carrying `Attachment` values with typed `AttachmentKind` variants (Audio, Image, Video, File). When the `stt` feature is enabled, audio attachments are automatically transcribed before entering the agent loop. See [Audio Input](audio-input.md) for details.
+`ChannelMessage` supports an optional `attachments` field carrying `Attachment` values with typed `AttachmentKind` variants (Audio, Image, Video, File). When the `stt` feature is enabled, audio attachments are automatically transcribed before entering the agent loop. The Telegram channel automatically downloads voice and audio messages and delivers them as attachments. See [Audio Input](audio-input.md) for details.
 
 ## Channel Selection Logic
 

diff --git a/src/main.rs b/src/main.rs
@@ -423,6 +423,35 @@ async fn main() -> anyhow::Result<()> {
     let agent = agent.with_mcp(mcp_tools, mcp_registry, Some(mcp_manager), &config.mcp);
     let agent = agent.with_learning(config.skills.learning.clone());
 
+    #[cfg(feature = "stt")]
+    let agent = if config.llm.stt.is_some() {
+        if let Some(ref api_key) = config.secrets.openai_api_key {
+            let base_url = config
+                .llm
+                .openai
+                .as_ref()
+                .map_or("https://api.openai.com/v1", |o| o.base_url.as_str());
+            let model = config
+                .llm
+                .stt
+                .as_ref()
+                .map_or("whisper-1", |s| s.model.as_str());
+            let whisper = zeph_llm::whisper::WhisperProvider::new(
+                reqwest::Client::new(),
+                api_key.expose(),
+                base_url,
+                model,
+            );
+            tracing::info!("STT enabled via Whisper (model: {model})");
+            agent.with_stt(Box::new(whisper))
+        } else {
+            tracing::warn!("STT configured but ZEPH_OPENAI_API_KEY not found");
+            agent
+        }
+    } else {
+        agent
+    };
+
     #[cfg(feature = "tui")]
     let tui_metrics_rx;
     #[cfg(feature = "tui")]