From 0b329d52882addc941d0ee3079754a0b5c0040f8 Mon Sep 17 00:00:00 2001
From: "Andrei G." <k05h31@gmail.com>
Date: Thu, 19 Feb 2026 16:52:43 +0100
Subject: [PATCH 1/2] release: prepare v0.11.1

---
 CHANGELOG.md                                  |  6 +++-
 Cargo.lock                                    | 26 ++++++++---------
 Cargo.toml                                    | 26 ++++++++---------
 README.md                                     |  3 +-
 crates/zeph-scheduler/README.md               |  2 +-
 ...idgets__splash__tests__splash_default.snap |  2 +-
 docs/src/changelog.md                         | 11 +++++--
 docs/src/getting-started/installation.md      |  2 +-
 docs/src/guide/tui.md                         | 29 ++++++++++++++++++-
 9 files changed, 73 insertions(+), 34 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1db7611..1914650 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,9 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+## [0.11.1] - 2026-02-19
+
 ### Added
 - Clickable markdown links in TUI via OSC 8 hyperlinks — `[text](url)` renders as terminal-clickable link with URL sanitization and scheme allowlist (#580)
 - `@`-triggered fuzzy file picker in TUI input — type `@` to search project files by name/path/extension with real-time filtering (#600)
+- Command palette in TUI with read-only agent management commands (#599)
 - Orchestrator provider option in `zeph init` wizard for multi-model routing setup (#597)
 - `zeph vault` CLI subcommands: `init` (generate age keypair), `set` (store secret), `get` (retrieve secret), `list` (show keys), `rm` (remove secret) (#598)
 - Atomic file writes for vault operations with temp+rename strategy (#598)
@@ -965,7 +968,8 @@ let agent = Agent::new(provider, channel, &skills_prompt, executor);
 - Agent calls channel.send_typing() before each LLM request
 - Agent::run() uses tokio::select! to race channel messages against shutdown signal
 
-[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.11.0...HEAD
+[Unreleased]: https://github.com/bug-ops/zeph/compare/v0.11.1...HEAD
+[0.11.1]: https://github.com/bug-ops/zeph/compare/v0.11.0...v0.11.1
 [0.11.0]: https://github.com/bug-ops/zeph/compare/v0.10.0...v0.11.0
 [0.10.0]: https://github.com/bug-ops/zeph/compare/v0.9.9...v0.10.0
 [0.9.9]: https://github.com/bug-ops/zeph/compare/v0.9.8...v0.9.9
diff --git a/Cargo.lock b/Cargo.lock
index 0ad3dea..fdbb3e6 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -8841,7 +8841,7 @@ dependencies = [
 
 [[package]]
 name = "zeph"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "anyhow",
  "clap",
@@ -8875,7 +8875,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-a2a"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "axum 0.8.8",
  "eventsource-stream",
@@ -8899,7 +8899,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-channels"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "axum 0.8.8",
  "criterion",
@@ -8922,7 +8922,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-core"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "age",
  "anyhow",
@@ -8952,7 +8952,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-gateway"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "axum 0.8.8",
  "blake3",
@@ -8969,7 +8969,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-index"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "blake3",
  "ignore",
@@ -9001,7 +9001,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-llm"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "anyhow",
  "base64 0.22.1",
@@ -9028,7 +9028,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-mcp"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "blake3",
  "qdrant-client",
@@ -9047,7 +9047,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-memory"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "anyhow",
  "criterion",
@@ -9070,7 +9070,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-scheduler"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "chrono",
  "cron",
@@ -9088,7 +9088,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-skills"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "anyhow",
  "blake3",
@@ -9112,7 +9112,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-tools"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "dirs",
  "filetime",
@@ -9136,7 +9136,7 @@ dependencies = [
 
 [[package]]
 name = "zeph-tui"
-version = "0.11.0"
+version = "0.11.1"
 dependencies = [
  "anyhow",
  "crossterm",
diff --git a/Cargo.toml b/Cargo.toml
index 4aee284..953c00c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,7 +5,7 @@ resolver = "3"
 [workspace.package]
 edition = "2024"
 rust-version = "1.88"
-version = "0.11.0"
+version = "0.11.1"
 authors = ["bug-ops"]
 license = "MIT"
 repository = "https://github.com/bug-ops/zeph"
@@ -88,18 +88,18 @@ unicode-width = "0.2"
 url = "2.5"
 uuid = "1.21"
 cron = "0.15"
-zeph-a2a = { path = "crates/zeph-a2a", version = "0.11.0" }
-zeph-channels = { path = "crates/zeph-channels", version = "0.11.0" }
-zeph-core = { path = "crates/zeph-core", version = "0.11.0" }
-zeph-index = { path = "crates/zeph-index", version = "0.11.0" }
-zeph-llm = { path = "crates/zeph-llm", version = "0.11.0" }
-zeph-mcp = { path = "crates/zeph-mcp", version = "0.11.0" }
-zeph-memory = { path = "crates/zeph-memory", version = "0.11.0" }
-zeph-skills = { path = "crates/zeph-skills", version = "0.11.0" }
-zeph-tools = { path = "crates/zeph-tools", version = "0.11.0" }
-zeph-gateway = { path = "crates/zeph-gateway", version = "0.11.0" }
-zeph-scheduler = { path = "crates/zeph-scheduler", version = "0.11.0" }
-zeph-tui = { path = "crates/zeph-tui", version = "0.11.0" }
+zeph-a2a = { path = "crates/zeph-a2a", version = "0.11.1" }
+zeph-channels = { path = "crates/zeph-channels", version = "0.11.1" }
+zeph-core = { path = "crates/zeph-core", version = "0.11.1" }
+zeph-index = { path = "crates/zeph-index", version = "0.11.1" }
+zeph-llm = { path = "crates/zeph-llm", version = "0.11.1" }
+zeph-mcp = { path = "crates/zeph-mcp", version = "0.11.1" }
+zeph-memory = { path = "crates/zeph-memory", version = "0.11.1" }
+zeph-skills = { path = "crates/zeph-skills", version = "0.11.1" }
+zeph-tools = { path = "crates/zeph-tools", version = "0.11.1" }
+zeph-gateway = { path = "crates/zeph-gateway", version = "0.11.1" }
+zeph-scheduler = { path = "crates/zeph-scheduler", version = "0.11.1" }
+zeph-tui = { path = "crates/zeph-tui", version = "0.11.1" }
 
 [workspace.lints.clippy]
 all = "warn"
diff --git a/README.md b/README.md
index 36251e1..dbf1f31 100644
--- a/README.md
+++ b/README.md
@@ -274,9 +274,10 @@ Skills **evolve**: failure detection triggers self-reflection, and the agent gen
 
 A full terminal UI powered by ratatui — not a separate monitoring tool, but an integrated experience:
 
-- Tree-sitter syntax highlighting and markdown rendering
+- Tree-sitter syntax highlighting and markdown rendering with clickable hyperlinks (OSC 8)
 - Syntax-highlighted diff view for file edits (compact/expanded toggle)
 - `@`-triggered fuzzy file picker with real-time filtering (nucleo-matcher)
+- Command palette for quick access to agent actions
 - Live metrics: token usage, filter savings, cost tracking, confidence distribution
 - Conversation history with message queueing
 - Responsive input handling during streaming with render cache and event batching
diff --git a/crates/zeph-scheduler/README.md b/crates/zeph-scheduler/README.md
index d6c1618..cb47278 100644
--- a/crates/zeph-scheduler/README.md
+++ b/crates/zeph-scheduler/README.md
@@ -47,7 +47,7 @@ scheduler.register_handler(&TaskKind::UpdateCheck, Box::new(handler));
 Notification format sent via the channel:
 
 ```
-New version available: v0.12.0 (current: v0.11.0).
+New version available: v0.12.0 (current: v0.11.1).
 Update: https://github.com/bug-ops/zeph/releases/tag/v0.12.0
 ```
 
diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap
index 2f90eff..57c23cf 100644
--- a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap
+++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__splash__tests__splash_default.snap
@@ -14,7 +14,7 @@ expression: output
 │              ███████╗███████╗██║     ██║  ██║            │
 │              ╚══════╝╚══════╝╚═╝     ╚═╝  ╚═╝            │
 │                                                          │
-│                          v0.11.0                         │
+│                          v0.11.1                         │
 │                                                          │
 │                  Type a message to start.                │
 │                                                          │
diff --git a/docs/src/changelog.md b/docs/src/changelog.md
index 722e6f6..aa9c2c4 100644
--- a/docs/src/changelog.md
+++ b/docs/src/changelog.md
@@ -6,11 +6,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 See the full [CHANGELOG.md](https://github.com/bug-ops/zeph/blob/main/CHANGELOG.md) in the repository for the complete version history.
 
-## [Unreleased]
+## [0.11.1] - 2026-02-19
 
 ### Added
+- Clickable markdown links in TUI via OSC 8 hyperlinks (#580)
 - `@`-triggered fuzzy file picker in TUI input (#600)
-- Orchestrator provider option in `zeph init` wizard for multi-model routing setup (#597)
+- Command palette in TUI with read-only agent management commands (#599)
+- Orchestrator provider option in `zeph init` wizard (#597)
+- `zeph vault` CLI subcommands: init, set, get, list, rm (#598)
+- Auto-update check via GitHub Releases API (#588)
+
+### Fixed
+- `--vault`, `--vault-key`, `--vault-path` CLI flags restored (#587)
 
 ## [0.11.0] - 2026-02-19
 
diff --git a/docs/src/getting-started/installation.md b/docs/src/getting-started/installation.md
index 0c0d007..935aa9a 100644
--- a/docs/src/getting-started/installation.md
+++ b/docs/src/getting-started/installation.md
@@ -19,7 +19,7 @@ ZEPH_INSTALL_DIR=/usr/local/bin curl -fsSL https://github.com/bug-ops/zeph/relea
 Install a specific version:
 
 ```bash
-curl -fsSL https://github.com/bug-ops/zeph/releases/latest/download/install.sh | sh -s -- --version v0.11.0
+curl -fsSL https://github.com/bug-ops/zeph/releases/latest/download/install.sh | sh -s -- --version v0.11.1
 ```
 
 After installation, run the configuration wizard:
diff --git a/docs/src/guide/tui.md b/docs/src/guide/tui.md
index a319a9d..79cac13 100644
--- a/docs/src/guide/tui.md
+++ b/docs/src/guide/tui.md
@@ -24,7 +24,7 @@ ZEPH_TUI=true zeph
 
 ```text
 +-------------------------------------------------------------+
-| Zeph v0.9.8 | Provider: orchestrator | Model: claude-son... |
+| Zeph v0.11.1 | Provider: orchestrator | Model: claude-son... |
 +----------------------------------------+--------------------+
 |                                        | Skills (3/15)      |
 |                                        | - setup-guide      |
@@ -77,6 +77,7 @@ ZEPH_TUI=true zeph
 | `Ctrl+C` | Quit application |
 | `Ctrl+U` | Clear input line |
 | `Ctrl+K` | Clear message queue |
+| `Ctrl+P` | Open command palette |
 
 ### File Picker
 
@@ -92,6 +93,32 @@ Typing `@` in Insert mode opens a fuzzy file search popup above the input area.
 
 All other keys are blocked while the picker is visible.
 
+### Command Palette
+
+Press `Ctrl+P` in Insert mode to open the command palette. The palette provides read-only agent management commands for inspecting runtime state without leaving the TUI.
+
+| Key | Action |
+|-----|--------|
+| Any character | Filter commands by substring match |
+| `Up` / `Down` | Navigate the command list |
+| `Enter` | Execute selected command |
+| `Backspace` | Remove last query character |
+| `Escape` | Close palette without executing |
+
+Available commands:
+
+| Command | Description |
+|---------|-------------|
+| `skill:list` | List loaded skills |
+| `mcp:list` | List MCP servers and tools |
+| `memory:stats` | Show memory statistics |
+| `view:cost` | Show cost breakdown |
+| `view:tools` | List available tools |
+| `view:config` | Show active configuration |
+| `view:autonomy` | Show autonomy/trust level |
+
+All commands are read-only and do not modify agent state. Results are displayed as system messages in the chat panel.
+
 ### Confirmation Modal
 
 When a destructive command requires confirmation, a modal overlay appears:

From 5241088c2c45de5cafadb3d93a258fb3aae2f799 Mon Sep 17 00:00:00 2001
From: "Andrei G." <k05h31@gmail.com>
Date: Thu, 19 Feb 2026 17:37:02 +0100
Subject: [PATCH 2/2] docs: restructure documentation around user journey

Reorganize docs from feature-oriented flat list to progressive
disclosure structure (attract -> onboard -> deepen -> master).

New pages:
- introduction.md: value proposition instead of feature dump
- why-zeph.md: differentiators and advantages
- first-conversation.md: scenario-driven onboarding
- wizard.md: focused zeph init walkthrough
- concepts/: skills, memory, providers, tools overviews
- guides/: cloud-provider, telegram, custom-skills

Structure changes:
- guide/ (26 flat pages) -> advanced/, guides/, concepts/, reference/
- Merged: audio-input+vision -> multimodal, daemon+scheduler -> daemon
- Split: configuration -> wizard + reference, channels -> telegram guide + full reference
- All internal cross-references updated
---
 docs/src/SUMMARY.md                           |  82 ++---
 docs/src/{guide => advanced}/a2a.md           |   0
 docs/src/{guide => advanced}/candle.md        |   0
 docs/src/advanced/channels.md                 |  95 ++++++
 docs/src/{guide => advanced}/code-indexing.md |   0
 docs/src/{guide => advanced}/context.md       |   0
 docs/src/advanced/daemon.md                   |  98 ++++++
 .../{guide => advanced}/document-loaders.md   |   0
 docs/src/{guide => advanced}/gateway.md       |   0
 docs/src/advanced/multimodal.md               |  99 ++++++
 docs/src/{guide => advanced}/observability.md |   0
 docs/src/{guide => advanced}/orchestrator.md  |   0
 docs/src/{guide => advanced}/pipeline.md      |   0
 docs/src/{guide => advanced}/self-learning.md |   0
 docs/src/{guide => advanced}/skill-trust.md   |   0
 docs/src/{guide => advanced}/tools.md         |   2 +-
 docs/src/{guide => advanced}/tui.md           |   0
 docs/src/architecture/token-efficiency.md     |   6 +-
 docs/src/concepts/memory.md                   |  51 ++++
 docs/src/concepts/providers.md                |  52 ++++
 docs/src/concepts/skills.md                   |  45 +++
 docs/src/concepts/tools.md                    |  53 ++++
 .../src/getting-started/first-conversation.md |  67 +++++
 docs/src/getting-started/installation.md      |   2 +-
 docs/src/getting-started/quick-start.md       |  61 ----
 docs/src/getting-started/wizard.md            |  62 ++++
 docs/src/guide/audio-input.md                 | 127 --------
 docs/src/guide/channels.md                    | 284 ------------------
 docs/src/guide/daemon.md                      |  51 ----
 docs/src/guide/openai.md                      |  48 ---
 docs/src/guide/scheduler.md                   | 109 -------
 docs/src/guide/skills.md                      | 154 ----------
 docs/src/guide/summarization.md               |  29 --
 docs/src/guide/vault.md                       | 110 -------
 docs/src/guide/vision.md                      |  73 -----
 docs/src/guides/cloud-provider.md             |  85 ++++++
 docs/src/guides/custom-skills.md              |  80 +++++
 docs/src/{guide => guides}/docker.md          |   0
 docs/src/{guide => guides}/mcp.md             |   0
 docs/src/{guide => guides}/semantic-memory.md |   2 +-
 docs/src/guides/telegram.md                   |  56 ++++
 docs/src/introduction.md                      |  43 ++-
 docs/src/{guide => reference}/cli.md          |   6 +-
 .../configuration.md                          |  33 +-
 docs/src/{ => reference}/feature-flags.md     |  18 +-
 docs/src/{ => reference}/security.md          |   2 +-
 docs/src/{ => reference}/security/mcp.md      |   2 +-
 docs/src/why-zeph.md                          |  25 ++
 48 files changed, 957 insertions(+), 1155 deletions(-)
 rename docs/src/{guide => advanced}/a2a.md (100%)
 rename docs/src/{guide => advanced}/candle.md (100%)
 create mode 100644 docs/src/advanced/channels.md
 rename docs/src/{guide => advanced}/code-indexing.md (100%)
 rename docs/src/{guide => advanced}/context.md (100%)
 create mode 100644 docs/src/advanced/daemon.md
 rename docs/src/{guide => advanced}/document-loaders.md (100%)
 rename docs/src/{guide => advanced}/gateway.md (100%)
 create mode 100644 docs/src/advanced/multimodal.md
 rename docs/src/{guide => advanced}/observability.md (100%)
 rename docs/src/{guide => advanced}/orchestrator.md (100%)
 rename docs/src/{guide => advanced}/pipeline.md (100%)
 rename docs/src/{guide => advanced}/self-learning.md (100%)
 rename docs/src/{guide => advanced}/skill-trust.md (100%)
 rename docs/src/{guide => advanced}/tools.md (98%)
 rename docs/src/{guide => advanced}/tui.md (100%)
 create mode 100644 docs/src/concepts/memory.md
 create mode 100644 docs/src/concepts/providers.md
 create mode 100644 docs/src/concepts/skills.md
 create mode 100644 docs/src/concepts/tools.md
 create mode 100644 docs/src/getting-started/first-conversation.md
 delete mode 100644 docs/src/getting-started/quick-start.md
 create mode 100644 docs/src/getting-started/wizard.md
 delete mode 100644 docs/src/guide/audio-input.md
 delete mode 100644 docs/src/guide/channels.md
 delete mode 100644 docs/src/guide/daemon.md
 delete mode 100644 docs/src/guide/openai.md
 delete mode 100644 docs/src/guide/scheduler.md
 delete mode 100644 docs/src/guide/skills.md
 delete mode 100644 docs/src/guide/summarization.md
 delete mode 100644 docs/src/guide/vault.md
 delete mode 100644 docs/src/guide/vision.md
 create mode 100644 docs/src/guides/cloud-provider.md
 create mode 100644 docs/src/guides/custom-skills.md
 rename docs/src/{guide => guides}/docker.md (100%)
 rename docs/src/{guide => guides}/mcp.md (100%)
 rename docs/src/{guide => guides}/semantic-memory.md (95%)
 create mode 100644 docs/src/guides/telegram.md
 rename docs/src/{guide => reference}/cli.md (90%)
 rename docs/src/{getting-started => reference}/configuration.md (82%)
 rename docs/src/{ => reference}/feature-flags.md (84%)
 rename docs/src/{ => reference}/security.md (98%)
 rename docs/src/{ => reference}/security/mcp.md (97%)
 create mode 100644 docs/src/why-zeph.md

diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
index 6232059..1e33c8c 100644
--- a/docs/src/SUMMARY.md
+++ b/docs/src/SUMMARY.md
@@ -1,49 +1,64 @@
 # Summary
 
 [Introduction](introduction.md)
+[Why Zeph?](why-zeph.md)
 
 # Getting Started
 
 - [Installation](getting-started/installation.md)
-- [Quick Start](getting-started/quick-start.md)
-- [Configuration](getting-started/configuration.md)
-
-# Guide
-
-- [CLI Reference](guide/cli.md)
-- [Skills](guide/skills.md)
-- [Semantic Memory](guide/semantic-memory.md)
-- [Context Engineering](guide/context.md)
-- [Conversation Summarization](guide/summarization.md)
-- [Docker Deployment](guide/docker.md)
-- [MCP Integration](guide/mcp.md)
-- [OpenAI Provider](guide/openai.md)
-- [Local Inference (Candle)](guide/candle.md)
-- [Model Orchestrator](guide/orchestrator.md)
-- [Self-Learning Skills](guide/self-learning.md)
-- [Skill Trust Levels](guide/skill-trust.md)
-- [A2A Protocol](guide/a2a.md)
-- [Secrets Management](guide/vault.md)
-- [Channels (CLI, Telegram, TUI)](guide/channels.md)
-- [Tool System](guide/tools.md)
-- [Pipeline API](guide/pipeline.md)
-- [Audio Input](guide/audio-input.md)
-- [Vision (Image Input)](guide/vision.md)
-- [TUI Dashboard](guide/tui.md)
-- [Observability & Cost](guide/observability.md)
-- [Code Indexing](guide/code-indexing.md)
-- [HTTP Gateway](guide/gateway.md)
-- [Daemon Supervisor](guide/daemon.md)
-- [Cron Scheduler](guide/scheduler.md)
-- [Document Loaders](guide/document-loaders.md)
+- [First Conversation](getting-started/first-conversation.md)
+- [Configuration Wizard](getting-started/wizard.md)
+
+# Core Concepts
+
+- [Skills](concepts/skills.md)
+- [Memory & Context](concepts/memory.md)
+- [LLM Providers](concepts/providers.md)
+- [Tools](concepts/tools.md)
+
+# Guides
+
+- [Use a Cloud Provider](guides/cloud-provider.md)
+- [Run via Telegram](guides/telegram.md)
+- [Add Custom Skills](guides/custom-skills.md)
+- [Connect MCP Servers](guides/mcp.md)
+- [Set Up Semantic Memory](guides/semantic-memory.md)
+- [Deploy with Docker](guides/docker.md)
+
+# Advanced
+
+- [Model Orchestrator](advanced/orchestrator.md)
+- [Self-Learning Skills](advanced/self-learning.md)
+- [Skill Trust & Security](advanced/skill-trust.md)
+- [A2A Protocol](advanced/a2a.md)
+- [Code Indexing](advanced/code-indexing.md)
+- [Pipeline API](advanced/pipeline.md)
+- [Context Engineering](advanced/context.md)
+- [Audio & Vision](advanced/multimodal.md)
+- [TUI Dashboard](advanced/tui.md)
+- [HTTP Gateway](advanced/gateway.md)
+- [Daemon & Scheduler](advanced/daemon.md)
+- [Document Loaders](advanced/document-loaders.md)
+- [Observability & Cost](advanced/observability.md)
+- [Channels](advanced/channels.md)
+- [Tool System](advanced/tools.md)
+- [Local Inference (Candle)](advanced/candle.md)
 
 # Architecture
 
 - [Overview](architecture/overview.md)
-- [Crates](architecture/crates.md)
+- [Crate Map](architecture/crates.md)
 - [Token Efficiency](architecture/token-efficiency.md)
 - [Performance](architecture/performance.md)
 
+# Reference
+
+- [CLI Reference](reference/cli.md)
+- [Configuration](reference/configuration.md)
+- [Feature Flags](reference/feature-flags.md)
+- [Security](reference/security.md)
+  - [MCP Security](reference/security/mcp.md)
+
 # Development
 
 - [sccache](development/sccache.md)
@@ -51,8 +66,5 @@
 
 ---
 
-- [Security](security.md)
-  - [MCP Security](security/mcp.md)
-- [Feature Flags](feature-flags.md)
 - [Contributing](contributing.md)
 - [Changelog](changelog.md)
diff --git a/docs/src/guide/a2a.md b/docs/src/advanced/a2a.md
similarity index 100%
rename from docs/src/guide/a2a.md
rename to docs/src/advanced/a2a.md
diff --git a/docs/src/guide/candle.md b/docs/src/advanced/candle.md
similarity index 100%
rename from docs/src/guide/candle.md
rename to docs/src/advanced/candle.md
diff --git a/docs/src/advanced/channels.md b/docs/src/advanced/channels.md
new file mode 100644
index 0000000..a74f93a
--- /dev/null
+++ b/docs/src/advanced/channels.md
@@ -0,0 +1,95 @@
+# Channels
+
+Zeph supports five I/O channels. Each implements the `Channel` trait and can be selected at runtime.
+
+## Overview
+
+| Channel | Activation | Streaming | Confirmation |
+|---------|-----------|-----------|--------------|
+| CLI | Default | Token-by-token to stdout | y/N prompt |
+| Discord | `ZEPH_DISCORD_TOKEN` (requires `discord` feature) | Edit-in-place every 1.5s | Reply "yes" |
+| Slack | `ZEPH_SLACK_BOT_TOKEN` (requires `slack` feature) | `chat.update` every 2s | Reply "yes" |
+| Telegram | `ZEPH_TELEGRAM_TOKEN` | Edit-in-place every 10s | Reply "yes" |
+| TUI | `--tui` flag (requires `tui` feature) | Real-time in chat panel | Auto-confirm |
+
+## CLI Channel
+
+Default channel. Reads from stdin, writes to stdout with immediate streaming. Persistent input history (rustyline): arrow keys to navigate, prefix search, Emacs keybindings (Ctrl+A/E, Alt+B/F, Ctrl+W). History stored in SQLite across restarts.
+
+## Telegram Channel
+
+See [Run via Telegram](../guides/telegram.md) for the setup guide. User whitelisting required (`allowed_users` must not be empty). MarkdownV2 formatting, voice/image support, 10s streaming throttle, 4096 char message splitting.
+
+## Discord Channel
+
+### Setup
+
+1. Create an application at the [Discord Developer Portal](https://discord.com/developers/applications)
+2. Copy the bot token, select `bot` + `applications.commands` scopes
+3. Configure:
+
+```bash
+ZEPH_DISCORD_TOKEN="..." ZEPH_DISCORD_APP_ID="..." zeph
+```
+
+```toml
+[discord]
+allowed_user_ids = []
+allowed_role_ids = []
+allowed_channel_ids = []
+```
+
+When all allowlists are empty, the bot accepts messages from all users.
+
+### Slash Commands
+
+| Command | Description |
+|---------|-------------|
+| `/ask <message>` | Send a message to the agent |
+| `/clear` | Reset conversation context |
+
+Streaming: 1.5s throttle, messages split at 2000 chars.
+
+## Slack Channel
+
+### Setup
+
+1. Create a Slack app at [api.slack.com/apps](https://api.slack.com/apps)
+2. Add `chat:write` scope, install to workspace, copy Bot User OAuth Token
+3. Copy Signing Secret from Basic Information
+4. Enable Event Subscriptions, set URL to `http://<host>:<port>/slack/events`
+5. Subscribe to `message.channels` and `message.im` bot events
+
+```bash
+ZEPH_SLACK_BOT_TOKEN="xoxb-..." ZEPH_SLACK_SIGNING_SECRET="..." zeph
+```
+
+Security: HMAC-SHA256 signature verification, 5-minute replay protection, 256 KB body limit. Self-message filtering via `auth.test` at startup.
+
+Streaming: 2s throttle via `chat.update`.
+
+## TUI Dashboard
+
+Rich terminal interface based on ratatui. See [TUI Dashboard](tui.md) for full documentation.
+
+```bash
+zeph --tui
+```
+
+## Channel Selection Priority
+
+1. `--tui` flag or `ZEPH_TUI=true` → TUI
+2. Discord config with token → Discord
+3. Slack config with bot_token → Slack
+4. `ZEPH_TELEGRAM_TOKEN` set → Telegram
+5. Default → CLI
+
+Only one channel is active per session.
+
+## Message Queueing
+
+Bounded FIFO queue (max 10 messages) handles input received during model inference. Consecutive messages within 500ms are merged. CLI is blocking (no queue). TUI shows a `[+N queued]` badge; press `Ctrl+K` to clear.
+
+## Attachments
+
+Audio and image attachments are supported on Telegram, Slack, CLI/TUI (via `/image`). See [Audio & Vision](multimodal.md).
diff --git a/docs/src/guide/code-indexing.md b/docs/src/advanced/code-indexing.md
similarity index 100%
rename from docs/src/guide/code-indexing.md
rename to docs/src/advanced/code-indexing.md
diff --git a/docs/src/guide/context.md b/docs/src/advanced/context.md
similarity index 100%
rename from docs/src/guide/context.md
rename to docs/src/advanced/context.md
diff --git a/docs/src/advanced/daemon.md b/docs/src/advanced/daemon.md
new file mode 100644
index 0000000..f1630ed
--- /dev/null
+++ b/docs/src/advanced/daemon.md
@@ -0,0 +1,98 @@
+# Daemon and Scheduler
+
+Run Zeph as a long-running process with component supervision and cron-based periodic tasks.
+
+## Daemon Supervisor
+
+The daemon manages component lifecycles (gateway, scheduler, A2A server), monitors for unexpected exits, and tracks restart counts.
+
+### Feature Flag
+
+```bash
+cargo build --release --features daemon
+```
+
+### Configuration
+
+```toml
+[daemon]
+enabled = true
+pid_file = "~/.zeph/zeph.pid"
+health_interval_secs = 30
+max_restart_backoff_secs = 60
+```
+
+### Component Lifecycle
+
+Each registered component is tracked with a status (`Running`, `Failed(reason)`, or `Stopped`) and a restart counter. The supervisor polls all components at `health_interval_secs` intervals.
+
+### PID File
+
+Written on startup for instance detection and stop signals. Tilde (`~`) expands to `$HOME`. Parent directory is created automatically.
+
+## Cron Scheduler
+
+Run periodic tasks on cron schedules with SQLite-backed persistence.
+
+### Feature Flag
+
+```bash
+cargo build --release --features scheduler
+```
+
+### Configuration
+
+```toml
+[scheduler]
+enabled = true
+
+[[scheduler.tasks]]
+name = "memory_cleanup"
+cron = "0 0 0 * * *"          # daily at midnight
+kind = "memory_cleanup"
+config = { max_age_days = 90 }
+
+[[scheduler.tasks]]
+name = "health_check"
+cron = "0 */5 * * * *"        # every 5 minutes
+kind = "health_check"
+```
+
+Cron expressions use 6 fields: `sec min hour day month weekday`. Standard features supported: ranges (`1-5`), lists (`1,3,5`), steps (`*/5`), wildcards (`*`).
+
+### Built-in Tasks
+
+| Kind | Description |
+|------|-------------|
+| `memory_cleanup` | Remove old conversation history entries |
+| `skill_refresh` | Re-scan skill directories for changes |
+| `health_check` | Internal health verification |
+| `update_check` | Query GitHub Releases API for newer versions |
+
+### Update Check
+
+Controlled by `auto_update_check` in `[agent]` (default: true):
+
+- **With scheduler**: runs daily at 09:00 UTC via cron task
+- **Without scheduler**: single one-shot check at startup
+
+### Custom Tasks
+
+Implement the `TaskHandler` trait:
+
+```rust
+pub trait TaskHandler: Send + Sync {
+    fn execute(
+        &self,
+        config: &serde_json::Value,
+    ) -> Pin<Box<dyn Future<Output = Result<(), SchedulerError>> + Send + '_>>;
+}
+```
+
+### Persistence
+
+Job metadata is stored in a `scheduled_jobs` SQLite table. The scheduler ticks every 60 seconds and checks whether each task is due based on `last_run` and the cron expression.
+
+## Shutdown
+
+Both daemon and scheduler listen on the global shutdown signal and exit gracefully.
diff --git a/docs/src/guide/document-loaders.md b/docs/src/advanced/document-loaders.md
similarity index 100%
rename from docs/src/guide/document-loaders.md
rename to docs/src/advanced/document-loaders.md
diff --git a/docs/src/guide/gateway.md b/docs/src/advanced/gateway.md
similarity index 100%
rename from docs/src/guide/gateway.md
rename to docs/src/advanced/gateway.md
diff --git a/docs/src/advanced/multimodal.md b/docs/src/advanced/multimodal.md
new file mode 100644
index 0000000..2a04ab1
--- /dev/null
+++ b/docs/src/advanced/multimodal.md
@@ -0,0 +1,99 @@
+# Audio and Vision
+
+Zeph supports audio transcription and image input across all channels.
+
+## Audio Input
+
+Pipeline: Audio attachment → STT provider → Transcribed text → Agent loop
+
+### Configuration
+
+Enable the `stt` feature flag:
+
+```bash
+cargo build --release --features stt
+```
+
+```toml
+[llm.stt]
+provider = "whisper"
+model = "whisper-1"
+```
+
+The Whisper provider inherits the OpenAI API key from `[llm.openai]` or `ZEPH_OPENAI_API_KEY`. Environment variable overrides: `ZEPH_STT_PROVIDER`, `ZEPH_STT_MODEL`.
+
+### Backends
+
+| Backend | Provider | Feature | Description |
+|---------|----------|---------|-------------|
+| OpenAI Whisper API | `whisper` | `stt` | Cloud-based transcription |
+| Local Whisper | `candle-whisper` | `candle` | Fully offline via candle |
+
+### Local Whisper (Candle)
+
+```bash
+cargo build --release --features candle   # CPU
+cargo build --release --features metal    # macOS Metal GPU
+cargo build --release --features cuda     # NVIDIA GPU
+```
+
+```toml
+[llm.stt]
+provider = "candle-whisper"
+model = "openai/whisper-tiny"
+```
+
+| Model | Parameters | Disk |
+|-------|------------|------|
+| `openai/whisper-tiny` | 39M | ~150 MB |
+| `openai/whisper-base` | 74M | ~290 MB |
+| `openai/whisper-small` | 244M | ~950 MB |
+
+Models are downloaded from HuggingFace on first use. Device auto-detection: Metal → CUDA → CPU.
+
+### Channel Support
+
+- **Telegram**: voice notes and audio files downloaded automatically
+- **Slack**: audio uploads detected, downloaded via `url_private_download` (25 MB limit, `.slack.com` host validation). Requires `files:read` OAuth scope
+- **CLI/TUI**: no audio input mechanism
+
+### Limits
+
+- 5-minute audio duration guard (candle backend)
+- 25 MB file size limit
+- No streaming transcription — entire file processed in one pass
+- One audio attachment per message
+
+## Image Input
+
+Pipeline: Image attachment → MessagePart::Image → LLM provider (base64) → Response
+
+### Provider Support
+
+| Provider | Vision | Notes |
+|----------|--------|-------|
+| Claude | Yes | Anthropic image content block |
+| OpenAI | Yes | image_url data-URI |
+| Ollama | Yes | Optional `vision_model` routing |
+| Candle | No | Text-only |
+
+### Ollama Vision Model
+
+Route image requests to a dedicated model while keeping a smaller text model for regular queries:
+
+```toml
+[llm]
+model = "mistral:7b"
+vision_model = "llava:13b"
+```
+
+### Sending Images
+
+- **CLI/TUI**: `/image /path/to/screenshot.png What is shown in this image?`
+- **Telegram**: send a photo directly; the caption becomes the prompt
+
+### Limits
+
+- 20 MB maximum image size
+- One image per message
+- No image generation (input only)
diff --git a/docs/src/guide/observability.md b/docs/src/advanced/observability.md
similarity index 100%
rename from docs/src/guide/observability.md
rename to docs/src/advanced/observability.md
diff --git a/docs/src/guide/orchestrator.md b/docs/src/advanced/orchestrator.md
similarity index 100%
rename from docs/src/guide/orchestrator.md
rename to docs/src/advanced/orchestrator.md
diff --git a/docs/src/guide/pipeline.md b/docs/src/advanced/pipeline.md
similarity index 100%
rename from docs/src/guide/pipeline.md
rename to docs/src/advanced/pipeline.md
diff --git a/docs/src/guide/self-learning.md b/docs/src/advanced/self-learning.md
similarity index 100%
rename from docs/src/guide/self-learning.md
rename to docs/src/advanced/self-learning.md
diff --git a/docs/src/guide/skill-trust.md b/docs/src/advanced/skill-trust.md
similarity index 100%
rename from docs/src/guide/skill-trust.md
rename to docs/src/advanced/skill-trust.md
diff --git a/docs/src/guide/tools.md b/docs/src/advanced/tools.md
similarity index 98%
rename from docs/src/guide/tools.md
rename to docs/src/advanced/tools.md
index c3645a9..493b040 100644
--- a/docs/src/guide/tools.md
+++ b/docs/src/advanced/tools.md
@@ -25,7 +25,7 @@ Each tool executor declares its definitions via `tool_definitions()`. On every L
 - `glob` results are filtered post-match to exclude files outside the sandbox.
 - `grep` validates the search directory before scanning.
 
-See [Security](../security.md#file-executor-sandbox) for details on the path validation mechanism.
+See [Security](../reference/security.md#file-executor-sandbox) for details on the path validation mechanism.
 
 ## Native Tool Use
 
diff --git a/docs/src/guide/tui.md b/docs/src/advanced/tui.md
similarity index 100%
rename from docs/src/guide/tui.md
rename to docs/src/advanced/tui.md
diff --git a/docs/src/architecture/token-efficiency.md b/docs/src/architecture/token-efficiency.md
index 0b57322..8b09f02 100644
--- a/docs/src/architecture/token-efficiency.md
+++ b/docs/src/architecture/token-efficiency.md
@@ -67,7 +67,7 @@ Typical savings by command type:
 | `cargo clippy` (many warnings) | ~200 | ~50 | ~75% |
 | `git log --oneline -50` | 50 | 20 | 60% |
 
-After each filtered execution, CLI mode prints a one-line stats summary and TUI mode accumulates the savings in the Resources panel. See [Tool System — Output Filter Pipeline](../guide/tools.md#output-filter-pipeline) for configuration details.
+After each filtered execution, CLI mode prints a one-line stats summary and TUI mode accumulates the savings in the Resources panel. See [Tool System — Output Filter Pipeline](../advanced/tools.md#output-filter-pipeline) for configuration details.
 
 ### Token Savings Tracking
 
@@ -77,11 +77,11 @@ After each filtered execution, CLI mode prints a one-line stats summary and TUI
 - `filter_total_commands` / `filter_filtered_commands` — hit rate denominator/numerator
 - `filter_confidence_full/partial/fallback` — distribution of filter confidence levels
 
-These feed into the [TUI filter metrics display](../guide/tui.md#filter-metrics) and are emitted as `tracing::debug!` every 50 commands.
+These feed into the [TUI filter metrics display](../advanced/tui.md#filter-metrics) and are emitted as `tracing::debug!` every 50 commands.
 
 ### Two-Tier Context Pruning
 
-Long conversations accumulate tool outputs that consume significant context space. Zeph uses a two-tier strategy: Tier 1 selectively prunes old tool outputs (cheap, no LLM call), and Tier 2 falls back to full LLM compaction only when Tier 1 is insufficient. See [Context Engineering](../guide/context.md) for details.
+Long conversations accumulate tool outputs that consume significant context space. Zeph uses a two-tier strategy: Tier 1 selectively prunes old tool outputs (cheap, no LLM call), and Tier 2 falls back to full LLM compaction only when Tier 1 is insufficient. See [Context Engineering](../advanced/context.md) for details.
 
 ## Configuration
 
diff --git a/docs/src/concepts/memory.md b/docs/src/concepts/memory.md
new file mode 100644
index 0000000..61c8b65
--- /dev/null
+++ b/docs/src/concepts/memory.md
@@ -0,0 +1,51 @@
+# Memory and Context
+
+Zeph uses a dual-store memory system: SQLite for structured conversation history and Qdrant for semantic search across past sessions.
+
+## Conversation History
+
+All messages are stored in SQLite. The CLI channel provides persistent input history with arrow-key navigation, prefix search, and Emacs keybindings. History persists across restarts.
+
+When conversations grow long, Zeph generates summaries automatically (triggered after `summarization_threshold` messages, default: 100). Summaries are stored in SQLite and injected into the context window to preserve long-term continuity.
+
+## Semantic Memory
+
+With Qdrant enabled, messages are embedded as vectors for semantic search. Ask "what did we discuss about the API yesterday?" and Zeph retrieves relevant context from past sessions automatically.
+
+Semantic memory uses hybrid search — vector similarity combined with SQLite FTS5 keyword search — to improve recall quality. When Qdrant is unavailable, Zeph falls back to keyword-only search.
+
+Setup requires a running Qdrant instance and a config change:
+
+```toml
+[memory.semantic]
+enabled = true
+recall_limit = 5
+```
+
+See [Set Up Semantic Memory](../guides/semantic-memory.md) for the full setup guide.
+
+## Context Engineering
+
+When `context_budget_tokens` is set (default: 0 = unlimited), Zeph allocates the context window proportionally:
+
+| Allocation | Share | Purpose |
+|-----------|-------|---------|
+| Summaries | 15% | Compressed conversation history |
+| Semantic recall | 25% | Relevant messages from past sessions |
+| Recent history | 60% | Most recent messages in current conversation |
+
+A two-tier pruning system manages overflow:
+
+1. **Tool output pruning** (cheap) — replaces old tool outputs with short placeholders
+2. **LLM compaction** (fallback) — summarizes middle messages when pruning is not enough
+
+Both tiers run automatically. See [Context Engineering](../advanced/context.md) for tuning options.
+
+## Project Context
+
+Drop a `ZEPH.md` file in your project root and Zeph discovers it automatically. Project-specific instructions are included in every prompt as a `<project_context>` block. Zeph walks up the directory tree looking for `ZEPH.md`, `ZEPH.local.md`, or `.zeph/config.md`.
+
+## Deep Dives
+
+- [Set Up Semantic Memory](../guides/semantic-memory.md) — Qdrant setup guide
+- [Context Engineering](../advanced/context.md) — budget allocation, compaction, recall tuning
diff --git a/docs/src/concepts/providers.md b/docs/src/concepts/providers.md
new file mode 100644
index 0000000..80ad9f5
--- /dev/null
+++ b/docs/src/concepts/providers.md
@@ -0,0 +1,52 @@
+# LLM Providers
+
+Zeph supports multiple LLM backends. Choose based on your needs:
+
+| Provider | Type | Embeddings | Vision | Best For |
+|----------|------|-----------|--------|----------|
+| Ollama | Local | Yes | Yes | Privacy, free, offline |
+| Claude | Cloud | No | Yes | Quality, reasoning |
+| OpenAI | Cloud | Yes | Yes | Ecosystem, compatibility |
+| Compatible | Cloud | Varies | Varies | Together AI, Groq, Fireworks |
+| Candle | Local | No | No | Minimal footprint |
+
+Claude does not support embeddings natively. Use the [orchestrator](../advanced/orchestrator.md) to combine Claude chat with Ollama embeddings.
+
+## Quick Setup
+
+**Ollama** (default — no API key needed):
+
+```bash
+ollama pull mistral:7b
+ollama pull qwen3-embedding
+zeph
+```
+
+**Claude**:
+
+```bash
+ZEPH_CLAUDE_API_KEY=sk-ant-... zeph
+```
+
+**OpenAI**:
+
+```bash
+ZEPH_LLM_PROVIDER=openai ZEPH_OPENAI_API_KEY=sk-... zeph
+```
+
+## Switching Providers
+
+One config change: set `provider` in `[llm]`. All skills, memory, and tools work the same regardless of which provider is active.
+
+```toml
+[llm]
+provider = "claude"   # ollama, claude, openai, candle, compatible, orchestrator, router
+```
+
+Or via environment variable: `ZEPH_LLM_PROVIDER`.
+
+## Deep Dives
+
+- [Use a Cloud Provider](../guides/cloud-provider.md) — Claude, OpenAI, and compatible API setup
+- [Model Orchestrator](../advanced/orchestrator.md) — multi-provider routing with fallback chains
+- [Local Inference (Candle)](../advanced/candle.md) — HuggingFace GGUF models
diff --git a/docs/src/concepts/skills.md b/docs/src/concepts/skills.md
new file mode 100644
index 0000000..e13daa3
--- /dev/null
+++ b/docs/src/concepts/skills.md
@@ -0,0 +1,45 @@
+# Skills
+
+Skills give Zeph specialized knowledge for specific tasks. Each skill is a markdown file (`SKILL.md`) containing instructions and examples that are injected into the LLM prompt when relevant.
+
+Instead of loading all skills into every prompt, Zeph selects only the top-K most relevant (default: 5) via embedding similarity. This keeps prompt size constant regardless of how many skills are installed.
+
+## How Matching Works
+
+1. You send a message — for example, "check disk usage on this server"
+2. Zeph embeds your query using the configured embedding model
+3. The top 5 most relevant skills are selected by cosine similarity
+4. Selected skills are injected into the system prompt
+5. Zeph responds using the matched skills
+
+This happens automatically on every message. You never activate skills manually.
+
+## Bundled Skills
+
+| Skill | Description |
+|-------|-------------|
+| `api-request` | HTTP API requests using curl |
+| `docker` | Docker container operations |
+| `file-ops` | File system operations — list, search, read, analyze |
+| `git` | Git version control — status, log, diff, commit, branch |
+| `mcp-generate` | Generate MCP-to-skill bridges |
+| `setup-guide` | Configuration reference |
+| `skill-audit` | Spec compliance and security review |
+| `skill-creator` | Create new skills |
+| `system-info` | System diagnostics — OS, disk, memory, processes |
+| `web-scrape` | Extract data from web pages |
+| `web-search` | Search the internet |
+
+Use `/skills` in chat to see active skills and their usage statistics.
+
+## Key Properties
+
+- **Progressive loading**: only metadata (~100 tokens per skill) is loaded at startup. Full body is loaded on first activation and cached
+- **Hot-reload**: edit a `SKILL.md` file, changes apply without restart
+- **Two matching backends**: in-memory (default) or Qdrant (faster startup with many skills, delta sync via BLAKE3 hash)
+
+## Deep Dives
+
+- [Add Custom Skills](../guides/custom-skills.md) — create your own skills
+- [Self-Learning Skills](../advanced/self-learning.md) — how skills evolve through failure detection
+- [Skill Trust Levels](../advanced/skill-trust.md) — security model for imported skills
diff --git a/docs/src/concepts/tools.md b/docs/src/concepts/tools.md
new file mode 100644
index 0000000..c964fb6
--- /dev/null
+++ b/docs/src/concepts/tools.md
@@ -0,0 +1,53 @@
+# Tools
+
+Tools give Zeph the ability to interact with the outside world. Three built-in tool types cover most use cases, with MCP providing extensibility.
+
+## Shell
+
+Execute any shell command via the `bash` tool. Commands are sandboxed:
+
+- **Path restrictions**: configure allowed directories (default: current working directory only)
+- **Network control**: block `curl`, `wget`, `nc` with `allow_network = false`
+- **Confirmation**: destructive commands (`rm`, `git push -f`, `drop table`) require a y/N prompt
+- **Output filtering**: test results, git diffs, and clippy output are automatically stripped of noise to reduce token usage
+
+## File Operations
+
+Five file tools (`read`, `write`, `edit`, `glob`, `grep`) provide structured access to the filesystem. All paths are validated against an allowlist. Directory traversal is prevented via canonical path resolution.
+
+## Web Scraping
+
+The `web_scrape` tool extracts data from web pages using CSS selectors. Configurable timeout (default: 15s) and body size limit (default: 1 MB).
+
+## MCP Tools
+
+Connect external tool servers via [Model Context Protocol](https://modelcontextprotocol.io/). MCP tools are embedded and matched alongside skills using the same cosine similarity pipeline — adding more servers does not inflate prompt size. See [Connect MCP Servers](../guides/mcp.md).
+
+## Permissions
+
+Three permission levels control tool access:
+
+| Action | Behavior |
+|--------|----------|
+| `allow` | Execute without confirmation |
+| `ask` | Prompt user before execution |
+| `deny` | Block execution entirely |
+
+Configure per-tool pattern rules in `[tools.permissions]`:
+
+```toml
+[[tools.permissions.bash]]
+pattern = "cargo *"
+action = "allow"
+
+[[tools.permissions.bash]]
+pattern = "*sudo*"
+action = "deny"
+```
+
+First matching rule wins. Default: `ask`.
+
+## Deep Dives
+
+- [Tool System](../advanced/tools.md) — full reference with filter pipeline, native tool use, iteration control
+- [Security](../reference/security.md) — sandboxing and path validation details
diff --git a/docs/src/getting-started/first-conversation.md b/docs/src/getting-started/first-conversation.md
new file mode 100644
index 0000000..2832c79
--- /dev/null
+++ b/docs/src/getting-started/first-conversation.md
@@ -0,0 +1,67 @@
+# First Conversation
+
+This guide takes you from a fresh install to your first productive interaction with Zeph.
+
+## Prerequisites
+
+- Zeph [installed](installation.md) and `zeph init` completed
+- Either Ollama running locally (`ollama serve`), or a Claude/OpenAI API key configured
+
+## Start the Agent
+
+```bash
+zeph
+```
+
+You see a `You:` prompt. Type a message and press Enter.
+
+## Ask About Files
+
+```
+You: What files are in the current directory?
+```
+
+Behind the scenes:
+1. Zeph embeds your query and matches the `file-ops` skill (ranked by cosine similarity)
+2. The skill's instructions are injected into the prompt
+3. The agent calls the `glob` tool to list files
+4. You get a structured answer with the directory listing
+
+You did not tell Zeph which skill to use — it figured it out from context.
+
+## Run a Command
+
+```
+You: Check disk usage on this machine
+```
+
+Zeph matches the `system-info` skill and runs `df -h` via the `bash` tool. If a command is potentially destructive (like `rm` or `git push --force`), Zeph asks for confirmation first:
+
+```
+Execute: rm -rf /tmp/old-cache? [y/N]
+```
+
+## See Memory in Action
+
+```
+You: What files did we just look at?
+```
+
+Zeph remembers the full conversation. It answers from context without re-running any commands. With [semantic memory](../guides/semantic-memory.md) enabled (Qdrant), Zeph can also recall relevant context from past sessions.
+
+## Useful Slash Commands
+
+| Command | Description |
+|---------|-------------|
+| `/skills` | Show active skills and usage statistics |
+| `/mcp` | List connected MCP tool servers |
+| `/reset` | Clear conversation context |
+| `/image <path>` | Attach an image for visual analysis |
+
+Type `exit`, `quit`, or press Ctrl-D to stop the agent.
+
+## Next Steps
+
+- [Configuration Wizard](wizard.md) — customize providers, memory, and channels
+- [Skills](../concepts/skills.md) — understand how skill matching works
+- [Tools](../concepts/tools.md) — what the agent can do with shell, files, and web
diff --git a/docs/src/getting-started/installation.md b/docs/src/getting-started/installation.md
index 935aa9a..48bfbcf 100644
--- a/docs/src/getting-started/installation.md
+++ b/docs/src/getting-started/installation.md
@@ -66,4 +66,4 @@ docker pull ghcr.io/bug-ops/zeph:v0.9.8
 
 Images are scanned with [Trivy](https://trivy.dev/) in CI/CD and use Oracle Linux 9 Slim base with **0 HIGH/CRITICAL CVEs**. Multi-platform: linux/amd64, linux/arm64.
 
-See [Docker Deployment](../guide/docker.md) for full deployment options including GPU support and age vault.
+See [Docker Deployment](../guides/docker.md) for full deployment options including GPU support and age vault.
diff --git a/docs/src/getting-started/quick-start.md b/docs/src/getting-started/quick-start.md
deleted file mode 100644
index cf74979..0000000
--- a/docs/src/getting-started/quick-start.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Quick Start
-
-Run Zeph after building and interact via CLI, Telegram, or a cloud provider.
-
-## CLI Mode (default)
-
-**Unix (Linux/macOS):**
-```bash
-./target/release/zeph
-```
-
-**Windows:**
-```powershell
-.\target\release\zeph.exe
-```
-
-Type messages at the `You:` prompt. Type `exit`, `quit`, or press Ctrl-D to stop.
-
-## Telegram Mode
-
-**Unix (Linux/macOS):**
-```bash
-ZEPH_TELEGRAM_TOKEN="123:ABC" ./target/release/zeph
-```
-
-**Windows:**
-```powershell
-$env:ZEPH_TELEGRAM_TOKEN="123:ABC"; .\target\release\zeph.exe
-```
-
-Restrict access by setting `telegram.allowed_users` in the [config file](configuration.md):
-
-```toml
-[telegram]
-allowed_users = ["your_username"]
-```
-
-## Ollama Setup
-
-When using Ollama (default provider), ensure both the LLM model and embedding model are pulled:
-
-```bash
-ollama pull mistral:7b
-ollama pull qwen3-embedding
-```
-
-The default configuration uses `mistral:7b` for text generation and `qwen3-embedding` for vector embeddings.
-
-## Cloud Providers
-
-For Claude:
-```bash
-ZEPH_CLAUDE_API_KEY=sk-ant-... ./target/release/zeph
-```
-
-For OpenAI:
-```bash
-ZEPH_LLM_PROVIDER=openai ZEPH_OPENAI_API_KEY=sk-... ./target/release/zeph
-```
-
-See [Configuration](configuration.md) for the full reference.
diff --git a/docs/src/getting-started/wizard.md b/docs/src/getting-started/wizard.md
new file mode 100644
index 0000000..2b18942
--- /dev/null
+++ b/docs/src/getting-started/wizard.md
@@ -0,0 +1,62 @@
+# Configuration Wizard
+
+Run `zeph init` to generate a `config.toml` through a guided 6-step wizard. This is the fastest way to get a working configuration.
+
+```bash
+zeph init
+zeph init --output ~/.zeph/config.toml   # custom output path
+```
+
+## Step 1: Secrets Backend
+
+Choose how API keys and tokens are stored:
+
+- **env** (default) — read secrets from environment variables
+- **age** — encrypt secrets in an age-encrypted vault file (recommended for production)
+
+When `age` is selected, API key prompts in subsequent steps are skipped since secrets are stored via `zeph vault set` instead.
+
+## Step 2: LLM Provider
+
+Select your inference backend:
+
+- **Ollama** — local, free, default. Provide model name (default: `mistral:7b`)
+- **Claude** — Anthropic API. Provide API key
+- **OpenAI** — OpenAI or compatible API. Provide base URL, model, API key
+- **Orchestrator** — multi-model routing. Select a primary and fallback provider
+- **Compatible** — any OpenAI-compatible endpoint
+
+Choose an embedding model for skill matching and semantic memory (default: `qwen3-embedding`).
+
+## Step 3: Memory
+
+Set the SQLite database path and optionally enable semantic memory with Qdrant. Qdrant requires a running instance (e.g., via Docker).
+
+## Step 4: Channel
+
+Pick the I/O channel:
+
+- **CLI** (default) — terminal interaction, no setup needed
+- **Telegram** — provide bot token, set allowed usernames
+- **Discord** — provide bot token and application ID (requires `discord` feature)
+- **Slack** — provide bot token and signing secret (requires `slack` feature)
+
+## Step 5: Update Check
+
+Enable or disable automatic version checks against GitHub Releases (default: enabled).
+
+## Step 6: Review and Save
+
+Inspect the generated TOML, confirm the output path, and save. If the file already exists, the wizard asks before overwriting.
+
+## After the Wizard
+
+The wizard prints the secrets you need to configure:
+
+- **env backend**: `export ZEPH_CLAUDE_API_KEY=...` commands to add to your shell profile
+- **age backend**: `zeph vault init` and `zeph vault set` commands to run
+
+## Further Reading
+
+- [Configuration Reference](../reference/configuration.md) — full config file and environment variables
+- [Secrets Management](../reference/security.md#age-vault) — vault setup and Docker integration
diff --git a/docs/src/guide/audio-input.md b/docs/src/guide/audio-input.md
deleted file mode 100644
index e6d4dab..0000000
--- a/docs/src/guide/audio-input.md
+++ /dev/null
@@ -1,127 +0,0 @@
-# Audio Input
-
-Zeph supports speech-to-text transcription, allowing users to send voice messages that are automatically converted to text before entering the agent loop.
-
-## Pipeline
-
-```
-Audio attachment → SpeechToText provider → Transcription text → Agent loop
-```
-
-When a `ChannelMessage` contains an audio `Attachment`, the agent calls `resolve_message_text()` which detects the attachment, sends it to the configured STT provider, and replaces the message body with the transcribed text. The rest of the agent loop processes it as a normal text message.
-
-## Configuration
-
-Enable the `stt` feature flag:
-
-```bash
-cargo build --release --features stt
-```
-
-Add the STT section to your config:
-
-```toml
-[llm.stt]
-provider = "whisper"
-model = "whisper-1"
-```
-
-The Whisper provider inherits the OpenAI API key from the `[llm.openai]` section (or `ZEPH_OPENAI_API_KEY` env var). No separate key is needed.
-
-### Environment Variable Overrides
-
-STT settings can be configured entirely via environment variables, without a config file:
-
-```bash
-ZEPH_STT_PROVIDER=whisper ZEPH_STT_MODEL=whisper-1 zeph
-```
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `ZEPH_STT_PROVIDER` | `whisper` (OpenAI API) or `candle-whisper` (local) | `whisper` |
-| `ZEPH_STT_MODEL` | Model identifier | `whisper-1` |
-
-Setting either variable automatically enables the STT config section.
-
-## Supported Backends
-
-| Backend | Provider | Feature | Status |
-|---------|----------|---------|--------|
-| OpenAI Whisper API | `whisper` | `stt` | Available |
-| Local Whisper (candle) | `candle-whisper` | `candle` | Available |
-
-## Local Whisper (Candle)
-
-The `candle-whisper` backend runs Whisper inference locally via [candle](https://github.com/huggingface/candle) — no network calls, fully offline after the initial model download.
-
-### Requirements
-
-Enable the `candle` feature flag:
-
-```bash
-cargo build --release --features candle   # CPU
-cargo build --release --features metal    # macOS Metal GPU (implies candle)
-cargo build --release --features cuda     # Linux NVIDIA GPU (implies candle)
-```
-
-### Configuration
-
-```toml
-[llm.stt]
-provider = "candle-whisper"
-model = "openai/whisper-tiny"
-```
-
-### Model Options
-
-Models are downloaded from HuggingFace on first use and cached locally.
-
-| Model | HuggingFace ID | Parameters | Disk |
-|-------|---------------|------------|------|
-| Tiny | `openai/whisper-tiny` | 39M | ~150 MB |
-| Base | `openai/whisper-base` | 74M | ~290 MB |
-| Small | `openai/whisper-small` | 244M | ~950 MB |
-
-Smaller models are faster but less accurate. `whisper-tiny` is a good starting point for low-latency use cases.
-
-### Device Auto-Detection
-
-The backend automatically selects the best available compute device:
-
-1. **Metal** — if `metal` feature is enabled and running on macOS
-2. **CUDA** — if `cuda` feature is enabled and an NVIDIA GPU is available
-3. **CPU** — fallback
-
-### Audio Pipeline
-
-Incoming audio is processed through: symphonia decode, rubato resample to 16 kHz mono, mel spectrogram extraction, then candle Whisper inference.
-
-### Limitations
-
-- **5-minute audio duration guard** — recordings longer than 5 minutes are rejected.
-- **No streaming** — the entire file is decoded and transcribed in one pass.
-
-## Telegram Voice Messages
-
-The Telegram channel automatically detects voice and audio messages. When a user sends a voice note or audio file, the adapter downloads the file bytes via the Telegram Bot API and wraps them as an `Attachment` with `AttachmentKind::Audio`. The attachment then follows the standard transcription pipeline described above.
-
-Download failures (network errors, expired file links) are logged at `warn` level and gracefully skipped — the message is delivered without an attachment rather than causing an error.
-
-Bootstrap wiring is automatic: when `[llm.stt]` is present in the config and the `stt` feature is enabled, `main.rs` creates a `WhisperProvider` and injects it into the agent via `with_stt()`. No additional setup is needed beyond the configuration shown above.
-
-## Slack Audio Files
-
-The Slack channel automatically detects audio file uploads and voice messages in incoming events. When a message contains files with audio MIME types (`audio/*`) or `video/webm` (commonly used for voice recordings), the adapter downloads the file and wraps it as an `Attachment` with `AttachmentKind::Audio`. The attachment then follows the standard transcription pipeline.
-
-Files are downloaded via `url_private_download` using Bearer token authentication with the bot token. For security, the adapter validates that the download URL host ends with `.slack.com` before making the request. Files exceeding 25 MB are skipped.
-
-Download failures (network errors, host validation rejection, oversized files) are logged at `warn` level and gracefully skipped — the message is delivered without an attachment.
-
-To enable Slack audio transcription, ensure both the `slack` and `stt` features are active and `[llm.stt]` is configured. Add the `files:read` OAuth scope to your Slack app so the bot can access uploaded files.
-
-## Limitations
-
-- **25 MB file size limit** — audio files exceeding this are rejected before upload.
-- **No streaming transcription** — the entire file is sent and transcribed in one request.
-- **No TTS** — text-to-speech output is not yet supported.
-- **Batch only** — one audio attachment per message; additional attachments are ignored.
diff --git a/docs/src/guide/channels.md b/docs/src/guide/channels.md
deleted file mode 100644
index 777ad2c..0000000
--- a/docs/src/guide/channels.md
+++ /dev/null
@@ -1,284 +0,0 @@
-# Channels
-
-Zeph supports multiple I/O channels for interacting with the agent. Each channel implements the `Channel` trait (returning `Result<_, ChannelError>` with typed variants for I/O, closed-channel, and cancellation errors) and can be selected at runtime based on configuration or CLI flags.
-
-## Available Channels
-
-| Channel | Activation | Streaming | Confirmation |
-|---------|-----------|-----------|--------------|
-| **CLI** | Default (no config needed) | Token-by-token to stdout | y/N prompt |
-| **Discord** | `ZEPH_DISCORD_TOKEN` env var or `[discord]` config (requires `discord` feature) | Edit-in-place every 1.5s | Reply "yes" to confirm |
-| **Slack** | `ZEPH_SLACK_BOT_TOKEN` env var or `[slack]` config (requires `slack` feature) | `chat.update` every 2s | Reply "yes" to confirm |
-| **Telegram** | `ZEPH_TELEGRAM_TOKEN` env var or `[telegram]` config | Edit-in-place every 10s | Reply "yes" to confirm |
-| **TUI** | `--tui` flag or `ZEPH_TUI=true` (requires `tui` feature) | Real-time in chat panel | Auto-confirm (Phase 1) |
-
-## CLI Channel
-
-The default channel. Reads from stdin, writes to stdout with immediate streaming output.
-
-```bash
-./zeph
-```
-
-No configuration required. Supports all slash commands (`/skills`, `/mcp`, `/reset`).
-
-### Input History
-
-CLI channel provides persistent input history powered by rustyline:
-
-- **Arrow keys** — Up/Down to navigate previous inputs
-- **Prefix search** — type partial text and press Up to find the most recent matching entry
-- **Line editing** — Emacs keybindings (Ctrl+A/E, Alt+B/F, Ctrl+W, etc.)
-- **Persistence** — history is stored in SQLite and available across restarts
-
-History is loaded automatically from the database on startup. Duplicate consecutive entries are suppressed.
-
-## Telegram Channel
-
-Run Zeph as a Telegram bot with streaming responses, MarkdownV2 formatting, and user whitelisting.
-
-### Setup
-
-1. Create a bot via [@BotFather](https://t.me/BotFather):
-   - Send `/newbot` and follow the prompts
-   - Copy the bot token (e.g., `123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11`)
-
-2. Configure the token via environment variable or config file:
-
-   ```bash
-   # Environment variable
-   ZEPH_TELEGRAM_TOKEN="123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11" ./zeph
-   ```
-
-   Or in `config/default.toml`:
-
-   ```toml
-   [telegram]
-   allowed_users = ["your_username"]
-   ```
-
-   The token can also be stored in the age-encrypted vault:
-
-   ```bash
-   # Store in vault
-   ZEPH_TELEGRAM_TOKEN=your-token
-   ```
-
-> The token is resolved via the vault provider (`ZEPH_TELEGRAM_TOKEN` secret). When using the `env` vault backend (default), set the environment variable directly. With the `age` backend, store it in the encrypted vault file.
-
-### User Whitelisting
-
-Restrict bot access to specific Telegram usernames:
-
-```toml
-[telegram]
-allowed_users = ["alice", "bob"]
-```
-
-The `allowed_users` list **must not be empty**. The Telegram channel refuses to start without at least one allowed username to prevent accidentally exposing the bot to all users. Messages from unauthorized users are silently rejected with a warning log.
-
-### Bot Commands
-
-| Command | Description |
-|---------|-------------|
-| `/start` | Welcome message |
-| `/reset` | Reset conversation context |
-| `/skills` | List loaded skills |
-
-### Streaming Behavior
-
-Telegram has API rate limits, so streaming works differently from CLI:
-
-- First chunk sends a new message immediately
-- Subsequent chunks edit the existing message in-place
-- Updates are throttled to one edit per 10 seconds to respect Telegram rate limits
-- On flush, a final edit delivers the complete response
-- Long messages (>4096 chars) are automatically split into multiple messages
-
-### MarkdownV2 Formatting
-
-LLM responses are automatically converted from standard Markdown to Telegram's MarkdownV2 format. Code blocks, bold, italic, and inline code are preserved. Special characters are escaped to prevent formatting errors.
-
-### Confirmation Prompts
-
-When the agent needs user confirmation (e.g., destructive shell commands), Telegram sends a text prompt asking the user to reply "yes" to confirm.
-
-## Discord Channel
-
-Run Zeph as a Discord bot with Gateway v10 WebSocket, slash commands, and edit-in-place streaming. Requires the `discord` feature flag.
-
-```bash
-cargo build --release --features discord
-```
-
-### Setup
-
-1. Create an application at the [Discord Developer Portal](https://discord.com/developers/applications).
-2. Under **Bot**, copy the bot token.
-3. Under **OAuth2 > URL Generator**, select `bot` and `applications.commands` scopes, then invite the bot to your server.
-
-4. Configure the token and application ID:
-
-   ```bash
-   ZEPH_DISCORD_TOKEN="your-bot-token" ZEPH_DISCORD_APP_ID="123456789" ./zeph
-   ```
-
-   Or in `config/default.toml`:
-
-   ```toml
-   [discord]
-   token = "your-bot-token"
-   application_id = "123456789"
-   allowed_user_ids = []
-   allowed_role_ids = []
-   allowed_channel_ids = []
-   ```
-
-> Tokens are resolved via the vault provider (`ZEPH_DISCORD_TOKEN` and `ZEPH_DISCORD_APP_ID` secrets).
-
-### Allowlists
-
-Restrict access by Discord user IDs, role IDs, or channel IDs:
-
-```toml
-[discord]
-allowed_user_ids = ["123456789012345678"]
-allowed_role_ids = ["987654321098765432"]
-allowed_channel_ids = ["111222333444555666"]
-```
-
-When all allowlists are empty, the bot accepts messages from all users in all channels.
-
-### Slash Commands
-
-Zeph registers two slash commands on startup via the Discord REST API:
-
-| Command | Description |
-|---------|-------------|
-| `/ask <message>` | Send a message to the agent |
-| `/clear` | Reset conversation context |
-
-### Streaming Behavior
-
-Discord enforces a rate limit of 5 message edits per 5 seconds. Streaming uses edit-in-place with a 1.5-second throttle:
-
-- First chunk sends a new message immediately
-- Subsequent chunks edit the existing message in-place (throttled to 1.5s intervals)
-- On flush, a final edit delivers the complete response
-- Long messages (>2000 chars) are automatically split
-
-## Slack Channel
-
-Run Zeph as a Slack bot with Events API webhook, HMAC-SHA256 signature verification, and streaming via message updates. Requires the `slack` feature flag.
-
-```bash
-cargo build --release --features slack
-```
-
-### Setup
-
-1. Create a Slack app at [api.slack.com/apps](https://api.slack.com/apps).
-2. Under **OAuth & Permissions**, add the `chat:write` scope and install to your workspace. Copy the Bot User OAuth Token.
-3. Under **Basic Information**, copy the Signing Secret.
-4. Under **Event Subscriptions**, enable events and set the Request URL to `http://<host>:<port>/slack/events`.
-5. Subscribe to the `message.channels` and `message.im` bot events.
-
-6. Configure the tokens:
-
-   ```bash
-   ZEPH_SLACK_BOT_TOKEN="xoxb-..." ZEPH_SLACK_SIGNING_SECRET="..." ./zeph
-   ```
-
-   Or in `config/default.toml`:
-
-   ```toml
-   [slack]
-   bot_token = "xoxb-..."
-   signing_secret = "..."
-   port = 3000
-   webhook_host = "127.0.0.1"
-   allowed_user_ids = []
-   allowed_channel_ids = []
-   ```
-
-> Tokens are resolved via the vault provider (`ZEPH_SLACK_BOT_TOKEN` and `ZEPH_SLACK_SIGNING_SECRET` secrets).
-
-### Allowlists
-
-Restrict access by Slack user IDs or channel IDs:
-
-```toml
-[slack]
-allowed_user_ids = ["U01ABC123"]
-allowed_channel_ids = ["C01XYZ456"]
-```
-
-When allowlists are empty, the bot accepts messages from all users in all channels.
-
-### Security
-
-- All incoming webhook requests are verified using HMAC-SHA256 with the signing secret (constant-time comparison)
-- Requests with timestamps older than 5 minutes are rejected (replay protection)
-- Request body size is limited to 256KB
-- The bot filters its own messages to prevent infinite feedback loops (via `auth.test` at startup)
-
-### Streaming Behavior
-
-Slack enforces rate limits on `chat.update`. Streaming uses message updates with a 2-second throttle:
-
-- First chunk posts a new message via `chat.postMessage`
-- Subsequent chunks update the message via `chat.update` (throttled to 2s intervals)
-- On flush, a final update delivers the complete response
-
-## TUI Dashboard
-
-A rich terminal interface based on ratatui with real-time agent metrics. Requires the `tui` feature flag.
-
-```bash
-cargo build --release --features tui
-./zeph --tui
-```
-
-See [TUI Dashboard](tui.md) for full documentation including keybindings, layout, and architecture.
-
-## Message Queueing
-
-Zeph maintains a bounded FIFO message queue (maximum 10 messages) to handle user input received during model inference. Queue behavior varies by channel:
-
-### CLI Channel
-
-Blocking stdin read — the queue is always empty. CLI users cannot send messages while the agent is responding.
-
-### Telegram Channel
-
-New messages are queued via an internal mpsc channel. Consecutive messages arriving within 500ms are automatically merged with a newline separator to reduce context fragmentation.
-
-Use `/clear-queue` to discard queued messages.
-
-### TUI Channel
-
-The input line remains interactive during model inference. Messages are queued in-order and drained after each response completes.
-
-- **Queue badge:** `[+N queued]` appears in the input area when messages are pending
-- **Clear queue:** Press `Ctrl+K` to discard all queued messages
-- **Merging:** Consecutive messages within 500ms are merged by newline
-
-When the queue is full (10 messages), new input is silently dropped until space becomes available.
-
-## Attachments
-
-`ChannelMessage` supports an optional `attachments` field carrying `Attachment` values with typed `AttachmentKind` variants (Audio, Image, Video, File). When the `stt` feature is enabled, audio attachments are automatically transcribed before entering the agent loop. The Telegram channel automatically downloads voice and audio messages and delivers them as attachments. The Slack channel detects audio file uploads and voice messages (`audio/*`, `video/webm`), downloads them via `url_private_download` with host validation (`.slack.com` only) and a 25 MB size limit, and delivers them as audio attachments. See [Audio Input](audio-input.md) for details.
-
-Image attachments are forwarded directly to the LLM as `MessagePart::Image` content. In CLI and TUI, use the `/image <path>` command to attach an image. In Telegram, send a photo directly. Images are subject to a 20 MB size limit. See [Vision](vision.md) for details.
-
-## Channel Selection Logic
-
-Zeph selects the channel at startup based on the following priority:
-
-1. `--tui` flag or `ZEPH_TUI=true` → TUI channel (requires `tui` feature)
-2. `[discord]` config with token → Discord channel (requires `discord` feature)
-3. `[slack]` config with bot_token → Slack channel (requires `slack` feature)
-4. `ZEPH_TELEGRAM_TOKEN` set → Telegram channel
-5. Otherwise → CLI channel
-
-Only one channel is active per session.
diff --git a/docs/src/guide/daemon.md b/docs/src/guide/daemon.md
deleted file mode 100644
index 8007955..0000000
--- a/docs/src/guide/daemon.md
+++ /dev/null
@@ -1,51 +0,0 @@
-# Daemon Supervisor
-
-The daemon supervisor manages component lifecycles within a long-running Zeph process. It monitors registered components (gateway, scheduler, A2A server) for unexpected exits and tracks restart counts.
-
-## Feature Flag
-
-Enable with `--features daemon` at build time:
-
-```bash
-cargo build --release --features daemon
-```
-
-## Configuration
-
-Add the `[daemon]` section to `config/default.toml`:
-
-```toml
-[daemon]
-enabled = true
-pid_file = "~/.zeph/zeph.pid"
-health_interval_secs = 30
-max_restart_backoff_secs = 60
-```
-
-### PID File
-
-The daemon writes its process ID to `pid_file` on startup. This file is used to detect running instances and to send stop signals. Tilde (`~`) expands to `$HOME`. The parent directory is created automatically if it does not exist.
-
-## Component Lifecycle
-
-Each registered component is wrapped in a `ComponentHandle` that tracks:
-
-- **name** -- human-readable identifier (e.g., `"gateway"`, `"scheduler"`)
-- **status** -- `Running`, `Failed(reason)`, or `Stopped`
-- **restart_count** -- number of unexpected exits detected
-
-The supervisor polls all components at `health_interval_secs` intervals. When a running component's task handle reports completion (unexpected exit), the supervisor marks it as `Failed` and increments its restart counter.
-
-## Shutdown
-
-The supervisor listens on the global shutdown signal (`watch::Receiver<bool>`). When the signal fires, the health loop exits and all component handles are dropped.
-
-## PID File Utilities
-
-The `daemon` module provides three standalone functions for PID file management:
-
-| Function | Description |
-|----------|-------------|
-| `write_pid_file(path)` | Write current process ID to file |
-| `read_pid_file(path)` | Read PID from file |
-| `remove_pid_file(path)` | Remove PID file (no-op if missing) |
diff --git a/docs/src/guide/openai.md b/docs/src/guide/openai.md
deleted file mode 100644
index 37faa6a..0000000
--- a/docs/src/guide/openai.md
+++ /dev/null
@@ -1,48 +0,0 @@
-# OpenAI Provider
-
-Use the OpenAI provider to connect to OpenAI API or any OpenAI-compatible service (Together AI, Groq, Fireworks, Perplexity).
-
-```bash
-ZEPH_LLM_PROVIDER=openai ZEPH_OPENAI_API_KEY=sk-... ./target/release/zeph
-```
-
-## Configuration
-
-```toml
-[llm]
-provider = "openai"
-
-[llm.openai]
-base_url = "https://api.openai.com/v1"
-model = "gpt-5.2"
-max_tokens = 4096
-embedding_model = "text-embedding-3-small"   # optional, enables vector embeddings
-reasoning_effort = "medium"                  # optional: low, medium, high (for o3, etc.)
-```
-
-## Compatible APIs
-
-Change `base_url` to point to any OpenAI-compatible API:
-
-```toml
-# Together AI
-base_url = "https://api.together.xyz/v1"
-
-# Groq
-base_url = "https://api.groq.com/openai/v1"
-
-# Fireworks
-base_url = "https://api.fireworks.ai/inference/v1"
-```
-
-## Embeddings
-
-When `embedding_model` is set, Qdrant subsystems automatically use it for skill matching and semantic memory instead of the global `llm.embedding_model`.
-
-## Reasoning Models
-
-Set `reasoning_effort` to control token budget for reasoning models like `o3`:
-
-- `low` — fast responses, less reasoning
-- `medium` — balanced
-- `high` — thorough reasoning, more tokens
diff --git a/docs/src/guide/scheduler.md b/docs/src/guide/scheduler.md
deleted file mode 100644
index 845730f..0000000
--- a/docs/src/guide/scheduler.md
+++ /dev/null
@@ -1,109 +0,0 @@
-# Cron Scheduler
-
-The scheduler runs periodic tasks on cron schedules with SQLite-backed persistence. It tracks last execution times to avoid duplicate runs and supports built-in and custom task kinds.
-
-## Feature Flag
-
-Enable with `--features scheduler` at build time:
-
-```bash
-cargo build --release --features scheduler
-```
-
-## Configuration
-
-Define tasks in the `[scheduler]` section of `config/default.toml`:
-
-```toml
-[scheduler]
-enabled = true
-
-[[scheduler.tasks]]
-name = "memory_cleanup"
-cron = "0 0 0 * * *"          # daily at midnight
-kind = "memory_cleanup"
-config = { max_age_days = 90 }
-
-[[scheduler.tasks]]
-name = "health_check"
-cron = "0 */5 * * * *"        # every 5 minutes
-kind = "health_check"
-```
-
-### Cron Expression Format
-
-The scheduler uses 6-field cron expressions (seconds included):
-
-```
-sec  min  hour  day  month  weekday
- 0    0    0     *    *      *
-```
-
-Standard cron features are supported: ranges (`1-5`), lists (`1,3,5`), steps (`*/5`), and wildcards (`*`).
-
-## Built-in Task Kinds
-
-| Kind | Description |
-|------|-------------|
-| `memory_cleanup` | Remove old conversation history entries |
-| `skill_refresh` | Re-scan skill directories for changes |
-| `health_check` | Internal health verification |
-| `update_check` | Query GitHub Releases API for a newer version |
-
-Custom kinds are also supported. Register a handler implementing the `TaskHandler` trait for any custom `kind` string.
-
-## Update Check
-
-The `update_check` task uses `UpdateCheckHandler` to query the GitHub Releases API and compare the running version against the latest release. When a newer version is detected, a notification message is emitted to the agent channel.
-
-The update check is controlled by `auto_update_check` in `[agent]` (default: `true`). It is independent of the scheduler feature flag:
-
-- **With `scheduler` feature enabled**: the check runs daily at 09:00 UTC via a cron task (`0 0 9 * * *`).
-- **Without `scheduler` feature**: a single one-shot check is performed at startup.
-
-To add the update check to the scheduler task list explicitly:
-
-```toml
-[agent]
-auto_update_check = true  # default; set to false to disable entirely
-
-[[scheduler.tasks]]
-name = "update_check"
-cron = "0 0 9 * * *"      # daily at 09:00 UTC
-kind = "update_check"
-```
-
-The handler uses a 10-second HTTP timeout and caps the response body at 64 KB. Network errors and non-2xx responses are logged as warnings and treated as no-ops, so a failed check never interrupts normal agent operation.
-
-## TaskHandler Trait
-
-Implement `TaskHandler` to define custom task logic:
-
-```rust
-pub trait TaskHandler: Send + Sync {
-    fn execute(
-        &self,
-        config: &serde_json::Value,
-    ) -> Pin<Box<dyn Future<Output = Result<(), SchedulerError>> + Send + '_>>;
-}
-```
-
-The `config` parameter receives the `config` value from the task definition in TOML.
-
-## Persistence
-
-The scheduler stores job metadata in a `scheduled_jobs` SQLite table:
-
-| Column | Type | Description |
-|--------|------|-------------|
-| `name` | TEXT | Unique task identifier |
-| `cron_expr` | TEXT | Cron schedule expression |
-| `kind` | TEXT | Task kind string |
-| `last_run` | TEXT | ISO 8601 timestamp of last execution |
-| `status` | TEXT | Current status (`pending`, `completed`) |
-
-On startup, the scheduler upserts all configured tasks into the table. Each tick (every 60 seconds), it checks whether each task is due based on `last_run` and the cron expression.
-
-## Shutdown
-
-The scheduler listens on the global shutdown signal and exits its tick loop gracefully.
diff --git a/docs/src/guide/skills.md b/docs/src/guide/skills.md
deleted file mode 100644
index 715df38..0000000
--- a/docs/src/guide/skills.md
+++ /dev/null
@@ -1,154 +0,0 @@
-# Skills
-
-Zeph uses an embedding-based skill system that dramatically reduces token consumption: instead of injecting all skills into every prompt, only the top-K most relevant (default: 5) are selected per query via cosine similarity of vector embeddings. Combined with progressive loading (metadata at startup, bodies on activation, resources on demand), this keeps prompt size constant regardless of how many skills are installed.
-
-## How It Works
-
-1. **You send a message** — for example, "check disk usage on this server"
-2. **Zeph embeds your query** using the configured embedding model
-3. **Top matching skills are selected** — by default, the 5 most relevant ones ranked by vector similarity
-4. **Selected skills are injected** into the system prompt, giving Zeph specific instructions and examples for the task
-5. **Zeph responds** using the knowledge from matched skills
-
-This happens automatically on every message. You don't need to activate skills manually.
-
-## Matching Backends
-
-Zeph supports two skill matching backends:
-
-- **In-memory** (default) — embeddings are computed on startup and matched via cosine similarity. No external dependencies required.
-- **Qdrant** — when semantic memory is enabled and Qdrant is reachable, skill embeddings are persisted in a `zeph_skills` collection. On startup, only changed skills are re-embedded using BLAKE3 content hash comparison. If Qdrant becomes unavailable, Zeph falls back to in-memory matching automatically.
-
-> The Qdrant backend significantly reduces startup time when you have many skills, since unchanged skills skip the embedding step entirely.
-
-## Bundled Skills
-
-| Skill | Description |
-|-------|-------------|
-| `api-request` | HTTP API requests using curl — GET, POST, PUT, DELETE with headers and JSON |
-| `docker` | Docker container operations — build, run, ps, logs, compose |
-| `file-ops` | File system operations — list, search, read, and analyze files |
-| `git` | Git version control — status, log, diff, commit, branch management |
-| `mcp-generate` | Generate MCP-to-skill bridges for external tool servers |
-| `setup-guide` | Configuration reference — LLM providers, memory, tools, and operating modes |
-| `skill-audit` | Spec compliance and security review of installed skills |
-| `skill-creator` | Create new skills following the agentskills.io specification |
-| `system-info` | System diagnostics — OS, disk, memory, processes, uptime |
-| `web-scrape` | Extract structured data from web pages using CSS selectors |
-| `web-search` | Search the internet for current information |
-
-Use `/skills` in chat to see all available skills and their usage statistics.
-
-## Creating Custom Skills
-
-A skill is a single `SKILL.md` file inside a named directory:
-
-```text
-skills/
-└── my-skill/
-    └── SKILL.md
-```
-
-### SKILL.md Format
-
-Each file has two parts: a YAML header and a markdown body.
-
-```markdown
----
-name: my-skill
-description: Short description of what this skill does.
----
-# My Skill
-
-Instructions and examples go here.
-```
-
-**Header fields:**
-
-| Field | Required | Description |
-|-------|----------|-------------|
-| `name` | Yes | Unique identifier (1-64 chars, lowercase, hyphens allowed) |
-| `description` | Yes | Used for embedding-based matching against user queries |
-| `compatibility` | No | Runtime requirements (e.g., "requires curl") |
-| `license` | No | Skill license |
-| `allowed-tools` | No | Comma-separated tool names this skill can use |
-| `metadata` | No | Arbitrary key-value pairs for forward compatibility |
-
-**Body:** markdown with instructions, code examples, or reference material. Injected verbatim into the LLM context when the skill is selected.
-
-### Skill Resources
-
-Skills can include additional resource directories:
-
-```text
-skills/
-└── system-info/
-    ├── SKILL.md
-    └── references/
-        ├── linux.md
-        ├── macos.md
-        └── windows.md
-```
-
-Resources in `scripts/`, `references/`, and `assets/` are loaded on demand with path traversal protection. Skill directories are validated via canonical path resolution to ensure they reside under a configured skill root, preventing symlink-based escapes. OS-specific reference files (named `linux.md`, `macos.md`, `windows.md`) are automatically filtered by the current platform.
-
-### Name Validation
-
-Skill names must be 1-64 characters, lowercase letters/numbers/hyphens only, no leading/trailing/consecutive hyphens, and must match the directory name.
-
-## Configuration
-
-### Skill Paths
-
-By default, Zeph scans `./skills` for skill directories. Add more paths in config:
-
-```toml
-[skills]
-paths = ["./skills", "/home/user/my-skills"]
-```
-
-If a skill with the same name appears in multiple paths, the first one found takes priority.
-
-### Max Active Skills
-
-Control how many skills are injected per query:
-
-```toml
-[skills]
-max_active_skills = 5
-```
-
-Or via environment variable:
-
-```bash
-export ZEPH_SKILLS_MAX_ACTIVE=5
-```
-
-Lower values reduce prompt size but may miss relevant skills. Higher values include more context but use more tokens.
-
-### Disambiguation Threshold
-
-When the top two candidate skills have cosine similarity scores within `disambiguation_threshold` of each other, the agent calls the LLM with a structured prompt to clarify intent. The LLM returns a typed `IntentClassification` (skill name, confidence 0-1, extracted parameters) via `chat_typed`, and the result reorders the candidate list so the best-matching skill is injected first.
-
-```toml
-[skills]
-disambiguation_threshold = 0.05
-```
-
-Set to `0.0` to disable disambiguation entirely (always use ranking order). Higher values cause disambiguation to trigger more aggressively on ambiguous queries.
-
-## Progressive Loading
-
-Only metadata (~100 tokens per skill) is loaded at startup for embedding and matching. Full body (<5000 tokens) is loaded lazily on first activation and cached via `OnceLock`. Resource files are loaded on demand.
-
-With 50+ skills installed, a typical prompt still contains only 5 — saving thousands of tokens per request compared to naive full-injection approaches.
-
-## Trust Levels
-
-Every skill is assigned a trust level (`trusted`, `verified`, `quarantined`, `blocked`) that controls which tools it can invoke. Local skills default to `trusted`; newly imported or hash-mismatch skills start as `quarantined` with restricted tool access. See [Skill Trust Levels](skill-trust.md) for details.
-
-## Hot Reload
-
-SKILL.md file changes are detected via filesystem watcher (500ms debounce) and re-embedded without restart. Cached bodies are invalidated on reload. If the BLAKE3 content hash changes, the skill's trust level may be downgraded according to the `hash_mismatch_level` configuration.
-
-With the Qdrant backend, hot-reload triggers a delta sync — only modified skills are re-embedded and updated in the collection.
diff --git a/docs/src/guide/summarization.md b/docs/src/guide/summarization.md
deleted file mode 100644
index d1fa5a2..0000000
--- a/docs/src/guide/summarization.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Conversation Summarization
-
-Automatically compress long conversation histories using LLM-based summarization to stay within context budget limits.
-
-Requires an LLM provider (Ollama or Claude). Set `context_budget_tokens = 0` to disable proportional allocation and use unlimited context.
-
-> For the full context management pipeline (semantic recall, message trimming, compaction, tool output management), see [Context Engineering](context.md).
-
-## Configuration
-
-```toml
-[memory]
-summarization_threshold = 100
-context_budget_tokens = 8000  # Set to LLM context window size (0 = unlimited)
-```
-
-## How It Works
-
-- Triggered when message count exceeds `summarization_threshold` (default: 100)
-- Summaries stored in SQLite with token estimates
-- Batch size = threshold/2 to balance summary quality with LLM call frequency
-- Context builder allocates proportional token budget:
-  - **15%** for summaries
-  - **25%** for semantic recall (if enabled)
-  - **60%** for recent message history
-
-## Token Estimation
-
-Token counts are estimated using a chars/4 heuristic (100x faster than tiktoken, ±25% accuracy). This is sufficient for proportional budget allocation where exact counts are not critical.
diff --git a/docs/src/guide/vault.md b/docs/src/guide/vault.md
deleted file mode 100644
index 72b39ed..0000000
--- a/docs/src/guide/vault.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# Secrets Management
-
-Zeph resolves secrets (`ZEPH_CLAUDE_API_KEY`, `ZEPH_OPENAI_API_KEY`, `ZEPH_TELEGRAM_TOKEN`, `ZEPH_A2A_AUTH_TOKEN`) through a pluggable `VaultProvider` with redacted debug output via the `Secret` newtype.
-
-> Never commit secrets to version control. Use environment variables or age-encrypted vault files.
-
-## Backend Selection
-
-The vault backend is determined by the following priority (highest to lowest):
-
-1. **CLI flag:** `--vault env` or `--vault age`
-2. **Environment variable:** `ZEPH_VAULT_BACKEND`
-3. **Config file:** `vault.backend` in TOML config
-4. **Default:** `"env"`
-
-Key and vault file paths follow the same priority:
-
-1. **CLI flags:** `--vault-key <PATH>`, `--vault-path <PATH>`
-2. **Environment variables:** `ZEPH_VAULT_KEY`, `ZEPH_VAULT_PATH`
-
-## Backends
-
-| Backend | Description | Activation |
-|---------|-------------|------------|
-| `env` (default) | Read secrets from environment variables | `--vault env` or omit |
-| `age` | Decrypt age-encrypted JSON vault file at startup | `--vault age --vault-key <identity> --vault-path <vault.age>` |
-
-## Environment Variables (default)
-
-Export secrets as environment variables:
-
-```bash
-export ZEPH_CLAUDE_API_KEY=sk-ant-...
-export ZEPH_TELEGRAM_TOKEN=123:ABC
-./target/release/zeph
-```
-
-## Age Vault
-
-For production deployments, encrypt secrets with [age](https://age-encryption.org/).
-
-### Using `zeph vault` CLI (recommended)
-
-The built-in vault CLI manages the keypair and encrypted file so you do not need the `age` binary:
-
-```bash
-# Initialize keypair and empty vault
-zeph vault init
-
-# Store secrets
-zeph vault set ZEPH_CLAUDE_API_KEY sk-ant-...
-zeph vault set ZEPH_TELEGRAM_TOKEN 123:ABC
-
-# Verify
-zeph vault list
-zeph vault get ZEPH_CLAUDE_API_KEY
-
-# Remove a secret
-zeph vault rm ZEPH_TELEGRAM_TOKEN
-
-# Run the agent (default paths are used automatically)
-zeph --vault age
-```
-
-Default file locations (created by `vault init`):
-
-| File | Default path |
-|------|-------------|
-| Identity (private key) | `~/.config/zeph/vault-key.txt` |
-| Encrypted secrets | `~/.config/zeph/secrets.age` |
-
-Override with `--vault-key` and `--vault-path`:
-
-```bash
-zeph vault set ZEPH_CLAUDE_API_KEY sk-ant-... --vault-key /custom/key.txt --vault-path /custom/secrets.age
-zeph --vault age --vault-key /custom/key.txt --vault-path /custom/secrets.age
-```
-
-### Manual setup with `age` CLI
-
-Alternatively, use the `age` binary directly:
-
-```bash
-# Generate an age identity key
-age-keygen -o key.txt
-
-# Create a JSON secrets file and encrypt it
-echo '{"ZEPH_CLAUDE_API_KEY":"sk-...","ZEPH_TELEGRAM_TOKEN":"123:ABC"}' | \
-  age -r $(grep 'public key' key.txt | awk '{print $NF}') -o secrets.age
-
-# Run with age vault
-./target/release/zeph --vault age --vault-key key.txt --vault-path secrets.age
-```
-
-> The `vault-age` feature flag is enabled by default. When building with `--no-default-features`, add `vault-age` explicitly if needed.
-
-## Docker
-
-Mount key and vault files into the container:
-
-```bash
-docker compose -f docker/docker-compose.yml -f docker/docker-compose.vault.yml up
-```
-
-Override paths:
-
-```bash
-ZEPH_VAULT_KEY=./my-key.txt ZEPH_VAULT_PATH=./my-secrets.age \
-  docker compose -f docker/docker-compose.yml -f docker/docker-compose.vault.yml up
-```
diff --git a/docs/src/guide/vision.md b/docs/src/guide/vision.md
deleted file mode 100644
index f0f281d..0000000
--- a/docs/src/guide/vision.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Vision (Image Input)
-
-Zeph supports image input across all channels. Images are sent to the LLM as inline content parts alongside the text prompt, enabling visual reasoning tasks such as screenshot analysis, diagram interpretation, and document extraction.
-
-## Pipeline
-
-```
-Image attachment → MessagePart::Image → LLM provider (base64) → Response
-```
-
-When a `ChannelMessage` contains an image `Attachment`, the agent converts it to a `MessagePart::Image` (raw bytes + MIME type). The active LLM provider encodes the image into its native API format and sends it as part of the chat request.
-
-## Provider Support
-
-Not all providers support vision. The `LlmProvider::supports_vision()` method indicates capability at runtime.
-
-| Provider | Vision | Format |
-|----------|--------|--------|
-| Claude | Yes | `AnthropicContentBlock::Image` (base64 source) |
-| OpenAI | Yes | Array content with `image_url` data-URI |
-| Ollama | Yes | `with_images()` API; optional `vision_model` routing |
-| Candle | No | Text-only |
-
-### Ollama Vision Model Routing
-
-Ollama can route image requests to a dedicated vision model (e.g., `llava`, `bakllava`) while keeping a smaller text model for regular queries. Set the `vision_model` field:
-
-```toml
-[llm]
-provider = "ollama"
-model = "mistral:7b"
-vision_model = "llava:13b"
-```
-
-When `vision_model` is set and the message contains an image, Ollama uses the vision model for that request. When unset, images are sent to the default model (which must support vision).
-
-## Sending Images
-
-### CLI and TUI
-
-Use the `/image` slash command followed by a file path:
-
-```
-/image /path/to/screenshot.png What is shown in this image?
-```
-
-The path can be absolute or relative to the working directory. Supported formats: JPEG, PNG, GIF, WebP.
-
-### Telegram
-
-Send a photo directly in the chat. The Telegram channel downloads the image via the Bot API (using the largest available photo size) and delivers it as an `Attachment` with `AttachmentKind::Image`. The text caption, if present, is used as the accompanying prompt.
-
-A pre-download size guard rejects images exceeding 20 MB before the download begins.
-
-## Configuration
-
-```toml
-[llm]
-vision_model = "llava:13b"  # Ollama only: dedicated model for image requests
-```
-
-| Variable | Description | Default |
-|----------|-------------|---------|
-| `ZEPH_LLM_VISION_MODEL` | Vision model name for Ollama | (none) |
-
-The `zeph init` wizard includes a prompt for the vision model when configuring the Ollama provider.
-
-## Limits
-
-- **20 MB maximum image size** -- images exceeding this limit are rejected.
-- **Path traversal protection** -- the `/image` command validates file paths to prevent directory traversal attacks.
-- **One image per message** -- additional image attachments in the same message are ignored.
-- **No image generation** -- only image input (vision) is supported; image output is not.
diff --git a/docs/src/guides/cloud-provider.md b/docs/src/guides/cloud-provider.md
new file mode 100644
index 0000000..ca7bf11
--- /dev/null
+++ b/docs/src/guides/cloud-provider.md
@@ -0,0 +1,85 @@
+# Use a Cloud Provider
+
+Connect Zeph to Claude, OpenAI, or any OpenAI-compatible API instead of local Ollama.
+
+## Claude
+
+```bash
+ZEPH_CLAUDE_API_KEY=sk-ant-... zeph
+```
+
+Or in config:
+
+```toml
+[llm]
+provider = "claude"
+
+[llm.cloud]
+model = "claude-sonnet-4-5-20250929"
+max_tokens = 4096
+```
+
+Claude does not support embeddings. Use the [orchestrator](../advanced/orchestrator.md) to combine Claude chat with Ollama embeddings, or use OpenAI embeddings.
+
+## OpenAI
+
+```bash
+ZEPH_LLM_PROVIDER=openai ZEPH_OPENAI_API_KEY=sk-... zeph
+```
+
+```toml
+[llm]
+provider = "openai"
+
+[llm.openai]
+base_url = "https://api.openai.com/v1"
+model = "gpt-5.2"
+max_tokens = 4096
+embedding_model = "text-embedding-3-small"
+reasoning_effort = "medium"   # optional: low, medium, high (for o3, etc.)
+```
+
+When `embedding_model` is set, Qdrant subsystems use it automatically for skill matching and semantic memory.
+
+## Compatible APIs
+
+Change `base_url` to point to any OpenAI-compatible endpoint:
+
+```toml
+# Together AI
+base_url = "https://api.together.xyz/v1"
+
+# Groq
+base_url = "https://api.groq.com/openai/v1"
+
+# Fireworks
+base_url = "https://api.fireworks.ai/inference/v1"
+```
+
+## Hybrid Setup
+
+Embeddings via free local Ollama, chat via paid Claude API:
+
+```toml
+[llm]
+provider = "orchestrator"
+
+[llm.orchestrator]
+default = "claude"
+embed = "ollama"
+
+[llm.orchestrator.providers.ollama]
+provider_type = "ollama"
+
+[llm.orchestrator.providers.claude]
+provider_type = "claude"
+
+[llm.orchestrator.routes]
+general = ["claude"]
+```
+
+See [Model Orchestrator](../advanced/orchestrator.md) for task classification and fallback chain options.
+
+## Interactive Setup
+
+Run `zeph init` and select your provider in Step 2. The wizard handles model names, base URLs, and API keys. See [Configuration Wizard](../getting-started/wizard.md).
diff --git a/docs/src/guides/custom-skills.md b/docs/src/guides/custom-skills.md
new file mode 100644
index 0000000..cd69577
--- /dev/null
+++ b/docs/src/guides/custom-skills.md
@@ -0,0 +1,80 @@
+# Add Custom Skills
+
+Create your own skills to teach Zeph new capabilities. A skill is a single `SKILL.md` file inside a named directory.
+
+## Skill Structure
+
+```text
+skills/
+└── my-skill/
+    └── SKILL.md
+```
+
+## SKILL.md Format
+
+Two parts: a YAML header and a markdown body.
+
+```markdown
+---
+name: my-skill
+description: Short description of what this skill does.
+---
+# My Skill
+
+Instructions and examples go here. This content is injected verbatim
+into the LLM context when the skill is matched.
+```
+
+### Header Fields
+
+| Field | Required | Description |
+|-------|----------|-------------|
+| `name` | Yes | Unique identifier (1-64 chars, lowercase, hyphens allowed) |
+| `description` | Yes | Used for embedding-based matching against user queries |
+| `compatibility` | No | Runtime requirements (e.g., "requires curl") |
+| `allowed-tools` | No | Comma-separated tool names this skill can use |
+
+### Name Rules
+
+Lowercase letters, numbers, and hyphens only. No leading, trailing, or consecutive hyphens. Must match the directory name.
+
+## Skill Resources
+
+Add reference files alongside `SKILL.md`:
+
+```text
+skills/
+└── system-info/
+    ├── SKILL.md
+    └── references/
+        ├── linux.md
+        ├── macos.md
+        └── windows.md
+```
+
+Resources in `scripts/`, `references/`, and `assets/` are loaded on demand. OS-specific files (`linux.md`, `macos.md`, `windows.md`) are filtered by platform automatically.
+
+## Configuration
+
+```toml
+[skills]
+paths = ["./skills", "/home/user/my-skills"]
+max_active_skills = 5
+```
+
+Skills from multiple paths are scanned. If a skill with the same name appears in multiple paths, the first one found takes priority.
+
+## Testing Your Skill
+
+1. Place the skill directory under `./skills/`
+2. Start Zeph — the skill is loaded automatically
+3. Send a message that should match your skill's description
+4. Run `/skills` to verify it was selected
+
+Changes to `SKILL.md` are hot-reloaded without restart (500ms debounce).
+
+## Deep Dives
+
+- [Skills](../concepts/skills.md) — how embedding-based matching works
+- [Self-Learning Skills](../advanced/self-learning.md) — automatic skill evolution
+- [Skill Trust Levels](../advanced/skill-trust.md) — security model for imported skills
diff --git a/docs/src/guide/docker.md b/docs/src/guides/docker.md
similarity index 100%
rename from docs/src/guide/docker.md
rename to docs/src/guides/docker.md
diff --git a/docs/src/guide/mcp.md b/docs/src/guides/mcp.md
similarity index 100%
rename from docs/src/guide/mcp.md
rename to docs/src/guides/mcp.md
diff --git a/docs/src/guide/semantic-memory.md b/docs/src/guides/semantic-memory.md
similarity index 95%
rename from docs/src/guide/semantic-memory.md
rename to docs/src/guides/semantic-memory.md
index a7badb9..a834a65 100644
--- a/docs/src/guide/semantic-memory.md
+++ b/docs/src/guides/semantic-memory.md
@@ -2,7 +2,7 @@
 
 Enable semantic search to retrieve contextually relevant messages from conversation history using vector similarity.
 
-Requires an embedding model. Ollama with `qwen3-embedding` is the default. Claude API does not support embeddings natively — use the [orchestrator](orchestrator.md) to route embeddings through Ollama while using Claude for chat.
+Requires an embedding model. Ollama with `qwen3-embedding` is the default. Claude API does not support embeddings natively — use the [orchestrator](../advanced/orchestrator.md) to route embeddings through Ollama while using Claude for chat.
 
 ## Setup
 
diff --git a/docs/src/guides/telegram.md b/docs/src/guides/telegram.md
new file mode 100644
index 0000000..9ed3f1b
--- /dev/null
+++ b/docs/src/guides/telegram.md
@@ -0,0 +1,56 @@
+# Run via Telegram
+
+Deploy Zeph as a Telegram bot with streaming responses, MarkdownV2 formatting, and user whitelisting.
+
+## Setup
+
+1. Create a bot via [@BotFather](https://t.me/BotFather) — send `/newbot` and copy the token.
+
+2. Configure the token:
+
+   ```bash
+   ZEPH_TELEGRAM_TOKEN="123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11" zeph
+   ```
+
+   Or store in the age vault:
+
+   ```bash
+   zeph vault set ZEPH_TELEGRAM_TOKEN "123456:ABC..."
+   zeph --vault age
+   ```
+
+3. **Required** — restrict access to specific usernames:
+
+   ```toml
+   [telegram]
+   allowed_users = ["your_username"]
+   ```
+
+   The bot refuses to start without at least one allowed user. Messages from unauthorized users are silently rejected.
+
+## Bot Commands
+
+| Command | Description |
+|---------|-------------|
+| `/start` | Welcome message |
+| `/reset` | Reset conversation context |
+| `/skills` | List loaded skills |
+
+## Streaming
+
+Telegram has API rate limits, so streaming works differently from CLI:
+
+- First chunk sends a new message immediately
+- Subsequent chunks edit the existing message in-place (throttled to one edit per 10 seconds)
+- Long messages (>4096 chars) are automatically split
+- MarkdownV2 formatting is applied automatically
+
+## Voice and Image Support
+
+- **Voice notes**: automatically transcribed via STT when `stt` feature is enabled
+- **Photos**: forwarded to the LLM for visual reasoning (requires vision-capable model)
+- See [Audio & Vision](../advanced/multimodal.md) for backend configuration
+
+## Other Channels
+
+Zeph also supports Discord, Slack, CLI, and TUI. See [Channels](../advanced/channels.md) for the full reference.
diff --git a/docs/src/introduction.md b/docs/src/introduction.md
index e4bfae6..a2a1cdb 100644
--- a/docs/src/introduction.md
+++ b/docs/src/introduction.md
@@ -1,38 +1,31 @@
 # Zeph
 
-Lightweight AI agent with hybrid inference (Ollama / Claude / OpenAI / HuggingFace via candle), skills-first architecture, semantic memory with Qdrant, MCP client, A2A protocol support, multi-model orchestration, self-learning skill evolution, and multi-channel I/O.
+You have an LLM. You want it to actually do things — run commands, search files, remember context, learn new skills. But wiring all that together means dealing with token bloat, provider lock-in, and context that evaporates between sessions.
 
-Only relevant skills and MCP tools are injected into each prompt via vector similarity — keeping token usage minimal regardless of how many are installed.
+Zeph is a lightweight AI agent written in Rust that connects to any LLM provider (local Ollama, Claude, OpenAI, or HuggingFace models), equips it with tools and skills, and manages conversation memory — all while keeping prompt size minimal. Only the skills relevant to your current query are loaded, so adding more capabilities never inflates your token bill.
 
-**Cross-platform**: Linux, macOS, Windows (x86_64 + ARM64).
+## What You Can Do with Zeph
+
+**Development assistant.** Point Zeph at your project directory, and it reads files, runs shell commands, searches code, and answers questions with full context. Drop a `ZEPH.md` file in your repo to give it project-specific instructions.
 
-## Key Features
+**Chat bot.** Deploy Zeph as a Telegram, Discord, or Slack bot with streaming responses, user whitelisting, and voice message transcription. Your team gets an AI assistant in the channels they already use.
 
-- **Hybrid inference** — Ollama (local), Claude (Anthropic), OpenAI (GPT + compatible APIs), Candle (HuggingFace GGUF)
-- **Skills-first architecture** — embedding-based skill matching selects only top-K relevant skills per query, not all
-- **Semantic memory** — SQLite for structured data + Qdrant for vector similarity search
-- **MCP client** — connect external tool servers via Model Context Protocol (stdio + HTTP transport)
-- **A2A protocol** — agent-to-agent communication via JSON-RPC 2.0 with SSE streaming
-- **Model orchestrator** — route tasks to different providers with automatic fallback chains
-- **Self-learning** — skills evolve through failure detection, self-reflection, and LLM-generated improvements
-- **Code indexing** — AST-based code RAG with tree-sitter, hybrid retrieval (semantic + grep routing), repo map
-- **Context engineering** — proportional budget allocation, semantic recall injection, runtime compaction, smart tool output summarization, ZEPH.md project config
-- **Multi-channel I/O** — CLI, Telegram, and TUI with streaming support
-- **Token-efficient** — prompt size is O(K) not O(N), where K is max active skills and N is total installed
+**Self-hosted agent.** Run fully local with Ollama — no data leaves your machine. Encrypt API keys with age vault. Sandbox tool access with path restrictions and command confirmation. You control everything.
 
-## Quick Start
+## Get Started
 
 ```bash
-git clone https://github.com/bug-ops/zeph
-cd zeph
-cargo build --release
-./target/release/zeph
+curl -fsSL https://github.com/bug-ops/zeph/releases/latest/download/install.sh | sh
+zeph init
+zeph
 ```
 
-See [Installation](getting-started/installation.md) for pre-built binaries and Docker options.
+Three commands: install the binary, generate a config, start talking.
+
+**Cross-platform**: Linux, macOS, Windows (x86_64 + ARM64).
 
-## Requirements
+## Next Steps
 
-- Rust 1.88+ (Edition 2024)
-- Ollama (for local inference and embeddings) or cloud API key (Claude / OpenAI)
-- Docker (optional, for Qdrant semantic memory and containerized deployment)
+- [Why Zeph?](why-zeph.md) — what sets Zeph apart from other LLM wrappers
+- [First Conversation](getting-started/first-conversation.md) — from zero to "aha moment" in 5 minutes
+- [Installation](getting-started/installation.md) — all installation methods (source, binaries, Docker)
diff --git a/docs/src/guide/cli.md b/docs/src/reference/cli.md
similarity index 90%
rename from docs/src/guide/cli.md
rename to docs/src/reference/cli.md
index 2a12114..d5b51be 100644
--- a/docs/src/guide/cli.md
+++ b/docs/src/reference/cli.md
@@ -12,10 +12,10 @@ zeph [OPTIONS] [COMMAND]
 
 | Command | Description |
 |---------|-------------|
-| `init`  | Interactive configuration wizard (see [Configuration](../getting-started/configuration.md)) |
-| `vault` | Manage the age-encrypted secrets vault (see [Secrets Management](vault.md)) |
+| `init`  | Interactive configuration wizard (see [Configuration Wizard](../getting-started/wizard.md)) |
+| `vault` | Manage the age-encrypted secrets vault (see [Secrets Management](security.md#age-vault)) |
 
-When no subcommand is given, Zeph starts the agent loop with persistent input history (arrow keys to navigate, prefix search with Ctrl+R, line editing via rustyline). History is stored in SQLite and persists across restarts.
+When no subcommand is given, Zeph starts the agent loop.
 
 ### `zeph init`
 
diff --git a/docs/src/getting-started/configuration.md b/docs/src/reference/configuration.md
similarity index 82%
rename from docs/src/getting-started/configuration.md
rename to docs/src/reference/configuration.md
index a2b7ada..2ed435c 100644
--- a/docs/src/getting-started/configuration.md
+++ b/docs/src/reference/configuration.md
@@ -1,32 +1,11 @@
-# Configuration
+# Configuration Reference
 
-## Configuration Wizard
-
-Run `zeph init` to generate a `config.toml` interactively. The wizard walks through six steps:
-
-1. **Secrets backend** -- choose `env` (environment variables) or `age` (encrypted file). When `age` is selected, API key prompts are skipped in subsequent steps since secrets are stored via `zeph vault set` instead.
-2. **LLM Provider** -- select Ollama (local), Claude, OpenAI, Orchestrator (multi-model routing), or a compatible endpoint. Orchestrator prompts for a primary and fallback provider, enabling automatic failover. Provide the base URL, model name, and API key as needed (skipped for age backend). Choose an embedding model (default: `qwen3-embedding`).
-3. **Memory** -- set the SQLite database path and optionally enable semantic memory with Qdrant.
-4. **Channel** -- pick CLI (default), Telegram, Discord, or Slack. Provide tokens and credentials for the selected channel (token prompts skipped for age backend).
-5. **Update check** -- choose whether to enable automatic version checks against GitHub Releases (default: enabled).
-6. **Review and write** -- inspect the generated TOML, confirm the output path, and save.
-
-Specify the output path directly:
-
-```bash
-zeph init --output ~/.zeph/config.toml
-```
-
-If the target file already exists, the wizard asks before overwriting.
-
-After writing, the wizard prints the secrets you need to configure. For the `env` backend it shows `export` commands; for `age` it prints the corresponding `zeph vault init` and `zeph vault set` commands.
+Complete reference for the Zeph configuration file and environment variables. For the interactive setup wizard, see [Configuration Wizard](../getting-started/wizard.md).
 
 ## Config File Resolution
 
 Zeph loads `config/default.toml` at startup and applies environment variable overrides.
 
-The config path can be overridden via CLI argument or environment variable:
-
 ```bash
 # CLI argument (highest priority)
 zeph --config /path/to/custom.toml
@@ -54,9 +33,9 @@ Priority: `--config` > `ZEPH_CONFIG` > `config/default.toml`.
 
 ## Hot-Reload
 
-Zeph watches the config file for changes and applies runtime-safe fields without restart. The file watcher uses 500ms debounce to avoid redundant reloads.
+Zeph watches the config file for changes and applies runtime-safe fields without restart (500ms debounce).
 
-**Reloadable fields** (applied immediately):
+**Reloadable fields:**
 
 | Section | Fields |
 |---------|--------|
@@ -70,8 +49,6 @@ Zeph watches the config file for changes and applies runtime-safe fields without
 
 **Not reloadable** (require restart): LLM provider/model, SQLite path, Qdrant URL, Telegram token, MCP servers, A2A config, skill paths.
 
-Check for `config reloaded` in the log to confirm a successful reload.
-
 ## Configuration File
 
 ```toml
@@ -185,8 +162,6 @@ port = 8080
 rate_limit = 60
 ```
 
-> Shell commands are sandboxed with path restrictions, network control, and destructive command confirmation. See [Security](../security.md) for details.
-
 ## Environment Variables
 
 | Variable | Description |
diff --git a/docs/src/feature-flags.md b/docs/src/reference/feature-flags.md
similarity index 84%
rename from docs/src/feature-flags.md
rename to docs/src/reference/feature-flags.md
index 86cda63..a563ec0 100644
--- a/docs/src/feature-flags.md
+++ b/docs/src/reference/feature-flags.md
@@ -20,18 +20,18 @@ Zeph uses Cargo feature flags to control optional functionality. As of M26, eigh
 | Feature | Description |
 |---------|-------------|
 | `tui` | ratatui-based TUI dashboard with real-time agent metrics |
-| `candle` | Local HuggingFace model inference via [candle](https://github.com/huggingface/candle) (GGUF quantized models) and local Whisper STT ([guide](guide/audio-input.md#local-whisper-candle)) |
+| `candle` | Local HuggingFace model inference via [candle](https://github.com/huggingface/candle) (GGUF quantized models) and local Whisper STT ([guide](../advanced/multimodal.md#local-whisper-candle)) |
 | `metal` | Metal GPU acceleration for candle on macOS (implies `candle`) |
 | `cuda` | CUDA GPU acceleration for candle on Linux (implies `candle`) |
-| `discord` | Discord channel adapter with Gateway v10 WebSocket and slash commands ([guide](guide/channels.md#discord-channel)) |
-| `slack` | Slack channel adapter with Events API webhook and HMAC-SHA256 verification ([guide](guide/channels.md#slack-channel)) |
+| `discord` | Discord channel adapter with Gateway v10 WebSocket and slash commands ([guide](../advanced/channels.md#discord-channel)) |
+| `slack` | Slack channel adapter with Events API webhook and HMAC-SHA256 verification ([guide](../advanced/channels.md#slack-channel)) |
 | `a2a` | [A2A protocol](https://github.com/a2aproject/A2A) client and server for agent-to-agent communication |
-| `index` | AST-based code indexing and semantic retrieval via tree-sitter ([guide](guide/code-indexing.md)) |
-| `gateway` | HTTP gateway for webhook ingestion with bearer auth and rate limiting ([guide](guide/gateway.md)) |
-| `daemon` | Daemon supervisor with component lifecycle, PID file, and health monitoring ([guide](guide/daemon.md)) |
-| `scheduler` | Cron-based periodic task scheduler with SQLite persistence, including the `update_check` handler for automatic version notifications ([guide](guide/scheduler.md)) |
-| `stt` | Speech-to-text transcription via OpenAI Whisper API ([guide](guide/audio-input.md)) |
-| `otel` | OpenTelemetry tracing export via OTLP/gRPC ([guide](guide/observability.md)) |
+| `index` | AST-based code indexing and semantic retrieval via tree-sitter ([guide](../advanced/code-indexing.md)) |
+| `gateway` | HTTP gateway for webhook ingestion with bearer auth and rate limiting ([guide](../advanced/gateway.md)) |
+| `daemon` | Daemon supervisor with component lifecycle, PID file, and health monitoring ([guide](../advanced/daemon.md)) |
+| `scheduler` | Cron-based periodic task scheduler with SQLite persistence, including the `update_check` handler for automatic version notifications ([guide](../advanced/daemon.md#cron-scheduler)) |
+| `stt` | Speech-to-text transcription via OpenAI Whisper API ([guide](../advanced/multimodal.md#audio-input)) |
+| `otel` | OpenTelemetry tracing export via OTLP/gRPC ([guide](../advanced/observability.md)) |
 | `pdf` | PDF document loading via [pdf-extract](https://crates.io/crates/pdf-extract) for the document ingestion pipeline |
 | `mock` | Mock providers and channels for testing |
 
diff --git a/docs/src/security.md b/docs/src/reference/security.md
similarity index 98%
rename from docs/src/security.md
rename to docs/src/reference/security.md
index f18e3b3..2a464af 100644
--- a/docs/src/security.md
+++ b/docs/src/reference/security.md
@@ -86,7 +86,7 @@ autonomy_level = "supervised"  # readonly, supervised, full
 
 ## Permission Policy
 
-The `[tools.permissions]` config section provides fine-grained, pattern-based access control for each tool. Rules are evaluated in order (first match wins) using case-insensitive glob patterns against the tool input. See [Tool System — Permissions](guide/tools.md#permissions) for configuration details.
+The `[tools.permissions]` config section provides fine-grained, pattern-based access control for each tool. Rules are evaluated in order (first match wins) using case-insensitive glob patterns against the tool input. See [Tool System — Permissions](../advanced/tools.md#permissions) for configuration details.
 
 Key security properties:
 - Tools with all-deny rules are excluded from the LLM system prompt, preventing the model from attempting to use them
diff --git a/docs/src/security/mcp.md b/docs/src/reference/security/mcp.md
similarity index 97%
rename from docs/src/security/mcp.md
rename to docs/src/reference/security/mcp.md
index 7bfc3cc..b028d63 100644
--- a/docs/src/security/mcp.md
+++ b/docs/src/reference/security/mcp.md
@@ -67,7 +67,7 @@ For production deployments, consider restricting which MCP tools can be invoked.
 
 ## Environment Variables
 
-MCP servers inherit environment variables from their configuration. Never store secrets directly in `config.toml` — use the [Vault](../guide/vault.md) integration instead:
+MCP servers inherit environment variables from their configuration. Never store secrets directly in `config.toml` — use the [Vault](../security.md#age-vault) integration instead:
 
 ```toml
 [[mcp.servers]]
diff --git a/docs/src/why-zeph.md b/docs/src/why-zeph.md
new file mode 100644
index 0000000..c294d71
--- /dev/null
+++ b/docs/src/why-zeph.md
@@ -0,0 +1,25 @@
+# Why Zeph?
+
+## Token Efficiency
+
+Most agent frameworks inject all available tools and instructions into every prompt. Zeph selects only the top-K most relevant skills per query (default: 5) via embedding similarity. Prompt size is O(K), not O(N) — with 50 skills installed, a typical prompt contains ~2,500 tokens of skill context instead of ~50,000. Skills use progressive loading: only metadata (~100 tokens each) is loaded at startup, full body is loaded on first activation, and resource files are fetched on demand.
+
+## Hybrid Inference
+
+Mix local and cloud models in a single setup. Run embeddings through free local Ollama while routing chat to Claude or OpenAI. The orchestrator classifies tasks and routes them to the best provider with automatic fallback chains — if the primary provider fails, the next one takes over. Switch providers with a single config change. Any OpenAI-compatible endpoint works out of the box (Together AI, Groq, Fireworks, and others).
+
+## Skills-First Architecture
+
+Skills are plain markdown files — easy to write, version control, and share. Zeph matches skills by embedding similarity, not keywords, so "check disk space" finds the `system-info` skill even without exact keyword overlap. Edit a `SKILL.md` file and changes apply immediately via hot-reload, no restart required. Skills can evolve autonomously: when the agent detects repeated failures, it reflects on the cause and generates improved skill versions.
+
+## Memory That Persists
+
+Conversation history lives in SQLite, with optional Qdrant for semantic search. Ask "what did we discuss about the API yesterday?" and Zeph retrieves relevant context from past sessions automatically. Long conversations are summarized to stay within the context budget. A two-tier pruning system (tool output pruning first, LLM compaction as fallback) manages memory without manual intervention. Place a `ZEPH.md` in your project root to inject project-specific instructions into every prompt.
+
+## Privacy and Security
+
+Run fully local with Ollama — no API calls, no data leaves your machine. Store API keys in an age-encrypted vault instead of plaintext environment variables. Tools are sandboxed: configure allowed directories, block network access from shell commands, require confirmation for destructive operations like `rm` or `git push --force`. Imported skills start in quarantine with restricted tool access until explicitly trusted.
+
+## Lightweight and Fast
+
+Zeph compiles to a single Rust binary (~15 MB). No Python runtime, no Node.js, no JVM dependency. Native async throughout with no garbage collector overhead. Builds and runs on Linux, macOS, and Windows across x86_64 and ARM64 architectures.