bug-ops · bug-ops · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026 · Feb 22, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+### Added
+- SQLite-backed `SqliteVectorStore` as embedded alternative to Qdrant for zero-dependency vector search (#741)
+- `vector_backend` config option to select between `qdrant` and `sqlite` vector backends
+- Credential scrubbing in LLM context pipeline via `scrub_content()` — redacts secrets and paths before LLM calls (#743)
+- `redact_credentials` config option (default: true) to toggle context scrubbing
+
+### Changed
+- Token estimation uses `chars/4` heuristic instead of `bytes/3` for better accuracy on multi-byte text (#742)
+
 ## [0.11.5] - 2026-02-22
 
 ### Added

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,6 +20,7 @@ anyhow = "1.0"
 axum = "0.8"
 base64 = "0.22"
 blake3 = "1.8"
+bytemuck = "1.25"
 candle-core = { version = "0.9", default-features = false }
 candle-nn = { version = "0.9", default-features = false }
 candle-transformers = { version = "0.9", default-features = false }
@@ -182,3 +183,11 @@ zeph-skills.workspace = true
 
 [lints]
 workspace = true
+
+[profile.release]
+lto = true
+codegen-units = 1
+strip = true
+
+[profile.bench]
+debug = true
diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ zeph --tui         # run with TUI dashboard
 |---|---|
 | **Hybrid inference** | Ollama, Claude, OpenAI, Candle (GGUF), any OpenAI-compatible API. Multi-model orchestrator with fallback chains |
 | **Skills-first architecture** | YAML+Markdown skill files with semantic matching, self-learning evolution, and 4-tier trust model |
-| **Semantic memory** | SQLite + Qdrant with summarization, cross-session recall, and vector retrieval |
+| **Semantic memory** | SQLite + Qdrant (or embedded SQLite vector search) with summarization, credential scrubbing, cross-session recall, and vector retrieval |
 | **Multi-channel I/O** | CLI, Telegram, Discord, Slack, TUI — all with streaming. Vision and speech-to-text input |
 | **Protocols** | MCP client (stdio + HTTP), A2A agent-to-agent communication, sub-agent orchestration |
 | **Defense-in-depth** | Shell sandbox, tool permissions, secret redaction, SSRF protection, skill trust quarantine, audit logging |

diff --git a/crates/zeph-core/src/agent/builder.rs b/crates/zeph-core/src/agent/builder.rs
@@ -43,14 +43,13 @@ impl<C: Channel> Agent<C> {
         recall_limit: usize,
         summarization_threshold: usize,
     ) -> Self {
-        let has_qdrant = memory.has_qdrant();
         self.memory_state.memory = Some(memory);
         self.memory_state.conversation_id = Some(conversation_id);
         self.memory_state.history_limit = history_limit;
         self.memory_state.recall_limit = recall_limit;
         self.memory_state.summarization_threshold = summarization_threshold;
         self.update_metrics(|m| {
-            m.qdrant_available = has_qdrant;
+            m.qdrant_available = false;
             m.sqlite_conversation_id = Some(conversation_id);
         });
         self
@@ -138,6 +137,18 @@ impl<C: Channel> Agent<C> {
         self
     }
 
+    #[must_use]
+    pub fn with_redact_credentials(mut self, enabled: bool) -> Self {
+        self.runtime.redact_credentials = enabled;
+        self
+    }
+
+    #[must_use]
+    pub fn with_token_safety_margin(mut self, margin: f32) -> Self {
+        self.runtime.token_safety_margin = margin;
+        self
+    }
+
     #[must_use]
     pub fn with_tool_summarization(mut self, enabled: bool) -> Self {
         self.runtime.summarize_tool_output_enabled = enabled;
@@ -215,11 +226,7 @@ impl<C: Channel> Agent<C> {
         let provider_name = self.provider.name().to_string();
         let model_name = self.runtime.model_name.clone();
         let total_skills = self.skill_state.registry.all_meta().len();
-        let qdrant_available = self
-            .memory_state
-            .memory
-            .as_ref()
-            .is_some_and(zeph_memory::semantic::SemanticMemory::has_qdrant);
+        let qdrant_available = false;
         let conversation_id = self.memory_state.conversation_id;
         let prompt_estimate = self
             .messages

diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::fmt::Write;
 
 use zeph_llm::provider::MessagePart;
@@ -6,6 +7,8 @@ use zeph_skills::ScoredMatch;
 use zeph_skills::loader::SkillMeta;
 use zeph_skills::prompt::format_skills_catalog;
 
+use crate::redact::scrub_content;
+
 use super::{
     Agent, CODE_CONTEXT_PREFIX, CROSS_SESSION_PREFIX, Channel, ContextBudget, EnvironmentContext,
     LlmProvider, Message, RECALL_PREFIX, Role, SUMMARY_PREFIX, Skill, build_system_prompt,
@@ -22,10 +25,11 @@ impl<C: Channel> Agent<C> {
         let Some(ref budget) = self.context_state.budget else {
             return false;
         };
+        let margin = self.runtime.token_safety_margin;
         let total_tokens: usize = self
             .messages
             .iter()
-            .map(|m| estimate_tokens(&m.content))
+            .map(|m| (estimate_tokens(&m.content) as f64 * f64::from(margin)) as usize)
             .sum();
         let threshold =
             (budget.max_tokens() as f32 * self.context_state.compaction_threshold) as usize;
@@ -644,6 +648,15 @@ impl<C: Channel> Agent<C> {
         }
 
         self.trim_messages_to_budget(alloc.recent_history);
+
+        if self.runtime.redact_credentials {
+            for msg in &mut self.messages {
+                if let Cow::Owned(s) = scrub_content(&msg.content) {
+                    msg.content = s;
+                }
+            }
+        }
+
         self.recompute_prompt_tokens();
         let _ = self.channel.send_status("").await;
 
@@ -1789,6 +1802,131 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_prepare_context_scrubs_secrets_when_redact_enabled() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(4096, 0.20, 0.80, 4, 0)
+            .with_redact_credentials(true);
+
+        // Push a user message containing a secret and a path
+        agent.messages.push(Message {
+            role: Role::User,
+            content: "my key is sk-abc123xyz and lives at /Users/dev/config.toml".into(),
+            parts: vec![],
+        });
+
+        agent.prepare_context("test").await.unwrap();
+
+        let user_msg = agent
+            .messages
+            .iter()
+            .find(|m| m.role == Role::User)
+            .unwrap();
+        assert!(
+            !user_msg.content.contains("sk-abc123xyz"),
+            "secret must be redacted"
+        );
+        assert!(
+            !user_msg.content.contains("/Users/dev/"),
+            "path must be redacted"
+        );
+        assert!(
+            user_msg.content.contains("[REDACTED]"),
+            "secret replaced with [REDACTED]"
+        );
+        assert!(
+            user_msg.content.contains("[PATH]"),
+            "path replaced with [PATH]"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_prepare_context_no_scrub_when_redact_disabled() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(4096, 0.20, 0.80, 4, 0)
+            .with_redact_credentials(false);
+
+        let original = "key sk-abc123xyz at /Users/dev/file.rs".to_string();
+        agent.messages.push(Message {
+            role: Role::User,
+            content: original.clone(),
+            parts: vec![],
+        });
+
+        agent.prepare_context("test").await.unwrap();
+
+        let user_msg = agent
+            .messages
+            .iter()
+            .find(|m| m.role == Role::User)
+            .unwrap();
+        assert_eq!(
+            user_msg.content, original,
+            "content must be unchanged when redact disabled"
+        );
+    }
+
+    #[test]
+    fn token_safety_margin_above_one_inflates_token_count() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        // With a very large margin, token count is inflated and compaction triggers earlier
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(1000, 0.20, 0.75, 4, 0)
+            .with_token_safety_margin(100.0);
+        for i in 0..5 {
+            agent.messages.push(Message {
+                role: Role::User,
+                content: format!("message {i} with content"),
+                parts: vec![],
+            });
+        }
+
+        assert!(
+            agent.should_compact(),
+            "large margin must trigger compaction even with few messages"
+        );
+    }
+
+    #[test]
+    fn token_safety_margin_zero_never_compacts() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        // margin=0.0 makes all token counts 0, so compaction never triggers
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(10, 0.20, 0.75, 4, 0)
+            .with_token_safety_margin(0.0);
+        for i in 0..50 {
+            agent.messages.push(Message {
+                role: Role::User,
+                content: format!(
+                    "very long message content {i} repeated many times to fill context"
+                ),
+                parts: vec![],
+            });
+        }
+        assert!(
+            !agent.should_compact(),
+            "margin=0.0 means zero token counts, must never compact"
+        );
+    }
+
     #[tokio::test]
     async fn disambiguate_skills_reorders_on_match() {
         let json = r#"{"skill_name":"beta_skill","confidence":0.9,"params":{}}"#;

diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
@@ -122,6 +122,8 @@ pub(super) struct RuntimeConfig {
     pub(super) max_tool_iterations: usize,
     pub(super) summarize_tool_output_enabled: bool,
     pub(super) permission_policy: zeph_tools::PermissionPolicy,
+    pub(super) redact_credentials: bool,
+    pub(super) token_safety_margin: f32,
 }
 
 pub struct Agent<C: Channel> {
@@ -227,6 +229,8 @@ impl<C: Channel> Agent<C> {
                 max_tool_iterations: 10,
                 summarize_tool_output_enabled: false,
                 permission_policy: zeph_tools::PermissionPolicy::default(),
+                redact_credentials: true,
+                token_safety_margin: 1.0,
             },
             learning_config: None,
             reflection_used: false,
@@ -931,6 +935,8 @@ impl<C: Channel> Agent<C> {
 
         self.runtime.security = config.security;
         self.runtime.timeouts = config.timeouts;
+        self.runtime.redact_credentials = config.memory.redact_credentials;
+        self.runtime.token_safety_margin = config.memory.token_safety_margin;
         self.memory_state.history_limit = config.memory.history_limit;
         self.memory_state.recall_limit = config.memory.semantic.recall_limit;
         self.memory_state.summarization_threshold = config.memory.summarization_threshold;

diff --git a/crates/zeph-core/src/agent/utils.rs b/crates/zeph-core/src/agent/utils.rs
@@ -5,6 +5,19 @@ use crate::channel::Channel;
 use crate::metrics::MetricsSnapshot;
 
 impl<C: Channel> Agent<C> {
+    /// Perform a real health check on the vector store and update metrics.
+    pub async fn check_vector_store_health(&self, backend_name: &str) {
+        let connected = match self.memory_state.memory.as_ref() {
+            Some(m) => m.is_vector_store_connected().await,
+            None => false,
+        };
+        let name = backend_name.to_owned();
+        self.update_metrics(|m| {
+            m.qdrant_available = connected;
+            m.vector_backend = name;
+        });
+    }
+
     pub(super) fn update_metrics(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
         if let Some(ref tx) = self.metrics_tx {
             let elapsed = self.start_time.elapsed().as_secs();

diff --git a/crates/zeph-core/src/bootstrap.rs b/crates/zeph-core/src/bootstrap.rs
@@ -148,18 +148,32 @@ impl AppBuilder {
 
     pub async fn build_memory(&self, provider: &AnyProvider) -> anyhow::Result<SemanticMemory> {
         let embed_model = self.embedding_model();
-        let memory = SemanticMemory::with_weights(
-            &self.config.memory.sqlite_path,
-            &self.config.memory.qdrant_url,
-            provider.clone(),
-            &embed_model,
-            self.config.memory.semantic.vector_weight,
-            self.config.memory.semantic.keyword_weight,
-        )
-        .await?;
+        let memory = match self.config.memory.vector_backend {
+            crate::config::VectorBackend::Sqlite => {
+                SemanticMemory::with_sqlite_backend(
+                    &self.config.memory.sqlite_path,
+                    provider.clone(),
+                    &embed_model,
+                    self.config.memory.semantic.vector_weight,
+                    self.config.memory.semantic.keyword_weight,
+                )
+                .await?
+            }
+            crate::config::VectorBackend::Qdrant => {
+                SemanticMemory::with_weights(
+                    &self.config.memory.sqlite_path,
+                    &self.config.memory.qdrant_url,
+                    provider.clone(),
+                    &embed_model,
+                    self.config.memory.semantic.vector_weight,
+                    self.config.memory.semantic.keyword_weight,
+                )
+                .await?
+            }
+        };
 
-        if self.config.memory.semantic.enabled && memory.has_qdrant() {
-            tracing::info!("semantic memory enabled, Qdrant connected");
+        if self.config.memory.semantic.enabled && memory.is_vector_store_connected().await {
+            tracing::info!("semantic memory enabled, vector store connected");
             match memory.embed_missing().await {
                 Ok(n) if n > 0 => tracing::info!("backfilled {n} missing embedding(s)"),
                 Ok(_) => {}
@@ -427,7 +441,7 @@ pub async fn create_skill_matcher(
 ) -> Option<SkillMatcherBackend> {
     let embed_fn = provider.embed_fn();
 
-    if config.memory.semantic.enabled && memory.has_qdrant() {
+    if config.memory.semantic.enabled && memory.is_vector_store_connected().await {
         match QdrantSkillMatcher::new(&config.memory.qdrant_url) {
             Ok(mut qm) => match qm.sync(meta, embedding_model, &embed_fn).await {
                 Ok(_) => return Some(SkillMatcherBackend::Qdrant(qm)),

diff --git a/crates/zeph-core/src/config/env.rs b/crates/zeph-core/src/config/env.rs
@@ -70,6 +70,17 @@ impl Config {
         {
             self.memory.prune_protect_tokens = tokens;
         }
+        if let Ok(v) = std::env::var("ZEPH_MEMORY_VECTOR_BACKEND") {
+            match v.to_lowercase().as_str() {
+                "sqlite" => {
+                    self.memory.vector_backend = super::VectorBackend::Sqlite;
+                }
+                "qdrant" => {
+                    self.memory.vector_backend = super::VectorBackend::Qdrant;
+                }
+                _ => {}
+            }
+        }
         if let Ok(v) = std::env::var("ZEPH_SKILLS_MAX_ACTIVE")
             && let Ok(n) = v.parse::<usize>()
         {