diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b115a6..6a27ca7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
 
 ## [Unreleased]
 
+### Added
+- SQLite-backed `SqliteVectorStore` as embedded alternative to Qdrant for zero-dependency vector search (#741)
+- `vector_backend` config option to select between `qdrant` and `sqlite` vector backends
+- Credential scrubbing in LLM context pipeline via `scrub_content()` — redacts secrets and paths before LLM calls (#743)
+- `redact_credentials` config option (default: true) to toggle context scrubbing
+
+### Changed
+- Token estimation uses `chars/4` heuristic instead of `bytes/3` for better accuracy on multi-byte text (#742)
+
 ## [0.11.5] - 2026-02-22
 
 ### Added
diff --git a/Cargo.lock b/Cargo.lock
index c51ac7d..cb0cf82 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9263,6 +9263,7 @@ name = "zeph-memory"
 version = "0.11.5"
 dependencies = [
  "blake3",
+ "bytemuck",
  "criterion",
  "pdf-extract",
  "proptest",
diff --git a/Cargo.toml b/Cargo.toml
index 582e054..558a00d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,6 +20,7 @@ anyhow = "1.0"
 axum = "0.8"
 base64 = "0.22"
 blake3 = "1.8"
+bytemuck = "1.25"
 candle-core = { version = "0.9", default-features = false }
 candle-nn = { version = "0.9", default-features = false }
 candle-transformers = { version = "0.9", default-features = false }
@@ -182,3 +183,11 @@ zeph-skills.workspace = true
 
 [lints]
 workspace = true
+
+[profile.release]
+lto = true
+codegen-units = 1
+strip = true
+
+[profile.bench]
+debug = true
diff --git a/README.md b/README.md
index 0cb9162..5bce8a0 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ zeph --tui         # run with TUI dashboard
 |---|---|
 | **Hybrid inference** | Ollama, Claude, OpenAI, Candle (GGUF), any OpenAI-compatible API. Multi-model orchestrator with fallback chains |
 | **Skills-first architecture** | YAML+Markdown skill files with semantic matching, self-learning evolution, and 4-tier trust model |
-| **Semantic memory** | SQLite + Qdrant with summarization, cross-session recall, and vector retrieval |
+| **Semantic memory** | SQLite + Qdrant (or embedded SQLite vector search) with summarization, credential scrubbing, cross-session recall, and vector retrieval |
 | **Multi-channel I/O** | CLI, Telegram, Discord, Slack, TUI — all with streaming. Vision and speech-to-text input |
 | **Protocols** | MCP client (stdio + HTTP), A2A agent-to-agent communication, sub-agent orchestration |
 | **Defense-in-depth** | Shell sandbox, tool permissions, secret redaction, SSRF protection, skill trust quarantine, audit logging |
diff --git a/crates/zeph-core/src/agent/builder.rs b/crates/zeph-core/src/agent/builder.rs
index a03b07b..cf79c5b 100644
--- a/crates/zeph-core/src/agent/builder.rs
+++ b/crates/zeph-core/src/agent/builder.rs
@@ -43,14 +43,13 @@ impl<C: Channel> Agent<C> {
         recall_limit: usize,
         summarization_threshold: usize,
     ) -> Self {
-        let has_qdrant = memory.has_qdrant();
         self.memory_state.memory = Some(memory);
         self.memory_state.conversation_id = Some(conversation_id);
         self.memory_state.history_limit = history_limit;
         self.memory_state.recall_limit = recall_limit;
         self.memory_state.summarization_threshold = summarization_threshold;
         self.update_metrics(|m| {
-            m.qdrant_available = has_qdrant;
+            m.qdrant_available = false;
             m.sqlite_conversation_id = Some(conversation_id);
         });
         self
@@ -138,6 +137,18 @@ impl<C: Channel> Agent<C> {
         self
     }
 
+    #[must_use]
+    pub fn with_redact_credentials(mut self, enabled: bool) -> Self {
+        self.runtime.redact_credentials = enabled;
+        self
+    }
+
+    #[must_use]
+    pub fn with_token_safety_margin(mut self, margin: f32) -> Self {
+        self.runtime.token_safety_margin = margin;
+        self
+    }
+
     #[must_use]
     pub fn with_tool_summarization(mut self, enabled: bool) -> Self {
         self.runtime.summarize_tool_output_enabled = enabled;
@@ -215,11 +226,7 @@ impl<C: Channel> Agent<C> {
         let provider_name = self.provider.name().to_string();
         let model_name = self.runtime.model_name.clone();
         let total_skills = self.skill_state.registry.all_meta().len();
-        let qdrant_available = self
-            .memory_state
-            .memory
-            .as_ref()
-            .is_some_and(zeph_memory::semantic::SemanticMemory::has_qdrant);
+        let qdrant_available = false;
         let conversation_id = self.memory_state.conversation_id;
         let prompt_estimate = self
             .messages
diff --git a/crates/zeph-core/src/agent/context.rs b/crates/zeph-core/src/agent/context.rs
index c277cb1..1224c94 100644
--- a/crates/zeph-core/src/agent/context.rs
+++ b/crates/zeph-core/src/agent/context.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::fmt::Write;
 
 use zeph_llm::provider::MessagePart;
@@ -6,6 +7,8 @@ use zeph_skills::ScoredMatch;
 use zeph_skills::loader::SkillMeta;
 use zeph_skills::prompt::format_skills_catalog;
 
+use crate::redact::scrub_content;
+
 use super::{
     Agent, CODE_CONTEXT_PREFIX, CROSS_SESSION_PREFIX, Channel, ContextBudget, EnvironmentContext,
     LlmProvider, Message, RECALL_PREFIX, Role, SUMMARY_PREFIX, Skill, build_system_prompt,
@@ -22,10 +25,11 @@ impl<C: Channel> Agent<C> {
         let Some(ref budget) = self.context_state.budget else {
             return false;
         };
+        let margin = self.runtime.token_safety_margin;
         let total_tokens: usize = self
             .messages
             .iter()
-            .map(|m| estimate_tokens(&m.content))
+            .map(|m| (estimate_tokens(&m.content) as f64 * f64::from(margin)) as usize)
             .sum();
         let threshold =
             (budget.max_tokens() as f32 * self.context_state.compaction_threshold) as usize;
@@ -644,6 +648,15 @@ impl<C: Channel> Agent<C> {
         }
 
         self.trim_messages_to_budget(alloc.recent_history);
+
+        if self.runtime.redact_credentials {
+            for msg in &mut self.messages {
+                if let Cow::Owned(s) = scrub_content(&msg.content) {
+                    msg.content = s;
+                }
+            }
+        }
+
         self.recompute_prompt_tokens();
         let _ = self.channel.send_status("").await;
 
@@ -1789,6 +1802,131 @@ mod tests {
         }
     }
 
+    #[tokio::test]
+    async fn test_prepare_context_scrubs_secrets_when_redact_enabled() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(4096, 0.20, 0.80, 4, 0)
+            .with_redact_credentials(true);
+
+        // Push a user message containing a secret and a path
+        agent.messages.push(Message {
+            role: Role::User,
+            content: "my key is sk-abc123xyz and lives at /Users/dev/config.toml".into(),
+            parts: vec![],
+        });
+
+        agent.prepare_context("test").await.unwrap();
+
+        let user_msg = agent
+            .messages
+            .iter()
+            .find(|m| m.role == Role::User)
+            .unwrap();
+        assert!(
+            !user_msg.content.contains("sk-abc123xyz"),
+            "secret must be redacted"
+        );
+        assert!(
+            !user_msg.content.contains("/Users/dev/"),
+            "path must be redacted"
+        );
+        assert!(
+            user_msg.content.contains("[REDACTED]"),
+            "secret replaced with [REDACTED]"
+        );
+        assert!(
+            user_msg.content.contains("[PATH]"),
+            "path replaced with [PATH]"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_prepare_context_no_scrub_when_redact_disabled() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(4096, 0.20, 0.80, 4, 0)
+            .with_redact_credentials(false);
+
+        let original = "key sk-abc123xyz at /Users/dev/file.rs".to_string();
+        agent.messages.push(Message {
+            role: Role::User,
+            content: original.clone(),
+            parts: vec![],
+        });
+
+        agent.prepare_context("test").await.unwrap();
+
+        let user_msg = agent
+            .messages
+            .iter()
+            .find(|m| m.role == Role::User)
+            .unwrap();
+        assert_eq!(
+            user_msg.content, original,
+            "content must be unchanged when redact disabled"
+        );
+    }
+
+    #[test]
+    fn token_safety_margin_above_one_inflates_token_count() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        // With a very large margin, token count is inflated and compaction triggers earlier
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(1000, 0.20, 0.75, 4, 0)
+            .with_token_safety_margin(100.0);
+        for i in 0..5 {
+            agent.messages.push(Message {
+                role: Role::User,
+                content: format!("message {i} with content"),
+                parts: vec![],
+            });
+        }
+
+        assert!(
+            agent.should_compact(),
+            "large margin must trigger compaction even with few messages"
+        );
+    }
+
+    #[test]
+    fn token_safety_margin_zero_never_compacts() {
+        let provider = mock_provider(vec![]);
+        let channel = MockChannel::new(vec![]);
+        let registry = create_test_registry();
+        let executor = MockToolExecutor::no_tools();
+
+        // margin=0.0 makes all token counts 0, so compaction never triggers
+        let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
+            .with_context_budget(10, 0.20, 0.75, 4, 0)
+            .with_token_safety_margin(0.0);
+        for i in 0..50 {
+            agent.messages.push(Message {
+                role: Role::User,
+                content: format!(
+                    "very long message content {i} repeated many times to fill context"
+                ),
+                parts: vec![],
+            });
+        }
+        assert!(
+            !agent.should_compact(),
+            "margin=0.0 means zero token counts, must never compact"
+        );
+    }
+
     #[tokio::test]
     async fn disambiguate_skills_reorders_on_match() {
         let json = r#"{"skill_name":"beta_skill","confidence":0.9,"params":{}}"#;
diff --git a/crates/zeph-core/src/agent/mod.rs b/crates/zeph-core/src/agent/mod.rs
index 73b8c88..723e94b 100644
--- a/crates/zeph-core/src/agent/mod.rs
+++ b/crates/zeph-core/src/agent/mod.rs
@@ -122,6 +122,8 @@ pub(super) struct RuntimeConfig {
     pub(super) max_tool_iterations: usize,
     pub(super) summarize_tool_output_enabled: bool,
     pub(super) permission_policy: zeph_tools::PermissionPolicy,
+    pub(super) redact_credentials: bool,
+    pub(super) token_safety_margin: f32,
 }
 
 pub struct Agent<C: Channel> {
@@ -227,6 +229,8 @@ impl<C: Channel> Agent<C> {
                 max_tool_iterations: 10,
                 summarize_tool_output_enabled: false,
                 permission_policy: zeph_tools::PermissionPolicy::default(),
+                redact_credentials: true,
+                token_safety_margin: 1.0,
             },
             learning_config: None,
             reflection_used: false,
@@ -931,6 +935,8 @@ impl<C: Channel> Agent<C> {
 
         self.runtime.security = config.security;
         self.runtime.timeouts = config.timeouts;
+        self.runtime.redact_credentials = config.memory.redact_credentials;
+        self.runtime.token_safety_margin = config.memory.token_safety_margin;
         self.memory_state.history_limit = config.memory.history_limit;
         self.memory_state.recall_limit = config.memory.semantic.recall_limit;
         self.memory_state.summarization_threshold = config.memory.summarization_threshold;
diff --git a/crates/zeph-core/src/agent/utils.rs b/crates/zeph-core/src/agent/utils.rs
index c5ee39f..3b27fef 100644
--- a/crates/zeph-core/src/agent/utils.rs
+++ b/crates/zeph-core/src/agent/utils.rs
@@ -5,6 +5,19 @@ use crate::channel::Channel;
 use crate::metrics::MetricsSnapshot;
 
 impl<C: Channel> Agent<C> {
+    /// Perform a real health check on the vector store and update metrics.
+    pub async fn check_vector_store_health(&self, backend_name: &str) {
+        let connected = match self.memory_state.memory.as_ref() {
+            Some(m) => m.is_vector_store_connected().await,
+            None => false,
+        };
+        let name = backend_name.to_owned();
+        self.update_metrics(|m| {
+            m.qdrant_available = connected;
+            m.vector_backend = name;
+        });
+    }
+
     pub(super) fn update_metrics(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
         if let Some(ref tx) = self.metrics_tx {
             let elapsed = self.start_time.elapsed().as_secs();
diff --git a/crates/zeph-core/src/bootstrap.rs b/crates/zeph-core/src/bootstrap.rs
index 02da983..542c4c1 100644
--- a/crates/zeph-core/src/bootstrap.rs
+++ b/crates/zeph-core/src/bootstrap.rs
@@ -148,18 +148,32 @@ impl AppBuilder {
 
     pub async fn build_memory(&self, provider: &AnyProvider) -> anyhow::Result<SemanticMemory> {
         let embed_model = self.embedding_model();
-        let memory = SemanticMemory::with_weights(
-            &self.config.memory.sqlite_path,
-            &self.config.memory.qdrant_url,
-            provider.clone(),
-            &embed_model,
-            self.config.memory.semantic.vector_weight,
-            self.config.memory.semantic.keyword_weight,
-        )
-        .await?;
+        let memory = match self.config.memory.vector_backend {
+            crate::config::VectorBackend::Sqlite => {
+                SemanticMemory::with_sqlite_backend(
+                    &self.config.memory.sqlite_path,
+                    provider.clone(),
+                    &embed_model,
+                    self.config.memory.semantic.vector_weight,
+                    self.config.memory.semantic.keyword_weight,
+                )
+                .await?
+            }
+            crate::config::VectorBackend::Qdrant => {
+                SemanticMemory::with_weights(
+                    &self.config.memory.sqlite_path,
+                    &self.config.memory.qdrant_url,
+                    provider.clone(),
+                    &embed_model,
+                    self.config.memory.semantic.vector_weight,
+                    self.config.memory.semantic.keyword_weight,
+                )
+                .await?
+            }
+        };
 
-        if self.config.memory.semantic.enabled && memory.has_qdrant() {
-            tracing::info!("semantic memory enabled, Qdrant connected");
+        if self.config.memory.semantic.enabled && memory.is_vector_store_connected().await {
+            tracing::info!("semantic memory enabled, vector store connected");
             match memory.embed_missing().await {
                 Ok(n) if n > 0 => tracing::info!("backfilled {n} missing embedding(s)"),
                 Ok(_) => {}
@@ -427,7 +441,7 @@ pub async fn create_skill_matcher(
 ) -> Option<SkillMatcherBackend> {
     let embed_fn = provider.embed_fn();
 
-    if config.memory.semantic.enabled && memory.has_qdrant() {
+    if config.memory.semantic.enabled && memory.is_vector_store_connected().await {
         match QdrantSkillMatcher::new(&config.memory.qdrant_url) {
             Ok(mut qm) => match qm.sync(meta, embedding_model, &embed_fn).await {
                 Ok(_) => return Some(SkillMatcherBackend::Qdrant(qm)),
diff --git a/crates/zeph-core/src/config/env.rs b/crates/zeph-core/src/config/env.rs
index 82e5dd6..8b06f18 100644
--- a/crates/zeph-core/src/config/env.rs
+++ b/crates/zeph-core/src/config/env.rs
@@ -70,6 +70,17 @@ impl Config {
         {
             self.memory.prune_protect_tokens = tokens;
         }
+        if let Ok(v) = std::env::var("ZEPH_MEMORY_VECTOR_BACKEND") {
+            match v.to_lowercase().as_str() {
+                "sqlite" => {
+                    self.memory.vector_backend = super::VectorBackend::Sqlite;
+                }
+                "qdrant" => {
+                    self.memory.vector_backend = super::VectorBackend::Qdrant;
+                }
+                _ => {}
+            }
+        }
         if let Ok(v) = std::env::var("ZEPH_SKILLS_MAX_ACTIVE")
             && let Ok(n) = v.parse::<usize>()
         {
diff --git a/crates/zeph-core/src/config/snapshots/zeph_core__config__types__tests__config_default_snapshot.snap b/crates/zeph-core/src/config/snapshots/zeph_core__config__types__tests__config_default_snapshot.snap
index 71de458..51f9554 100644
--- a/crates/zeph-core/src/config/snapshots/zeph_core__config__types__tests__config_default_snapshot.snap
+++ b/crates/zeph-core/src/config/snapshots/zeph_core__config__types__tests__config_default_snapshot.snap
@@ -1,5 +1,6 @@
 ---
 source: crates/zeph-core/src/config/types.rs
+assertion_line: 1113
 expression: toml_str
 ---
 [agent]
@@ -44,6 +45,9 @@ compaction_preserve_tail = 6
 auto_budget = true
 prune_protect_tokens = 40000
 cross_session_score_threshold = 0.35
+vector_backend = "qdrant"
+token_safety_margin = 1.0
+redact_credentials = true
 
 [memory.semantic]
 enabled = true
diff --git a/crates/zeph-core/src/config/tests.rs b/crates/zeph-core/src/config/tests.rs
index 69597c4..bf5019e 100644
--- a/crates/zeph-core/src/config/tests.rs
+++ b/crates/zeph-core/src/config/tests.rs
@@ -2490,3 +2490,44 @@ fn env_override_auto_update_check_invalid_ignored() {
 
     assert!(config.agent.auto_update_check);
 }
+
+#[test]
+fn vector_backend_sqlite_roundtrip() {
+    let toml_str = r#"
+sqlite_path = "zeph.db"
+history_limit = 100
+vector_backend = "sqlite"
+"#;
+    let memory: MemoryConfig = toml::from_str(toml_str).unwrap();
+    assert_eq!(memory.vector_backend, VectorBackend::Sqlite);
+
+    let serialized = toml::to_string(&memory).unwrap();
+    let reparsed: MemoryConfig = toml::from_str(&serialized).unwrap();
+    assert_eq!(reparsed.vector_backend, VectorBackend::Sqlite);
+}
+
+#[test]
+fn redact_credentials_false_parse() {
+    let toml_str = r#"
+sqlite_path = "zeph.db"
+history_limit = 100
+redact_credentials = false
+"#;
+    let memory: MemoryConfig = toml::from_str(toml_str).unwrap();
+    assert!(!memory.redact_credentials);
+}
+
+#[test]
+fn token_safety_margin_custom_parse() {
+    let toml_str = r#"
+sqlite_path = "zeph.db"
+history_limit = 100
+token_safety_margin = 1.15
+"#;
+    let memory: MemoryConfig = toml::from_str(toml_str).unwrap();
+    assert!(
+        (memory.token_safety_margin - 1.15).abs() < 1e-5,
+        "token_safety_margin must parse to 1.15, got {}",
+        memory.token_safety_margin
+    );
+}
diff --git a/crates/zeph-core/src/config/types.rs b/crates/zeph-core/src/config/types.rs
index 9f427df..151dae6 100644
--- a/crates/zeph-core/src/config/types.rs
+++ b/crates/zeph-core/src/config/types.rs
@@ -409,6 +409,25 @@ fn default_cooldown_minutes() -> u64 {
     60
 }
 
+/// Vector backend selector for embedding storage.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Deserialize, Serialize)]
+#[serde(rename_all = "lowercase")]
+pub enum VectorBackend {
+    #[default]
+    Qdrant,
+    Sqlite,
+}
+
+impl VectorBackend {
+    #[must_use]
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::Qdrant => "qdrant",
+            Self::Sqlite => "sqlite",
+        }
+    }
+}
+
 #[derive(Debug, Deserialize, Serialize)]
 pub struct MemoryConfig {
     pub sqlite_path: String,
@@ -431,6 +450,20 @@ pub struct MemoryConfig {
     pub prune_protect_tokens: usize,
     #[serde(default = "default_cross_session_score_threshold")]
     pub cross_session_score_threshold: f32,
+    #[serde(default)]
+    pub vector_backend: VectorBackend,
+    #[serde(default = "default_token_safety_margin")]
+    pub token_safety_margin: f32,
+    #[serde(default = "default_redact_credentials")]
+    pub redact_credentials: bool,
+}
+
+fn default_token_safety_margin() -> f32 {
+    1.0
+}
+
+fn default_redact_credentials() -> bool {
+    true
 }
 
 fn default_qdrant_url() -> String {
@@ -1040,6 +1073,9 @@ impl Default for Config {
                 auto_budget: default_auto_budget(),
                 prune_protect_tokens: default_prune_protect_tokens(),
                 cross_session_score_threshold: default_cross_session_score_threshold(),
+                vector_backend: VectorBackend::default(),
+                token_safety_margin: default_token_safety_margin(),
+                redact_credentials: default_redact_credentials(),
             },
             telegram: None,
             discord: None,
diff --git a/crates/zeph-core/src/context.rs b/crates/zeph-core/src/context.rs
index 22b829c..65a6d4e 100644
--- a/crates/zeph-core/src/context.rs
+++ b/crates/zeph-core/src/context.rs
@@ -261,8 +261,10 @@ mod tests {
 
     #[test]
     fn estimate_tokens_basic() {
-        assert_eq!(estimate_tokens("Hello world"), 3);
+        // "Hello world" = 11 chars / 4 = 2
+        assert_eq!(estimate_tokens("Hello world"), 2);
         assert_eq!(estimate_tokens(""), 0);
+        // "test" = 4 chars / 4 = 1
         assert_eq!(estimate_tokens("test"), 1);
     }
 
diff --git a/crates/zeph-core/src/metrics.rs b/crates/zeph-core/src/metrics.rs
index 88c3ba0..9cc7f72 100644
--- a/crates/zeph-core/src/metrics.rs
+++ b/crates/zeph-core/src/metrics.rs
@@ -28,6 +28,7 @@ pub struct MetricsSnapshot {
     pub sqlite_message_count: u64,
     pub sqlite_conversation_id: Option<zeph_memory::ConversationId>,
     pub qdrant_available: bool,
+    pub vector_backend: String,
     pub embeddings_generated: u64,
     pub last_llm_latency_ms: u64,
     pub uptime_seconds: u64,
diff --git a/crates/zeph-core/src/redact.rs b/crates/zeph-core/src/redact.rs
index a8a59a2..1e1109e 100644
--- a/crates/zeph-core/src/redact.rs
+++ b/crates/zeph-core/src/redact.rs
@@ -1,6 +1,29 @@
 use std::borrow::Cow;
 use std::sync::LazyLock;
 
+/// Apply both secret redaction and path sanitization in a single pass.
+///
+/// Returns `Cow::Borrowed` when no changes are needed (zero-allocation fast path).
+#[must_use]
+pub fn scrub_content(text: &str) -> Cow<'_, str> {
+    let after_secrets = match redact_secrets(text) {
+        Cow::Borrowed(_) => {
+            // No secrets found: only run path scan on original text
+            return match sanitize_paths(text) {
+                Cow::Owned(s) => Cow::Owned(s),
+                Cow::Borrowed(_) => Cow::Borrowed(text),
+            };
+        }
+        Cow::Owned(s) => s,
+    };
+
+    // Second pass: path sanitization on already-modified string
+    match sanitize_paths(&after_secrets) {
+        Cow::Owned(s) => Cow::Owned(s),
+        Cow::Borrowed(_) => Cow::Owned(after_secrets),
+    }
+}
+
 use regex::Regex;
 
 const SECRET_PREFIXES: &[&str] = &[
@@ -329,6 +352,71 @@ mod tests {
 
     use proptest::prelude::*;
 
+    #[test]
+    fn scrub_no_match_passthrough() {
+        let text = "hello world, nothing sensitive here";
+        let result = scrub_content(text);
+        assert!(matches!(result, Cow::Borrowed(_)));
+        assert_eq!(result.as_ref(), text);
+    }
+
+    #[test]
+    fn scrub_only_secrets() {
+        let text = "key: sk-abc123def";
+        let result = scrub_content(text);
+        assert!(result.contains("[REDACTED]"));
+        assert!(!result.contains("sk-abc123"));
+        assert!(!result.contains("/home/"));
+    }
+
+    #[test]
+    fn scrub_only_paths() {
+        let text = "error at /Users/dev/project/src/main.rs:42";
+        let result = scrub_content(text);
+        assert!(result.contains("[PATH]"));
+        assert!(!result.contains("/Users/dev/"));
+    }
+
+    #[test]
+    fn scrub_secrets_and_paths_combined() {
+        let text = "token sk-abc123 found at /home/user/config.toml";
+        let result = scrub_content(text);
+        assert!(result.contains("[REDACTED]"));
+        assert!(result.contains("[PATH]"));
+        assert!(!result.contains("sk-abc123"));
+        assert!(!result.contains("/home/user/"));
+    }
+
+    #[test]
+    fn scrub_secrets_no_paths() {
+        // Secret found but no path → function returns Cow::Owned (modified string)
+        let text = "use sk-abc123 for auth";
+        let result = scrub_content(text);
+        assert!(
+            matches!(result, Cow::Owned(_)),
+            "must return Cow::Owned when secret was found"
+        );
+        assert!(result.contains("[REDACTED]"));
+        assert!(!result.contains("[PATH]"));
+    }
+
+    #[test]
+    fn sanitize_paths_all_prefixes() {
+        let cases = [
+            ("/root/secrets.toml", "/root/"),
+            ("/tmp/tmpfile.lock", "/tmp/"),
+            ("/var/log/app.log", "/var/"),
+        ];
+        for (text, prefix) in cases {
+            let result = sanitize_paths(text);
+            assert!(result.contains("[PATH]"), "{prefix} must be sanitized");
+            assert!(
+                !result.contains(prefix),
+                "{prefix} must be removed from output"
+            );
+        }
+    }
+
     proptest! {
         #[test]
         fn redact_secrets_never_panics(s in ".*") {
@@ -353,5 +441,25 @@ mod tests {
                 assert_eq!(result.as_ref(), s.as_str());
             }
         }
+
+        #[test]
+        fn scrub_content_never_panics(s in ".*") {
+            let _ = scrub_content(&s);
+        }
+
+        #[test]
+        fn scrub_content_result_never_contains_raw_secret(s in ".*") {
+            let result = scrub_content(&s);
+            let secret_prefixes = [
+                "sk-", "sk_live_", "sk_test_", "AKIA", "ghp_", "gho_",
+                "xoxb-", "xoxp-", "AIza", "glpat-", "dckr_pat_",
+            ];
+            for prefix in secret_prefixes {
+                assert!(
+                    !result.contains(prefix),
+                    "scrub_content must redact prefix: {prefix}"
+                );
+            }
+        }
     }
 }
diff --git a/crates/zeph-memory/Cargo.toml b/crates/zeph-memory/Cargo.toml
index 4f432f3..1646dda 100644
--- a/crates/zeph-memory/Cargo.toml
+++ b/crates/zeph-memory/Cargo.toml
@@ -13,6 +13,7 @@ description = "Semantic memory with SQLite and Qdrant for Zeph agent"
 readme = "README.md"
 
 [dependencies]
+bytemuck = { workspace = true }
 pdf-extract = { workspace = true, optional = true }
 qdrant-client = { workspace = true, features = ["serde"] }
 schemars.workspace = true
diff --git a/crates/zeph-memory/migrations/011_vector_store.sql b/crates/zeph-memory/migrations/011_vector_store.sql
new file mode 100644
index 0000000..af9e806
--- /dev/null
+++ b/crates/zeph-memory/migrations/011_vector_store.sql
@@ -0,0 +1,13 @@
+CREATE TABLE IF NOT EXISTS vector_collections (
+    name TEXT PRIMARY KEY
+);
+
+CREATE TABLE IF NOT EXISTS vector_points (
+    id TEXT NOT NULL,
+    collection TEXT NOT NULL REFERENCES vector_collections(name),
+    vector BLOB NOT NULL,
+    payload TEXT NOT NULL DEFAULT '{}',
+    PRIMARY KEY (collection, id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_vector_points_collection ON vector_points(collection);
diff --git a/crates/zeph-memory/src/embedding_store.rs b/crates/zeph-memory/src/embedding_store.rs
index 9cea2e2..198995b 100644
--- a/crates/zeph-memory/src/embedding_store.rs
+++ b/crates/zeph-memory/src/embedding_store.rs
@@ -3,6 +3,7 @@ use sqlx::SqlitePool;
 
 use crate::error::MemoryError;
 use crate::qdrant_ops::QdrantOps;
+use crate::sqlite_vector_store::SqliteVectorStore;
 use crate::types::{ConversationId, MessageId};
 use crate::vector_store::{FieldCondition, FieldValue, VectorFilter, VectorPoint, VectorStore};
 
@@ -83,6 +84,19 @@ impl EmbeddingStore {
         })
     }
 
+    /// Create a new `EmbeddingStore` backed by `SQLite` for vector storage.
+    ///
+    /// Uses the same pool for both vector data and metadata. No external Qdrant required.
+    #[must_use]
+    pub fn new_sqlite(pool: SqlitePool) -> Self {
+        let ops = SqliteVectorStore::new(pool.clone());
+        Self {
+            ops: Box::new(ops),
+            collection: COLLECTION_NAME.into(),
+            pool,
+        }
+    }
+
     #[must_use]
     pub fn with_store(store: Box<dyn VectorStore>, pool: SqlitePool) -> Self {
         Self {
@@ -92,6 +106,10 @@ impl EmbeddingStore {
         }
     }
 
+    pub async fn health_check(&self) -> bool {
+        self.ops.health_check().await.unwrap_or(false)
+    }
+
     /// Ensure the collection exists in Qdrant with the given vector size.
     ///
     /// Idempotent: no-op if the collection already exists.
diff --git a/crates/zeph-memory/src/in_memory_store.rs b/crates/zeph-memory/src/in_memory_store.rs
index c8b41b7..cc0021c 100644
--- a/crates/zeph-memory/src/in_memory_store.rs
+++ b/crates/zeph-memory/src/in_memory_store.rs
@@ -248,6 +248,10 @@ impl VectorStore for InMemoryVectorStore {
             Ok(result)
         })
     }
+
+    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>> {
+        Box::pin(async { Ok(true) })
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/zeph-memory/src/lib.rs b/crates/zeph-memory/src/lib.rs
index bf55430..9bca80b 100644
--- a/crates/zeph-memory/src/lib.rs
+++ b/crates/zeph-memory/src/lib.rs
@@ -9,6 +9,7 @@ pub mod in_memory_store;
 pub mod qdrant_ops;
 pub mod semantic;
 pub mod sqlite;
+pub mod sqlite_vector_store;
 pub mod types;
 pub mod vector_store;
 
diff --git a/crates/zeph-memory/src/qdrant_ops.rs b/crates/zeph-memory/src/qdrant_ops.rs
index cc4d95f..ce68f35 100644
--- a/crates/zeph-memory/src/qdrant_ops.rs
+++ b/crates/zeph-memory/src/qdrant_ops.rs
@@ -343,6 +343,20 @@ impl crate::vector_store::VectorStore for QdrantOps {
                 .map_err(|e| crate::VectorStoreError::Scroll(e.to_string()))
         })
     }
+
+    fn health_check(
+        &self,
+    ) -> std::pin::Pin<
+        Box<dyn std::future::Future<Output = Result<bool, crate::VectorStoreError>> + Send + '_>,
+    > {
+        Box::pin(async move {
+            self.client
+                .health_check()
+                .await
+                .map(|_| true)
+                .map_err(|e| crate::VectorStoreError::Collection(e.to_string()))
+        })
+    }
 }
 
 fn vector_filter_to_qdrant(filter: crate::VectorFilter) -> Filter {
diff --git a/crates/zeph-memory/src/semantic.rs b/crates/zeph-memory/src/semantic.rs
index a646eca..7ccc98c 100644
--- a/crates/zeph-memory/src/semantic.rs
+++ b/crates/zeph-memory/src/semantic.rs
@@ -40,10 +40,13 @@ pub struct SessionSummaryResult {
     pub conversation_id: ConversationId,
 }
 
-/// Estimate token count using bytes/3 heuristic.
+/// Estimate token count using chars/4 heuristic.
+///
+/// Aligns better with `cl100k_base` tokenizer behavior for both ASCII and multi-byte
+/// UTF-8 text (CJK, Cyrillic) compared to the previous byte-length approach.
 #[must_use]
 pub fn estimate_tokens(text: &str) -> usize {
-    text.len() / 3
+    text.chars().count() / 4
 }
 
 fn build_summarization_prompt(messages: &[(MessageId, String, String)]) -> String {
@@ -124,6 +127,32 @@ impl SemanticMemory {
         })
     }
 
+    /// Create a `SemanticMemory` using the `SQLite`-embedded vector backend.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if `SQLite` cannot be initialized.
+    pub async fn with_sqlite_backend(
+        sqlite_path: &str,
+        provider: AnyProvider,
+        embedding_model: &str,
+        vector_weight: f64,
+        keyword_weight: f64,
+    ) -> Result<Self, MemoryError> {
+        let sqlite = SqliteStore::new(sqlite_path).await?;
+        let pool = sqlite.pool().clone();
+        let store = EmbeddingStore::new_sqlite(pool);
+
+        Ok(Self {
+            sqlite,
+            qdrant: Some(store),
+            provider,
+            embedding_model: embedding_model.into(),
+            vector_weight,
+            keyword_weight,
+        })
+    }
+
     /// Save a message to `SQLite` and optionally embed and store in Qdrant.
     ///
     /// Returns the message ID assigned by `SQLite`.
@@ -493,9 +522,20 @@ impl SemanticMemory {
         &self.sqlite
     }
 
-    /// Check if Qdrant is available for semantic search.
+    /// Check if the vector store backend is reachable.
+    ///
+    /// Performs a real health check (Qdrant gRPC ping or `SQLite` query)
+    /// instead of just checking whether the client was created.
+    pub async fn is_vector_store_connected(&self) -> bool {
+        match self.qdrant.as_ref() {
+            Some(store) => store.health_check().await,
+            None => false,
+        }
+    }
+
+    /// Check if a vector store client is configured (may not be connected).
     #[must_use]
-    pub fn has_qdrant(&self) -> bool {
+    pub fn has_vector_store(&self) -> bool {
         self.qdrant.is_some()
     }
 
@@ -877,9 +917,15 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn has_qdrant_returns_false_when_unavailable() {
+    async fn has_vector_store_returns_false_when_unavailable() {
+        let memory = test_semantic_memory(false).await;
+        assert!(!memory.has_vector_store());
+    }
+
+    #[tokio::test]
+    async fn is_vector_store_connected_returns_false_when_unavailable() {
         let memory = test_semantic_memory(false).await;
-        assert!(!memory.has_qdrant());
+        assert!(!memory.is_vector_store_connected().await);
     }
 
     #[tokio::test]
@@ -907,14 +953,16 @@ mod tests {
 
     #[test]
     fn estimate_tokens_ascii() {
+        // "Hello, world!" = 13 chars / 4 = 3
         let text = "Hello, world!";
-        assert_eq!(estimate_tokens(text), 4);
+        assert_eq!(estimate_tokens(text), 3);
     }
 
     #[test]
     fn estimate_tokens_unicode() {
+        // "Привет мир" = 10 chars / 4 = 2
         let text = "Привет мир";
-        assert_eq!(estimate_tokens(text), 6);
+        assert_eq!(estimate_tokens(text), 2);
     }
 
     #[test]
@@ -922,6 +970,13 @@ mod tests {
         assert_eq!(estimate_tokens(""), 0);
     }
 
+    #[test]
+    fn estimate_tokens_cjk() {
+        // 8 CJK chars / 4 = 2
+        let text = "你好世界テスト日";
+        assert_eq!(estimate_tokens(text), 2);
+    }
+
     #[tokio::test]
     async fn message_count_empty_conversation() {
         let memory = test_semantic_memory(false).await;
@@ -1211,13 +1266,15 @@ mod tests {
 
     #[test]
     fn estimate_tokens_short_text() {
+        // "ab" = 2 chars / 4 = 0
         assert_eq!(estimate_tokens("ab"), 0);
     }
 
     #[test]
     fn estimate_tokens_longer_text() {
+        // 100 chars / 4 = 25
         let text = "a".repeat(100);
-        assert_eq!(estimate_tokens(&text), 33);
+        assert_eq!(estimate_tokens(&text), 25);
     }
 
     #[tokio::test]
@@ -1246,6 +1303,54 @@ mod tests {
         assert!(result.is_ok());
     }
 
+    #[tokio::test]
+    async fn test_semantic_memory_sqlite_remember_recall_roundtrip() {
+        // Build SemanticMemory with EmbeddingStore backed by SQLite instead of Qdrant
+        let mut mock = MockProvider::default();
+        mock.supports_embeddings = true;
+        // Provide deterministic embedding vectors: embed returns a fixed 4-element vector
+        // MockProvider.embed always returns the same vector, so cosine similarity = 1.0
+        let provider = AnyProvider::Mock(mock);
+
+        let sqlite = SqliteStore::new(":memory:").await.unwrap();
+        let pool = sqlite.pool().clone();
+        let qdrant = Some(crate::embedding_store::EmbeddingStore::new_sqlite(pool));
+
+        let memory = SemanticMemory {
+            sqlite,
+            qdrant,
+            provider,
+            embedding_model: "test-model".into(),
+            vector_weight: 0.7,
+            keyword_weight: 0.3,
+        };
+
+        let cid = memory.sqlite().create_conversation().await.unwrap();
+
+        // remember → stores in SQLite + SQLite vector store
+        let id1 = memory
+            .remember(cid, "user", "rust async programming")
+            .await
+            .unwrap();
+        let id2 = memory
+            .remember(cid, "assistant", "use tokio for async")
+            .await
+            .unwrap();
+        assert!(id1 < id2);
+
+        // recall → should return results via FTS5 keyword search
+        let recalled = memory.recall("rust", 5, None).await.unwrap();
+        assert!(
+            !recalled.is_empty(),
+            "recall must return at least one result"
+        );
+
+        // Verify history is accessible
+        let history = memory.sqlite().load_history(cid, 50).await.unwrap();
+        assert_eq!(history.len(), 2);
+        assert_eq!(history[0].content, "rust async programming");
+    }
+
     #[tokio::test]
     async fn remember_with_embeddings_supported_but_no_qdrant() {
         let memory = test_semantic_memory(true).await;
diff --git a/crates/zeph-memory/src/sqlite_vector_store.rs b/crates/zeph-memory/src/sqlite_vector_store.rs
new file mode 100644
index 0000000..7648553
--- /dev/null
+++ b/crates/zeph-memory/src/sqlite_vector_store.rs
@@ -0,0 +1,734 @@
+use std::collections::HashMap;
+use std::future::Future;
+use std::pin::Pin;
+
+use sqlx::SqlitePool;
+
+use crate::vector_store::{
+    FieldValue, ScoredVectorPoint, ScrollResult, VectorFilter, VectorPoint, VectorStore,
+    VectorStoreError,
+};
+
+pub struct SqliteVectorStore {
+    pool: SqlitePool,
+}
+
+impl SqliteVectorStore {
+    #[must_use]
+    pub fn new(pool: SqlitePool) -> Self {
+        Self { pool }
+    }
+}
+
+fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
+    if a.len() != b.len() || a.is_empty() {
+        return 0.0;
+    }
+    let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
+    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
+    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
+    if norm_a == 0.0 || norm_b == 0.0 {
+        return 0.0;
+    }
+    dot / (norm_a * norm_b)
+}
+
+fn matches_filter(payload: &HashMap<String, serde_json::Value>, filter: &VectorFilter) -> bool {
+    for cond in &filter.must {
+        let Some(val) = payload.get(&cond.field) else {
+            return false;
+        };
+        let matches = match &cond.value {
+            FieldValue::Integer(i) => val.as_i64().is_some_and(|v| v == *i),
+            FieldValue::Text(t) => val.as_str().is_some_and(|v| v == t.as_str()),
+        };
+        if !matches {
+            return false;
+        }
+    }
+    for cond in &filter.must_not {
+        let Some(val) = payload.get(&cond.field) else {
+            continue;
+        };
+        let matches = match &cond.value {
+            FieldValue::Integer(i) => val.as_i64().is_some_and(|v| v == *i),
+            FieldValue::Text(t) => val.as_str().is_some_and(|v| v == t.as_str()),
+        };
+        if matches {
+            return false;
+        }
+    }
+    true
+}
+
+type BoxFuture<'a, T> = Pin<Box<dyn Future<Output = T> + Send + 'a>>;
+
+impl VectorStore for SqliteVectorStore {
+    fn ensure_collection(
+        &self,
+        collection: &str,
+        _vector_size: u64,
+    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            sqlx::query("INSERT OR IGNORE INTO vector_collections (name) VALUES (?)")
+                .bind(&collection)
+                .execute(&self.pool)
+                .await
+                .map_err(|e| VectorStoreError::Collection(e.to_string()))?;
+            Ok(())
+        })
+    }
+
+    fn collection_exists(&self, collection: &str) -> BoxFuture<'_, Result<bool, VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            let row: (i64,) =
+                sqlx::query_as("SELECT COUNT(*) FROM vector_collections WHERE name = ?")
+                    .bind(&collection)
+                    .fetch_one(&self.pool)
+                    .await
+                    .map_err(|e| VectorStoreError::Connection(e.to_string()))?;
+            Ok(row.0 > 0)
+        })
+    }
+
+    fn delete_collection(&self, collection: &str) -> BoxFuture<'_, Result<(), VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            sqlx::query("DELETE FROM vector_points WHERE collection = ?")
+                .bind(&collection)
+                .execute(&self.pool)
+                .await
+                .map_err(|e| VectorStoreError::Delete(e.to_string()))?;
+            sqlx::query("DELETE FROM vector_collections WHERE name = ?")
+                .bind(&collection)
+                .execute(&self.pool)
+                .await
+                .map_err(|e| VectorStoreError::Delete(e.to_string()))?;
+            Ok(())
+        })
+    }
+
+    fn upsert(
+        &self,
+        collection: &str,
+        points: Vec<VectorPoint>,
+    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            for point in points {
+                let vector_bytes: Vec<u8> = bytemuck::cast_slice(&point.vector).to_vec();
+                let payload_json = serde_json::to_string(&point.payload)
+                    .map_err(|e| VectorStoreError::Serialization(e.to_string()))?;
+                sqlx::query(
+                    "INSERT INTO vector_points (id, collection, vector, payload) VALUES (?, ?, ?, ?) \
+                     ON CONFLICT(collection, id) DO UPDATE SET vector = excluded.vector, payload = excluded.payload",
+                )
+                .bind(&point.id)
+                .bind(&collection)
+                .bind(&vector_bytes)
+                .bind(&payload_json)
+                .execute(&self.pool)
+                .await
+                .map_err(|e| VectorStoreError::Upsert(e.to_string()))?;
+            }
+            Ok(())
+        })
+    }
+
+    fn search(
+        &self,
+        collection: &str,
+        vector: Vec<f32>,
+        limit: u64,
+        filter: Option<VectorFilter>,
+    ) -> BoxFuture<'_, Result<Vec<ScoredVectorPoint>, VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            let rows: Vec<(String, Vec<u8>, String)> = sqlx::query_as(
+                "SELECT id, vector, payload FROM vector_points WHERE collection = ?",
+            )
+            .bind(&collection)
+            .fetch_all(&self.pool)
+            .await
+            .map_err(|e| VectorStoreError::Search(e.to_string()))?;
+
+            let limit_usize = usize::try_from(limit).unwrap_or(usize::MAX);
+            let mut scored: Vec<ScoredVectorPoint> = rows
+                .into_iter()
+                .filter_map(|(id, blob, payload_str)| {
+                    let Ok(stored) = bytemuck::try_cast_slice::<u8, f32>(&blob) else {
+                        return None;
+                    };
+                    let payload: HashMap<String, serde_json::Value> =
+                        serde_json::from_str(&payload_str).unwrap_or_default();
+
+                    if filter
+                        .as_ref()
+                        .is_some_and(|f| !matches_filter(&payload, f))
+                    {
+                        return None;
+                    }
+
+                    let score = cosine_similarity(&vector, stored);
+                    Some(ScoredVectorPoint { id, score, payload })
+                })
+                .collect();
+
+            scored.sort_by(|a, b| {
+                b.score
+                    .partial_cmp(&a.score)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+            });
+            scored.truncate(limit_usize);
+            Ok(scored)
+        })
+    }
+
+    fn delete_by_ids(
+        &self,
+        collection: &str,
+        ids: Vec<String>,
+    ) -> BoxFuture<'_, Result<(), VectorStoreError>> {
+        let collection = collection.to_owned();
+        Box::pin(async move {
+            for id in ids {
+                sqlx::query("DELETE FROM vector_points WHERE collection = ? AND id = ?")
+                    .bind(&collection)
+                    .bind(&id)
+                    .execute(&self.pool)
+                    .await
+                    .map_err(|e| VectorStoreError::Delete(e.to_string()))?;
+            }
+            Ok(())
+        })
+    }
+
+    fn scroll_all(
+        &self,
+        collection: &str,
+        key_field: &str,
+    ) -> BoxFuture<'_, Result<ScrollResult, VectorStoreError>> {
+        let collection = collection.to_owned();
+        let key_field = key_field.to_owned();
+        Box::pin(async move {
+            let rows: Vec<(String, String)> =
+                sqlx::query_as("SELECT id, payload FROM vector_points WHERE collection = ?")
+                    .bind(&collection)
+                    .fetch_all(&self.pool)
+                    .await
+                    .map_err(|e| VectorStoreError::Scroll(e.to_string()))?;
+
+            let mut result = ScrollResult::new();
+            for (id, payload_str) in rows {
+                let payload: HashMap<String, serde_json::Value> =
+                    serde_json::from_str(&payload_str).unwrap_or_default();
+                if let Some(val) = payload.get(&key_field) {
+                    let mut map = HashMap::new();
+                    map.insert(
+                        key_field.clone(),
+                        val.as_str().unwrap_or_default().to_owned(),
+                    );
+                    result.insert(id, map);
+                }
+            }
+            Ok(result)
+        })
+    }
+
+    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>> {
+        Box::pin(async move {
+            sqlx::query_scalar::<_, i32>("SELECT 1")
+                .fetch_one(&self.pool)
+                .await
+                .map(|_| true)
+                .map_err(|e| VectorStoreError::Collection(e.to_string()))
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::sqlite::SqliteStore;
+    use crate::vector_store::FieldCondition;
+
+    async fn setup() -> (SqliteVectorStore, SqliteStore) {
+        let store = SqliteStore::new(":memory:").await.unwrap();
+        let pool = store.pool().clone();
+        let vs = SqliteVectorStore::new(pool);
+        (vs, store)
+    }
+
+    #[tokio::test]
+    async fn ensure_and_exists() {
+        let (vs, _) = setup().await;
+        assert!(!vs.collection_exists("col1").await.unwrap());
+        vs.ensure_collection("col1", 4).await.unwrap();
+        assert!(vs.collection_exists("col1").await.unwrap());
+        // idempotent
+        vs.ensure_collection("col1", 4).await.unwrap();
+        assert!(vs.collection_exists("col1").await.unwrap());
+    }
+
+    #[tokio::test]
+    async fn delete_collection() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("col1", 4).await.unwrap();
+        vs.upsert(
+            "col1",
+            vec![VectorPoint {
+                id: "p1".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::new(),
+            }],
+        )
+        .await
+        .unwrap();
+        vs.delete_collection("col1").await.unwrap();
+        assert!(!vs.collection_exists("col1").await.unwrap());
+    }
+
+    #[tokio::test]
+    async fn upsert_and_search() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("user"))]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![0.0, 1.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("assistant"))]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 2, None)
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].id, "a");
+        assert!((results[0].score - 1.0).abs() < 1e-5);
+    }
+
+    #[tokio::test]
+    async fn search_with_filter() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("user"))]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("assistant"))]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let filter = VectorFilter {
+            must: vec![FieldCondition {
+                field: "role".into(),
+                value: FieldValue::Text("user".into()),
+            }],
+            must_not: vec![],
+        };
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, Some(filter))
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "a");
+    }
+
+    #[tokio::test]
+    async fn delete_by_ids() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::new(),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![0.0, 1.0, 0.0, 0.0],
+                    payload: HashMap::new(),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+        vs.delete_by_ids("c", vec!["a".into()]).await.unwrap();
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, None)
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "b");
+    }
+
+    #[tokio::test]
+    async fn scroll_all() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "p1".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::from([("text".into(), serde_json::json!("hello"))]),
+            }],
+        )
+        .await
+        .unwrap();
+        let result = vs.scroll_all("c", "text").await.unwrap();
+        assert_eq!(result.len(), 1);
+        assert_eq!(result["p1"]["text"], "hello");
+    }
+
+    #[tokio::test]
+    async fn upsert_updates_existing() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "p1".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::from([("v".into(), serde_json::json!(1))]),
+            }],
+        )
+        .await
+        .unwrap();
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "p1".into(),
+                vector: vec![0.0, 1.0, 0.0, 0.0],
+                payload: HashMap::from([("v".into(), serde_json::json!(2))]),
+            }],
+        )
+        .await
+        .unwrap();
+        let results = vs
+            .search("c", vec![0.0, 1.0, 0.0, 0.0], 1, None)
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert!((results[0].score - 1.0).abs() < 1e-5);
+    }
+
+    #[test]
+    fn cosine_similarity_orthogonal() {
+        assert_eq!(cosine_similarity(&[1.0, 0.0], &[0.0, 1.0]), 0.0);
+    }
+
+    #[test]
+    fn cosine_similarity_identical() {
+        let v = vec![3.0, 4.0];
+        assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
+    }
+
+    #[test]
+    fn cosine_similarity_zero_vector() {
+        assert_eq!(cosine_similarity(&[0.0, 0.0], &[1.0, 0.0]), 0.0);
+    }
+
+    #[test]
+    fn cosine_similarity_length_mismatch() {
+        assert_eq!(cosine_similarity(&[1.0], &[1.0, 0.0]), 0.0);
+    }
+
+    #[tokio::test]
+    async fn search_with_must_not_filter() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("user"))]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("role".into(), serde_json::json!("system"))]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let filter = VectorFilter {
+            must: vec![],
+            must_not: vec![FieldCondition {
+                field: "role".into(),
+                value: FieldValue::Text("system".into()),
+            }],
+        };
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, Some(filter))
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "a");
+    }
+
+    #[tokio::test]
+    async fn search_with_integer_filter() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("conv_id".into(), serde_json::json!(1))]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("conv_id".into(), serde_json::json!(2))]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let filter = VectorFilter {
+            must: vec![FieldCondition {
+                field: "conv_id".into(),
+                value: FieldValue::Integer(1),
+            }],
+            must_not: vec![],
+        };
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, Some(filter))
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "a");
+    }
+
+    #[tokio::test]
+    async fn search_empty_collection() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, None)
+            .await
+            .unwrap();
+        assert!(results.is_empty());
+    }
+
+    #[tokio::test]
+    async fn search_with_must_not_integer_filter() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("conv_id".into(), serde_json::json!(1))]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([("conv_id".into(), serde_json::json!(2))]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let filter = VectorFilter {
+            must: vec![],
+            must_not: vec![FieldCondition {
+                field: "conv_id".into(),
+                value: FieldValue::Integer(1),
+            }],
+        };
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, Some(filter))
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "b");
+    }
+
+    #[tokio::test]
+    async fn search_with_combined_must_and_must_not() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![
+                VectorPoint {
+                    id: "a".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([
+                        ("role".into(), serde_json::json!("user")),
+                        ("conv_id".into(), serde_json::json!(1)),
+                    ]),
+                },
+                VectorPoint {
+                    id: "b".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([
+                        ("role".into(), serde_json::json!("user")),
+                        ("conv_id".into(), serde_json::json!(2)),
+                    ]),
+                },
+                VectorPoint {
+                    id: "c".into(),
+                    vector: vec![1.0, 0.0, 0.0, 0.0],
+                    payload: HashMap::from([
+                        ("role".into(), serde_json::json!("assistant")),
+                        ("conv_id".into(), serde_json::json!(1)),
+                    ]),
+                },
+            ],
+        )
+        .await
+        .unwrap();
+
+        let filter = VectorFilter {
+            must: vec![FieldCondition {
+                field: "role".into(),
+                value: FieldValue::Text("user".into()),
+            }],
+            must_not: vec![FieldCondition {
+                field: "conv_id".into(),
+                value: FieldValue::Integer(2),
+            }],
+        };
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, Some(filter))
+            .await
+            .unwrap();
+        // Only "a": role=user AND conv_id != 2
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "a");
+    }
+
+    #[tokio::test]
+    async fn scroll_all_missing_key_field() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "p1".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::from([("other".into(), serde_json::json!("value"))]),
+            }],
+        )
+        .await
+        .unwrap();
+        // key_field "text" doesn't exist in payload → point excluded from result
+        let result = vs.scroll_all("c", "text").await.unwrap();
+        assert!(
+            result.is_empty(),
+            "points without the key field must not appear in scroll result"
+        );
+    }
+
+    #[tokio::test]
+    async fn delete_by_ids_empty_and_nonexistent() {
+        let (vs, _) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "a".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::new(),
+            }],
+        )
+        .await
+        .unwrap();
+
+        // Empty list: no-op, must succeed
+        vs.delete_by_ids("c", vec![]).await.unwrap();
+
+        // Non-existent id: must succeed (idempotent)
+        vs.delete_by_ids("c", vec!["nonexistent".into()])
+            .await
+            .unwrap();
+
+        // Original point still present
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, None)
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "a");
+    }
+
+    #[tokio::test]
+    async fn search_corrupt_blob_skipped() {
+        let (vs, store) = setup().await;
+        vs.ensure_collection("c", 4).await.unwrap();
+
+        // Insert a valid point first
+        vs.upsert(
+            "c",
+            vec![VectorPoint {
+                id: "valid".into(),
+                vector: vec![1.0, 0.0, 0.0, 0.0],
+                payload: HashMap::new(),
+            }],
+        )
+        .await
+        .unwrap();
+
+        // Insert raw invalid bytes directly into vector_points table
+        // 3 bytes cannot be cast to f32 (needs multiples of 4)
+        let corrupt_blob: Vec<u8> = vec![0xFF, 0xFE, 0xFD];
+        let payload_json = r#"{}"#;
+        sqlx::query(
+            "INSERT INTO vector_points (id, collection, vector, payload) VALUES (?, ?, ?, ?)",
+        )
+        .bind("corrupt")
+        .bind("c")
+        .bind(&corrupt_blob)
+        .bind(payload_json)
+        .execute(store.pool())
+        .await
+        .unwrap();
+
+        // Search must not panic and must skip the corrupt point
+        let results = vs
+            .search("c", vec![1.0, 0.0, 0.0, 0.0], 10, None)
+            .await
+            .unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].id, "valid");
+    }
+}
diff --git a/crates/zeph-memory/src/vector_store.rs b/crates/zeph-memory/src/vector_store.rs
index 6964f68..314de20 100644
--- a/crates/zeph-memory/src/vector_store.rs
+++ b/crates/zeph-memory/src/vector_store.rs
@@ -92,4 +92,6 @@ pub trait VectorStore: Send + Sync {
         collection: &str,
         key_field: &str,
     ) -> BoxFuture<'_, Result<ScrollResult, VectorStoreError>>;
+
+    fn health_check(&self) -> BoxFuture<'_, Result<bool, VectorStoreError>>;
 }
diff --git a/crates/zeph-tui/src/app.rs b/crates/zeph-tui/src/app.rs
index 41b221b..ae22173 100644
--- a/crates/zeph-tui/src/app.rs
+++ b/crates/zeph-tui/src/app.rs
@@ -753,11 +753,16 @@ impl App {
                 self.push_system_message(tools);
             }
             TuiCommand::MemoryStats => {
+                let vector_status = if self.metrics.qdrant_available {
+                    format!("{} (connected)", self.metrics.vector_backend)
+                } else if !self.metrics.vector_backend.is_empty() {
+                    format!("{} (offline)", self.metrics.vector_backend)
+                } else {
+                    "none".into()
+                };
                 let msg = format!(
-                    "Memory stats:\n  SQLite messages: {}\n  Qdrant available: {}\n  Embeddings generated: {}",
-                    self.metrics.sqlite_message_count,
-                    self.metrics.qdrant_available,
-                    self.metrics.embeddings_generated,
+                    "Memory stats:\n  SQLite messages: {}\n  Vector store: {vector_status}\n  Embeddings generated: {}",
+                    self.metrics.sqlite_message_count, self.metrics.embeddings_generated,
                 );
                 self.push_system_message(msg);
             }
diff --git a/crates/zeph-tui/src/widgets/memory.rs b/crates/zeph-tui/src/widgets/memory.rs
index f035c29..381d747 100644
--- a/crates/zeph-tui/src/widgets/memory.rs
+++ b/crates/zeph-tui/src/widgets/memory.rs
@@ -9,24 +9,31 @@ use crate::theme::Theme;
 pub fn render(metrics: &MetricsSnapshot, frame: &mut Frame, area: Rect) {
     let theme = Theme::default();
 
-    let mem_lines = vec![
-        Line::from(format!("  SQLite: {} msgs", metrics.sqlite_message_count)),
-        Line::from(format!(
-            "  Qdrant: {}",
-            if metrics.qdrant_available {
-                "connected"
-            } else {
-                "---"
-            }
-        )),
-        Line::from(format!(
-            "  Conv ID: {}",
-            metrics
-                .sqlite_conversation_id
-                .map_or_else(|| "---".to_string(), |id| id.to_string())
-        )),
-        Line::from(format!("  Embeddings: {}", metrics.embeddings_generated)),
-    ];
+    let mut mem_lines = vec![Line::from(format!(
+        "  SQLite: {} msgs",
+        metrics.sqlite_message_count
+    ))];
+    if metrics.qdrant_available {
+        mem_lines.push(Line::from(format!(
+            "  Vector: {} (connected)",
+            metrics.vector_backend
+        )));
+    } else if !metrics.vector_backend.is_empty() {
+        mem_lines.push(Line::from(format!(
+            "  Vector: {} (offline)",
+            metrics.vector_backend
+        )));
+    }
+    mem_lines.push(Line::from(format!(
+        "  Conv ID: {}",
+        metrics
+            .sqlite_conversation_id
+            .map_or_else(|| "---".to_string(), |id| id.to_string())
+    )));
+    mem_lines.push(Line::from(format!(
+        "  Embeddings: {}",
+        metrics.embeddings_generated
+    )));
     let memory = Paragraph::new(mem_lines).block(
         Block::default()
             .borders(Borders::ALL)
@@ -48,6 +55,7 @@ mod tests {
         let mut metrics = MetricsSnapshot::default();
         metrics.sqlite_message_count = 42;
         metrics.qdrant_available = true;
+        metrics.vector_backend = "qdrant".into();
         metrics.embeddings_generated = 10;
 
         let output = render_to_string(30, 8, |frame, area| {
diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__memory_with_stats.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__memory_with_stats.snap
index 4bda1b4..6970916 100644
--- a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__memory_with_stats.snap
+++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__memory__tests__memory_with_stats.snap
@@ -4,7 +4,7 @@ expression: output
 ---
 ┌ Memory ────────────────────┐
 │  SQLite: 42 msgs           │
-│  Qdrant: connected         │
+│  Vector: qdrant (connected)│
 │  Conv ID: ---              │
 │  Embeddings: 10            │
 │                            │
diff --git a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__status__tests__status_bar_snapshot.snap b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__status__tests__status_bar_snapshot.snap
index f57f394..b54a56a 100644
--- a/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__status__tests__status_bar_snapshot.snap
+++ b/crates/zeph-tui/src/widgets/snapshots/zeph_tui__widgets__status__tests__status_bar_snapshot.snap
@@ -2,4 +2,4 @@
 source: crates/zeph-tui/src/widgets/status.rs
 expression: output
 ---
- [Insert] | Panel: ON | Skills: 2/5 | Tokens: 4.2k | Qdrant: OK | API: 12 | 2m 15s
+ [Insert] | Panel: ON | Skills: 2/5 | Tokens: 4.2k | qdrant: OK | API: 12 | 2m 15s
diff --git a/crates/zeph-tui/src/widgets/status.rs b/crates/zeph-tui/src/widgets/status.rs
index 6c6b434..f60099b 100644
--- a/crates/zeph-tui/src/widgets/status.rs
+++ b/crates/zeph-tui/src/widgets/status.rs
@@ -15,8 +15,6 @@ pub fn render(app: &App, metrics: &MetricsSnapshot, frame: &mut Frame, area: Rec
         InputMode::Insert => "Insert",
     };
 
-    let qdrant = if metrics.qdrant_available { "OK" } else { "--" };
-
     let uptime = format_uptime(metrics.uptime_seconds);
 
     let panel = if app.show_side_panels() { "ON" } else { "OFF" };
@@ -27,8 +25,14 @@ pub fn render(app: &App, metrics: &MetricsSnapshot, frame: &mut Frame, area: Rec
         ""
     };
 
+    let qdrant_segment = if metrics.qdrant_available {
+        format!(" | {}: OK", metrics.vector_backend)
+    } else {
+        String::new()
+    };
+
     let text = format!(
-        " [{mode}] | Panel: {panel} | Skills: {active}/{total} | Tokens: {tok} | Qdrant: {qdrant} | API: {api} | {uptime}{cancel_hint}",
+        " [{mode}] | Panel: {panel} | Skills: {active}/{total} | Tokens: {tok}{qdrant_segment} | API: {api} | {uptime}{cancel_hint}",
         active = metrics.active_skills.len(),
         total = metrics.total_skills,
         tok = format_tokens(metrics.total_tokens),
@@ -108,6 +112,7 @@ mod tests {
         metrics.active_skills = vec!["web".into(), "code".into()];
         metrics.total_skills = 5;
         metrics.qdrant_available = true;
+        metrics.vector_backend = "qdrant".into();
         metrics.uptime_seconds = 135;
 
         let output = render_to_string(100, 1, |frame, area| {
diff --git a/docs/src/architecture/token-efficiency.md b/docs/src/architecture/token-efficiency.md
index 8b09f02..009fb11 100644
--- a/docs/src/architecture/token-efficiency.md
+++ b/docs/src/architecture/token-efficiency.md
@@ -79,6 +79,10 @@ After each filtered execution, CLI mode prints a one-line stats summary and TUI
 
 These feed into the [TUI filter metrics display](../advanced/tui.md#filter-metrics) and are emitted as `tracing::debug!` every 50 commands.
 
+### Token Estimation
+
+Zeph estimates token counts using a `chars / 4` heuristic instead of the naive `bytes / 3` approach. Character-based estimation handles multi-byte scripts (Cyrillic, CJK, Arabic) more accurately, preventing budget overallocation for non-ASCII content. The `token_safety_margin` config multiplier (default: 1.0) allows tuning the estimate conservatively when precision matters.
+
 ### Two-Tier Context Pruning
 
 Long conversations accumulate tool outputs that consume significant context space. Zeph uses a two-tier strategy: Tier 1 selectively prunes old tool outputs (cheap, no LLM call), and Tier 2 falls back to full LLM compaction only when Tier 1 is insufficient. See [Context Engineering](../advanced/context.md) for details.
diff --git a/docs/src/concepts/memory.md b/docs/src/concepts/memory.md
index 836b9ae..1f3885a 100644
--- a/docs/src/concepts/memory.md
+++ b/docs/src/concepts/memory.md
@@ -1,6 +1,6 @@
 # Memory and Context
 
-Zeph uses a dual-store memory system: SQLite for structured conversation history and Qdrant for semantic search across past sessions.
+Zeph uses a dual-store memory system: SQLite for structured conversation history and a configurable vector backend (Qdrant or embedded SQLite) for semantic search across past sessions.
 
 ## Conversation History
 
@@ -10,13 +10,34 @@ When conversations grow long, Zeph generates summaries automatically (triggered
 
 ## Semantic Memory
 
-With Qdrant enabled, messages are embedded as vectors for semantic search. Ask "what did we discuss about the API yesterday?" and Zeph retrieves relevant context from past sessions automatically.
+With semantic memory enabled, messages are embedded as vectors for similarity search. Ask "what did we discuss about the API yesterday?" and Zeph retrieves relevant context from past sessions automatically.
 
-Semantic memory uses hybrid search — vector similarity combined with SQLite FTS5 keyword search — to improve recall quality. When Qdrant is unavailable, Zeph falls back to keyword-only search.
+Two vector backends are available:
 
-Setup requires a running Qdrant instance and a config change:
+| Backend | Use case | Dependency |
+|---------|----------|------------|
+| `qdrant` (default) | Production, large datasets | External Qdrant server |
+| `sqlite` | Development, single-user, offline | None (embedded) |
+
+Semantic memory uses hybrid search — vector similarity combined with SQLite FTS5 keyword search — to improve recall quality. When the vector backend is unavailable, Zeph falls back to keyword-only search.
+
+Setup with embedded SQLite vectors (no external dependencies):
 
 ```toml
+[memory]
+vector_backend = "sqlite"
+
+[memory.semantic]
+enabled = true
+recall_limit = 5
+```
+
+For Qdrant (production):
+
+```toml
+[memory]
+vector_backend = "qdrant"  # default
+
 [memory.semantic]
 enabled = true
 recall_limit = 5
@@ -49,6 +70,10 @@ Drop a `ZEPH.md` file in your project root and Zeph discovers it automatically.
 
 The `Embeddable` trait provides a generic interface for any type that can be embedded in Qdrant. It requires `id()`, `content_for_embedding()`, `content_hash()`, and `to_payload()` methods. `EmbeddingRegistry<T: Embeddable>` is a generic sync/search engine that delta-syncs items by BLAKE3 content hash and performs cosine similarity search. This pattern is used internally by skill matching, MCP tool registry, and code indexing.
 
+## Credential Scrubbing
+
+When `memory.redact_credentials` is enabled (default: `true`), Zeph scrubs credential patterns from message content before sending it to the LLM context pipeline. This prevents accidental leakage of API keys, tokens, and passwords stored in conversation history. The scrubbing runs via `scrub_content()` in the context builder and covers the same patterns as the output redaction system (see [Security — Secret Redaction](../reference/security.md#secret-redaction)).
+
 ## Deep Dives
 
 - [Set Up Semantic Memory](../guides/semantic-memory.md) — Qdrant setup guide
diff --git a/docs/src/guides/semantic-memory.md b/docs/src/guides/semantic-memory.md
index a834a65..a641d53 100644
--- a/docs/src/guides/semantic-memory.md
+++ b/docs/src/guides/semantic-memory.md
@@ -4,7 +4,33 @@ Enable semantic search to retrieve contextually relevant messages from conversat
 
 Requires an embedding model. Ollama with `qwen3-embedding` is the default. Claude API does not support embeddings natively — use the [orchestrator](../advanced/orchestrator.md) to route embeddings through Ollama while using Claude for chat.
 
-## Setup
+## Vector Backend
+
+Zeph supports two vector backends for storing embeddings:
+
+| Backend | Best for | External dependencies |
+|---------|----------|----------------------|
+| `qdrant` (default) | Production, multi-user, large datasets | Qdrant server |
+| `sqlite` | Development, single-user, offline, quick setup | None |
+
+The `sqlite` backend stores vectors in the same SQLite database as conversation history and performs cosine similarity search in-process. It requires no external services, making it ideal for local development and single-user deployments.
+
+## Setup with SQLite Backend (Quickstart)
+
+No external services needed:
+
+```toml
+[memory]
+vector_backend = "sqlite"
+
+[memory.semantic]
+enabled = true
+recall_limit = 5
+```
+
+The vector tables are created automatically via migration `011_vector_store.sql`.
+
+## Setup with Qdrant Backend
 
 1. **Start Qdrant:**
 
@@ -15,6 +41,9 @@ Requires an embedding model. Ollama with `qwen3-embedding` is the default. Claud
 2. **Enable semantic memory in config:**
 
    ```toml
+   [memory]
+   vector_backend = "qdrant"  # default, can be omitted
+
    [memory.semantic]
    enabled = true
    recall_limit = 5
@@ -49,6 +78,6 @@ When Qdrant is unavailable, only keyword search runs (effectively `keyword_weigh
 | Store | Purpose |
 |-------|---------|
 | SQLite | Source of truth for message text, conversations, summaries, skill usage |
-| Qdrant | Vector index for semantic similarity search (embeddings only) |
+| Qdrant or SQLite vectors | Vector index for semantic similarity search (embeddings only) |
 
-Both stores work together: SQLite holds the data, Qdrant enables vector search over it. The `embeddings_metadata` table in SQLite maps message IDs to Qdrant point IDs.
+Both stores work together: SQLite holds the data, the vector backend enables similarity search over it. With the Qdrant backend, the `embeddings_metadata` table in SQLite maps message IDs to Qdrant point IDs. With the SQLite backend, vectors are stored directly in `vector_points` and `vector_point_payloads` tables.
diff --git a/docs/src/reference/configuration.md b/docs/src/reference/configuration.md
index f2e4fe8..894bc53 100644
--- a/docs/src/reference/configuration.md
+++ b/docs/src/reference/configuration.md
@@ -47,7 +47,7 @@ Zeph watches the config file for changes and applies runtime-safe fields without
 | `[agent]` | `max_tool_iterations` |
 | `[skills]` | `max_active_skills` |
 
-**Not reloadable** (require restart): LLM provider/model, SQLite path, Qdrant URL, Telegram token, MCP servers, A2A config, skill paths.
+**Not reloadable** (require restart): LLM provider/model, SQLite path, Qdrant URL, vector backend, Telegram token, MCP servers, A2A config, skill paths.
 
 ## Configuration File
 
@@ -97,6 +97,9 @@ compaction_threshold = 0.75    # Compact when context usage exceeds this fractio
 compaction_preserve_tail = 4   # Keep last N messages during compaction
 prune_protect_tokens = 40000   # Protect recent N tokens from tool output pruning
 cross_session_score_threshold = 0.35  # Minimum relevance for cross-session results
+vector_backend = "qdrant"     # Vector store: "qdrant" (default) or "sqlite" (embedded)
+token_safety_margin = 1.0     # Multiplier for token budget safety margin (default: 1.0)
+redact_credentials = true     # Scrub credential patterns from LLM context (default: true)
 
 [memory.semantic]
 enabled = false               # Enable semantic search via Qdrant
@@ -241,7 +244,10 @@ Field resolution: per-provider value → parent section (`[llm]`, `[llm.cloud]`)
 | `ZEPH_MEMORY_COMPACTION_PRESERVE_TAIL` | Messages preserved during compaction (default: 4) |
 | `ZEPH_MEMORY_PRUNE_PROTECT_TOKENS` | Tokens protected from Tier 1 tool output pruning (default: 40000) |
 | `ZEPH_MEMORY_CROSS_SESSION_SCORE_THRESHOLD` | Minimum relevance score for cross-session memory (default: 0.35) |
-| `ZEPH_MEMORY_SEMANTIC_ENABLED` | Enable semantic memory with Qdrant (default: false) |
+| `ZEPH_MEMORY_VECTOR_BACKEND` | Vector backend: `qdrant` or `sqlite` (default: `qdrant`) |
+| `ZEPH_MEMORY_TOKEN_SAFETY_MARGIN` | Token budget safety margin multiplier (default: 1.0) |
+| `ZEPH_MEMORY_REDACT_CREDENTIALS` | Scrub credentials from LLM context (default: true) |
+| `ZEPH_MEMORY_SEMANTIC_ENABLED` | Enable semantic memory (default: false) |
 | `ZEPH_MEMORY_RECALL_LIMIT` | Max semantically relevant messages to recall (default: 5) |
 | `ZEPH_SKILLS_MAX_ACTIVE` | Max skills per query via embedding match (default: 5) |
 | `ZEPH_AGENT_MAX_TOOL_ITERATIONS` | Max tool loop iterations per response (default: 10) |
diff --git a/docs/src/reference/security.md b/docs/src/reference/security.md
index 44c0c57..241dc7d 100644
--- a/docs/src/reference/security.md
+++ b/docs/src/reference/security.md
@@ -169,6 +169,17 @@ LLM responses are scanned for secret patterns using compiled regexes before disp
 - Regex-based matching replaces detected secrets with `[REDACTED]`, preserving original whitespace formatting
 - Enabled by default (`security.redact_secrets = true`), applied to both streaming and non-streaming responses
 
+## Credential Scrubbing in Context
+
+In addition to output redaction, Zeph scrubs credential patterns from conversation history **before** injecting it into the LLM context window. The `scrub_content()` function in the context builder detects the same secret prefixes and replaces them with `[REDACTED]`. This prevents credentials that appeared in past messages from leaking into future LLM prompts.
+
+```toml
+[memory]
+redact_credentials = true  # default: true
+```
+
+This is independent of `security.redact_secrets` — output redaction sanitizes LLM *responses*, while credential scrubbing sanitizes LLM *inputs* from stored history.
+
 ## Config Validation
 
 `Config::validate()` enforces upper bounds at startup to catch configuration errors early:
diff --git a/src/main.rs b/src/main.rs
index 0703138..e67b2ef 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -490,6 +490,7 @@ async fn main() -> anyhow::Result<()> {
     )
     .with_shutdown(shutdown_rx.clone())
     .with_security(config.security, config.timeouts)
+    .with_redact_credentials(config.memory.redact_credentials)
     .with_tool_summarization(config.tools.summarize_output)
     .with_permission_policy(permission_policy.clone())
     .with_config_reload(config_path, config_reload_rx)
@@ -664,6 +665,9 @@ async fn main() -> anyhow::Result<()> {
     };
 
     let mut agent = agent;
+    agent
+        .check_vector_store_health(config.memory.vector_backend.as_str())
+        .await;
 
     // Double Ctrl+C: first cancels current operation, second within 2s shuts down
     let cancel_signal = agent.cancel_signal();
@@ -1464,6 +1468,7 @@ async fn run_daemon(
     )
     .with_shutdown(shutdown_rx.clone())
     .with_security(config.security, config.timeouts)
+    .with_redact_credentials(config.memory.redact_credentials)
     .with_tool_summarization(config.tools.summarize_output)
     .with_permission_policy(permission_policy)
     .with_config_reload(config_path_owned, config_reload_rx)
@@ -1493,6 +1498,9 @@ async fn run_daemon(
     };
 
     agent.load_history().await?;
+    agent
+        .check_vector_store_health(config.memory.vector_backend.as_str())
+        .await;
 
     spawn_a2a_server(config, shutdown_rx.clone(), loopback_handle);