Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).

## [Unreleased]

### Added
- SQLite-backed `SqliteVectorStore` as embedded alternative to Qdrant for zero-dependency vector search (#741)
- `vector_backend` config option to select between `qdrant` and `sqlite` vector backends
- Credential scrubbing in LLM context pipeline via `scrub_content()` — redacts secrets and paths before LLM calls (#743)
- `redact_credentials` config option (default: true) to toggle context scrubbing

### Changed
- Token estimation uses `chars/4` heuristic instead of `bytes/3` for better accuracy on multi-byte text (#742)

## [0.11.5] - 2026-02-22

### Added
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ anyhow = "1.0"
axum = "0.8"
base64 = "0.22"
blake3 = "1.8"
bytemuck = "1.25"
candle-core = { version = "0.9", default-features = false }
candle-nn = { version = "0.9", default-features = false }
candle-transformers = { version = "0.9", default-features = false }
Expand Down Expand Up @@ -182,3 +183,11 @@ zeph-skills.workspace = true

[lints]
workspace = true

[profile.release]
lto = true
codegen-units = 1
strip = true

[profile.bench]
debug = true
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ zeph --tui # run with TUI dashboard
|---|---|
| **Hybrid inference** | Ollama, Claude, OpenAI, Candle (GGUF), any OpenAI-compatible API. Multi-model orchestrator with fallback chains |
| **Skills-first architecture** | YAML+Markdown skill files with semantic matching, self-learning evolution, and 4-tier trust model |
| **Semantic memory** | SQLite + Qdrant with summarization, cross-session recall, and vector retrieval |
| **Semantic memory** | SQLite + Qdrant (or embedded SQLite vector search) with summarization, credential scrubbing, cross-session recall, and vector retrieval |
| **Multi-channel I/O** | CLI, Telegram, Discord, Slack, TUI — all with streaming. Vision and speech-to-text input |
| **Protocols** | MCP client (stdio + HTTP), A2A agent-to-agent communication, sub-agent orchestration |
| **Defense-in-depth** | Shell sandbox, tool permissions, secret redaction, SSRF protection, skill trust quarantine, audit logging |
Expand Down
21 changes: 14 additions & 7 deletions crates/zeph-core/src/agent/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,13 @@ impl<C: Channel> Agent<C> {
recall_limit: usize,
summarization_threshold: usize,
) -> Self {
let has_qdrant = memory.has_qdrant();
self.memory_state.memory = Some(memory);
self.memory_state.conversation_id = Some(conversation_id);
self.memory_state.history_limit = history_limit;
self.memory_state.recall_limit = recall_limit;
self.memory_state.summarization_threshold = summarization_threshold;
self.update_metrics(|m| {
m.qdrant_available = has_qdrant;
m.qdrant_available = false;
m.sqlite_conversation_id = Some(conversation_id);
});
self
Expand Down Expand Up @@ -138,6 +137,18 @@ impl<C: Channel> Agent<C> {
self
}

#[must_use]
pub fn with_redact_credentials(mut self, enabled: bool) -> Self {
self.runtime.redact_credentials = enabled;
self
}

#[must_use]
pub fn with_token_safety_margin(mut self, margin: f32) -> Self {
self.runtime.token_safety_margin = margin;
self
}

#[must_use]
pub fn with_tool_summarization(mut self, enabled: bool) -> Self {
self.runtime.summarize_tool_output_enabled = enabled;
Expand Down Expand Up @@ -215,11 +226,7 @@ impl<C: Channel> Agent<C> {
let provider_name = self.provider.name().to_string();
let model_name = self.runtime.model_name.clone();
let total_skills = self.skill_state.registry.all_meta().len();
let qdrant_available = self
.memory_state
.memory
.as_ref()
.is_some_and(zeph_memory::semantic::SemanticMemory::has_qdrant);
let qdrant_available = false;
let conversation_id = self.memory_state.conversation_id;
let prompt_estimate = self
.messages
Expand Down
140 changes: 139 additions & 1 deletion crates/zeph-core/src/agent/context.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::borrow::Cow;
use std::fmt::Write;

use zeph_llm::provider::MessagePart;
Expand All @@ -6,6 +7,8 @@ use zeph_skills::ScoredMatch;
use zeph_skills::loader::SkillMeta;
use zeph_skills::prompt::format_skills_catalog;

use crate::redact::scrub_content;

use super::{
Agent, CODE_CONTEXT_PREFIX, CROSS_SESSION_PREFIX, Channel, ContextBudget, EnvironmentContext,
LlmProvider, Message, RECALL_PREFIX, Role, SUMMARY_PREFIX, Skill, build_system_prompt,
Expand All @@ -22,10 +25,11 @@ impl<C: Channel> Agent<C> {
let Some(ref budget) = self.context_state.budget else {
return false;
};
let margin = self.runtime.token_safety_margin;
let total_tokens: usize = self
.messages
.iter()
.map(|m| estimate_tokens(&m.content))
.map(|m| (estimate_tokens(&m.content) as f64 * f64::from(margin)) as usize)
.sum();
let threshold =
(budget.max_tokens() as f32 * self.context_state.compaction_threshold) as usize;
Expand Down Expand Up @@ -644,6 +648,15 @@ impl<C: Channel> Agent<C> {
}

self.trim_messages_to_budget(alloc.recent_history);

if self.runtime.redact_credentials {
for msg in &mut self.messages {
if let Cow::Owned(s) = scrub_content(&msg.content) {
msg.content = s;
}
}
}

self.recompute_prompt_tokens();
let _ = self.channel.send_status("").await;

Expand Down Expand Up @@ -1789,6 +1802,131 @@ mod tests {
}
}

#[tokio::test]
async fn test_prepare_context_scrubs_secrets_when_redact_enabled() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(4096, 0.20, 0.80, 4, 0)
.with_redact_credentials(true);

// Push a user message containing a secret and a path
agent.messages.push(Message {
role: Role::User,
content: "my key is sk-abc123xyz and lives at /Users/dev/config.toml".into(),
parts: vec![],
});

agent.prepare_context("test").await.unwrap();

let user_msg = agent
.messages
.iter()
.find(|m| m.role == Role::User)
.unwrap();
assert!(
!user_msg.content.contains("sk-abc123xyz"),
"secret must be redacted"
);
assert!(
!user_msg.content.contains("/Users/dev/"),
"path must be redacted"
);
assert!(
user_msg.content.contains("[REDACTED]"),
"secret replaced with [REDACTED]"
);
assert!(
user_msg.content.contains("[PATH]"),
"path replaced with [PATH]"
);
}

#[tokio::test]
async fn test_prepare_context_no_scrub_when_redact_disabled() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(4096, 0.20, 0.80, 4, 0)
.with_redact_credentials(false);

let original = "key sk-abc123xyz at /Users/dev/file.rs".to_string();
agent.messages.push(Message {
role: Role::User,
content: original.clone(),
parts: vec![],
});

agent.prepare_context("test").await.unwrap();

let user_msg = agent
.messages
.iter()
.find(|m| m.role == Role::User)
.unwrap();
assert_eq!(
user_msg.content, original,
"content must be unchanged when redact disabled"
);
}

#[test]
fn token_safety_margin_above_one_inflates_token_count() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

// With a very large margin, token count is inflated and compaction triggers earlier
let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(1000, 0.20, 0.75, 4, 0)
.with_token_safety_margin(100.0);
for i in 0..5 {
agent.messages.push(Message {
role: Role::User,
content: format!("message {i} with content"),
parts: vec![],
});
}

assert!(
agent.should_compact(),
"large margin must trigger compaction even with few messages"
);
}

#[test]
fn token_safety_margin_zero_never_compacts() {
let provider = mock_provider(vec![]);
let channel = MockChannel::new(vec![]);
let registry = create_test_registry();
let executor = MockToolExecutor::no_tools();

// margin=0.0 makes all token counts 0, so compaction never triggers
let mut agent = Agent::new(provider, channel, registry, None, 5, executor)
.with_context_budget(10, 0.20, 0.75, 4, 0)
.with_token_safety_margin(0.0);
for i in 0..50 {
agent.messages.push(Message {
role: Role::User,
content: format!(
"very long message content {i} repeated many times to fill context"
),
parts: vec![],
});
}
assert!(
!agent.should_compact(),
"margin=0.0 means zero token counts, must never compact"
);
}

#[tokio::test]
async fn disambiguate_skills_reorders_on_match() {
let json = r#"{"skill_name":"beta_skill","confidence":0.9,"params":{}}"#;
Expand Down
6 changes: 6 additions & 0 deletions crates/zeph-core/src/agent/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ pub(super) struct RuntimeConfig {
pub(super) max_tool_iterations: usize,
pub(super) summarize_tool_output_enabled: bool,
pub(super) permission_policy: zeph_tools::PermissionPolicy,
pub(super) redact_credentials: bool,
pub(super) token_safety_margin: f32,
}

pub struct Agent<C: Channel> {
Expand Down Expand Up @@ -227,6 +229,8 @@ impl<C: Channel> Agent<C> {
max_tool_iterations: 10,
summarize_tool_output_enabled: false,
permission_policy: zeph_tools::PermissionPolicy::default(),
redact_credentials: true,
token_safety_margin: 1.0,
},
learning_config: None,
reflection_used: false,
Expand Down Expand Up @@ -931,6 +935,8 @@ impl<C: Channel> Agent<C> {

self.runtime.security = config.security;
self.runtime.timeouts = config.timeouts;
self.runtime.redact_credentials = config.memory.redact_credentials;
self.runtime.token_safety_margin = config.memory.token_safety_margin;
self.memory_state.history_limit = config.memory.history_limit;
self.memory_state.recall_limit = config.memory.semantic.recall_limit;
self.memory_state.summarization_threshold = config.memory.summarization_threshold;
Expand Down
13 changes: 13 additions & 0 deletions crates/zeph-core/src/agent/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ use crate::channel::Channel;
use crate::metrics::MetricsSnapshot;

impl<C: Channel> Agent<C> {
/// Perform a real health check on the vector store and update metrics.
pub async fn check_vector_store_health(&self, backend_name: &str) {
let connected = match self.memory_state.memory.as_ref() {
Some(m) => m.is_vector_store_connected().await,
None => false,
};
let name = backend_name.to_owned();
self.update_metrics(|m| {
m.qdrant_available = connected;
m.vector_backend = name;
});
}

pub(super) fn update_metrics(&self, f: impl FnOnce(&mut MetricsSnapshot)) {
if let Some(ref tx) = self.metrics_tx {
let elapsed = self.start_time.elapsed().as_secs();
Expand Down
38 changes: 26 additions & 12 deletions crates/zeph-core/src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,18 +148,32 @@ impl AppBuilder {

pub async fn build_memory(&self, provider: &AnyProvider) -> anyhow::Result<SemanticMemory> {
let embed_model = self.embedding_model();
let memory = SemanticMemory::with_weights(
&self.config.memory.sqlite_path,
&self.config.memory.qdrant_url,
provider.clone(),
&embed_model,
self.config.memory.semantic.vector_weight,
self.config.memory.semantic.keyword_weight,
)
.await?;
let memory = match self.config.memory.vector_backend {
crate::config::VectorBackend::Sqlite => {
SemanticMemory::with_sqlite_backend(
&self.config.memory.sqlite_path,
provider.clone(),
&embed_model,
self.config.memory.semantic.vector_weight,
self.config.memory.semantic.keyword_weight,
)
.await?
}
crate::config::VectorBackend::Qdrant => {
SemanticMemory::with_weights(
&self.config.memory.sqlite_path,
&self.config.memory.qdrant_url,
provider.clone(),
&embed_model,
self.config.memory.semantic.vector_weight,
self.config.memory.semantic.keyword_weight,
)
.await?
}
};

if self.config.memory.semantic.enabled && memory.has_qdrant() {
tracing::info!("semantic memory enabled, Qdrant connected");
if self.config.memory.semantic.enabled && memory.is_vector_store_connected().await {
tracing::info!("semantic memory enabled, vector store connected");
match memory.embed_missing().await {
Ok(n) if n > 0 => tracing::info!("backfilled {n} missing embedding(s)"),
Ok(_) => {}
Expand Down Expand Up @@ -427,7 +441,7 @@ pub async fn create_skill_matcher(
) -> Option<SkillMatcherBackend> {
let embed_fn = provider.embed_fn();

if config.memory.semantic.enabled && memory.has_qdrant() {
if config.memory.semantic.enabled && memory.is_vector_store_connected().await {
match QdrantSkillMatcher::new(&config.memory.qdrant_url) {
Ok(mut qm) => match qm.sync(meta, embedding_model, &embed_fn).await {
Ok(_) => return Some(SkillMatcherBackend::Qdrant(qm)),
Expand Down
11 changes: 11 additions & 0 deletions crates/zeph-core/src/config/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ impl Config {
{
self.memory.prune_protect_tokens = tokens;
}
if let Ok(v) = std::env::var("ZEPH_MEMORY_VECTOR_BACKEND") {
match v.to_lowercase().as_str() {
"sqlite" => {
self.memory.vector_backend = super::VectorBackend::Sqlite;
}
"qdrant" => {
self.memory.vector_backend = super::VectorBackend::Qdrant;
}
_ => {}
}
}
if let Ok(v) = std::env::var("ZEPH_SKILLS_MAX_ACTIVE")
&& let Ok(n) = v.parse::<usize>()
{
Expand Down
Loading
Loading