diff --git a/CHANGELOG.md b/CHANGELOG.md
index 09bbbcb..b3de69a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
## [Unreleased]
+### Added
+- `validate_skill_references(body, skill_dir)` in zeph-skills loader: parses Markdown links targeting `references/`, `scripts/`, or `assets/` subdirs, warns on missing files and symlink traversal attempts (#689)
+- `sanitize_skill_body(body)` in zeph-skills prompt: escapes XML structural tags (``, ``, ``) to prevent prompt injection (#689)
+- Body sanitization applied automatically to all non-`Trusted` skills in `format_skills_prompt()` (#689)
+
### Changed
- `allowed-tools` SKILL.md field now uses space-separated values per agentskills.io spec (was comma-separated) — **breaking change** for skills using comma-delimited allowed-tools (#686)
diff --git a/crates/zeph-skills/src/loader.rs b/crates/zeph-skills/src/loader.rs
index 307936b..dbabc2a 100644
--- a/crates/zeph-skills/src/loader.rs
+++ b/crates/zeph-skills/src/loader.rs
@@ -286,6 +286,43 @@ pub fn load_skill_body(meta: &SkillMeta) -> Result {
Ok(body.to_string())
}
+/// Parse Markdown link targets from skill body and warn about broken or out-of-bounds references.
+///
+/// Checks links whose targets start with `references/`, `scripts/`, or `assets/`.
+/// Missing files or paths escaping `skill_dir` are returned as warning strings.
+/// This does not block skill loading.
+#[must_use]
+pub fn validate_skill_references(body: &str, skill_dir: &Path) -> Vec {
+ let mut warnings = Vec::new();
+ // Match ](references/...), ](scripts/...), ](assets/...)
+ let mut rest = body;
+ while let Some(open) = rest.find("](") {
+ rest = &rest[open + 2..];
+ let Some(close) = rest.find(')') else {
+ break;
+ };
+ let target = &rest[..close];
+ rest = &rest[close + 1..];
+
+ if !target.starts_with("references/")
+ && !target.starts_with("scripts/")
+ && !target.starts_with("assets/")
+ {
+ continue;
+ }
+
+ let full = skill_dir.join(target);
+ if !full.exists() {
+ warnings.push(format!("broken reference: {target} does not exist"));
+ continue;
+ }
+ if let Err(e) = validate_path_within(&full, skill_dir) {
+ warnings.push(format!("unsafe reference {target}: {e}"));
+ }
+ }
+ warnings
+}
+
/// Load a skill from a SKILL.md file with YAML frontmatter.
///
/// # Errors
@@ -294,6 +331,11 @@ pub fn load_skill_body(meta: &SkillMeta) -> Result {
pub fn load_skill(path: &Path) -> Result {
let meta = load_skill_meta(path)?;
let body = load_skill_body(&meta)?;
+
+ for warning in validate_skill_references(&body, &meta.skill_dir) {
+ tracing::warn!(skill = %meta.name, "{warning}");
+ }
+
Ok(Skill { meta, body })
}
@@ -643,6 +685,90 @@ mod tests {
assert_eq!(meta.requires_secrets, vec!["key_a", "key_b"]);
}
+ #[test]
+ fn validate_references_valid() {
+ let dir = tempfile::tempdir().unwrap();
+ let refs = dir.path().join("references");
+ std::fs::create_dir_all(&refs).unwrap();
+ std::fs::write(refs.join("api.md"), "api docs").unwrap();
+
+ let body = "Use [api docs](references/api.md) for details.";
+ let warnings = validate_skill_references(body, dir.path());
+ assert!(
+ warnings.is_empty(),
+ "expected no warnings, got: {warnings:?}"
+ );
+ }
+
+ #[test]
+ fn validate_references_broken_link() {
+ let dir = tempfile::tempdir().unwrap();
+ let body = "See [missing](references/missing.md).";
+ let warnings = validate_skill_references(body, dir.path());
+ assert_eq!(warnings.len(), 1);
+ assert!(warnings[0].contains("broken reference"));
+ assert!(warnings[0].contains("references/missing.md"));
+ }
+
+ #[test]
+ fn validate_references_multiple_links_on_one_line() {
+ let dir = tempfile::tempdir().unwrap();
+ let refs = dir.path().join("references");
+ std::fs::create_dir_all(&refs).unwrap();
+ std::fs::write(refs.join("a.md"), "a").unwrap();
+ // b.md does not exist
+
+ let body = "See [a](references/a.md) and [b](references/b.md) on the same line.";
+ let warnings = validate_skill_references(body, dir.path());
+ assert_eq!(warnings.len(), 1);
+ assert!(warnings[0].contains("references/b.md"));
+ }
+
+ #[test]
+ fn validate_references_ignores_external_links() {
+ let dir = tempfile::tempdir().unwrap();
+ let body = "See [external](https://example.com) and [local](docs/guide.md).";
+ let warnings = validate_skill_references(body, dir.path());
+ assert!(warnings.is_empty());
+ }
+
+ #[test]
+ fn validate_references_scripts_and_assets() {
+ let dir = tempfile::tempdir().unwrap();
+ // scripts/run.sh exists, assets/logo.png does not
+ let scripts = dir.path().join("scripts");
+ std::fs::create_dir_all(&scripts).unwrap();
+ std::fs::write(scripts.join("run.sh"), "#!/bin/sh").unwrap();
+
+ let body = "Run [script](scripts/run.sh). See [logo](assets/logo.png).";
+ let warnings = validate_skill_references(body, dir.path());
+ assert_eq!(warnings.len(), 1);
+ assert!(warnings[0].contains("assets/logo.png"));
+ }
+
+ #[test]
+ #[cfg(unix)]
+ fn validate_references_rejects_traversal() {
+ let base = tempfile::tempdir().unwrap();
+ let outside = tempfile::tempdir().unwrap();
+ let outside_file = outside.path().join("secret.txt");
+ std::fs::write(&outside_file, "secret").unwrap();
+
+ let refs = base.path().join("references");
+ std::fs::create_dir_all(&refs).unwrap();
+ let link = refs.join("evil.md");
+ std::os::unix::fs::symlink(&outside_file, &link).unwrap();
+
+ let body = "See [evil](references/evil.md).";
+ let warnings = validate_skill_references(body, base.path());
+ assert_eq!(warnings.len(), 1);
+ assert!(
+ warnings[0].contains("unsafe reference"),
+ "expected traversal warning, got: {:?}",
+ warnings[0]
+ );
+ }
+
#[test]
fn requires_secrets_underscores_unchanged() {
let dir = tempfile::tempdir().unwrap();
diff --git a/crates/zeph-skills/src/prompt.rs b/crates/zeph-skills/src/prompt.rs
index e3fa315..5db4c0c 100644
--- a/crates/zeph-skills/src/prompt.rs
+++ b/crates/zeph-skills/src/prompt.rs
@@ -7,6 +7,49 @@ use crate::trust::TrustLevel;
const OS_NAMES: &[&str] = &["linux", "macos", "windows"];
+// XML tag patterns (lowercase) that could break prompt structure if injected verbatim.
+// Matching is case-insensitive; the replacement is always the canonical escaped form.
+const SANITIZE_PATTERNS: &[(&str, &str)] = &[
+ ("", "</skill>"),
+ ("", "</instructions>"),
+ ("", "</available_skills>"),
+ (" String {
+ let lower = src.to_ascii_lowercase();
+ let mut out = String::with_capacity(src.len());
+ let mut pos = 0;
+ while pos < src.len() {
+ if lower[pos..].starts_with(pattern) {
+ out.push_str(replacement);
+ pos += pattern.len();
+ } else {
+ // Safety: pos is always at a char boundary because ascii_lowercase preserves boundaries
+ let ch = src[pos..].chars().next().unwrap();
+ out.push(ch);
+ pos += ch.len_utf8();
+ }
+ }
+ out
+}
+
+/// Escape XML tags that could break prompt structure when emitted verbatim.
+///
+/// Matching is case-insensitive so mixed-case variants like `` are also escaped.
+/// Applied only to untrusted (non-`Trusted`) skill bodies before prompt injection.
+#[must_use]
+pub fn sanitize_skill_body(body: &str) -> String {
+ let mut out = body.to_string();
+ for (pattern, replacement) in SANITIZE_PATTERNS {
+ out = replace_case_insensitive(&out, pattern, replacement);
+ }
+ out
+}
+
fn should_include_reference(filename: &str, os_family: &str) -> bool {
let stem = filename.strip_suffix(".md").unwrap_or(filename);
if OS_NAMES.contains(&stem) {
@@ -33,10 +76,15 @@ pub fn format_skills_prompt(
.get(skill.name())
.copied()
.unwrap_or(TrustLevel::Trusted);
+ let raw_body = if trust == TrustLevel::Trusted {
+ skill.body.clone()
+ } else {
+ sanitize_skill_body(&skill.body)
+ };
let body = if trust == TrustLevel::Quarantined {
- wrap_quarantined(skill.name(), &skill.body)
+ wrap_quarantined(skill.name(), &raw_body)
} else {
- skill.body.clone()
+ raw_body
};
let _ = write!(
out,
@@ -245,6 +293,79 @@ mod tests {
assert!(output.contains("do stuff"));
}
+ #[test]
+ fn sanitize_case_insensitive() {
+ // Mixed-case variants must be escaped
+ let body = "Close and and .";
+ let sanitized = sanitize_skill_body(body);
+ assert!(!sanitized.contains(""));
+ assert!(!sanitized.contains(""));
+ assert!(!sanitized.contains(""));
+ assert!(sanitized.contains("</skill>"));
+ assert!(sanitized.contains("</instructions>"));
+ assert!(sanitized.contains("</available_skills>"));
+ }
+
+ #[test]
+ fn sanitize_escapes_xml_tags() {
+ let body = "Do not close or tags.";
+ let sanitized = sanitize_skill_body(body);
+ assert!(!sanitized.contains(""));
+ assert!(!sanitized.contains(""));
+ assert!(sanitized.contains("</skill>"));
+ assert!(sanitized.contains("</instructions>"));
+ }
+
+ #[test]
+ fn sanitize_escapes_opening_xml_tags() {
+ let body = "Inject and here.";
+ let sanitized = sanitize_skill_body(body);
+ assert!(!sanitized.contains("") || output.contains("</skill>"));
+ // Specifically, it must NOT have been sanitized (verbatim pass-through)
+ assert!(output.contains("Some content."));
+ }
+
+ #[test]
+ fn verified_skill_is_sanitized() {
+ let body = "Inject here.";
+ let skills = vec![make_skill("ver", "desc", body)];
+ let mut trust = HashMap::new();
+ trust.insert("ver".into(), TrustLevel::Verified);
+ let output = format_skills_prompt(&skills, "linux", &trust);
+ // Escaped tag must appear; raw body text must not appear verbatim
+ assert!(output.contains("</skill>"));
+ assert!(!output.contains("Inject here."));
+ }
+
+ #[test]
+ fn quarantined_skill_is_sanitized_and_wrapped() {
+ let body = "Inject and .";
+ let skills = vec![make_skill("evil", "desc", body)];
+ let mut trust = HashMap::new();
+ trust.insert("evil".into(), TrustLevel::Quarantined);
+ let output = format_skills_prompt(&skills, "linux", &trust);
+ assert!(output.contains("[QUARANTINED SKILL: evil]"));
+ // The injected XML tags must be escaped; the structural ones remain
+ assert!(output.contains("</instructions>"));
+ assert!(output.contains("</skill>"));
+ // Raw injected body should not appear verbatim
+ assert!(!output.contains("Inject "));
+ }
+
#[test]
fn format_skills_catalog_empty() {
let empty: &[Skill] = &[];