diff --git a/crates/goose/src/security/mod.rs b/crates/goose/src/security/mod.rs index 780a0a45c45c..88b5d21487f1 100644 --- a/crates/goose/src/security/mod.rs +++ b/crates/goose/src/security/mod.rs @@ -43,9 +43,15 @@ impl SecurityManager { fn is_ml_scanning_enabled(&self) -> bool { let config = Config::global(); - config + let prompt_enabled = config .get_param::("SECURITY_PROMPT_CLASSIFIER_ENABLED") - .unwrap_or(false) + .unwrap_or(false); + + let command_enabled = config + .get_param::("SECURITY_COMMAND_CLASSIFIER_ENABLED") + .unwrap_or(false); + + prompt_enabled || command_enabled } pub async fn analyze_tool_requests( diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs index 2cb7612b23c5..dca32e1ed323 100644 --- a/crates/goose/src/security/scanner.rs +++ b/crates/goose/src/security/scanner.rs @@ -149,18 +149,19 @@ impl PromptInjectionScanner { tracing::info!( "Classifier Results - Command: {:.3}, Prompt: {:.3}, Threshold: {:.3}", tool_result.confidence, - context_result.confidence, + context_result.ml_confidence.unwrap_or(0.0), threshold ); let final_confidence = - self.combine_confidences(tool_result.confidence, context_result.confidence); + self.combine_confidences(tool_result.confidence, context_result.ml_confidence); tracing::info!( tool_confidence = %tool_result.confidence, - context_confidence = %context_result.confidence, + context_confidence = ?context_result.ml_confidence, final_confidence = %final_confidence, - has_ml = tool_result.ml_confidence.is_some(), + has_command_ml = tool_result.ml_confidence.is_some(), + has_prompt_ml = context_result.ml_confidence.is_some(), has_patterns = !tool_result.pattern_matches.is_empty(), threshold = %threshold, malicious = final_confidence >= threshold, @@ -239,7 +240,11 @@ impl PromptInjectionScanner { }) } - fn combine_confidences(&self, tool_confidence: f32, context_confidence: f32) -> f32 { + fn combine_confidences(&self, tool_confidence: f32, context_confidence: Option) -> f32 { + let Some(context_confidence) = context_confidence else { + return tool_confidence; + }; + // If tool is safe, context is not taken into account if tool_confidence < 0.3 { return tool_confidence; diff --git a/ui/desktop/src/components/settings/security/SecurityToggle.tsx b/ui/desktop/src/components/settings/security/SecurityToggle.tsx index 42444ec31caf..58858f465f37 100644 --- a/ui/desktop/src/components/settings/security/SecurityToggle.tsx +++ b/ui/desktop/src/components/settings/security/SecurityToggle.tsx @@ -271,17 +271,73 @@ export const SecurityToggle = () => { /> - {/* ML Detection Toggle */} + {/* Command Injection Detection Toggle */}

- Enable ML-Based Detection + Enable Command Injection ML Detection +

+

+ Use ML models to detect malicious shell commands +

+
+
+ +
+
+ + {hasCommandModel ? ( + enabled && + effectiveCommandClassifierEnabled && ( +
+ ✓ Command classifier active (auto-configured from environment) +
+ ) + ) : ( +
+
+ +
+
+ )} +
+ + {/* Prompt Injection Detection Toggle */} +
+
+
+

+ Enable Prompt Injection ML Detection

- Use machine learning models for more accurate detection + Use ML models to detect potential prompt injection in your chat

@@ -348,61 +404,6 @@ export const SecurityToggle = () => {
- -
-
-
-

- Enable Command Injection ML Detection -

-

- Use ML models to detect malicious shell commands -

-
-
- -
-
- - {hasCommandModel ? ( - enabled && - effectiveCommandClassifierEnabled && ( -
- ✓ Command classifier active (auto-configured from environment) -
- ) - ) : ( -
-
- -
-
- )} -