feat: prevent reasoning traces from contaminating LLM prompt history (#1169)

Pouyanpi · web-flow · commit 457cd486eb73 · 2025-05-05T13:27:20.000+02:00
This change addresses an issue where reasoning traces (like &lt;think&gt;...&lt;/think&gt;)
from previous bot messages were included in the prompt history for subsequent
LLM calls when rails.output.apply_to_reasoning_traces=true was enabled.

The fix adds event preprocessing in LLMTaskManager to strip reasoning traces
from both BotMessage.text and StartUtteranceBotAction.script fields before
including them in prompt rendering, preventing the LLM from seeing its own
internal reasoning from previous turns.

Comprehensive tests were added to verify:
1. Preprocessing doesn't modify original events
2. Reasoning traces are properly removed from prompt history
3. The clean bot messages are still included in rendered prompt
diff --git a/nemoguardrails/llm/taskmanager.py b/nemoguardrails/llm/taskmanager.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import logging
 import re
 from ast import literal_eval
@@ -22,6 +23,7 @@
 from jinja2 import meta
 from jinja2.sandbox import SandboxedEnvironment
 
+from nemoguardrails.actions.llm.utils import get_and_clear_reasoning_trace_contextvar
 from nemoguardrails.llm.filters import (
     co_v2,
     colang,
@@ -154,6 +156,70 @@ def _get_general_instructions(self):
 
         return text
 
+    def _preprocess_events_for_prompt(
+        self, events: Optional[List[dict]]
+    ) -> Optional[List[dict]]:
+        """Remove reasoning traces from bot messages before rendering them in prompts.
+
+        This prevents reasoning traces from being included in LLM prompt history when
+        rails.output.apply_to_reasoning_traces=true is enabled.
+
+        Args:
+            events: The list of events to preprocess
+
+        Returns:
+            A new list of preprocessed events, or None if events was None
+        """
+        if not events:
+            return None
+
+        processed_events = copy.deepcopy(events)
+
+        for event in processed_events:
+            if (
+                isinstance(event, dict)
+                and event.get("type") == "BotMessage"
+                and "text" in event
+            ):
+                bot_utterance = event["text"]
+                for task in Task:
+                    start_token, end_token = get_reasoning_token_tags(self.config, task)
+                    if (
+                        start_token
+                        and end_token
+                        and output_has_reasoning_traces(
+                            bot_utterance, start_token, end_token
+                        )
+                    ):
+                        result = extract_and_strip_trace(
+                            bot_utterance, start_token, end_token
+                        )
+                        event["text"] = result.text
+                        break
+
+            elif (
+                isinstance(event, dict)
+                and event.get("type") == "StartUtteranceBotAction"
+                and "script" in event
+            ):
+                bot_utterance = event["script"]
+                for task in Task:
+                    start_token, end_token = get_reasoning_token_tags(self.config, task)
+                    if (
+                        start_token
+                        and end_token
+                        and output_has_reasoning_traces(
+                            bot_utterance, start_token, end_token
+                        )
+                    ):
+                        result = extract_and_strip_trace(
+                            bot_utterance, start_token, end_token
+                        )
+                        event["script"] = result.text
+                        break
+
+        return processed_events
+
     def _render_string(
         self,
         template_str: str,
@@ -168,6 +234,8 @@ def _render_string(
         :return: The rendered template.
         :rtype: str.
         """
+        # Preprocess events to remove reasoning traces from BotMessage events
+        processed_events = self._preprocess_events_for_prompt(events)
 
         template = self.env.from_string(template_str)
 
@@ -176,7 +244,7 @@ def _render_string(
 
         # This is the context that will be passed to the template when rendering.
         render_context = {
-            "history": events,
+            "history": processed_events,
             "general_instructions": self._get_general_instructions(),
             "sample_conversation": self.config.sample_conversation,
             "sample_conversation_two_turns": self.config.sample_conversation,
diff --git a/tests/test_llm_task_manager.py b/tests/test_llm_task_manager.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import copy
 import textwrap
 
 import pytest
@@ -295,3 +296,164 @@ def test_stop_configuration_parameter():
     # Check if the stop tokens are correctly set in the rendered prompt
     for stop_token in expected_stop_tokens:
         assert stop_token in task_prompt.stop
+
+
+def test_preprocess_events_removes_reasoning_traces():
+    """Test that reasoning traces are removed from bot messages in rendered prompts."""
+    config = RailsConfig.from_content(
+        yaml_content=textwrap.dedent(
+            """
+            models:
+             - type: main
+               engine: openai
+               model: gpt-3.5-turbo-instruct
+               reasoning_config:
+                 start_token: "<think>"
+                 end_token: "</think>"
+            rails:
+             output:
+               apply_to_reasoning_traces: true
+            prompts:
+             - task: generate_user_intent
+               content: |-
+                 {% if examples %}{{ examples }}{% endif %}
+                 {{ history | colang }}
+                 user "{{ user_input }}"
+                 user intent:
+            """
+        )
+    )
+
+    llm_task_manager = LLMTaskManager(config)
+
+    events = [
+        {"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
+        {
+            "type": "StartUtteranceBotAction",
+            "script": "<think>Let me think how to respond some crazy COT</think>Hi there!",
+        },
+        {"type": "UtteranceUserActionFinished", "final_transcript": "How are you?"},
+    ]
+
+    rendered_prompt = llm_task_manager.render_task_prompt(
+        task=Task.GENERATE_USER_INTENT,
+        context={"user_input": "How are you?", "examples": ""},
+        events=events,
+    )
+
+    assert isinstance(rendered_prompt, str)
+
+    assert "<think>" not in rendered_prompt
+    assert "</think>" not in rendered_prompt
+    assert "Let me think how to respond..." not in rendered_prompt
+
+    assert "Hi there!" in rendered_prompt
+
+
+def test_preprocess_events_preserves_original_events():
+    """Test that _preprocess_events_for_prompt doesn't modify the original events."""
+    config = RailsConfig.from_content(
+        yaml_content=textwrap.dedent(
+            """
+            models:
+             - type: main
+               engine: openai
+               model: gpt-3.5-turbo-instruct
+               reasoning_config:
+                 start_token: "<think>"
+                 end_token: "</think>"
+            rails:
+             output:
+               apply_to_reasoning_traces: true
+            """
+        )
+    )
+
+    llm_task_manager = LLMTaskManager(config)
+
+    original_events = [
+        {"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
+        {
+            "type": "StartUtteranceBotAction",
+            "script": "<think>Let me think how to respond some crazy COT</think>Hi there!",
+        },
+        {"type": "UtteranceUserActionFinished", "final_transcript": "How are you?"},
+    ]
+
+    events_copy = copy.deepcopy(original_events)
+
+    processed_events = llm_task_manager._preprocess_events_for_prompt(events_copy)
+
+    assert events_copy == original_events
+
+    assert "<think>" not in processed_events[1]["script"]
+    assert "</think>" not in processed_events[1]["script"]
+    assert processed_events[1]["script"] == "Hi there!"
+
+
+def test_reasoning_traces_not_included_in_prompt_history():
+    """Test that reasoning traces don't get included in prompt history for subsequent LLM calls."""
+    config = RailsConfig.from_content(
+        yaml_content=textwrap.dedent(
+            """
+            models:
+             - type: main
+               engine: openai
+               model: gpt-3.5-turbo-instruct
+               reasoning_config:
+                 start_token: "<think>"
+                 end_token: "</think>"
+            rails:
+             output:
+               apply_to_reasoning_traces: true
+            prompts:
+             - task: generate_user_intent
+               content: |-
+                 {% if examples %}{{ examples }}{% endif %}
+                 Previous conversation:
+                 {{ history | colang }}
+
+                 Current user message:
+                 user "{{ user_input }}"
+                 user intent:
+            """
+        )
+    )
+
+    llm_task_manager = LLMTaskManager(config)
+
+    events = [
+        {"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
+        {
+            "type": "StartUtteranceBotAction",
+            "script": "<think>I should greet the user back.</think>Hi there!",
+        },
+        {
+            "type": "UtteranceUserActionFinished",
+            "final_transcript": "What's the weather like?",
+        },
+        {
+            "type": "StartUtteranceBotAction",
+            "script": "<think>I should explain I don't have real-time weather data.</think>I don't have access to real-time weather information.",
+        },
+        {"type": "UtteranceUserActionFinished", "final_transcript": "Tell me about AI"},
+    ]
+
+    rendered_prompt = llm_task_manager.render_task_prompt(
+        task=Task.GENERATE_USER_INTENT,
+        context={"user_input": "Tell me about AI", "examples": ""},
+        events=events,
+    )
+
+    assert isinstance(rendered_prompt, str)
+
+    assert "<think>I should greet the user back.</think>" not in rendered_prompt
+    assert (
+        "<think>I should explain I don't have real-time weather data.</think>"
+        not in rendered_prompt
+    )
+
+    assert (
+        "Hi there!" in rendered_prompt
+        or "I don't have access to real-time weather information." in rendered_prompt
+    )