Skip to content

Commit 457cd48

Browse files
authored
feat: prevent reasoning traces from contaminating LLM prompt history (#1169)
This change addresses an issue where reasoning traces (like <think>...</think>) from previous bot messages were included in the prompt history for subsequent LLM calls when rails.output.apply_to_reasoning_traces=true was enabled. The fix adds event preprocessing in LLMTaskManager to strip reasoning traces from both BotMessage.text and StartUtteranceBotAction.script fields before including them in prompt rendering, preventing the LLM from seeing its own internal reasoning from previous turns. Comprehensive tests were added to verify: 1. Preprocessing doesn't modify original events 2. Reasoning traces are properly removed from prompt history 3. The clean bot messages are still included in rendered prompt
1 parent 8215095 commit 457cd48

File tree

2 files changed

+231
-1
lines changed

2 files changed

+231
-1
lines changed

nemoguardrails/llm/taskmanager.py

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
import copy
1617
import logging
1718
import re
1819
from ast import literal_eval
@@ -22,6 +23,7 @@
2223
from jinja2 import meta
2324
from jinja2.sandbox import SandboxedEnvironment
2425

26+
from nemoguardrails.actions.llm.utils import get_and_clear_reasoning_trace_contextvar
2527
from nemoguardrails.llm.filters import (
2628
co_v2,
2729
colang,
@@ -154,6 +156,70 @@ def _get_general_instructions(self):
154156

155157
return text
156158

159+
def _preprocess_events_for_prompt(
160+
self, events: Optional[List[dict]]
161+
) -> Optional[List[dict]]:
162+
"""Remove reasoning traces from bot messages before rendering them in prompts.
163+
164+
This prevents reasoning traces from being included in LLM prompt history when
165+
rails.output.apply_to_reasoning_traces=true is enabled.
166+
167+
Args:
168+
events: The list of events to preprocess
169+
170+
Returns:
171+
A new list of preprocessed events, or None if events was None
172+
"""
173+
if not events:
174+
return None
175+
176+
processed_events = copy.deepcopy(events)
177+
178+
for event in processed_events:
179+
if (
180+
isinstance(event, dict)
181+
and event.get("type") == "BotMessage"
182+
and "text" in event
183+
):
184+
bot_utterance = event["text"]
185+
for task in Task:
186+
start_token, end_token = get_reasoning_token_tags(self.config, task)
187+
if (
188+
start_token
189+
and end_token
190+
and output_has_reasoning_traces(
191+
bot_utterance, start_token, end_token
192+
)
193+
):
194+
result = extract_and_strip_trace(
195+
bot_utterance, start_token, end_token
196+
)
197+
event["text"] = result.text
198+
break
199+
200+
elif (
201+
isinstance(event, dict)
202+
and event.get("type") == "StartUtteranceBotAction"
203+
and "script" in event
204+
):
205+
bot_utterance = event["script"]
206+
for task in Task:
207+
start_token, end_token = get_reasoning_token_tags(self.config, task)
208+
if (
209+
start_token
210+
and end_token
211+
and output_has_reasoning_traces(
212+
bot_utterance, start_token, end_token
213+
)
214+
):
215+
result = extract_and_strip_trace(
216+
bot_utterance, start_token, end_token
217+
)
218+
event["script"] = result.text
219+
break
220+
221+
return processed_events
222+
157223
def _render_string(
158224
self,
159225
template_str: str,
@@ -168,6 +234,8 @@ def _render_string(
168234
:return: The rendered template.
169235
:rtype: str.
170236
"""
237+
# Preprocess events to remove reasoning traces from BotMessage events
238+
processed_events = self._preprocess_events_for_prompt(events)
171239

172240
template = self.env.from_string(template_str)
173241

@@ -176,7 +244,7 @@ def _render_string(
176244

177245
# This is the context that will be passed to the template when rendering.
178246
render_context = {
179-
"history": events,
247+
"history": processed_events,
180248
"general_instructions": self._get_general_instructions(),
181249
"sample_conversation": self.config.sample_conversation,
182250
"sample_conversation_two_turns": self.config.sample_conversation,

tests/test_llm_task_manager.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
import copy
1617
import textwrap
1718

1819
import pytest
@@ -295,3 +296,164 @@ def test_stop_configuration_parameter():
295296
# Check if the stop tokens are correctly set in the rendered prompt
296297
for stop_token in expected_stop_tokens:
297298
assert stop_token in task_prompt.stop
299+
300+
301+
def test_preprocess_events_removes_reasoning_traces():
302+
"""Test that reasoning traces are removed from bot messages in rendered prompts."""
303+
config = RailsConfig.from_content(
304+
yaml_content=textwrap.dedent(
305+
"""
306+
models:
307+
- type: main
308+
engine: openai
309+
model: gpt-3.5-turbo-instruct
310+
reasoning_config:
311+
start_token: "<think>"
312+
end_token: "</think>"
313+
rails:
314+
output:
315+
apply_to_reasoning_traces: true
316+
prompts:
317+
- task: generate_user_intent
318+
content: |-
319+
{% if examples %}{{ examples }}{% endif %}
320+
{{ history | colang }}
321+
user "{{ user_input }}"
322+
user intent:
323+
"""
324+
)
325+
)
326+
327+
llm_task_manager = LLMTaskManager(config)
328+
329+
events = [
330+
{"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
331+
{
332+
"type": "StartUtteranceBotAction",
333+
"script": "<think>Let me think how to respond some crazy COT</think>Hi there!",
334+
},
335+
{"type": "UtteranceUserActionFinished", "final_transcript": "How are you?"},
336+
]
337+
338+
rendered_prompt = llm_task_manager.render_task_prompt(
339+
task=Task.GENERATE_USER_INTENT,
340+
context={"user_input": "How are you?", "examples": ""},
341+
events=events,
342+
)
343+
344+
assert isinstance(rendered_prompt, str)
345+
346+
assert "<think>" not in rendered_prompt
347+
assert "</think>" not in rendered_prompt
348+
assert "Let me think how to respond..." not in rendered_prompt
349+
350+
assert "Hi there!" in rendered_prompt
351+
352+
353+
def test_preprocess_events_preserves_original_events():
354+
"""Test that _preprocess_events_for_prompt doesn't modify the original events."""
355+
config = RailsConfig.from_content(
356+
yaml_content=textwrap.dedent(
357+
"""
358+
models:
359+
- type: main
360+
engine: openai
361+
model: gpt-3.5-turbo-instruct
362+
reasoning_config:
363+
start_token: "<think>"
364+
end_token: "</think>"
365+
rails:
366+
output:
367+
apply_to_reasoning_traces: true
368+
"""
369+
)
370+
)
371+
372+
llm_task_manager = LLMTaskManager(config)
373+
374+
original_events = [
375+
{"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
376+
{
377+
"type": "StartUtteranceBotAction",
378+
"script": "<think>Let me think how to respond some crazy COT</think>Hi there!",
379+
},
380+
{"type": "UtteranceUserActionFinished", "final_transcript": "How are you?"},
381+
]
382+
383+
events_copy = copy.deepcopy(original_events)
384+
385+
processed_events = llm_task_manager._preprocess_events_for_prompt(events_copy)
386+
387+
assert events_copy == original_events
388+
389+
assert "<think>" not in processed_events[1]["script"]
390+
assert "</think>" not in processed_events[1]["script"]
391+
assert processed_events[1]["script"] == "Hi there!"
392+
393+
394+
def test_reasoning_traces_not_included_in_prompt_history():
395+
"""Test that reasoning traces don't get included in prompt history for subsequent LLM calls."""
396+
config = RailsConfig.from_content(
397+
yaml_content=textwrap.dedent(
398+
"""
399+
models:
400+
- type: main
401+
engine: openai
402+
model: gpt-3.5-turbo-instruct
403+
reasoning_config:
404+
start_token: "<think>"
405+
end_token: "</think>"
406+
rails:
407+
output:
408+
apply_to_reasoning_traces: true
409+
prompts:
410+
- task: generate_user_intent
411+
content: |-
412+
{% if examples %}{{ examples }}{% endif %}
413+
Previous conversation:
414+
{{ history | colang }}
415+
416+
Current user message:
417+
user "{{ user_input }}"
418+
user intent:
419+
"""
420+
)
421+
)
422+
423+
llm_task_manager = LLMTaskManager(config)
424+
425+
events = [
426+
{"type": "UtteranceUserActionFinished", "final_transcript": "Hello"},
427+
{
428+
"type": "StartUtteranceBotAction",
429+
"script": "<think>I should greet the user back.</think>Hi there!",
430+
},
431+
{
432+
"type": "UtteranceUserActionFinished",
433+
"final_transcript": "What's the weather like?",
434+
},
435+
{
436+
"type": "StartUtteranceBotAction",
437+
"script": "<think>I should explain I don't have real-time weather data.</think>I don't have access to real-time weather information.",
438+
},
439+
{"type": "UtteranceUserActionFinished", "final_transcript": "Tell me about AI"},
440+
]
441+
442+
rendered_prompt = llm_task_manager.render_task_prompt(
443+
task=Task.GENERATE_USER_INTENT,
444+
context={"user_input": "Tell me about AI", "examples": ""},
445+
events=events,
446+
)
447+
448+
assert isinstance(rendered_prompt, str)
449+
450+
assert "<think>I should greet the user back.</think>" not in rendered_prompt
451+
assert (
452+
"<think>I should explain I don't have real-time weather data.</think>"
453+
not in rendered_prompt
454+
)
455+
456+
assert (
457+
"Hi there!" in rendered_prompt
458+
or "I don't have access to real-time weather information." in rendered_prompt
459+
)

0 commit comments

Comments
 (0)