NVIDIA-NeMo · Pouyanpi · Oct 18, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 17, 2024
diff --git a/nemoguardrails/actions/v2_x/generation.py b/nemoguardrails/actions/v2_x/generation.py
@@ -48,11 +48,19 @@
     get_element_from_head,
     get_event_from_element,
 )
+from nemoguardrails.context import (
+    generation_options_var,
+    llm_call_info_var,
+    raw_llm_request,
+    streaming_handler_var,
+)
 from nemoguardrails.embeddings.index import EmbeddingsIndex, IndexItem
 from nemoguardrails.llm.filters import colang
 from nemoguardrails.llm.params import llm_params
 from nemoguardrails.llm.types import Task
 from nemoguardrails.logging import verbose
+from nemoguardrails.logging.explain import LLMCallInfo
+from nemoguardrails.rails.llm.options import GenerationOptions
 from nemoguardrails.utils import console, new_uuid
 
 log = logging.getLogger(__name__)
@@ -390,6 +398,54 @@ async def generate_user_intent_and_bot_action(
             "bot_action": bot_action,
         }
 
+    @action(name="PassthroughLLMAction", is_system_action=True, execute_async=True)
+    async def passthrough_llm_action(
+        self,
+        user_message: str,
+        state: State,
+        events: List[dict],
+        llm: Optional[BaseLLM] = None,
+    ):
+        event = get_last_user_utterance_event_v2_x(events)
+
+        # We check if we have a raw request. If the guardrails API is using
+        # the `generate_events` API, this will not be set.
+        raw_prompt = raw_llm_request.get()
+
+        if raw_prompt is None:
+            prompt = event["final_transcript"]
+        else:
+            if isinstance(raw_prompt, str):
+                # If we're in completion mode, we use directly the last $user_message
+                # as it may have been altered by the input rails.
+                prompt = event["final_transcript"]
+            elif isinstance(raw_prompt, list):
+                prompt = raw_prompt.copy()
+
+                # In this case, if the last message is from the user, we replace the text
+                # just in case the input rails may have altered it.
+                if prompt[-1]["role"] == "user":
+                    raw_prompt[-1]["content"] = event["final_transcript"]
+            else:
+                raise ValueError(f"Unsupported type for raw prompt: {type(raw_prompt)}")
+
+        # Initialize the LLMCallInfo object
+        llm_call_info_var.set(LLMCallInfo(task=Task.GENERAL.value))
+
+        generation_options: GenerationOptions = generation_options_var.get()
+
+        with llm_params(
+            llm,
+            **((generation_options and generation_options.llm_params) or {}),
+        ):
+            text = await llm_call(
+                llm,
+                user_message,
+                custom_callback_handlers=[streaming_handler_var.get()],
+            )
+
+        return text
+
     @action(name="CheckValidFlowExistsAction", is_system_action=True)
     async def check_if_flow_exists(self, state: "State", flow_id: str) -> bool:
         """Return True if a flow with the provided flow_id exists."""

diff --git a/nemoguardrails/colang/v2_x/library/passthrough.co b/nemoguardrails/colang/v2_x/library/passthrough.co
@@ -0,0 +1,26 @@
+
+import llm
+
+flow context free bot response generation on unhandled user intent
+  """Just make a call to LLM in passthrough mode"""
+
+  activate polling llm request response
+  await _user_said_something_unexpected as $user_said
+  $event = $user_said.event
+
+  # we need to wait for the automatic intent detection
+  await unhandled user intent as $flow
+  log 'unexpected user utterance: "{$event.final_transcript}"'
+
+  $user_message = $event.final_transcript
+
+
+  log 'start generating bot response in passthrough mode...'
+  $bot_message = await PassthroughLLMAction(user_message=$user_message)
+  bot say $bot_message
+
+@override
+flow llm continuation
+  activate automating intent detection
+  activate generating user intent for unhandled user utterance
+  activate context free bot response generation on unhandled user intent
diff --git a/tests/v2_x/test_passthroug_mode.py b/tests/v2_x/test_passthroug_mode.py
@@ -0,0 +1,105 @@
+# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import unittest
+
+from nemoguardrails import RailsConfig
+from tests.utils import TestChat
+
+colang_content = '''
+    import core
+    import passthrough
+
+    flow main
+        activate llm continuation
+        activate greeting
+        activate other reactions
+
+    flow greeting
+        user expressed greeting
+        bot say "Hello world!"
+
+    flow other reactions
+        user expressed to be bored
+        bot say "No problem!"
+
+    flow user expressed greeting
+        """"User expressed greeting in any way or form."""
+        user said "hi"
+
+    flow user expressed to be bored
+        """"User expressed to be bored."""
+        user said "This is boring"
+    '''
+
+yaml_content = """
+colang_version: "2.x"
+models:
+  - type: main
+    engine: openai
+    model: gpt-3.5-turbo-instruct
+
+    """
+
+
+config = RailsConfig.from_content(colang_content, yaml_content)
+
+
+class TestPassthroughLLMActionLogging(unittest.IsolatedAsyncioTestCase):
+    def test_passthrough_llm_action_not_invoked_via_logs(self):
+        chat = TestChat(
+            config,
+            llm_completions=["user expressed greeting"],
+        )
+        rails = chat.app
+
+        logger = logging.getLogger("nemoguardrails.colang.v2_x.runtime.statemachine")
+
+        with self.assertLogs(logger, level="INFO") as log:
+            messages = [{"role": "user", "content": "hi"}]
+            response = rails.generate(messages=messages)
+            # Check that 'StartPassthroughLLMAction' is not in the logs
+            passthrough_invoked = any(
+                "PassthroughLLMActionFinished" in message for message in log.output
+            )
+            self.assertFalse(
+                passthrough_invoked, "PassthroughLLMAction was invoked unexpectedly."
+            )
+
+            self.assertIn("content", response)
+            self.assertIsInstance(response["content"], str)
+
+    def test_passthrough_llm_action_invoked_via_logs(self):
+        chat = TestChat(
+            config,
+            llm_completions=["user asked about capabilites", "a random text from llm"],
+        )
+        rails = chat.app
+
+        logger = logging.getLogger("nemoguardrails.colang.v2_x.runtime.statemachine")
+
+        with self.assertLogs(logger, level="INFO") as log:
+            messages = [{"role": "user", "content": "What can you do?"}]
+            response = rails.generate(messages=messages)
+            # Check that 'StartPassthroughLLMAction' is in the logs
+            passthrough_invoked = any(
+                "StartPassthroughLLMAction" in message for message in log.output
+            )
+            self.assertTrue(
+                passthrough_invoked, "PassthroughLLMAction was not invoked."
+            )
+
+            self.assertIn("content", response)
+            self.assertIsInstance(response["content"], str)