diff --git a/.railsignore b/.railsignore new file mode 100644 index 000000000..e69de29bb diff --git a/nemoguardrails/actions/action_dispatcher.py b/nemoguardrails/actions/action_dispatcher.py deleted file mode 100644 index 67eef91cd..000000000 --- a/nemoguardrails/actions/action_dispatcher.py +++ /dev/null @@ -1,370 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Module for the calling proper action endpoints based on events received at action server endpoint""" - -import importlib.util -import inspect -import logging -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -from langchain.chains.base import Chain -from langchain_core.runnables import Runnable - -from nemoguardrails import utils -from nemoguardrails.actions.llm.utils import LLMCallException -from nemoguardrails.logging.callbacks import logging_callbacks - -log = logging.getLogger(__name__) - - -class ActionDispatcher: - def __init__( - self, - load_all_actions: bool = True, - config_path: Optional[str] = None, - import_paths: Optional[List[str]] = None, - ): - """ - Initializes an actions dispatcher. - Args: - load_all_actions (bool, optional): When set to True, it loads all actions in the - 'actions' folder both in the current working directory and in the package. - config_path (str, optional): The path from which the configuration was loaded. - If there are actions at the specified path, it loads them as well. - import_paths (List[str], optional): Additional imported paths from which actions - should be loaded. - """ - log.info("Initializing action dispatcher") - - self._registered_actions = {} - - if load_all_actions: - # TODO: check for better way to find actions dir path or use constants.py - current_file_path = Path(__file__).resolve() - parent_directory_path = current_file_path.parents[1] - - # First, we load all actions from the actions folder - self.load_actions_from_path(parent_directory_path) - # self.load_actions_from_path(os.path.join(os.path.dirname(__file__), "..")) - - # Next, we load all actions from the library folder - library_path = parent_directory_path / "library" - - for root, dirs, files in os.walk(library_path): - # We only load the actions if there is an `actions` sub-folder or - # an `actions.py` file. - if "actions" in dirs or "actions.py" in files: - self.load_actions_from_path(Path(root)) - - # Next, we load all actions from the current working directory - # TODO: add support for an explicit ACTIONS_PATH - self.load_actions_from_path(Path.cwd()) - - # Last, but not least, if there was a config path, we try to load actions - # from there as well. - if config_path: - config_path = config_path.split(",") - for path in config_path: - self.load_actions_from_path(Path(path.strip())) - - # If there are any imported paths, we load the actions from there as well. - if import_paths: - for import_path in import_paths: - self.load_actions_from_path(Path(import_path.strip())) - - log.info(f"Registered Actions :: {sorted(self._registered_actions.keys())}") - log.info("Action dispatcher initialized") - - @property - def registered_actions(self): - """ - Gets the dictionary of registered actions. - Returns: - dict: A dictionary where keys are action names and values are callable action functions. - """ - return self._registered_actions - - def load_actions_from_path(self, path: Path): - """Loads all actions from the specified path. - - This method loads all actions from the `actions.py` file if it exists and - all actions inside the `actions` folder if it exists. - - Args: - path (str): A string representing the path from which to load actions. - - """ - actions_path = path / "actions" - if os.path.exists(actions_path): - self._registered_actions.update(self._find_actions(actions_path)) - - actions_py_path = os.path.join(path, "actions.py") - if os.path.exists(actions_py_path): - self._registered_actions.update( - self._load_actions_from_module(actions_py_path) - ) - - def register_action( - self, action: callable, name: Optional[str] = None, override: bool = True - ): - """Registers an action with the given name. - - Args: - action (callable): The action function. - name (Optional[str]): The name of the action. Defaults to None. - override (bool): If an action already exists, whether it should be overridden or not. - """ - if name is None: - action_meta = getattr(action, "action_meta", None) - name = action_meta["name"] if action_meta else action.__name__ - - # If we're not allowed to override, we stop. - if name in self._registered_actions and not override: - return - - self._registered_actions[name] = action - - def register_actions(self, actions_obj: any, override: bool = True): - """Registers all the actions from the given object. - - Args: - actions_obj (any): The object containing actions. - override (bool): If an action already exists, whether it should be overridden or not. - """ - - # Register the actions - for attr in dir(actions_obj): - val = getattr(actions_obj, attr) - - if hasattr(val, "action_meta"): - self.register_action(val, override=override) - - def _normalize_action_name(self, name: str) -> str: - """Normalize the action name to the required format.""" - if name not in self.registered_actions: - if name.endswith("Action"): - name = name.replace("Action", "") - name = utils.camelcase_to_snakecase(name) - return name - - def has_registered(self, name: str) -> bool: - """Check if an action is registered.""" - name = self._normalize_action_name(name) - return name in self.registered_actions - - def get_action(self, name: str) -> callable: - """Get the registered action by name. - - Args: - name (str): The name of the action. - - Returns: - callable: The registered action. - """ - name = self._normalize_action_name(name) - return self._registered_actions.get(name, None) - - async def execute_action( - self, action_name: str, params: Dict[str, Any] - ) -> Tuple[Union[str, Dict[str, Any]], str]: - """Execute a registered action. - - Args: - action_name (str): The name of the action to execute. - params (Dict[str, Any]): Parameters for the action. - - Returns: - Tuple[Union[str, Dict[str, Any]], str]: A tuple containing the result and status. - """ - - action_name = self._normalize_action_name(action_name) - - if action_name in self._registered_actions: - log.info(f"Executing registered action: {action_name}") - fn = self._registered_actions.get(action_name, None) - - # Actions that are registered as classes are initialized lazy, when - # they are first used. - if inspect.isclass(fn): - fn = fn() - self._registered_actions[action_name] = fn - - if fn is not None: - try: - # We support both functions and classes as actions - if inspect.isfunction(fn) or inspect.ismethod(fn): - # We support both sync and async actions. - result = fn(**params) - if inspect.iscoroutine(result): - result = await result - else: - log.warning( - f"Synchronous action `{action_name}` has been called." - ) - - elif isinstance(fn, Chain): - try: - chain = fn - - # For chains with only one output key, we use the `arun` function - # to return directly the result. - if len(chain.output_keys) == 1: - result = await chain.arun( - **params, callbacks=logging_callbacks - ) - else: - # Otherwise, we return the dict with the output keys. - result = await chain.acall( - inputs=params, - return_only_outputs=True, - callbacks=logging_callbacks, - ) - except NotImplementedError: - # Not ideal, but for now we fall back to sync execution - # if the async is not available - result = fn.run(**params) - elif isinstance(fn, Runnable): - # If it's a Runnable, we invoke it as well - runnable = fn - - result = await runnable.ainvoke(input=params) - else: - # TODO: there should be a common base class here - result = fn.run(**params) - return result, "success" - - # We forward LLM Call exceptions - except LLMCallException as e: - raise e - - except Exception as e: - filtered_params = { - k: v - for k, v in params.items() - if k not in ["state", "events", "llm"] - } - log.warning( - "Error while execution '%s' with parameters '%s': %s", - action_name, - filtered_params, - e, - ) - log.exception(e) - - return None, "failed" - - def get_registered_actions(self) -> List[str]: - """Get the list of available actions. - - Returns: - List[str]: List of available actions. - """ - return list(self._registered_actions.keys()) - - @staticmethod - def _load_actions_from_module(filepath: str): - """Loads the actions from the specified python module. - - Args: - filepath (str): The path of the Python module. - - Returns: - Dict: Dictionary of loaded actions. - """ - action_objects = {} - filename = os.path.basename(filepath) - - if not os.path.isfile(filepath): - log.error(f"{filepath} does not exist or is not a file.") - log.error(f"Failed to load actions from {filename}.") - return action_objects - - try: - log.debug(f"Analyzing file {filename}") - # Import the module from the file - - spec = importlib.util.spec_from_file_location(filename, filepath) - if spec is None: - log.error(f"Failed to create a module spec from {filepath}.") - return action_objects - - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - - # Loop through all members in the module and check for the `@action` decorator - # If class has action decorator is_action class member is true - for name, obj in inspect.getmembers(module): - if (inspect.isfunction(obj) or inspect.isclass(obj)) and hasattr( - obj, "action_meta" - ): - try: - action_objects[obj.action_meta["name"]] = obj - log.info(f"Added {obj.action_meta['name']} to actions") - except Exception as e: - log.error( - f"Failed to register {obj.action_meta['name']} in action dispatcher due to exception {e}" - ) - except Exception as e: - relative_filepath = Path(module.__file__).relative_to(Path.cwd()) - log.error( - f"Failed to register {filename} from {relative_filepath} in action dispatcher due to exception: {e}" - ) - - return action_objects - - def _find_actions(self, directory) -> Dict: - """Loop through all the subdirectories and check for the class with @action - decorator and add in action_classes dict. - - Args: - directory: The directory to search for actions. - - Returns: - Dict: Dictionary of found actions. - """ - action_objects = {} - - if not os.path.exists(directory): - log.debug(f"_find_actions: {directory} does not exist.") - return action_objects - - # Loop through all files in the directory and its subdirectories - for root, dirs, files in os.walk(directory): - for filename in files: - if filename.endswith(".py"): - filepath = os.path.join(root, filename) - if is_action_file(filepath): - action_objects.update( - ActionDispatcher._load_actions_from_module(filepath) - ) - if not action_objects: - log.debug(f"No actions found in {directory}") - log.exception(f"No actions found in the directory {directory}.") - - return action_objects - - -def is_action_file(filepath): - """Heuristics for determining if a Python file can have actions or not. - - Currently, it only excludes the `__init__.py files. - """ - if "__init__.py" in filepath: - return False - - return True diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index c6294cedc..3ff415973 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -14,7 +14,7 @@ # limitations under the License. """Module for the configuration of rails.""" - +import fnmatch import logging import os import warnings @@ -28,6 +28,7 @@ from nemoguardrails.colang.v2_x.lang.colang_ast import Flow from nemoguardrails.colang.v2_x.lang.utils import format_colang_parsing_error_message from nemoguardrails.colang.v2_x.runtime.errors import ColangParsingError +from nemoguardrails.utils import get_railsignore_patterns log = logging.getLogger(__name__) @@ -556,6 +557,12 @@ def _load_path( # Followlinks to traverse symlinks instead of ignoring them. for file in files: + # Verify railsignore to skip loading + ignored_by_railsignore = _is_file_ignored_by_railsignore(file) + + if ignored_by_railsignore: + continue + # This is the raw configuration that will be loaded from the file. _raw_config = {} @@ -1203,3 +1210,19 @@ def _generate_rails_flows(flows): flow_definitions.insert(1, _LIBRARY_IMPORT + _NEWLINE * 2) return flow_definitions + + +def _is_file_ignored_by_railsignore(filename: str) -> bool: + # Default no skip + should_skip_file = False + + # Load candidate patterns from railsignore + candidate_patterns = get_railsignore_patterns() + + # Ignore colang, kb, python modules if specified in valid railsignore glob format + if filename.endswith(".py") or filename.endswith(".co") or filename.endswith(".kb"): + for pattern in candidate_patterns: + if fnmatch.fnmatch(filename, pattern): + should_skip_file = True + + return should_skip_file diff --git a/nemoguardrails/utils.py b/nemoguardrails/utils.py index a79987432..c289bd9e0 100644 --- a/nemoguardrails/utils.py +++ b/nemoguardrails/utils.py @@ -23,6 +23,7 @@ from collections import namedtuple from datetime import datetime, timezone from enum import Enum +from pathlib import Path from typing import Any, Dict, Tuple import yaml @@ -302,3 +303,52 @@ def snake_to_camelcase(name: str) -> str: str: The converted CamelCase string. """ return "".join(n.capitalize() for n in name.split("_")) + + +def get_railsignore_path() -> Path: + """Helper to get railsignore path. + + Returns: + Path: The.railsignore file path. + """ + current_path = Path(__file__).resolve() + + # Navigate to the root directory by going up 4 levels + root_dir = current_path.parents[1] + + file_path = root_dir / ".railsignore" + + return file_path + + +def get_railsignore_patterns() -> set[str]: + """ + Helper to retrieve all specified patterns in railsignore. + Returns: + Set[str]: The set of filenames or glob patterns in railsignore + """ + ignored_patterns = set() + + railsignore_path = get_railsignore_path() + + # File doesn't exist or is empty + if not railsignore_path.exists() or not os.path.getsize(railsignore_path): + return ignored_patterns + + try: + with open(railsignore_path, "r") as f: + railsignore_entries = f.readlines() + + # Remove comments and empty lines, and strip out any extra spaces/newlines + railsignore_entries = [ + line.strip() + for line in railsignore_entries + if line.strip() and not line.startswith("#") + ] + + ignored_patterns.update(railsignore_entries) + return ignored_patterns + + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + return ignored_patterns diff --git a/tests/test_configs/railsignore_config/config_to_load.co b/tests/test_configs/railsignore_config/config_to_load.co new file mode 100644 index 000000000..c7b27161e --- /dev/null +++ b/tests/test_configs/railsignore_config/config_to_load.co @@ -0,0 +1,6 @@ +define user express greeting + "hey" + "hei" + +define flow + user express greeting diff --git a/tests/test_configs/railsignore_config/ignored_config.co b/tests/test_configs/railsignore_config/ignored_config.co new file mode 100644 index 000000000..827aecb7a --- /dev/null +++ b/tests/test_configs/railsignore_config/ignored_config.co @@ -0,0 +1,7 @@ +define user express greeting + "hi" + "hello" + +define flow + user express greeting + bot express greeting diff --git a/tests/test_railsignore.py b/tests/test_railsignore.py new file mode 100644 index 000000000..5ef1c58ea --- /dev/null +++ b/tests/test_railsignore.py @@ -0,0 +1,135 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil + +import pytest + +from nemoguardrails import RailsConfig +from nemoguardrails.utils import get_railsignore_path, get_railsignore_patterns + +CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") + + +@pytest.fixture(scope="function") +def cleanup(): + # Copy current rails ignore and prepare for tests + railsignore_path = get_railsignore_path() + + temp_file_path = str(railsignore_path) + "-copy" + + # Copy the original .railsignore to a temporary file + shutil.copy(railsignore_path, temp_file_path) + print(f"Copied {railsignore_path} to {temp_file_path}") + + # Clean railsignore file before + cleanup_railsignore() + + # Yield control to test + yield + + # Clean railsignore file before + cleanup_railsignore() + + # Restore the original .railsignore from the temporary copy + shutil.copy(temp_file_path, railsignore_path) + print(f"Restored {railsignore_path} from {temp_file_path}") + + # Delete the temporary file + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + print(f"Deleted temporary file {temp_file_path}") + + +def test_railsignore_config_loading(cleanup): + # Setup railsignore + append_railsignore("ignored_config.co") + + # Load config + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "railsignore_config")) + + config_string = str(config) + # Assert .railsignore successfully ignores + assert "ignored_config.co" not in config_string + + # Other files should load successfully + assert "config_to_load.co" in config_string + + +def test_get_railsignore_files(cleanup): + # Empty railsignore + ignored_files = get_railsignore_patterns() + + assert "ignored_module.py" not in ignored_files + assert "ignored_colang.co" not in ignored_files + + # Append files to railsignore + append_railsignore("ignored_module.py") + append_railsignore("ignored_colang.co") + + # Grab ignored files + ignored_files = get_railsignore_patterns() + + # Check files exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + # Append comment and whitespace + append_railsignore("# This_is_a_comment.py") + append_railsignore(" ") + append_railsignore("") + + # Grab ignored files + ignored_files = get_railsignore_patterns() + + # Comments and whitespace not retrieved + assert "# This_is_a_comment.py" not in ignored_files + assert " " not in ignored_files + assert "" not in ignored_files + + # Assert files still exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + +def cleanup_railsignore(): + """ + Helper for clearing a railsignore file. + """ + railsignore_path = get_railsignore_path() + + try: + with open(railsignore_path, "w") as f: + pass + except OSError as e: + print(f"Error: Unable to create {railsignore_path}. {e}") + else: + print(f"Successfully cleaned up .railsignore: {railsignore_path}") + + +def append_railsignore(file_name: str) -> None: + """ + Helper for appending to a railsignore file. + """ + railsignore_path = get_railsignore_path() + + try: + with open(railsignore_path, "a") as f: + f.write(file_name + "\n") + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + except OSError as e: + print(f"Error: Failed to write to {railsignore_path}. {e}")