diff --git a/.railsignore b/.railsignore new file mode 100644 index 000000000..e69de29bb diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index c6294cedc..6fbdbfb08 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -24,6 +24,7 @@ from pydantic import BaseModel, ValidationError, root_validator from pydantic.fields import Field +from nemoguardrails import utils from nemoguardrails.colang import parse_colang_file, parse_flow_elements from nemoguardrails.colang.v2_x.lang.colang_ast import Flow from nemoguardrails.colang.v2_x.lang.utils import format_colang_parsing_error_message @@ -551,11 +552,23 @@ def _load_path( if not os.path.exists(config_path): raise ValueError(f"Could not find config path: {config_path}") + # the first .railsignore file found from cwd down to its subdirectories + railsignore_path = utils.get_railsignore_path(config_path) + ignore_patterns = utils.get_railsignore_patterns(railsignore_path) + if os.path.isdir(config_path): for root, _, files in os.walk(config_path, followlinks=True): # Followlinks to traverse symlinks instead of ignoring them. for file in files: + # Verify railsignore to skip loading + ignored_by_railsignore = utils.is_ignored_by_railsignore( + file, ignore_patterns + ) + + if ignored_by_railsignore: + continue + # This is the raw configuration that will be loaded from the file. _raw_config = {} diff --git a/nemoguardrails/utils.py b/nemoguardrails/utils.py index d1689b452..fda3d43bf 100644 --- a/nemoguardrails/utils.py +++ b/nemoguardrails/utils.py @@ -14,6 +14,7 @@ # limitations under the License. import asyncio import dataclasses +import fnmatch import importlib.resources as pkg_resources import json import os @@ -23,7 +24,8 @@ from collections import namedtuple from datetime import datetime, timezone from enum import Enum -from typing import Any, Dict, Tuple +from pathlib import Path +from typing import Any, Dict, Optional, Set, Tuple import yaml from rich.console import Console @@ -312,3 +314,75 @@ def snake_to_camelcase(name: str) -> str: str: The converted CamelCase string. """ return "".join(n.capitalize() for n in name.split("_")) + + +def get_railsignore_path(path: Optional[str] = None) -> Optional[Path]: + """Get railsignore path. + + Args: + path (Optional[str]): The starting path to search for the .railsignore file. + + Returns: + Path: The .railsignore file path, if found. + + Raises: + FileNotFoundError: If the .railsignore file is not found. + """ + current_path = Path(path) if path else Path.cwd() + + while True: + railsignore_file = current_path / ".railsignore" + if railsignore_file.exists() and railsignore_file.is_file(): + return railsignore_file + if current_path == current_path.parent: + break + current_path = current_path.parent + + return None + + +def get_railsignore_patterns(railsignore_path: Path) -> Set[str]: + """Retrieve all specified patterns in railsignore. + + Returns: + Set[str]: The set of filenames or glob patterns in railsignore + """ + ignored_patterns = set() + + if railsignore_path is None: + return ignored_patterns + + # File doesn't exist or is empty + if not railsignore_path.exists() or not os.path.getsize(railsignore_path): + return ignored_patterns + + try: + with open(railsignore_path, "r") as f: + railsignore_entries = f.readlines() + + # Remove comments and empty lines, and strip out any extra spaces/newlines + railsignore_entries = [ + line.strip() + for line in railsignore_entries + if line.strip() and not line.startswith("#") + ] + + ignored_patterns.update(railsignore_entries) + return ignored_patterns + + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + return ignored_patterns + + +def is_ignored_by_railsignore(filename: str, ignore_patterns: str) -> bool: + """Verify if a filename should be ignored by a railsignore pattern""" + + ignore = False + + for pattern in ignore_patterns: + if fnmatch.fnmatch(filename, pattern): + ignore = True + break + + return ignore diff --git a/tests/test_configs/railsignore_config/config_to_load.co b/tests/test_configs/railsignore_config/config_to_load.co new file mode 100644 index 000000000..c7b27161e --- /dev/null +++ b/tests/test_configs/railsignore_config/config_to_load.co @@ -0,0 +1,6 @@ +define user express greeting + "hey" + "hei" + +define flow + user express greeting diff --git a/tests/test_configs/railsignore_config/ignored_config.co b/tests/test_configs/railsignore_config/ignored_config.co new file mode 100644 index 000000000..827aecb7a --- /dev/null +++ b/tests/test_configs/railsignore_config/ignored_config.co @@ -0,0 +1,7 @@ +define user express greeting + "hi" + "hello" + +define flow + user express greeting + bot express greeting diff --git a/tests/test_railsignore.py b/tests/test_railsignore.py new file mode 100644 index 000000000..951a30cee --- /dev/null +++ b/tests/test_railsignore.py @@ -0,0 +1,142 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +from pathlib import Path +from unittest.mock import patch + +import pytest + +from nemoguardrails import RailsConfig +from nemoguardrails.utils import get_railsignore_patterns, is_ignored_by_railsignore + +CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") + + +@pytest.fixture(scope="function") +def cleanup(): + # Mock the path to the .railsignore file + with patch( + "nemoguardrails.utils.get_railsignore_path" + ) as mock_get_railsignore_path: + railsignore_path = Path("/tmp/.railsignore") + mock_get_railsignore_path.return_value = railsignore_path + + # Ensure the mock file exists + railsignore_path.touch() + + # Clean railsignore file before + cleanup_railsignore(railsignore_path) + + # Yield control to test + yield railsignore_path + + # Clean railsignore file after + cleanup_railsignore(railsignore_path) + + # Remove the mock file + if railsignore_path.exists(): + railsignore_path.unlink() + + +def test_railsignore_config_loading(cleanup): + railsignore_path = cleanup + # Setup railsignore + append_railsignore(railsignore_path, "ignored_config.co") + + # Load config + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "railsignore_config")) + + config_string = str(config) + # Assert .railsignore successfully ignores + assert "ignored_config.co" not in config_string + + # Other files should load successfully + assert "config_to_load.co" in config_string + + +def test_get_railsignore_patterns(cleanup): + railsignore_path = cleanup + # Empty railsignore + ignored_files = get_railsignore_patterns(railsignore_path) + + assert "ignored_module.py" not in ignored_files + assert "ignored_colang.co" not in ignored_files + + # Append files to railsignore + append_railsignore(railsignore_path, "ignored_module.py") + append_railsignore(railsignore_path, "ignored_colang.co") + + # Grab ignored files + ignored_files = get_railsignore_patterns(railsignore_path) + + # Check files exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + # Append comment and whitespace + append_railsignore(railsignore_path, "# This_is_a_comment.py") + append_railsignore(railsignore_path, " ") + append_railsignore(railsignore_path, "") + + # Grab ignored files + ignored_files = get_railsignore_patterns(railsignore_path) + + # Comments and whitespace not retrieved + assert "# This_is_a_comment.py" not in ignored_files + assert " " not in ignored_files + assert "" not in ignored_files + + # Assert files still exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + +def test_is_ignored_by_railsignore(cleanup): + railsignore_path = cleanup + # Append files to railsignore + append_railsignore(railsignore_path, "ignored_module.py") + append_railsignore(railsignore_path, "ignored_colang.co") + + # Grab ignored files + ignored_files = get_railsignore_patterns(railsignore_path) + + # Check if files are ignored + assert is_ignored_by_railsignore("ignored_module.py", ignored_files) + assert is_ignored_by_railsignore("ignored_colang.co", ignored_files) + assert not is_ignored_by_railsignore("not_ignored.py", ignored_files) + + +def cleanup_railsignore(railsignore_path): + """Helper for clearing a railsignore file.""" + try: + with open(railsignore_path, "w") as f: + pass + except OSError as e: + print(f"Error: Unable to create {railsignore_path}. {e}") + else: + print(f"Successfully cleaned up .railsignore: {railsignore_path}") + + +def append_railsignore(railsignore_path: str, file_name: str) -> None: + """Helper for appending to a railsignore file.""" + try: + with open(railsignore_path, "a") as f: + f.write(file_name + "\n") + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + except OSError as e: + print(f"Error: Failed to write to {railsignore_path}. {e}")