From e31c391b71aeb5acdac345c97a18fa4e0411bd2b Mon Sep 17 00:00:00 2001 From: Eren-Ajani Tshimanga Date: Tue, 1 Oct 2024 19:36:20 -0700 Subject: [PATCH 1/6] feat: railsignore added for config loading of LLMRails --- .railsignore | 0 nemoguardrails/rails/llm/config.py | 25 +++- nemoguardrails/utils.py | 50 +++++++ .../railsignore_config/config_to_load.co | 6 + .../railsignore_config/ignored_config.co | 7 + tests/test_railsignore.py | 135 ++++++++++++++++++ 6 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 .railsignore create mode 100644 tests/test_configs/railsignore_config/config_to_load.co create mode 100644 tests/test_configs/railsignore_config/ignored_config.co create mode 100644 tests/test_railsignore.py diff --git a/.railsignore b/.railsignore new file mode 100644 index 000000000..e69de29bb diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index c6294cedc..3ff415973 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -14,7 +14,7 @@ # limitations under the License. """Module for the configuration of rails.""" - +import fnmatch import logging import os import warnings @@ -28,6 +28,7 @@ from nemoguardrails.colang.v2_x.lang.colang_ast import Flow from nemoguardrails.colang.v2_x.lang.utils import format_colang_parsing_error_message from nemoguardrails.colang.v2_x.runtime.errors import ColangParsingError +from nemoguardrails.utils import get_railsignore_patterns log = logging.getLogger(__name__) @@ -556,6 +557,12 @@ def _load_path( # Followlinks to traverse symlinks instead of ignoring them. for file in files: + # Verify railsignore to skip loading + ignored_by_railsignore = _is_file_ignored_by_railsignore(file) + + if ignored_by_railsignore: + continue + # This is the raw configuration that will be loaded from the file. _raw_config = {} @@ -1203,3 +1210,19 @@ def _generate_rails_flows(flows): flow_definitions.insert(1, _LIBRARY_IMPORT + _NEWLINE * 2) return flow_definitions + + +def _is_file_ignored_by_railsignore(filename: str) -> bool: + # Default no skip + should_skip_file = False + + # Load candidate patterns from railsignore + candidate_patterns = get_railsignore_patterns() + + # Ignore colang, kb, python modules if specified in valid railsignore glob format + if filename.endswith(".py") or filename.endswith(".co") or filename.endswith(".kb"): + for pattern in candidate_patterns: + if fnmatch.fnmatch(filename, pattern): + should_skip_file = True + + return should_skip_file diff --git a/nemoguardrails/utils.py b/nemoguardrails/utils.py index d1689b452..ea373a94d 100644 --- a/nemoguardrails/utils.py +++ b/nemoguardrails/utils.py @@ -23,6 +23,7 @@ from collections import namedtuple from datetime import datetime, timezone from enum import Enum +from pathlib import Path from typing import Any, Dict, Tuple import yaml @@ -312,3 +313,52 @@ def snake_to_camelcase(name: str) -> str: str: The converted CamelCase string. """ return "".join(n.capitalize() for n in name.split("_")) + + +def get_railsignore_path() -> Path: + """Helper to get railsignore path. + + Returns: + Path: The.railsignore file path. + """ + current_path = Path(__file__).resolve() + + # Navigate to the root directory by going up 4 levels + root_dir = current_path.parents[1] + + file_path = root_dir / ".railsignore" + + return file_path + + +def get_railsignore_patterns() -> set[str]: + """ + Helper to retrieve all specified patterns in railsignore. + Returns: + Set[str]: The set of filenames or glob patterns in railsignore + """ + ignored_patterns = set() + + railsignore_path = get_railsignore_path() + + # File doesn't exist or is empty + if not railsignore_path.exists() or not os.path.getsize(railsignore_path): + return ignored_patterns + + try: + with open(railsignore_path, "r") as f: + railsignore_entries = f.readlines() + + # Remove comments and empty lines, and strip out any extra spaces/newlines + railsignore_entries = [ + line.strip() + for line in railsignore_entries + if line.strip() and not line.startswith("#") + ] + + ignored_patterns.update(railsignore_entries) + return ignored_patterns + + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + return ignored_patterns diff --git a/tests/test_configs/railsignore_config/config_to_load.co b/tests/test_configs/railsignore_config/config_to_load.co new file mode 100644 index 000000000..c7b27161e --- /dev/null +++ b/tests/test_configs/railsignore_config/config_to_load.co @@ -0,0 +1,6 @@ +define user express greeting + "hey" + "hei" + +define flow + user express greeting diff --git a/tests/test_configs/railsignore_config/ignored_config.co b/tests/test_configs/railsignore_config/ignored_config.co new file mode 100644 index 000000000..827aecb7a --- /dev/null +++ b/tests/test_configs/railsignore_config/ignored_config.co @@ -0,0 +1,7 @@ +define user express greeting + "hi" + "hello" + +define flow + user express greeting + bot express greeting diff --git a/tests/test_railsignore.py b/tests/test_railsignore.py new file mode 100644 index 000000000..5ef1c58ea --- /dev/null +++ b/tests/test_railsignore.py @@ -0,0 +1,135 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil + +import pytest + +from nemoguardrails import RailsConfig +from nemoguardrails.utils import get_railsignore_path, get_railsignore_patterns + +CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") + + +@pytest.fixture(scope="function") +def cleanup(): + # Copy current rails ignore and prepare for tests + railsignore_path = get_railsignore_path() + + temp_file_path = str(railsignore_path) + "-copy" + + # Copy the original .railsignore to a temporary file + shutil.copy(railsignore_path, temp_file_path) + print(f"Copied {railsignore_path} to {temp_file_path}") + + # Clean railsignore file before + cleanup_railsignore() + + # Yield control to test + yield + + # Clean railsignore file before + cleanup_railsignore() + + # Restore the original .railsignore from the temporary copy + shutil.copy(temp_file_path, railsignore_path) + print(f"Restored {railsignore_path} from {temp_file_path}") + + # Delete the temporary file + if os.path.exists(temp_file_path): + os.remove(temp_file_path) + print(f"Deleted temporary file {temp_file_path}") + + +def test_railsignore_config_loading(cleanup): + # Setup railsignore + append_railsignore("ignored_config.co") + + # Load config + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "railsignore_config")) + + config_string = str(config) + # Assert .railsignore successfully ignores + assert "ignored_config.co" not in config_string + + # Other files should load successfully + assert "config_to_load.co" in config_string + + +def test_get_railsignore_files(cleanup): + # Empty railsignore + ignored_files = get_railsignore_patterns() + + assert "ignored_module.py" not in ignored_files + assert "ignored_colang.co" not in ignored_files + + # Append files to railsignore + append_railsignore("ignored_module.py") + append_railsignore("ignored_colang.co") + + # Grab ignored files + ignored_files = get_railsignore_patterns() + + # Check files exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + # Append comment and whitespace + append_railsignore("# This_is_a_comment.py") + append_railsignore(" ") + append_railsignore("") + + # Grab ignored files + ignored_files = get_railsignore_patterns() + + # Comments and whitespace not retrieved + assert "# This_is_a_comment.py" not in ignored_files + assert " " not in ignored_files + assert "" not in ignored_files + + # Assert files still exist + assert "ignored_module.py" in ignored_files + assert "ignored_colang.co" in ignored_files + + +def cleanup_railsignore(): + """ + Helper for clearing a railsignore file. + """ + railsignore_path = get_railsignore_path() + + try: + with open(railsignore_path, "w") as f: + pass + except OSError as e: + print(f"Error: Unable to create {railsignore_path}. {e}") + else: + print(f"Successfully cleaned up .railsignore: {railsignore_path}") + + +def append_railsignore(file_name: str) -> None: + """ + Helper for appending to a railsignore file. + """ + railsignore_path = get_railsignore_path() + + try: + with open(railsignore_path, "a") as f: + f.write(file_name + "\n") + except FileNotFoundError: + print(f"No {railsignore_path} found in the current directory.") + except OSError as e: + print(f"Error: Failed to write to {railsignore_path}. {e}") From f6eb37c300c85d636c2910d5bcb468fdbb49275f Mon Sep 17 00:00:00 2001 From: Eren-Ajani Tshimanga Date: Wed, 2 Oct 2024 17:24:39 -0700 Subject: [PATCH 2/6] feat: railsignore - review changes --- nemoguardrails/rails/llm/config.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index 3ff415973..1f0135758 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -1213,16 +1213,13 @@ def _generate_rails_flows(flows): def _is_file_ignored_by_railsignore(filename: str) -> bool: - # Default no skip - should_skip_file = False + ignore = False # Load candidate patterns from railsignore candidate_patterns = get_railsignore_patterns() - # Ignore colang, kb, python modules if specified in valid railsignore glob format - if filename.endswith(".py") or filename.endswith(".co") or filename.endswith(".kb"): - for pattern in candidate_patterns: - if fnmatch.fnmatch(filename, pattern): - should_skip_file = True + for pattern in candidate_patterns: + if fnmatch.fnmatch(filename, pattern): + ignore = True - return should_skip_file + return ignore From 16d06c9e30f4d0dbc16397208c3f093a17dd7996 Mon Sep 17 00:00:00 2001 From: prezakhani <13303554+Pouyanpi@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:14:48 +0200 Subject: [PATCH 3/6] feat: integrate .railsignore handling in config loading --- nemoguardrails/rails/llm/config.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py index 1f0135758..6fbdbfb08 100644 --- a/nemoguardrails/rails/llm/config.py +++ b/nemoguardrails/rails/llm/config.py @@ -14,7 +14,7 @@ # limitations under the License. """Module for the configuration of rails.""" -import fnmatch + import logging import os import warnings @@ -24,11 +24,11 @@ from pydantic import BaseModel, ValidationError, root_validator from pydantic.fields import Field +from nemoguardrails import utils from nemoguardrails.colang import parse_colang_file, parse_flow_elements from nemoguardrails.colang.v2_x.lang.colang_ast import Flow from nemoguardrails.colang.v2_x.lang.utils import format_colang_parsing_error_message from nemoguardrails.colang.v2_x.runtime.errors import ColangParsingError -from nemoguardrails.utils import get_railsignore_patterns log = logging.getLogger(__name__) @@ -552,13 +552,19 @@ def _load_path( if not os.path.exists(config_path): raise ValueError(f"Could not find config path: {config_path}") + # the first .railsignore file found from cwd down to its subdirectories + railsignore_path = utils.get_railsignore_path(config_path) + ignore_patterns = utils.get_railsignore_patterns(railsignore_path) + if os.path.isdir(config_path): for root, _, files in os.walk(config_path, followlinks=True): # Followlinks to traverse symlinks instead of ignoring them. for file in files: # Verify railsignore to skip loading - ignored_by_railsignore = _is_file_ignored_by_railsignore(file) + ignored_by_railsignore = utils.is_ignored_by_railsignore( + file, ignore_patterns + ) if ignored_by_railsignore: continue @@ -1210,16 +1216,3 @@ def _generate_rails_flows(flows): flow_definitions.insert(1, _LIBRARY_IMPORT + _NEWLINE * 2) return flow_definitions - - -def _is_file_ignored_by_railsignore(filename: str) -> bool: - ignore = False - - # Load candidate patterns from railsignore - candidate_patterns = get_railsignore_patterns() - - for pattern in candidate_patterns: - if fnmatch.fnmatch(filename, pattern): - ignore = True - - return ignore From 9934325da08b4083436f4aa81311fde8e08caa90 Mon Sep 17 00:00:00 2001 From: prezakhani <13303554+Pouyanpi@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:15:37 +0200 Subject: [PATCH 4/6] refactor: improve performance of .railsignore handling fix --- nemoguardrails/utils.py | 47 +++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/nemoguardrails/utils.py b/nemoguardrails/utils.py index ea373a94d..1afef6e42 100644 --- a/nemoguardrails/utils.py +++ b/nemoguardrails/utils.py @@ -14,6 +14,7 @@ # limitations under the License. import asyncio import dataclasses +import fnmatch import importlib.resources as pkg_resources import json import os @@ -24,7 +25,7 @@ from datetime import datetime, timezone from enum import Enum from pathlib import Path -from typing import Any, Dict, Tuple +from typing import Any, Dict, Optional, Set, Tuple import yaml from rich.console import Console @@ -315,23 +316,32 @@ def snake_to_camelcase(name: str) -> str: return "".join(n.capitalize() for n in name.split("_")) -def get_railsignore_path() -> Path: +def get_railsignore_path(path: Optional[str] = None) -> Optional[Path]: """Helper to get railsignore path. + Args: + path (Optional[str]): The starting path to search for the .railsignore file. + Returns: - Path: The.railsignore file path. - """ - current_path = Path(__file__).resolve() + Path: The .railsignore file path, if found. - # Navigate to the root directory by going up 4 levels - root_dir = current_path.parents[1] + Raises: + FileNotFoundError: If the .railsignore file is not found. + """ + current_path = Path(path) if path else Path.cwd() - file_path = root_dir / ".railsignore" + while True: + railsignore_file = current_path / ".railsignore" + if railsignore_file.exists() and railsignore_file.is_file(): + return railsignore_file + if current_path == current_path.parent: + break + current_path = current_path.parent - return file_path + return None -def get_railsignore_patterns() -> set[str]: +def get_railsignore_patterns(railsignore_path) -> Set[str]: """ Helper to retrieve all specified patterns in railsignore. Returns: @@ -339,7 +349,10 @@ def get_railsignore_patterns() -> set[str]: """ ignored_patterns = set() - railsignore_path = get_railsignore_path() + if railsignore_path is None: + return ignored_patterns + + # railsignore_path = get_railsignore_path() # File doesn't exist or is empty if not railsignore_path.exists() or not os.path.getsize(railsignore_path): @@ -362,3 +375,15 @@ def get_railsignore_patterns() -> set[str]: except FileNotFoundError: print(f"No {railsignore_path} found in the current directory.") return ignored_patterns + + +def is_ignored_by_railsignore(filename, ignore_patterns) -> bool: + ignore = False + + # Load candidate patterns from railsignore + + for pattern in ignore_patterns: + if fnmatch.fnmatch(filename, pattern): + ignore = True + + return ignore From a556b0ef40353a7a11a76c090742e71b2ad33887 Mon Sep 17 00:00:00 2001 From: prezakhani <13303554+Pouyanpi@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:16:46 +0200 Subject: [PATCH 5/6] refactor: mock .railsignore path in tests and minor refactoring --- tests/test_railsignore.py | 93 +++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 43 deletions(-) diff --git a/tests/test_railsignore.py b/tests/test_railsignore.py index 5ef1c58ea..951a30cee 100644 --- a/tests/test_railsignore.py +++ b/tests/test_railsignore.py @@ -15,48 +15,47 @@ import os import shutil +from pathlib import Path +from unittest.mock import patch import pytest from nemoguardrails import RailsConfig -from nemoguardrails.utils import get_railsignore_path, get_railsignore_patterns +from nemoguardrails.utils import get_railsignore_patterns, is_ignored_by_railsignore CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") @pytest.fixture(scope="function") def cleanup(): - # Copy current rails ignore and prepare for tests - railsignore_path = get_railsignore_path() + # Mock the path to the .railsignore file + with patch( + "nemoguardrails.utils.get_railsignore_path" + ) as mock_get_railsignore_path: + railsignore_path = Path("/tmp/.railsignore") + mock_get_railsignore_path.return_value = railsignore_path - temp_file_path = str(railsignore_path) + "-copy" + # Ensure the mock file exists + railsignore_path.touch() - # Copy the original .railsignore to a temporary file - shutil.copy(railsignore_path, temp_file_path) - print(f"Copied {railsignore_path} to {temp_file_path}") + # Clean railsignore file before + cleanup_railsignore(railsignore_path) - # Clean railsignore file before - cleanup_railsignore() + # Yield control to test + yield railsignore_path - # Yield control to test - yield + # Clean railsignore file after + cleanup_railsignore(railsignore_path) - # Clean railsignore file before - cleanup_railsignore() - - # Restore the original .railsignore from the temporary copy - shutil.copy(temp_file_path, railsignore_path) - print(f"Restored {railsignore_path} from {temp_file_path}") - - # Delete the temporary file - if os.path.exists(temp_file_path): - os.remove(temp_file_path) - print(f"Deleted temporary file {temp_file_path}") + # Remove the mock file + if railsignore_path.exists(): + railsignore_path.unlink() def test_railsignore_config_loading(cleanup): + railsignore_path = cleanup # Setup railsignore - append_railsignore("ignored_config.co") + append_railsignore(railsignore_path, "ignored_config.co") # Load config config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "railsignore_config")) @@ -69,31 +68,32 @@ def test_railsignore_config_loading(cleanup): assert "config_to_load.co" in config_string -def test_get_railsignore_files(cleanup): +def test_get_railsignore_patterns(cleanup): + railsignore_path = cleanup # Empty railsignore - ignored_files = get_railsignore_patterns() + ignored_files = get_railsignore_patterns(railsignore_path) assert "ignored_module.py" not in ignored_files assert "ignored_colang.co" not in ignored_files # Append files to railsignore - append_railsignore("ignored_module.py") - append_railsignore("ignored_colang.co") + append_railsignore(railsignore_path, "ignored_module.py") + append_railsignore(railsignore_path, "ignored_colang.co") # Grab ignored files - ignored_files = get_railsignore_patterns() + ignored_files = get_railsignore_patterns(railsignore_path) # Check files exist assert "ignored_module.py" in ignored_files assert "ignored_colang.co" in ignored_files # Append comment and whitespace - append_railsignore("# This_is_a_comment.py") - append_railsignore(" ") - append_railsignore("") + append_railsignore(railsignore_path, "# This_is_a_comment.py") + append_railsignore(railsignore_path, " ") + append_railsignore(railsignore_path, "") # Grab ignored files - ignored_files = get_railsignore_patterns() + ignored_files = get_railsignore_patterns(railsignore_path) # Comments and whitespace not retrieved assert "# This_is_a_comment.py" not in ignored_files @@ -105,12 +105,23 @@ def test_get_railsignore_files(cleanup): assert "ignored_colang.co" in ignored_files -def cleanup_railsignore(): - """ - Helper for clearing a railsignore file. - """ - railsignore_path = get_railsignore_path() +def test_is_ignored_by_railsignore(cleanup): + railsignore_path = cleanup + # Append files to railsignore + append_railsignore(railsignore_path, "ignored_module.py") + append_railsignore(railsignore_path, "ignored_colang.co") + + # Grab ignored files + ignored_files = get_railsignore_patterns(railsignore_path) + + # Check if files are ignored + assert is_ignored_by_railsignore("ignored_module.py", ignored_files) + assert is_ignored_by_railsignore("ignored_colang.co", ignored_files) + assert not is_ignored_by_railsignore("not_ignored.py", ignored_files) + +def cleanup_railsignore(railsignore_path): + """Helper for clearing a railsignore file.""" try: with open(railsignore_path, "w") as f: pass @@ -120,12 +131,8 @@ def cleanup_railsignore(): print(f"Successfully cleaned up .railsignore: {railsignore_path}") -def append_railsignore(file_name: str) -> None: - """ - Helper for appending to a railsignore file. - """ - railsignore_path = get_railsignore_path() - +def append_railsignore(railsignore_path: str, file_name: str) -> None: + """Helper for appending to a railsignore file.""" try: with open(railsignore_path, "a") as f: f.write(file_name + "\n") From 23a57a7ecf1692637832e533b90e92667922fcd3 Mon Sep 17 00:00:00 2001 From: Eren-Ajani Tshimanga Date: Thu, 17 Oct 2024 17:57:47 -0700 Subject: [PATCH 6/6] feat: railsignore - finalizing review changes --- nemoguardrails/utils.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/nemoguardrails/utils.py b/nemoguardrails/utils.py index 1afef6e42..fda3d43bf 100644 --- a/nemoguardrails/utils.py +++ b/nemoguardrails/utils.py @@ -317,7 +317,7 @@ def snake_to_camelcase(name: str) -> str: def get_railsignore_path(path: Optional[str] = None) -> Optional[Path]: - """Helper to get railsignore path. + """Get railsignore path. Args: path (Optional[str]): The starting path to search for the .railsignore file. @@ -341,9 +341,9 @@ def get_railsignore_path(path: Optional[str] = None) -> Optional[Path]: return None -def get_railsignore_patterns(railsignore_path) -> Set[str]: - """ - Helper to retrieve all specified patterns in railsignore. +def get_railsignore_patterns(railsignore_path: Path) -> Set[str]: + """Retrieve all specified patterns in railsignore. + Returns: Set[str]: The set of filenames or glob patterns in railsignore """ @@ -352,8 +352,6 @@ def get_railsignore_patterns(railsignore_path) -> Set[str]: if railsignore_path is None: return ignored_patterns - # railsignore_path = get_railsignore_path() - # File doesn't exist or is empty if not railsignore_path.exists() or not os.path.getsize(railsignore_path): return ignored_patterns @@ -377,13 +375,14 @@ def get_railsignore_patterns(railsignore_path) -> Set[str]: return ignored_patterns -def is_ignored_by_railsignore(filename, ignore_patterns) -> bool: - ignore = False +def is_ignored_by_railsignore(filename: str, ignore_patterns: str) -> bool: + """Verify if a filename should be ignored by a railsignore pattern""" - # Load candidate patterns from railsignore + ignore = False for pattern in ignore_patterns: if fnmatch.fnmatch(filename, pattern): ignore = True + break return ignore