From 34c54511dbe0b598a73300fcc67b44b73aa55707 Mon Sep 17 00:00:00 2001 From: Dishant Miyani Date: Fri, 19 Sep 2025 01:06:29 +0530 Subject: [PATCH 1/7] Sync www-repopsitories (#2164) * spelling fixes and tests * sonar and code rabbit suggestions implemented * json chunking and suggestions implemented * code rabbit and sonar qube suggestions * code rabbit suggestions * suggestions implemented * github advance security addressed * tests fixed * fixed tests * Clean up backend/test_commands.py --------- Co-authored-by: Arkadii Yakovets <2201626+arkid15r@users.noreply.github.com> Co-authored-by: Arkadii Yakovets --- backend/apps/ai/Makefile | 8 + backend/apps/ai/common/base/chunk_command.py | 13 +- backend/apps/ai/common/constants.py | 1 + .../apps/ai/common/extractors/repository.py | 176 ++++++ .../commands/ai_update_repository_chunks.py | 41 ++ .../commands/ai_update_repository_context.py | 41 ++ backend/apps/ai/models/chunk.py | 4 +- backend/apps/common/utils.py | 18 + .../ai/common/extractors/repository_test.py | 593 ++++++++++++++++++ backend/tests/apps/ai/common/utils_test.py | 5 +- .../ai_update_repository_chunks_test.py | 123 ++++ .../ai_update_repository_context_test.py | 168 +++++ backend/tests/apps/ai/models/chunk_test.py | 26 +- 13 files changed, 1202 insertions(+), 15 deletions(-) create mode 100644 backend/apps/ai/common/extractors/repository.py create mode 100644 backend/apps/ai/management/commands/ai_update_repository_chunks.py create mode 100644 backend/apps/ai/management/commands/ai_update_repository_context.py create mode 100644 backend/tests/apps/ai/common/extractors/repository_test.py create mode 100644 backend/tests/apps/ai/management/commands/ai_update_repository_chunks_test.py create mode 100644 backend/tests/apps/ai/management/commands/ai_update_repository_context_test.py diff --git a/backend/apps/ai/Makefile b/backend/apps/ai/Makefile index 3243269378..4e793a5393 100644 --- a/backend/apps/ai/Makefile +++ b/backend/apps/ai/Makefile @@ -34,6 +34,14 @@ ai-update-project-context: @echo "Updating project context" @CMD="python manage.py ai_update_project_context" $(MAKE) exec-backend-command +ai-update-repository-chunks: + @echo "Updating repository chunks" + @CMD="python manage.py ai_update_repository_chunks" $(MAKE) exec-backend-command + +ai-update-repository-context: + @echo "Updating repository context" + @CMD="python manage.py ai_update_repository_context" $(MAKE) exec-backend-command + ai-update-slack-message-chunks: @echo "Updating Slack message chunks" @CMD="python manage.py ai_update_slack_message_chunks" $(MAKE) exec-backend-command diff --git a/backend/apps/ai/common/base/chunk_command.py b/backend/apps/ai/common/base/chunk_command.py index 6a29005d12..07098acc0e 100644 --- a/backend/apps/ai/common/base/chunk_command.py +++ b/backend/apps/ai/common/base/chunk_command.py @@ -7,6 +7,7 @@ from apps.ai.common.utils import create_chunks_and_embeddings from apps.ai.models.chunk import Chunk from apps.ai.models.context import Context +from apps.common.utils import is_valid_json class BaseChunkCommand(BaseAICommand): @@ -43,10 +44,14 @@ def process_chunks_batch(self, entities: list[Model]) -> int: count, _ = context.chunks.all().delete() self.stdout.write(f"Deleted {count} stale chunks for {entity_key}") - prose_content, metadata_content = self.extract_content(entity) - full_content = ( - f"{metadata_content}\n\n{prose_content}" if metadata_content else prose_content - ) + content, metadata_content = self.extract_content(entity) + + if is_valid_json(content): + full_content = content + else: + full_content = ( + f"{metadata_content}\n\n{content}" if metadata_content else content + ) if not full_content.strip(): self.stdout.write(f"No content to chunk for {self.entity_name} {entity_key}") diff --git a/backend/apps/ai/common/constants.py b/backend/apps/ai/common/constants.py index cce67fc739..2517a4c85c 100644 --- a/backend/apps/ai/common/constants.py +++ b/backend/apps/ai/common/constants.py @@ -4,4 +4,5 @@ DEFAULT_CHUNKS_RETRIEVAL_LIMIT = 5 DEFAULT_SIMILARITY_THRESHOLD = 0.4 DELIMITER = "\n\n" +GITHUB_REQUEST_INTERVAL_SECONDS = 0.5 MIN_REQUEST_INTERVAL_SECONDS = 1.2 diff --git a/backend/apps/ai/common/extractors/repository.py b/backend/apps/ai/common/extractors/repository.py new file mode 100644 index 0000000000..b257495782 --- /dev/null +++ b/backend/apps/ai/common/extractors/repository.py @@ -0,0 +1,176 @@ +"""Content extractor for Repository.""" + +import json +import logging +import time + +from apps.ai.common.constants import DELIMITER, GITHUB_REQUEST_INTERVAL_SECONDS +from apps.common.utils import is_valid_json +from apps.github.utils import get_repository_file_content + +logger = logging.getLogger(__name__) + + +def extract_repository_content(repository) -> tuple[str, str]: + """Extract structured content from repository data. + + Args: + repository: Repository instance + + Returns: + tuple[str, str]: (json_content, metadata_content) + + """ + repository_data = {} + + if repository.name: + repository_data["name"] = repository.name + if repository.key: + repository_data["key"] = repository.key + if repository.description: + repository_data["description"] = repository.description + if repository.homepage: + repository_data["homepage"] = repository.homepage + if repository.license: + repository_data["license"] = repository.license + if repository.topics: + repository_data["topics"] = repository.topics + + status = {} + if repository.is_archived: + status["archived"] = True + if repository.is_empty: + status["empty"] = True + if repository.is_owasp_repository: + status["owasp_repository"] = True + if repository.is_owasp_site_repository: + status["owasp_site_repository"] = True + if status: + repository_data["status"] = status + + funding = {} + if repository.is_funding_policy_compliant: + funding["policy_compliant"] = True + if repository.has_funding_yml: + funding["has_funding_yml"] = True + if funding: + repository_data["funding"] = funding + + if repository.pages_status: + repository_data["pages_status"] = repository.pages_status + + features = [] + if repository.has_downloads: + features.append("downloads") + if repository.has_issues: + features.append("issues") + if repository.has_pages: + features.append("pages") + if repository.has_projects: + features.append("projects") + if repository.has_wiki: + features.append("wiki") + if features: + repository_data["features"] = features + + stats = {} + if repository.commits_count: + stats["commits"] = repository.commits_count + if repository.contributors_count: + stats["contributors"] = repository.contributors_count + if repository.forks_count: + stats["forks"] = repository.forks_count + if repository.open_issues_count: + stats["open_issues"] = repository.open_issues_count + if repository.stars_count: + stats["stars"] = repository.stars_count + if repository.subscribers_count: + stats["subscribers"] = repository.subscribers_count + if repository.watchers_count: + stats["watchers"] = repository.watchers_count + if stats: + repository_data["statistics"] = stats + + dates = {} + if repository.created_at: + dates["created"] = repository.created_at.strftime("%Y-%m-%d") + if repository.updated_at: + dates["last_updated"] = repository.updated_at.strftime("%Y-%m-%d") + if repository.pushed_at: + dates["last_pushed"] = repository.pushed_at.strftime("%Y-%m-%d") + if dates: + repository_data["dates"] = dates + + ownership = {} + if repository.organization: + ownership["organization"] = repository.organization.login + if repository.owner: + ownership["owner"] = repository.owner.login + if ownership: + repository_data["ownership"] = ownership + + markdown_files = [ + "README.md", + "index.md", + "info.md", + "leaders.md", + ] + + if repository.organization: + owner = repository.organization.login + else: + owner = repository.owner.login if repository.owner else "" + branch = repository.default_branch or "main" + + tab_files = [] + if owner and repository.key: + contents_url = ( + f"https://api.github.com/repos/{owner}/{repository.key}/contents/?ref={branch}" + ) + response = get_repository_file_content(contents_url) + if response and is_valid_json(response): + items = json.loads(response) + for item in items: + name = item.get("name", "") + if name.startswith("tab_") and name.endswith(".md"): + tab_files.append(name) + + all_markdown_files = markdown_files + tab_files + + markdown_content = {} + for file_path in all_markdown_files: + try: + if owner and repository.key: + raw_url = ( + f"https://raw.githubusercontent.com/{owner}/{repository.key}/" + f"{branch}/{file_path}" + ) + content = get_repository_file_content(raw_url) + + if content and content.strip(): + markdown_content[file_path] = content + time.sleep(GITHUB_REQUEST_INTERVAL_SECONDS) + + except (ValueError, TypeError, OSError): + logger.debug("Failed to fetch markdown file") + continue + + if markdown_content: + repository_data["markdown_content"] = markdown_content + + json_content = json.dumps(repository_data, indent=2) + + metadata_parts = [] + if repository.name: + metadata_parts.append(f"Repository Name: {repository.name}") + if repository.key: + metadata_parts.append(f"Repository Key: {repository.key}") + if repository.organization: + metadata_parts.append(f"Organization: {repository.organization.login}") + if repository.owner: + metadata_parts.append(f"Owner: {repository.owner.login}") + + return ( + json_content, + DELIMITER.join(filter(None, metadata_parts)), + ) diff --git a/backend/apps/ai/management/commands/ai_update_repository_chunks.py b/backend/apps/ai/management/commands/ai_update_repository_chunks.py new file mode 100644 index 0000000000..ef14c7d311 --- /dev/null +++ b/backend/apps/ai/management/commands/ai_update_repository_chunks.py @@ -0,0 +1,41 @@ +"""A command to create chunks of OWASP repository data for RAG.""" + +from django.db.models import QuerySet + +from apps.ai.common.base.chunk_command import BaseChunkCommand +from apps.ai.common.extractors.repository import extract_repository_content +from apps.github.models.repository import Repository + + +class Command(BaseChunkCommand): + key_field_name = "key" + model_class = Repository + + def __init__(self, *args, **kwargs): + """Initialize command for repository.""" + super().__init__(*args, **kwargs) + self.entity_name_plural = "repositories" + + def extract_content(self, entity: Repository) -> tuple[str, str]: + """Extract content from the repository.""" + return extract_repository_content(entity) + + def get_base_queryset(self) -> QuerySet: + """Return the base queryset with filtering for OWASP site repositories.""" + return ( + super() + .get_base_queryset() + .filter( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + ) + + def get_default_queryset(self) -> QuerySet: + """Override to avoid is_active filter since Repository doesn't have that field.""" + return self.get_base_queryset() + + def source_name(self) -> str: + """Return the source name for context creation.""" + return "owasp_repository" diff --git a/backend/apps/ai/management/commands/ai_update_repository_context.py b/backend/apps/ai/management/commands/ai_update_repository_context.py new file mode 100644 index 0000000000..0fc4bbab34 --- /dev/null +++ b/backend/apps/ai/management/commands/ai_update_repository_context.py @@ -0,0 +1,41 @@ +"""A command to update context for OWASP repository data.""" + +from django.db.models import QuerySet + +from apps.ai.common.base.context_command import BaseContextCommand +from apps.ai.common.extractors.repository import extract_repository_content +from apps.github.models.repository import Repository + + +class Command(BaseContextCommand): + key_field_name = "key" + model_class = Repository + + def __init__(self, *args, **kwargs): + """Initialize command for repository.""" + super().__init__(*args, **kwargs) + self.entity_name_plural = "repositories" + + def extract_content(self, entity: Repository) -> tuple[str, str]: + """Extract content from the repository.""" + return extract_repository_content(entity) + + def get_base_queryset(self) -> QuerySet: + """Return the base queryset with filtering for OWASP site repositories.""" + return ( + super() + .get_base_queryset() + .filter( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + ) + + def get_default_queryset(self) -> QuerySet: + """Override to avoid is_active filter since Repository doesn't have that field.""" + return self.get_base_queryset() + + def source_name(self) -> str: + """Return the source name for context creation.""" + return "owasp_repository" diff --git a/backend/apps/ai/models/chunk.py b/backend/apps/ai/models/chunk.py index 8dfcaf0022..210e63ef66 100644 --- a/backend/apps/ai/models/chunk.py +++ b/backend/apps/ai/models/chunk.py @@ -35,8 +35,8 @@ def bulk_save(chunks, fields=None): def split_text(text: str) -> list[str]: """Split text into chunks.""" return RecursiveCharacterTextSplitter( - chunk_size=300, - chunk_overlap=40, + chunk_size=500, + chunk_overlap=80, length_function=len, separators=["\n\n", "\n", " ", ""], ).split_text(text) diff --git a/backend/apps/common/utils.py b/backend/apps/common/utils.py index d5f6a9e49c..4b561670fc 100644 --- a/backend/apps/common/utils.py +++ b/backend/apps/common/utils.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import re from datetime import UTC, datetime from urllib.parse import urlparse @@ -102,6 +103,23 @@ def get_user_ip_address(request) -> str: return x_forwarded_for.split(",")[0] if x_forwarded_for else request.META.get("REMOTE_ADDR") +def is_valid_json(content: str) -> bool: + """Check if content is JSON format. + + Args: + content: The content to check + + Returns: + bool: True if content is valid JSON, False otherwise + + """ + try: + json.loads(content) + except (TypeError, ValueError): + return False + return True + + def join_values(fields: list, delimiter: str = " ") -> str: """Join non-empty field values using a specified delimiter. diff --git a/backend/tests/apps/ai/common/extractors/repository_test.py b/backend/tests/apps/ai/common/extractors/repository_test.py new file mode 100644 index 0000000000..62afe0f3f8 --- /dev/null +++ b/backend/tests/apps/ai/common/extractors/repository_test.py @@ -0,0 +1,593 @@ +"""Tests for repository content extractor.""" + +import json +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch + +from apps.ai.common.constants import DELIMITER +from apps.ai.common.extractors.repository import ( + extract_repository_content, +) + + +def create_mock_repository(**kwargs): + """Create a properly configured mock repository.""" + repository = MagicMock() + repository.name = kwargs.get("name") + repository.key = kwargs.get("key") + repository.description = kwargs.get("description") + repository.homepage = kwargs.get("homepage") + repository.license = kwargs.get("license") + repository.topics = kwargs.get("topics", []) + repository.is_archived = kwargs.get("is_archived", False) + repository.is_empty = kwargs.get("is_empty", False) + repository.is_owasp_repository = kwargs.get("is_owasp_repository", False) + repository.is_owasp_site_repository = kwargs.get("is_owasp_site_repository", False) + repository.is_funding_policy_compliant = kwargs.get("is_funding_policy_compliant", False) + repository.has_funding_yml = kwargs.get("has_funding_yml", False) + repository.funding_yml = kwargs.get("funding_yml", {}) + repository.pages_status = kwargs.get("pages_status") + repository.has_downloads = kwargs.get("has_downloads", False) + repository.has_issues = kwargs.get("has_issues", False) + repository.has_pages = kwargs.get("has_pages", False) + repository.has_projects = kwargs.get("has_projects", False) + repository.has_wiki = kwargs.get("has_wiki", False) + repository.commits_count = kwargs.get("commits_count") + repository.contributors_count = kwargs.get("contributors_count") + repository.forks_count = kwargs.get("forks_count") + repository.open_issues_count = kwargs.get("open_issues_count") + repository.stars_count = kwargs.get("stars_count") + repository.subscribers_count = kwargs.get("subscribers_count") + repository.watchers_count = kwargs.get("watchers_count") + repository.created_at = kwargs.get("created_at") + repository.updated_at = kwargs.get("updated_at") + repository.pushed_at = kwargs.get("pushed_at") + repository.track_issues = kwargs.get("track_issues", False) + repository.default_branch = kwargs.get("default_branch") + repository.organization = kwargs.get("organization") + repository.owner = kwargs.get("owner") + return repository + + +class TestRepositoryContentExtractor: + """Test cases for repository content extraction.""" + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_content_full_data(self, mock_get_content): + """Test extraction with complete repository data.""" + mock_get_content.return_value = "[]" + + organization = MagicMock() + organization.login = "test-org" + + owner = MagicMock() + owner.login = "test-user" + + repository = create_mock_repository( + name="test-repo", + key="test-repo-key", + description="A test repository for testing purposes", + homepage="https://test-repo.example.com", + license="MIT", + topics=["security", "testing", "python"], + is_owasp_repository=True, + is_owasp_site_repository=True, + is_funding_policy_compliant=True, + has_funding_yml=True, + funding_yml={"github": "owasp"}, + pages_status="enabled", + has_downloads=True, + has_issues=True, + has_pages=True, + has_projects=True, + has_wiki=True, + commits_count=1500, + contributors_count=25, + forks_count=100, + open_issues_count=15, + stars_count=500, + subscribers_count=50, + watchers_count=75, + created_at=datetime(2020, 1, 15, tzinfo=UTC), + updated_at=datetime(2024, 6, 10, tzinfo=UTC), + pushed_at=datetime(2024, 6, 9, tzinfo=UTC), + track_issues=True, + default_branch="main", + organization=organization, + owner=owner, + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "test-repo" + assert data["key"] == "test-repo-key" + assert data["description"] == "A test repository for testing purposes" + assert data["homepage"] == "https://test-repo.example.com" + assert data["license"] == "MIT" + assert data["topics"] == ["security", "testing", "python"] + assert data["status"]["owasp_repository"] + assert data["status"]["owasp_site_repository"] + assert data["funding"]["policy_compliant"] + assert data["funding"]["has_funding_yml"] + assert data["pages_status"] == "enabled" + assert data["features"] == ["downloads", "issues", "pages", "projects", "wiki"] + assert data["statistics"]["commits"] == 1500 + assert data["statistics"]["contributors"] == 25 + assert data["statistics"]["forks"] == 100 + assert data["statistics"]["open_issues"] == 15 + assert data["statistics"]["stars"] == 500 + assert data["statistics"]["subscribers"] == 50 + assert data["statistics"]["watchers"] == 75 + assert data["dates"]["created"] == "2020-01-15" + assert data["dates"]["last_updated"] == "2024-06-10" + assert data["dates"]["last_pushed"] == "2024-06-09" + assert data["ownership"]["organization"] == "test-org" + assert data["ownership"]["owner"] == "test-user" + + assert "Repository Name: test-repo" in metadata + assert "Repository Key: test-repo-key" in metadata + assert "Organization: test-org" in metadata + assert "Owner: test-user" in metadata + + def test_extract_repository_content_minimal_data(self): + """Test extraction with minimal repository data.""" + repository = create_mock_repository(name="minimal-repo", key="minimal-repo-key") + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "minimal-repo" + assert data["key"] == "minimal-repo-key" + + assert "Repository Name: minimal-repo" in metadata + assert "Repository Key: minimal-repo-key" in metadata + + def test_extract_repository_content_archived_repository(self): + """Test extraction with archived repository.""" + repository = create_mock_repository( + name="archived-repo", + key="archived-repo-key", + description="This repository is archived", + is_archived=True, + is_owasp_repository=True, + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "archived-repo" + assert data["key"] == "archived-repo-key" + assert data["description"] == "This repository is archived" + assert data["status"]["archived"] + assert data["status"]["owasp_repository"] + + assert "Repository Name: archived-repo" in metadata + assert "Repository Key: archived-repo-key" in metadata + + def test_extract_repository_content_empty_repository(self): + """Test extraction with empty repository.""" + repository = create_mock_repository( + name="empty-repo", + key="empty-repo-key", + description="This repository is empty", + is_empty=True, + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "empty-repo" + assert data["key"] == "empty-repo-key" + assert data["description"] == "This repository is empty" + assert data["status"]["empty"] + + assert "Repository Name: empty-repo" in metadata + assert "Repository Key: empty-repo-key" in metadata + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_content_with_organization_only(self, mock_get_content): + """Test extraction when repository has organization but no owner.""" + mock_get_content.return_value = "[]" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="org-repo", key="org-repo-key", organization=organization + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "org-repo" + assert data["key"] == "org-repo-key" + assert data["ownership"]["organization"] == "test-org" + assert "owner" not in data["ownership"] + + assert "Repository Name: org-repo" in metadata + assert "Repository Key: org-repo-key" in metadata + assert "Organization: test-org" in metadata + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_content_with_owner_only(self, mock_get_content): + """Test extraction when repository has owner but no organization.""" + mock_get_content.return_value = "[]" + owner = MagicMock() + owner.login = "test-user" + + repository = create_mock_repository(name="user-repo", key="user-repo-key", owner=owner) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "user-repo" + assert data["key"] == "user-repo-key" + assert data["ownership"]["owner"] == "test-user" + assert "organization" not in data["ownership"] + + assert "Repository Name: user-repo" in metadata + assert "Repository Key: user-repo-key" in metadata + assert "Owner: test-user" in metadata + + def test_extract_repository_content_delimiter_usage(self): + """Test that DELIMITER is used correctly between content parts.""" + repository = create_mock_repository( + name="delimiter-test", + key="delimiter-test-key", + description="First description", + homepage="https://example.com", + license="MIT", + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "delimiter-test" + assert data["key"] == "delimiter-test-key" + assert data["description"] == "First description" + assert data["homepage"] == "https://example.com" + assert data["license"] == "MIT" + + expected_metadata = ( + f"Repository Name: delimiter-test{DELIMITER}Repository Key: delimiter-test-key" + ) + assert metadata == expected_metadata + + +class TestRepositoryMarkdownContentExtractor: + """Test cases for repository markdown content extraction.""" + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_with_description(self, mock_get_content): + """Test extraction with repository description.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository with markdown content", + default_branch="main", + organization=organization, + ) + + mock_get_content.return_value = "" + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository with markdown content" + assert data["ownership"]["organization"] == "test-org" + + assert "Repository Name: test-repo" in metadata + assert "Repository Key: test-repo" in metadata + assert "Organization: test-org" in metadata + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_with_readme(self, mock_get_content): + """Test extraction with README.md file.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + organization=organization, + ) + + mock_get_content.return_value = "# Test Repository\n\nThis is a test repository." + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" in data + assert "README.md" in data["markdown_content"] + assert ( + data["markdown_content"]["README.md"] + == "# Test Repository\n\nThis is a test repository." + ) + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_with_owner_fallback(self, mock_get_content): + """Test extraction when organization is None, falls back to owner.""" + owner = MagicMock() + owner.login = "test-user" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + owner=owner, + ) + + mock_get_content.return_value = "# Test Repository\n\nThis is a test repository." + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["owner"] == "test-user" + assert "markdown_content" in data + assert "README.md" in data["markdown_content"] + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_with_default_branch_fallback( + self, mock_get_content + ): + """Test extraction when default_branch is None, falls back to 'main'.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + organization=organization, + ) + + mock_get_content.return_value = "# Test Repository\n\nThis is a test repository." + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" in data + assert "README.md" in data["markdown_content"] + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_multiple_files(self, mock_get_content): + """Test extraction with multiple markdown files.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + organization=organization, + ) + + def mock_content_side_effect(url): + if "README.md" in url: + return "# README Content" + if "index.md" in url: + return "# Index Content" + if "info.md" in url: + return "# Info Content" + if "leaders.md" in url: + return "# Leaders Content" + return "" + + mock_get_content.side_effect = mock_content_side_effect + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" in data + assert "README.md" in data["markdown_content"] + assert data["markdown_content"]["README.md"] == "# README Content" + assert "index.md" in data["markdown_content"] + assert data["markdown_content"]["index.md"] == "# Index Content" + assert "info.md" in data["markdown_content"] + assert data["markdown_content"]["info.md"] == "# Info Content" + assert "leaders.md" in data["markdown_content"] + assert data["markdown_content"]["leaders.md"] == "# Leaders Content" + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_file_fetch_exception(self, mock_get_content): + """Test extraction when file fetching raises an exception.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + organization=organization, + ) + + problematic_url = "https://raw.githubusercontent.com/test-org/test-repo/main/README.md" + network_error_message = "Network error" + + def side_effect(url): + if url == problematic_url: + raise ValueError(network_error_message) + return "" + + mock_get_content.side_effect = side_effect + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" not in data + + assert "Repository Name: test-repo" in metadata + assert "Repository Key: test-repo" in metadata + assert "Organization: test-org" in metadata + mock_get_content.assert_any_call(problematic_url) + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_empty_file_content(self, mock_get_content): + """Test extraction when file content is empty or whitespace.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + organization=organization, + ) + + mock_get_content.return_value = " \n\n " + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" not in data + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_no_description(self, mock_get_content): + """Test extraction when repository has no description.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description=None, + default_branch="main", + organization=organization, + ) + + mock_get_content.return_value = "# Test Repository\n\nThis is a test repository." + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["name"] == "test-repo" + assert data["key"] == "test-repo" + assert data["ownership"]["organization"] == "test-org" + assert "markdown_content" in data + assert "README.md" in data["markdown_content"] + + assert "Repository Name: test-repo" in metadata + assert "Repository Key: test-repo" in metadata + assert "Organization: test-org" in metadata + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_url_construction(self, mock_get_content): + """Test that URLs are constructed correctly for file fetching.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="develop", + organization=organization, + ) + + mock_get_content.return_value = "# Test Content" + + extract_repository_content(repository) + + mock_get_content.assert_called() + assert any( + "https://raw.githubusercontent.com/test-org/test-repo/develop/" in str(call) + for call in mock_get_content.call_args_list + ) + + def test_extract_repository_markdown_content_no_owner_or_org(self): + """Test extraction when repository has neither organization nor owner.""" + repository = create_mock_repository( + name="test-repo", key="test-repo", description="Test repository", default_branch="main" + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["name"] == "test-repo" + assert data["key"] == "test-repo" + + assert "Repository Name: test-repo" in metadata + assert "Repository Key: test-repo" in metadata + + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_no_key(self, mock_get_content): + """Test extraction when repository has no key.""" + mock_get_content.return_value = "[]" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key=None, + description="Test repository", + default_branch="main", + organization=organization, + ) + + json_content, metadata = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["name"] == "test-repo" + assert data["ownership"]["organization"] == "test-org" + + assert "Repository Name: test-repo" in metadata + assert "Organization: test-org" in metadata + + @patch("apps.ai.common.extractors.repository.logger") + @patch("apps.ai.common.extractors.repository.get_repository_file_content") + def test_extract_repository_markdown_content_logs_debug_on_exception( + self, mock_get_content, mock_logger + ): + """Test that debug logging occurs when file fetching fails.""" + organization = MagicMock() + organization.login = "test-org" + + repository = create_mock_repository( + name="test-repo", + key="test-repo", + description="Test repository", + default_branch="main", + organization=organization, + ) + + error_trigger_url = "https://raw.githubusercontent.com/test-org/test-repo/main/README.md" + error_message = "Test exception" + + def side_effect(url): + if url == error_trigger_url: + raise ValueError(error_message) + return "[]" + + mock_get_content.side_effect = side_effect + + json_content, _ = extract_repository_content(repository) + + data = json.loads(json_content) + assert data["description"] == "Test repository" + assert data["ownership"]["organization"] == "test-org" + + mock_logger.debug.assert_called_once() + debug_call_args = mock_logger.debug.call_args[0][0] + assert "Failed to fetch markdown file" in debug_call_args diff --git a/backend/tests/apps/ai/common/utils_test.py b/backend/tests/apps/ai/common/utils_test.py index 2d18684f44..64b7eef7d7 100644 --- a/backend/tests/apps/ai/common/utils_test.py +++ b/backend/tests/apps/ai/common/utils_test.py @@ -94,7 +94,8 @@ def test_create_chunks_and_embeddings_api_error(self, mock_logger): assert result == [] @patch("apps.ai.common.utils.logger") - def test_create_chunks_and_embeddings_none_context(self, mock_logger): + @patch("apps.ai.common.utils.Chunk.update_data") + def test_create_chunks_and_embeddings_none_context(self, mock_update_data, mock_logger): """Tests the failure path when context is None.""" mock_openai_client = MagicMock() @@ -102,6 +103,8 @@ def test_create_chunks_and_embeddings_none_context(self, mock_logger): mock_response.data = [MagicMock(embedding=[0.1, 0.2, 0.3])] mock_openai_client.embeddings.create.return_value = mock_response + mock_update_data.side_effect = AttributeError("Context is required") + result = create_chunks_and_embeddings( chunk_texts=["some text"], context=None, diff --git a/backend/tests/apps/ai/management/commands/ai_update_repository_chunks_test.py b/backend/tests/apps/ai/management/commands/ai_update_repository_chunks_test.py new file mode 100644 index 0000000000..477af48ab5 --- /dev/null +++ b/backend/tests/apps/ai/management/commands/ai_update_repository_chunks_test.py @@ -0,0 +1,123 @@ +from unittest.mock import Mock, patch + +import pytest + +from apps.ai.common.base.chunk_command import BaseChunkCommand +from apps.ai.management.commands.ai_update_repository_chunks import Command +from apps.github.models.repository import Repository + + +@pytest.fixture +def command(): + return Command() + + +@pytest.fixture +def mock_repository(): + repository = Mock() + repository.id = 1 + repository.key = "test-repo" + repository.is_owasp_site_repository = True + repository.is_archived = False + repository.is_empty = False + return repository + + +class TestAiUpdateRepositoryChunksCommand: + def test_command_inheritance(self, command): + """Test that the command inherits from BaseChunkCommand.""" + assert isinstance(command, BaseChunkCommand) + + def test_command_help_text(self, command): + """Test that the command has the correct help text.""" + assert command.help() == "Create or update chunks for OWASP repository data" + + def test_model_class_property(self, command): + """Test the model_class property returns Repository.""" + assert command.model_class == Repository + + def test_entity_name_property(self, command): + """Test the entity_name property.""" + assert command.entity_name == "repository" + + def test_entity_name_plural_property(self, command): + """Test the entity_name_plural property.""" + assert command.entity_name_plural == "repositories" + + def test_key_field_name_property(self, command): + """Test the key_field_name property.""" + assert command.key_field_name == "key" + + def test_source_name(self, command): + """Test the source_name method returns the correct value.""" + assert command.source_name() == "owasp_repository" + + def test_extract_content(self, command, mock_repository): + """Test the extract_content method.""" + with patch( + "apps.ai.management.commands.ai_update_repository_chunks.extract_repository_content" + ) as mock_extract: + mock_extract.return_value = ("json content", "metadata content") + content = command.extract_content(mock_repository) + assert content == ("json content", "metadata content") + mock_extract.assert_called_once_with(mock_repository) + + def test_get_base_queryset(self, command): + """Test the get_base_queryset method applies correct filters.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + result = command.get_base_queryset() + + mock_super.assert_called_once() + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + assert result == "filtered_queryset" + + def test_get_default_queryset(self, command): + """Test the get_default_queryset method returns base queryset.""" + with patch.object(command, "get_base_queryset") as mock_base: + mock_base.return_value = "base_queryset" + + result = command.get_default_queryset() + + mock_base.assert_called_once() + assert result == "base_queryset" + + def test_get_default_queryset_avoids_is_active_filter(self, command): + """Test that get_default_queryset doesn't apply is_active filter like the base class.""" + with patch.object(command, "get_base_queryset") as mock_base: + mock_base.return_value = "base_queryset" + + result = command.get_default_queryset() + + assert result == "base_queryset" + mock_base.assert_called_once() + + def test_queryset_filtering_logic(self, command): + """Test that the queryset filtering logic works correctly.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + result = command.get_base_queryset() + + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + assert result == "filtered_queryset" + + def test_command_initialization(self, command): + """Test that the command initializes correctly.""" + assert command.model_class is not None + assert command.key_field_name == "key" + assert command.entity_name == "repository" + assert command.entity_name_plural == "repositories" diff --git a/backend/tests/apps/ai/management/commands/ai_update_repository_context_test.py b/backend/tests/apps/ai/management/commands/ai_update_repository_context_test.py new file mode 100644 index 0000000000..c8474a46bb --- /dev/null +++ b/backend/tests/apps/ai/management/commands/ai_update_repository_context_test.py @@ -0,0 +1,168 @@ +from unittest.mock import Mock, patch + +import pytest + +from apps.ai.common.base.context_command import BaseContextCommand +from apps.ai.management.commands.ai_update_repository_context import Command +from apps.github.models.repository import Repository + + +@pytest.fixture +def command(): + return Command() + + +@pytest.fixture +def mock_repository(): + repository = Mock() + repository.id = 1 + repository.key = "test-repo" + repository.is_owasp_site_repository = True + repository.is_archived = False + repository.is_empty = False + return repository + + +class TestAiUpdateRepositoryContextCommand: + def test_command_inheritance(self, command): + """Test that the command inherits from BaseContextCommand.""" + assert isinstance(command, BaseContextCommand) + + def test_command_help_text(self, command): + """Test that the command has the correct help text.""" + assert command.help() == "Update context for OWASP repository data" + + def test_model_class_property(self, command): + """Test the model_class property returns Repository.""" + assert command.model_class == Repository + + def test_entity_name_property(self, command): + """Test the entity_name property.""" + assert command.entity_name == "repository" + + def test_entity_name_plural_property(self, command): + """Test the entity_name_plural property.""" + assert command.entity_name_plural == "repositories" + + def test_key_field_name_property(self, command): + """Test the key_field_name property.""" + assert command.key_field_name == "key" + + def test_source_name(self, command): + """Test the source_name method returns the correct value.""" + assert command.source_name() == "owasp_repository" + + def test_extract_content(self, command, mock_repository): + """Test the extract_content method.""" + with patch( + "apps.ai.management.commands.ai_update_repository_context.extract_repository_content" + ) as mock_extract: + mock_extract.return_value = ("json content", "metadata content") + content = command.extract_content(mock_repository) + assert content == ("json content", "metadata content") + mock_extract.assert_called_once_with(mock_repository) + + def test_get_base_queryset(self, command): + """Test that the get_base_queryset method applies correct filters.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + result = command.get_base_queryset() + + mock_super.assert_called_once() + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + assert result == "filtered_queryset" + + def test_get_default_queryset(self, command): + """Test the get_default_queryset method returns base queryset.""" + with patch.object(command, "get_base_queryset") as mock_base: + mock_base.return_value = "base_queryset" + + result = command.get_default_queryset() + + mock_base.assert_called_once() + assert result == "base_queryset" + + def test_get_default_queryset_avoids_is_active_filter(self, command): + """Test that get_default_queryset doesn't apply is_active filter like the base class.""" + with patch.object(command, "get_base_queryset") as mock_base: + mock_base.return_value = "base_queryset" + + result = command.get_default_queryset() + + assert result == "base_queryset" + mock_base.assert_called_once() + + def test_queryset_filtering_logic(self, command): + """Test that the queryset filtering logic works correctly.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + result = command.get_base_queryset() + + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + assert result == "filtered_queryset" + + def test_command_initialization(self, command): + """Test that the command initializes correctly.""" + assert command.model_class is not None + assert command.key_field_name == "key" + assert command.entity_name == "repository" + assert command.entity_name_plural == "repositories" + + def test_queryset_filters_owasp_site_repositories(self, command): + """Test that the queryset only includes OWASP site repositories.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + command.get_base_queryset() + + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + + def test_queryset_excludes_archived_repositories(self, command): + """Test that the queryset excludes archived repositories.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + command.get_base_queryset() + + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) + + def test_queryset_excludes_empty_repositories(self, command): + """Test that the queryset excludes empty repositories.""" + with patch.object(command.__class__.__bases__[0], "get_base_queryset") as mock_super: + mock_queryset = Mock() + mock_super.return_value = mock_queryset + mock_queryset.filter.return_value = "filtered_queryset" + + command.get_base_queryset() + + mock_queryset.filter.assert_called_once_with( + is_owasp_site_repository=True, + is_archived=False, + is_empty=False, + ) diff --git a/backend/tests/apps/ai/models/chunk_test.py b/backend/tests/apps/ai/models/chunk_test.py index d3c1fc61c9..efff29ad21 100644 --- a/backend/tests/apps/ai/models/chunk_test.py +++ b/backend/tests/apps/ai/models/chunk_test.py @@ -29,19 +29,29 @@ def test_str_method(self): assert "This is a test chunk with some content that" in result def test_bulk_save_with_chunks(self): - mock_chunks = [Mock(), Mock(), Mock()] + mock_chunks = [Mock(spec=Chunk), Mock(spec=Chunk), Mock(spec=Chunk)] + for chunk in mock_chunks: + chunk.context = Mock() + chunk.text = "test text" - with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: - Chunk.bulk_save(mock_chunks) - mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=None) + with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_filter: + mock_filter.return_value.exists.return_value = False + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save(mock_chunks) + mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=None) def test_bulk_save_with_fields_parameter(self): - mock_chunks = [Mock(), Mock()] + mock_chunks = [Mock(spec=Chunk), Mock(spec=Chunk)] + for chunk in mock_chunks: + chunk.context = Mock() + chunk.text = "test text" fields = ["text", "embedding"] - with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: - Chunk.bulk_save(mock_chunks, fields=fields) - mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=fields) + with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_filter: + mock_filter.return_value.exists.return_value = False + with patch("apps.common.models.BulkSaveModel.bulk_save") as mock_bulk_save: + Chunk.bulk_save(mock_chunks, fields=fields) + mock_bulk_save.assert_called_once_with(Chunk, mock_chunks, fields=fields) def test_split_text(self): text = "This is a long text that should be split into multiple chunks. " * 10 From db6a6f2aee5f364ab91cc9681b8beb0bde6f96cd Mon Sep 17 00:00:00 2001 From: Arkadii Yakovets Date: Fri, 19 Sep 2025 17:43:10 -0700 Subject: [PATCH 2/7] Consolidate code commits --- backend/apps/ai/common/constants.py | 1 + backend/apps/slack/MANIFEST.yaml | 9 + backend/apps/slack/admin/conversation.py | 1 + backend/apps/slack/apps.py | 7 + backend/apps/slack/commands/__init__.py | 1 + backend/apps/slack/commands/ai.py | 23 ++ backend/apps/slack/common/handlers/ai.py | 61 ++++ .../apps/slack/common/question_detector.py | 141 ++++++++ backend/apps/slack/constants.py | 58 ++++ backend/apps/slack/events/__init__.py | 25 +- backend/apps/slack/events/app_mention.py | 43 +++ backend/apps/slack/events/message_posted.py | 73 +++++ ...versation_is_nest_bot_assistant_enabled.py | 17 + backend/apps/slack/models/conversation.py | 3 + backend/apps/slack/services/__init__.py | 1 + .../apps/slack/services/message_auto_reply.py | 53 +++ .../apps/slack/templates/commands/ai.jinja | 12 + backend/poetry.lock | 126 ++++++- backend/pyproject.toml | 1 + backend/settings/base.py | 11 + backend/settings/urls.py | 1 + backend/tests/apps/slack/commands/ai_test.py | 186 +++++++++++ .../apps/slack/common/handlers/ai_test.py | 142 ++++++++ .../slack/common/question_detector_test.py | 215 ++++++++++++ .../apps/slack/events/message_posted_test.py | 309 ++++++++++++++++++ backend/tests/apps/slack/services/__init__.py | 0 .../slack/services/message_auto_reply_test.py | 205 ++++++++++++ cspell/custom-dict.txt | 2 + docker-compose/local.yaml | 29 ++ 29 files changed, 1748 insertions(+), 8 deletions(-) create mode 100644 backend/apps/slack/commands/ai.py create mode 100644 backend/apps/slack/common/handlers/ai.py create mode 100644 backend/apps/slack/common/question_detector.py create mode 100644 backend/apps/slack/events/app_mention.py create mode 100644 backend/apps/slack/events/message_posted.py create mode 100644 backend/apps/slack/migrations/0019_conversation_is_nest_bot_assistant_enabled.py create mode 100644 backend/apps/slack/services/__init__.py create mode 100644 backend/apps/slack/services/message_auto_reply.py create mode 100644 backend/apps/slack/templates/commands/ai.jinja create mode 100644 backend/tests/apps/slack/commands/ai_test.py create mode 100644 backend/tests/apps/slack/common/handlers/ai_test.py create mode 100644 backend/tests/apps/slack/common/question_detector_test.py create mode 100644 backend/tests/apps/slack/events/message_posted_test.py create mode 100644 backend/tests/apps/slack/services/__init__.py create mode 100644 backend/tests/apps/slack/services/message_auto_reply_test.py diff --git a/backend/apps/ai/common/constants.py b/backend/apps/ai/common/constants.py index 2517a4c85c..27ddd0082c 100644 --- a/backend/apps/ai/common/constants.py +++ b/backend/apps/ai/common/constants.py @@ -6,3 +6,4 @@ DELIMITER = "\n\n" GITHUB_REQUEST_INTERVAL_SECONDS = 0.5 MIN_REQUEST_INTERVAL_SECONDS = 1.2 +QUEUE_RESPONSE_TIME_MINUTES = 1 diff --git a/backend/apps/slack/MANIFEST.yaml b/backend/apps/slack/MANIFEST.yaml index 05a75c86eb..d6617e94b3 100644 --- a/backend/apps/slack/MANIFEST.yaml +++ b/backend/apps/slack/MANIFEST.yaml @@ -95,6 +95,11 @@ features: description: OWASP users list usage_hint: should_escape: false + - command: /ai + url: https://nest.owasp.org/integrations/slack/commands/ + description: AI-powered OWASP Nest assistant + usage_hint: + should_escape: false oauth_config: scopes: user: @@ -103,6 +108,7 @@ oauth_config: - mpim:read - users:read bot: + - app_mentions:read - channels:read - chat:write - commands @@ -115,6 +121,7 @@ oauth_config: - users:read - groups:write - channels:manage + - channels:history settings: event_subscriptions: request_url: https://nest.owasp.org/integrations/slack/events/ @@ -123,7 +130,9 @@ settings: - team_join bot_events: - app_home_opened + - app_mention - member_joined_channel + - message.channels - team_join interactivity: is_enabled: true diff --git a/backend/apps/slack/admin/conversation.py b/backend/apps/slack/admin/conversation.py index 2e0d946147..935cb05b9d 100644 --- a/backend/apps/slack/admin/conversation.py +++ b/backend/apps/slack/admin/conversation.py @@ -27,6 +27,7 @@ class ConversationAdmin(admin.ModelAdmin): "is_private", "is_archived", "is_general", + "is_nest_bot_assistant_enabled", ) }, ), diff --git a/backend/apps/slack/apps.py b/backend/apps/slack/apps.py index be6e6e5ba2..422b72ba25 100644 --- a/backend/apps/slack/apps.py +++ b/backend/apps/slack/apps.py @@ -25,6 +25,13 @@ class SlackConfig(AppConfig): else None ) + def ready(self): + """Configure Slack events when the app is ready.""" + super().ready() + from apps.slack.events import configure_slack_events + + configure_slack_events() + if SlackConfig.app: diff --git a/backend/apps/slack/commands/__init__.py b/backend/apps/slack/commands/__init__.py index 46592a7c4b..e2c5154a6a 100644 --- a/backend/apps/slack/commands/__init__.py +++ b/backend/apps/slack/commands/__init__.py @@ -2,6 +2,7 @@ from apps.slack.commands.command import CommandBase from . import ( + ai, board, chapters, committees, diff --git a/backend/apps/slack/commands/ai.py b/backend/apps/slack/commands/ai.py new file mode 100644 index 0000000000..255b84f498 --- /dev/null +++ b/backend/apps/slack/commands/ai.py @@ -0,0 +1,23 @@ +"""Slack bot AI command.""" + +from apps.slack.commands.command import CommandBase + + +class Ai(CommandBase): + """Slack bot /ai command.""" + + def render_blocks(self, command: dict): + """Get the rendered blocks. + + Args: + command (dict): The Slack command payload. + + Returns: + list: A list of Slack blocks representing the AI response. + + """ + from apps.slack.common.handlers.ai import get_blocks + + return get_blocks( + query=command["text"].strip(), + ) diff --git a/backend/apps/slack/common/handlers/ai.py b/backend/apps/slack/common/handlers/ai.py new file mode 100644 index 0000000000..ef0452e7b8 --- /dev/null +++ b/backend/apps/slack/common/handlers/ai.py @@ -0,0 +1,61 @@ +"""Handler for AI-powered Slack functionality.""" + +from __future__ import annotations + +import logging + +from apps.ai.agent.tools.rag.rag_tool import RagTool +from apps.slack.blocks import markdown + +logger = logging.getLogger(__name__) + + +def get_blocks(query: str) -> list[dict]: + """Get AI response blocks. + + Args: + query (str): The user's question. + presentation (EntityPresentation | None): Configuration for entity presentation. + + Returns: + list: A list of Slack blocks representing the AI response. + + """ + ai_response = process_ai_query(query.strip()) + + if ai_response: + return [markdown(ai_response)] + return get_error_blocks() + + +def process_ai_query(query: str) -> str | None: + """Process the AI query using the RAG tool. + + Args: + query (str): The user's question. + + Returns: + str | None: The AI response or None if error occurred. + + """ + rag_tool = RagTool( + chat_model="gpt-4o", + embedding_model="text-embedding-3-small", + ) + + return rag_tool.query(question=query) + + +def get_error_blocks() -> list[dict]: + """Get error response blocks. + + Returns: + list: A list of Slack blocks with error message. + + """ + return [ + markdown( + "⚠️ Unfortunately, I'm unable to answer your question at this time.\n" + "Please try again later or contact support if the issue persists." + ) + ] diff --git a/backend/apps/slack/common/question_detector.py b/backend/apps/slack/common/question_detector.py new file mode 100644 index 0000000000..17d13c787d --- /dev/null +++ b/backend/apps/slack/common/question_detector.py @@ -0,0 +1,141 @@ +"""Question detection utilities for Slack OWASP bot.""" + +from __future__ import annotations + +import logging +import os +import re + +import openai + +from apps.slack.constants import OWASP_KEYWORDS + +logger = logging.getLogger(__name__) + + +class QuestionDetector: + """Utility class for detecting OWASP-related questions.""" + + MAX_TOKENS = 50 + TEMPERATURE = 0.1 + CHAT_MODEL = "gpt-4o" + + SYSTEM_PROMPT = """ + You are an expert in cybersecurity and OWASP (Open Web Application Security Project). + Your task is to determine if a given question is related to OWASP, cybersecurity, + web application security, or similar topics. + + Key OWASP-related terms: {keywords} + + Respond with only "YES" if the question is related to OWASP/cybersecurity, + or "NO" if it's not. + Do not provide any explanation or additional text. + """ + + def __init__(self): + """Initialize the question detector. + + Raises: + ValueError: If the OpenAI API key is not set. + + """ + if not (openai_api_key := os.getenv("DJANGO_OPEN_AI_SECRET_KEY")): + error_msg = "DJANGO_OPEN_AI_SECRET_KEY environment variable not set" + raise ValueError(error_msg) + + self.owasp_keywords = OWASP_KEYWORDS + self.openai_client = openai.OpenAI(api_key=openai_api_key) + + question_patterns = [ + r"\?", + r"^(what|how|why|when|where|which|who|can|could|would|should|is|are|does|do|did)", + r"(help|explain|tell me|show me|guide|tutorial|example)", + r"(recommend|suggest|advice|opinion)", + ] + + self.compiled_patterns = [ + re.compile(pattern, re.IGNORECASE) for pattern in question_patterns + ] + + def is_owasp_question(self, text: str) -> bool: + """Check if the input text is an OWASP-related question. + + This is the main public method that orchestrates the detection logic. + """ + if not text or not text.strip(): + return False + + if not self.is_question(text): + return False + + openai_result = self.is_owasp_question_with_openai(text) + + if openai_result is None: + logger.warning( + "OpenAI detection failed. Falling back to keyword matching", + ) + return self.contains_owasp_keywords(text) + + if openai_result: + return True + if self.contains_owasp_keywords(text): + logger.info( + "OpenAI classified as non-OWASP, but keywords were detected. Overriding to TRUE." + ) + return True + return False + + def is_question(self, text: str) -> bool: + """Check if text appears to be a question.""" + return any(pattern.search(text) for pattern in self.compiled_patterns) + + def is_owasp_question_with_openai(self, text: str) -> bool | None: + """Determine if the text is an OWASP-related question. + + Returns: + - True: If the model responds with "YES". + - False: If the model responds with "NO". + - None: If the API call fails or the response is unexpected. + + """ + system_prompt = self.SYSTEM_PROMPT.format(keywords=", ".join(self.owasp_keywords)) + user_prompt = f'Question: "{text}"' + + try: + response = self.openai_client.chat.completions.create( + model=self.CHAT_MODEL, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=self.TEMPERATURE, + max_tokens=self.MAX_TOKENS, + ) + except openai.OpenAIError: + logger.exception("OpenAI API error during question detection") + return None + else: + answer = response.choices[0].message.content + if not answer: + logger.error("OpenAI returned an empty response") + return None + + clean_answer = answer.strip().upper() + + if "YES" in clean_answer: + return True + if "NO" in clean_answer: + return False + logger.warning("Unexpected OpenAI response") + return None + + def contains_owasp_keywords(self, text: str) -> bool: + """Check if text contains OWASP-related keywords.""" + words = re.findall(r"\b\w+\b", text) + text_words = set(words) + + intersection = self.owasp_keywords.intersection(text_words) + if intersection: + return True + + return any(" " in keyword and keyword in text for keyword in self.owasp_keywords) diff --git a/backend/apps/slack/constants.py b/backend/apps/slack/constants.py index d4a53f22f4..ef9bb7b9bb 100644 --- a/backend/apps/slack/constants.py +++ b/backend/apps/slack/constants.py @@ -22,6 +22,64 @@ OWASP_SPONSORSHIP_CHANNEL_ID = "#C08EGFDD9L2" OWASP_THREAT_MODELING_CHANNEL_ID = "#C1CS3C6AF" +OWASP_KEYWORDS = { + "api security", + "appsec", + "application security", + "assessment", + "authentication", + "authorization", + "cheat sheet series", + "chapter", + "code review", + "committee", + "cryptography", + "csrf", + "defectdojo", + "dependency", + "devops", + "devsecops", + "dynamic analysis", + "encryption", + "event", + "firewall", + "injection", + "juice shop", + "mobile security", + "nest", + "nettacker", + "owasp", + "penetration", + "project", + "rasp", + "red team", + "risk", + "sbom", + "secure", + "secure coding", + "security", + "security best practice", + "security bug", + "security fix", + "security framework", + "security guideline", + "security patch", + "security policy", + "security standard", + "security testing", + "security tools", + "static analysis", + "threat", + "threat modeling", + "top 10", + "top10", + "vulnerabilities", + "vulnerability", + "web security", + "webgoat", + "xss", +} + OWASP_WORKSPACE_ID = "T04T40NHX" VIEW_PROJECTS_ACTION = "view_projects_action" diff --git a/backend/apps/slack/events/__init__.py b/backend/apps/slack/events/__init__.py index faf30995a1..f5fb7e015f 100644 --- a/backend/apps/slack/events/__init__.py +++ b/backend/apps/slack/events/__init__.py @@ -1,7 +1,20 @@ -from apps.slack.apps import SlackConfig -from apps.slack.events import app_home_opened, team_join, url_verification -from apps.slack.events.event import EventBase -from apps.slack.events.member_joined_channel import catch_all, contribute, gsoc, project_nest +def configure_slack_events(): + """Configure Slack events after Django apps are ready.""" + from apps.slack.apps import SlackConfig + from apps.slack.events import ( + app_home_opened, + app_mention, + message_posted, + team_join, + url_verification, + ) + from apps.slack.events.event import EventBase + from apps.slack.events.member_joined_channel import ( + catch_all, + contribute, + gsoc, + project_nest, + ) -if SlackConfig.app: - EventBase.configure_events() + if SlackConfig.app: + EventBase.configure_events() diff --git a/backend/apps/slack/events/app_mention.py b/backend/apps/slack/events/app_mention.py new file mode 100644 index 0000000000..aeb33243e9 --- /dev/null +++ b/backend/apps/slack/events/app_mention.py @@ -0,0 +1,43 @@ +"""Slack app mention event handler.""" + +import logging + +from apps.slack.common.handlers.ai import get_blocks +from apps.slack.events.event import EventBase + +logger = logging.getLogger(__name__) + + +class AppMention(EventBase): + """Handles app mention events when the bot is mentioned in a channel.""" + + event_type = "app_mention" + + def handle_event(self, event, client): + """Handle an incoming app mention event.""" + channel_id = event.get("channel") + text = event.get("text", "") + + query = text + for mention in event.get("blocks", []): + if mention.get("type") == "rich_text": + for element in mention.get("elements", []): + if element.get("type") == "rich_text_section": + for text_element in element.get("elements", []): + if text_element.get("type") == "text": + query = text_element.get("text", "").strip() + break + + if not query: + logger.warning("No query found in app mention") + return + + logger.info("Handling app mention") + + reply_blocks = get_blocks(query=query) + client.chat_postMessage( + channel=channel_id, + blocks=reply_blocks, + text=query, + thread_ts=event.get("thread_ts") or event.get("ts"), + ) diff --git a/backend/apps/slack/events/message_posted.py b/backend/apps/slack/events/message_posted.py new file mode 100644 index 0000000000..5b38c3077b --- /dev/null +++ b/backend/apps/slack/events/message_posted.py @@ -0,0 +1,73 @@ +"""Slack message event template.""" + +import logging +from datetime import timedelta + +import django_rq + +from apps.ai.common.constants import QUEUE_RESPONSE_TIME_MINUTES +from apps.slack.common.question_detector import QuestionDetector +from apps.slack.events.event import EventBase +from apps.slack.models import Conversation, Member, Message +from apps.slack.services.message_auto_reply import generate_ai_reply_if_unanswered + +logger = logging.getLogger(__name__) + + +class MessagePosted(EventBase): + """Handles new messages posted in channels.""" + + event_type = "message" + + def __init__(self): + """Initialize MessagePosted event handler.""" + self.question_detector = QuestionDetector() + + def handle_event(self, event, client): + """Handle an incoming message event.""" + if event.get("subtype") or event.get("bot_id"): + logger.info("Ignored message due to subtype, bot_id, or thread_ts.") + return + + if event.get("thread_ts"): + try: + Message.objects.filter( + slack_message_id=event.get("thread_ts"), + conversation__slack_channel_id=event.get("channel"), + ).update(has_replies=True) + except Message.DoesNotExist: + logger.warning("Thread message not found.") + return + + channel_id = event.get("channel") + user_id = event.get("user") + text = event.get("text", "") + + try: + conversation = Conversation.objects.get( + slack_channel_id=channel_id, + is_nest_bot_assistant_enabled=True, + ) + except Conversation.DoesNotExist: + logger.warning("Conversation not found or assistant not enabled.") + return + + if not self.question_detector.is_owasp_question(text): + return + + try: + author = Member.objects.get(slack_user_id=user_id, workspace=conversation.workspace) + except Member.DoesNotExist: + user_info = client.users_info(user=user_id) + author = Member.update_data(user_info["user"], conversation.workspace, save=True) + logger.info("Created new member") + + message = Message.update_data( + data=event, conversation=conversation, author=author, save=True + ) + + django_rq.get_queue("ai").enqueue_in( + timedelta(minutes=QUEUE_RESPONSE_TIME_MINUTES), + generate_ai_reply_if_unanswered, + message.id, + ) diff --git a/backend/apps/slack/migrations/0019_conversation_is_nest_bot_assistant_enabled.py b/backend/apps/slack/migrations/0019_conversation_is_nest_bot_assistant_enabled.py new file mode 100644 index 0000000000..597856c6ea --- /dev/null +++ b/backend/apps/slack/migrations/0019_conversation_is_nest_bot_assistant_enabled.py @@ -0,0 +1,17 @@ +# Generated by Django 5.2.5 on 2025-08-19 10:31 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("slack", "0018_conversation_sync_messages"), + ] + + operations = [ + migrations.AddField( + model_name="conversation", + name="is_nest_bot_assistant_enabled", + field=models.BooleanField(default=False, verbose_name="Is Nest Bot Assistant Enabled"), + ), + ] diff --git a/backend/apps/slack/models/conversation.py b/backend/apps/slack/models/conversation.py index e58c6b2fba..9735786c24 100644 --- a/backend/apps/slack/models/conversation.py +++ b/backend/apps/slack/models/conversation.py @@ -27,6 +27,9 @@ class Meta: is_group = models.BooleanField(verbose_name="Is group", default=False) is_im = models.BooleanField(verbose_name="Is IM", default=False) is_mpim = models.BooleanField(verbose_name="Is MPIM", default=False) + is_nest_bot_assistant_enabled = models.BooleanField( + verbose_name="Is Nest Bot Assistant Enabled", default=False + ) is_private = models.BooleanField(verbose_name="Is private", default=False) is_shared = models.BooleanField(verbose_name="Is shared", default=False) name = models.CharField(verbose_name="Name", max_length=100, default="") diff --git a/backend/apps/slack/services/__init__.py b/backend/apps/slack/services/__init__.py new file mode 100644 index 0000000000..4920d87173 --- /dev/null +++ b/backend/apps/slack/services/__init__.py @@ -0,0 +1 @@ +"""Slack services package.""" diff --git a/backend/apps/slack/services/message_auto_reply.py b/backend/apps/slack/services/message_auto_reply.py new file mode 100644 index 0000000000..f390969ca9 --- /dev/null +++ b/backend/apps/slack/services/message_auto_reply.py @@ -0,0 +1,53 @@ +"""Slack service tasks for background processing.""" + +import logging + +from django_rq import job +from slack_sdk.errors import SlackApiError + +from apps.slack.apps import SlackConfig +from apps.slack.common.handlers.ai import get_blocks, process_ai_query +from apps.slack.models import Message + +logger = logging.getLogger(__name__) + + +@job("ai") +def generate_ai_reply_if_unanswered(message_id: int): + """Check if a message is still unanswered and generate AI reply.""" + try: + message = Message.objects.get(pk=message_id) + except Message.DoesNotExist: + return + + if not message.conversation.is_nest_bot_assistant_enabled: + return + + if not SlackConfig.app: + logger.warning("Slack app is not configured") + return + + client = SlackConfig.app.client + + try: + result = client.conversations_replies( + channel=message.conversation.slack_channel_id, + ts=message.slack_message_id, + limit=1, + ) + if result.get("messages") and result["messages"][0].get("reply_count", 0) > 0: + return + + except SlackApiError: + logger.exception("Error checking for replies for message") + + ai_response_text = process_ai_query(query=message.text) + if not ai_response_text: + return + + client.chat_postMessage( + channel=message.conversation.slack_channel_id, + blocks=get_blocks(ai_response_text), + text=ai_response_text, + thread_ts=message.slack_message_id, + ) diff --git a/backend/apps/slack/templates/commands/ai.jinja b/backend/apps/slack/templates/commands/ai.jinja new file mode 100644 index 0000000000..e71e62a33f --- /dev/null +++ b/backend/apps/slack/templates/commands/ai.jinja @@ -0,0 +1,12 @@ +*Ask OWASP AI Assistant* + +Use this command to ask questions about OWASP projects, OWASP chapters, and community information using AI-powered knowledge base of OWASP. + +*Examples:* +• `{{ COMMAND }} What are the OWASP Top 10 vulnerabilities?` +• `{{ COMMAND }} How do I contribute to an OWASP project?` +• `{{ COMMAND }} When is the next OWASP appsec days event?` + +{{ DIVIDER }} + +{{ FEEDBACK_SHARING_INVITE }} diff --git a/backend/poetry.lock b/backend/poetry.lock index 9f5317225e..17f7daeafd 100644 --- a/backend/poetry.lock +++ b/backend/poetry.lock @@ -505,7 +505,7 @@ version = "8.3.0" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.10" -groups = ["dev"] +groups = ["main", "dev"] files = [ {file = "click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc"}, {file = "click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4"}, @@ -628,6 +628,22 @@ files = [ [package.extras] toml = ["tomli ; python_full_version <= \"3.11.0a6\""] +[[package]] +name = "croniter" +version = "6.0.0" +description = "croniter provides iteration for datetime object with cron like format" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.6" +groups = ["main"] +files = [ + {file = "croniter-6.0.0-py2.py3-none-any.whl", hash = "sha256:2f878c3856f17896979b2a4379ba1f09c83e374931ea15cc835c5dd2eee9b368"}, + {file = "croniter-6.0.0.tar.gz", hash = "sha256:37c504b313956114a983ece2c2b07790b1f1094fe9d81cc94739214748255577"}, +] + +[package.dependencies] +python-dateutil = "*" +pytz = ">2021.1" + [[package]] name = "cryptography" version = "46.0.1" @@ -861,6 +877,27 @@ redis = ">=4.0.2" [package.extras] hiredis = ["redis[hiredis] (>=4.0.2)"] +[[package]] +name = "django-rq" +version = "3.1" +description = "An app that provides django integration for RQ (Redis Queue)" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "django_rq-3.1-py3-none-any.whl", hash = "sha256:9c8a725aa3f43251a5571ec51d7b65a01613358574d01a5101861480963e59b7"}, + {file = "django_rq-3.1.tar.gz", hash = "sha256:8d7b9137b85b8df18b1cdf06244eb71b39f43ad020c0a0c7d49723f8940074ae"}, +] + +[package.dependencies] +django = ">=3.2" +redis = ">=3.5" +rq = ">=2" + +[package.extras] +prometheus = ["prometheus-client (>=0.4.0)"] +sentry = ["sentry-sdk (>=1.0.0)"] + [[package]] name = "django-storages" version = "1.14.6" @@ -3151,6 +3188,18 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "pytz" +version = "2025.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -3702,6 +3751,23 @@ files = [ {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"}, ] +[[package]] +name = "rq" +version = "2.6.0" +description = "RQ is a simple, lightweight, library for creating background jobs, and processing them." +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "rq-2.6.0-py3-none-any.whl", hash = "sha256:be5ccc0f0fc5f32da0999648340e31476368f08067f0c3fce6768d00064edbb5"}, + {file = "rq-2.6.0.tar.gz", hash = "sha256:92ad55676cda14512c4eea5782f398a102dc3af108bea197c868c4c50c5d3e81"}, +] + +[package.dependencies] +click = ">=5" +croniter = "*" +redis = ">=3.5,<6 || >6" + [[package]] name = "ruff" version = "0.13.1" @@ -3870,6 +3936,62 @@ optional = false python-versions = ">=3.7" groups = ["main"] files = [ + {file = "SQLAlchemy-2.0.43-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:21ba7a08a4253c5825d1db389d4299f64a100ef9800e4624c8bf70d8f136e6ed"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11b9503fa6f8721bef9b8567730f664c5a5153d25e247aadc69247c4bc605227"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07097c0a1886c150ef2adba2ff7437e84d40c0f7dcb44a2c2b9c905ccfc6361c"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:cdeff998cb294896a34e5b2f00e383e7c5c4ef3b4bfa375d9104723f15186443"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:bcf0724a62a5670e5718957e05c56ec2d6850267ea859f8ad2481838f889b42c"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-win32.whl", hash = "sha256:c697575d0e2b0a5f0433f679bda22f63873821d991e95a90e9e52aae517b2e32"}, + {file = "SQLAlchemy-2.0.43-cp37-cp37m-win_amd64.whl", hash = "sha256:d34c0f6dbefd2e816e8f341d0df7d4763d382e3f452423e752ffd1e213da2512"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:70322986c0c699dca241418fcf18e637a4369e0ec50540a2b907b184c8bca069"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:87accdbba88f33efa7b592dc2e8b2a9c2cdbca73db2f9d5c510790428c09c154"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c00e7845d2f692ebfc7d5e4ec1a3fd87698e4337d09e58d6749a16aedfdf8612"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:022e436a1cb39b13756cf93b48ecce7aa95382b9cfacceb80a7d263129dfd019"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c5e73ba0d76eefc82ec0219d2301cb33bfe5205ed7a2602523111e2e56ccbd20"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9c2e02f06c68092b875d5cbe4824238ab93a7fa35d9c38052c033f7ca45daa18"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-win32.whl", hash = "sha256:e7a903b5b45b0d9fa03ac6a331e1c1d6b7e0ab41c63b6217b3d10357b83c8b00"}, + {file = "sqlalchemy-2.0.43-cp310-cp310-win_amd64.whl", hash = "sha256:4bf0edb24c128b7be0c61cd17eef432e4bef507013292415f3fb7023f02b7d4b"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:52d9b73b8fb3e9da34c2b31e6d99d60f5f99fd8c1225c9dad24aeb74a91e1d29"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f42f23e152e4545157fa367b2435a1ace7571cab016ca26038867eb7df2c3631"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fb1a8c5438e0c5ea51afe9c6564f951525795cf432bed0c028c1cb081276685"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db691fa174e8f7036afefe3061bc40ac2b770718be2862bfb03aabae09051aca"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2b3b4927d0bc03d02ad883f402d5de201dbc8894ac87d2e981e7d87430e60d"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4d3d9b904ad4a6b175a2de0738248822f5ac410f52c2fd389ada0b5262d6a1e3"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-win32.whl", hash = "sha256:5cda6b51faff2639296e276591808c1726c4a77929cfaa0f514f30a5f6156921"}, + {file = "sqlalchemy-2.0.43-cp311-cp311-win_amd64.whl", hash = "sha256:c5d1730b25d9a07727d20ad74bc1039bbbb0a6ca24e6769861c1aa5bf2c4c4a8"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d"}, + {file = "sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e7c08f57f75a2bb62d7ee80a89686a5e5669f199235c6d1dac75cd59374091c3"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:14111d22c29efad445cd5021a70a8b42f7d9152d8ba7f73304c4d82460946aaa"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b27b56eb2f82653168cefe6cb8e970cdaf4f3a6cb2c5e3c3c1cf3158968ff9"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c5a9da957c56e43d72126a3f5845603da00e0293720b03bde0aacffcf2dc04f"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d79f9fdc9584ec83d1b3c75e9f4595c49017f5594fee1a2217117647225d738"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d"}, + {file = "sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4e6aeb2e0932f32950cf56a8b4813cb15ff792fc0c9b3752eaf067cfe298496a"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:61f964a05356f4bca4112e6334ed7c208174511bd56e6b8fc86dad4d024d4185"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46293c39252f93ea0910aababa8752ad628bcce3a10d3f260648dd472256983f"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:136063a68644eca9339d02e6693932116f6a8591ac013b0014479a1de664e40a"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:6e2bf13d9256398d037fef09fd8bf9b0bf77876e22647d10761d35593b9ac547"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:44337823462291f17f994d64282a71c51d738fc9ef561bf265f1d0fd9116a782"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-win32.whl", hash = "sha256:13194276e69bb2af56198fef7909d48fd34820de01d9c92711a5fa45497cc7ed"}, + {file = "sqlalchemy-2.0.43-cp38-cp38-win_amd64.whl", hash = "sha256:334f41fa28de9f9be4b78445e68530da3c5fa054c907176460c81494f4ae1f5e"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ceb5c832cc30663aeaf5e39657712f4c4241ad1f638d487ef7216258f6d41fe7"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11f43c39b4b2ec755573952bbcc58d976779d482f6f832d7f33a8d869ae891bf"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:413391b2239db55be14fa4223034d7e13325a1812c8396ecd4f2c08696d5ccad"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c379e37b08c6c527181a397212346be39319fb64323741d23e46abd97a400d34"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:03d73ab2a37d9e40dec4984d1813d7878e01dbdc742448d44a7341b7a9f408c7"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8cee08f15d9e238ede42e9bbc1d6e7158d0ca4f176e4eab21f88ac819ae3bd7b"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-win32.whl", hash = "sha256:b3edaec7e8b6dc5cd94523c6df4f294014df67097c8217a89929c99975811414"}, + {file = "sqlalchemy-2.0.43-cp39-cp39-win_amd64.whl", hash = "sha256:227119ce0a89e762ecd882dc661e0aa677a690c914e358f0dd8932a2e8b2765b"}, + {file = "sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc"}, {file = "sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417"}, ] @@ -4375,4 +4497,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt [metadata] lock-version = "2.1" python-versions = "^3.13" -content-hash = "a4b850be6ab60ebb96142cc7c7b824b79d032d09c1742732ecae3927f2ab4cf0" +content-hash = "33d6f057dd78aad1af4527c94708698fe670d4dfdbd956940db2d8dacb876ccd" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 9b86730684..8fe58c32a7 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -19,6 +19,7 @@ django-configurations = "^2.5.1" django-cors-headers = "^4.7.0" django-ninja = "^1.4.3" django-redis = "^6.0.0" +django-rq = "^3.1" django-storages = { extras = [ "s3" ], version = "^1.14.4" } emoji = "^2.14.1" geopy = "^2.4.1" diff --git a/backend/settings/base.py b/backend/settings/base.py index 22b76ea900..5ec949ef70 100644 --- a/backend/settings/base.py +++ b/backend/settings/base.py @@ -46,6 +46,7 @@ class Base(Configuration): THIRD_PARTY_APPS = ( "algoliasearch_django", "corsheaders", + "django_rq", "ninja", "storages", ) @@ -141,6 +142,16 @@ class Base(Configuration): } } + RQ_QUEUES = { + "ai": { + "HOST": REDIS_HOST, + "PORT": 6379, + "PASSWORD": REDIS_PASSWORD, + "DB": 1, + "DEFAULT_TIMEOUT": 360, + } + } + # Database # https://docs.djangoproject.com/en/5.1/ref/settings/#databases DATABASES = { diff --git a/backend/settings/urls.py b/backend/settings/urls.py index 9f2da5090e..4c7b11352f 100644 --- a/backend/settings/urls.py +++ b/backend/settings/urls.py @@ -28,6 +28,7 @@ path("owasp/", include(owasp_urls)), path("status/", get_status), path("", include("apps.sitemap.urls")), + path("django-rq/", include("django_rq.urls")), ] if SlackConfig.app: diff --git a/backend/tests/apps/slack/commands/ai_test.py b/backend/tests/apps/slack/commands/ai_test.py new file mode 100644 index 0000000000..e7e5af1b0f --- /dev/null +++ b/backend/tests/apps/slack/commands/ai_test.py @@ -0,0 +1,186 @@ +"""Tests for AI command functionality.""" + +from unittest.mock import patch + +import pytest + +from apps.slack.commands.ai import Ai + + +class TestAiCommand: + """Test cases for AI command functionality.""" + + @pytest.fixture(autouse=True) + def setup_method(self): + """Set up test data before each test method.""" + self.ai_command = Ai() + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_success(self, mock_get_blocks): + """Test successful rendering of AI response blocks.""" + command = { + "text": "What is OWASP?", + "user_id": "U123456", + "channel_id": "C123456", + } + expected_blocks = [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "OWASP is a security organization...", + }, + } + ] + mock_get_blocks.return_value = expected_blocks + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with(query="What is OWASP?") + assert result == expected_blocks + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_with_whitespace(self, mock_get_blocks): + """Test rendering blocks with text that has whitespace.""" + command = { + "text": " What is OWASP security? ", + "user_id": "U123456", + "channel_id": "C123456", + } + expected_blocks = [ + { + "type": "section", + "text": { + "type": "mrkdwn", + "text": "OWASP is a security organization...", + }, + } + ] + mock_get_blocks.return_value = expected_blocks + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with(query="What is OWASP security?") + assert result == expected_blocks + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_empty_text(self, mock_get_blocks): + """Test rendering blocks with empty text.""" + command = {"text": "", "user_id": "U123456", "channel_id": "C123456"} + expected_blocks = [ + {"type": "section", "text": {"type": "mrkdwn", "text": "Error message"}} + ] + mock_get_blocks.return_value = expected_blocks + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with(query="") + assert result == expected_blocks + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_only_whitespace(self, mock_get_blocks): + """Test rendering blocks with only whitespace in text.""" + command = {"text": " ", "user_id": "U123456", "channel_id": "C123456"} + expected_blocks = [ + {"type": "section", "text": {"type": "mrkdwn", "text": "Error message"}} + ] + mock_get_blocks.return_value = expected_blocks + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with(query="") + assert result == expected_blocks + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_complex_query(self, mock_get_blocks): + """Test rendering blocks with complex query.""" + command = { + "text": "What are the OWASP Top 10 vulnerabilities and how can I prevent them?", + "user_id": "U123456", + "channel_id": "C123456", + } + expected_blocks = [ + { + "type": "section", + "text": {"type": "mrkdwn", "text": "The OWASP Top 10 is a list..."}, + }, + {"type": "divider"}, + { + "type": "section", + "text": {"type": "mrkdwn", "text": "Prevention techniques..."}, + }, + ] + mock_get_blocks.return_value = expected_blocks + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with( + query="What are the OWASP Top 10 vulnerabilities and how can I prevent them?" + ) + assert result == expected_blocks + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_handles_exception(self, mock_get_blocks): + """Test that render_blocks handles exceptions gracefully.""" + command = { + "text": "What is OWASP?", + "user_id": "U123456", + "channel_id": "C123456", + } + mock_get_blocks.side_effect = Exception("AI service error") + + ai_command = Ai() + with pytest.raises(Exception, match="AI service error"): + ai_command.render_blocks(command) + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_returns_none(self, mock_get_blocks): + """Test handling when get_blocks returns None.""" + command = { + "text": "What is OWASP?", + "user_id": "U123456", + "channel_id": "C123456", + } + mock_get_blocks.return_value = None + + ai_command = Ai() + result = ai_command.render_blocks(command) + + mock_get_blocks.assert_called_once_with(query="What is OWASP?") + assert result is None + + def test_ai_command_inheritance(self): + """Test that Ai command inherits from CommandBase.""" + from apps.slack.commands.command import CommandBase + + ai_command = Ai() + assert isinstance(ai_command, CommandBase) + + @patch("apps.slack.common.handlers.ai.get_blocks") + def test_render_blocks_special_characters(self, mock_get_blocks): + """Test rendering blocks with special characters in query.""" + command = { + "text": "What is XSS & SQL injection? How to prevent