resuable code

Dishant1804 · Dishant1804 · commit f529179a6050 · 2025-07-08T06:34:59.000+05:30
diff --git a/backend/apps/ai/common/create_chunks_and_embeddings.py b/backend/apps/ai/common/create_chunks_and_embeddings.py
@@ -0,0 +1,56 @@
+import time
+from datetime import UTC, datetime, timedelta
+
+from apps.ai.models.chunk import Chunk
+from apps.ai.common.constants import (
+    DEFAULT_LAST_REQUEST_OFFSET_SECONDS,
+    MIN_REQUEST_INTERVAL_SECONDS,
+)
+
+def create_chunks_and_embeddings(
+    all_chunk_texts: list[str],
+    content_object,
+    openai_client,
+) -> list[Chunk]:
+    """
+    Creates chunks and embeddings from given texts using OpenAI embeddings.
+
+    Args:
+        all_chunk_texts (list[str]): List of text chunks to embed.
+        content_object: The object to associate the chunks with.
+        openai_client: Initialized OpenAI client instance.
+
+    Returns:
+        list[Chunk]: List of Chunk instances (not saved).
+    """
+    try:
+        last_request_time = datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS)
+        time_since_last_request = datetime.now(UTC) - last_request_time
+
+        if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS):
+            time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds())
+
+        response = openai_client.embeddings.create(
+            input=all_chunk_texts,
+            model="text-embedding-3-small",
+        )
+
+        return [
+            chunk
+            for text, embedding in zip(
+                all_chunk_texts,
+                [d.embedding for d in response.data],
+                strict=True,
+            )
+            if (
+                chunk := Chunk.update_data(
+                    text=text,
+                    content_object=content_object,
+                    embedding=embedding,
+                    save=False,
+                )
+            )
+        ]
+    except Exception as e:
+        print(f"OpenAI API error: {e}")
+        return []
diff --git a/backend/apps/ai/management/commands/ai_create_chapter_chunks.py b/backend/apps/ai/management/commands/ai_create_chapter_chunks.py
@@ -1,17 +1,12 @@
 """A command to create chunks of OWASP chapter data for RAG."""
 
 import os
-import time
-from datetime import UTC, datetime, timedelta
 
 import openai
 from django.core.management.base import BaseCommand
 
-from apps.ai.common.constants import (
-    DEFAULT_LAST_REQUEST_OFFSET_SECONDS,
-    DELIMITER,
-    MIN_REQUEST_INTERVAL_SECONDS,
-)
+from apps.ai.common.constants import DELIMITER
+from apps.ai.common.create_chunks_and_embeddings import create_chunks_and_embeddings
 from apps.ai.models.chunk import Chunk
 from apps.owasp.models.chapter import Chapter
 
@@ -65,15 +60,15 @@ def handle(self, *args, **options):
 
             batch_chunks = []
             for chapter in batch_chapters:
-                batch_chunks.extend(self.create_chunks(chapter))
+                batch_chunks.extend(self.handle_chunks(chapter))
 
             if batch_chunks:
                 Chunk.bulk_save(batch_chunks)
                 self.stdout.write(f"Saved {len(batch_chunks)} chunks")
 
         self.stdout.write(f"Completed processing all {total_chapters} chapters")
 
-    def create_chunks(self, chapter: Chapter) -> list[Chunk]:
+    def handle_chunks(self, chapter: Chapter) -> list[Chunk]:
         """Create chunks from a chapter's data."""
         prose_content, metadata_content = self.extract_chapter_content(chapter)
 
@@ -89,41 +84,11 @@ def create_chunks(self, chapter: Chapter) -> list[Chunk]:
             self.stdout.write(f"No content to chunk for chapter {chapter.key}")
             return []
 
-        try:
-            time_since_last_request = datetime.now(UTC) - getattr(
-                self,
-                "last_request_time",
-                datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS),
-            )
-
-            if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS):
-                time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds())
-
-            response = self.openai_client.embeddings.create(
-                input=all_chunk_texts,
-                model="text-embedding-3-small",
-            )
-            self.last_request_time = datetime.now(UTC)
-
-            return [
-                chunk
-                for text, embedding in zip(
-                    all_chunk_texts,
-                    [d.embedding for d in response.data],
-                    strict=True,
-                )
-                if (
-                    chunk := Chunk.update_data(
-                        text=text,
-                        content_object=chapter,
-                        embedding=embedding,
-                        save=False,
-                    )
-                )
-            ]
-        except openai.OpenAIError as e:
-            self.stdout.write(self.style.ERROR(f"OpenAI API error for chapter {chapter.key}: {e}"))
-            return []
+        return create_chunks_and_embeddings(
+            all_chunk_texts=all_chunk_texts,
+            content_object=chapter,
+            openai_client=self.openai_client,
+        )
 
     def extract_chapter_content(self, chapter: Chapter) -> tuple[str, str]:
         """Extract and separate prose content from metadata for a chapter.
diff --git a/backend/apps/ai/management/commands/ai_create_event_chunks.py b/backend/apps/ai/management/commands/ai_create_event_chunks.py
@@ -1,17 +1,12 @@
 """A command to create chunks of OWASP event data for RAG."""
 
 import os
-import time
-from datetime import UTC, datetime, timedelta
 
 import openai
 from django.core.management.base import BaseCommand
 
-from apps.ai.common.constants import (
-    DEFAULT_LAST_REQUEST_OFFSET_SECONDS,
-    DELIMITER,
-    MIN_REQUEST_INTERVAL_SECONDS,
-)
+from apps.ai.common.constants import DELIMITER
+from apps.ai.common.create_chunks_and_embeddings import create_chunks_and_embeddings
 from apps.ai.models.chunk import Chunk
 from apps.owasp.models.event import Event
 
@@ -65,15 +60,15 @@ def handle(self, *args, **options):
 
             batch_chunks = []
             for event in batch_events:
-                batch_chunks.extend(self.create_chunks(event))
+                batch_chunks.extend(self.handle_chunks(event))
 
             if batch_chunks:
                 Chunk.bulk_save(batch_chunks)
                 self.stdout.write(f"Saved {len(batch_chunks)} chunks")
 
         self.stdout.write(f"Completed processing all {total_events} events")
 
-    def create_chunks(self, event: Event) -> list[Chunk]:
+    def handle_chunks(self, event: Event) -> list[Chunk]:
         """Create chunks from an event's data."""
         prose_content, metadata_content = self.extract_event_content(event)
 
@@ -89,41 +84,11 @@ def create_chunks(self, event: Event) -> list[Chunk]:
             self.stdout.write(f"No content to chunk for event {event.key}")
             return []
 
-        try:
-            time_since_last_request = datetime.now(UTC) - getattr(
-                self,
-                "last_request_time",
-                datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS),
-            )
-
-            if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS):
-                time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds())
-
-            response = self.openai_client.embeddings.create(
-                input=all_chunk_texts,
-                model="text-embedding-3-small",
-            )
-            self.last_request_time = datetime.now(UTC)
-
-            return [
-                chunk
-                for text, embedding in zip(
-                    all_chunk_texts,
-                    [d.embedding for d in response.data],
-                    strict=True,
-                )
-                if (
-                    chunk := Chunk.update_data(
-                        text=text,
-                        content_object=event,
-                        embedding=embedding,
-                        save=False,
-                    )
-                )
-            ]
-        except openai.OpenAIError as e:
-            self.stdout.write(self.style.ERROR(f"OpenAI API error for event {event.key}: {e}"))
-            return []
+        return create_chunks_and_embeddings(
+            all_chunk_texts,
+            content_object=event,
+            openai_client=self.openai_client,
+        )
 
     def extract_event_content(self, event: Event) -> tuple[str, str]:
         """Extract and separate prose content from metadata for an event.
diff --git a/backend/apps/ai/management/commands/ai_create_slack_message_chunks.py b/backend/apps/ai/management/commands/ai_create_slack_message_chunks.py
@@ -1,16 +1,11 @@
 """A command to create chunks of Slack messages."""
 
 import os
-import time
-from datetime import UTC, datetime, timedelta
 
 import openai
 from django.core.management.base import BaseCommand
 
-from apps.ai.common.constants import (
-    DEFAULT_LAST_REQUEST_OFFSET_SECONDS,
-    MIN_REQUEST_INTERVAL_SECONDS,
-)
+from apps.ai.common.create_chunks_and_embeddings import create_chunks_and_embeddings
 from apps.ai.models.chunk import Chunk
 from apps.slack.models.message import Message
 
@@ -36,13 +31,13 @@ def handle(self, *args, **options):
                 [
                     chunk
                     for message in Message.objects.all()[offset : offset + batch_size]
-                    for chunk in self.create_chunks(message)
+                    for chunk in self.handle_chunks(message)
                 ]
             )
 
         self.stdout.write(f"Completed processing all {total_messages} messages")
 
-    def create_chunks(self, message: Message) -> list[Chunk]:
+    def handle_chunks(self, message: Message) -> list[Chunk]:
         """Create chunks from a message."""
         if message.subtype in {"channel_join", "channel_leave"}:
             return []
@@ -54,40 +49,8 @@ def create_chunks(self, message: Message) -> list[Chunk]:
             )
             return []
 
-        try:
-            time_since_last_request = datetime.now(UTC) - getattr(
-                self,
-                "last_request_time",
-                datetime.now(UTC) - timedelta(seconds=DEFAULT_LAST_REQUEST_OFFSET_SECONDS),
-            )
-
-            if time_since_last_request < timedelta(seconds=MIN_REQUEST_INTERVAL_SECONDS):
-                time.sleep(MIN_REQUEST_INTERVAL_SECONDS - time_since_last_request.total_seconds())
-
-            response = self.openai_client.embeddings.create(
-                input=chunk_text,
-                model="text-embedding-3-small",
-            )
-            self.last_request_time = datetime.now(UTC)
-
-            return [
-                chunk
-                for text, embedding in zip(
-                    chunk_text,
-                    [d.embedding for d in response.data],  # Embedding data from OpenAI response.
-                    strict=True,
-                )
-                if (
-                    chunk := Chunk.update_data(
-                        text=text,
-                        content_object=message,
-                        embedding=embedding,
-                        save=False,
-                    )
-                )
-            ]
-        except openai.OpenAIError as e:
-            self.stdout.write(
-                self.style.ERROR(f"OpenAI API error for message {message.slack_message_id}: {e}")
-            )
-            return []
+        return create_chunks_and_embeddings(
+            all_chunk_texts=chunk_text,
+            content_object=message,
+            openai_client=self.openai_client,
+        )