Skip to content

Commit 466bca3

Browse files
committed
sugesstions and decoupling with tests
1 parent 011e843 commit 466bca3

20 files changed

+383
-80
lines changed

backend/apps/ai/Makefile

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,43 @@
1-
ai-create-chapter-chunks:
2-
@echo "Creating chapter chunks"
3-
@CMD="python manage.py ai_create_chapter_chunks" $(MAKE) exec-backend-command
4-
5-
ai-create-committee-chunks:
6-
@echo "Creating committee chunks"
7-
@CMD="python manage.py ai_create_committee_chunks" $(MAKE) exec-backend-command
8-
9-
ai-create-event-chunks:
10-
@echo "Creating event chunks"
11-
@CMD="python manage.py ai_create_event_chunks" $(MAKE) exec-backend-command
12-
13-
ai-create-project-chunks:
14-
@echo "Creating project chunks"
15-
@CMD="python manage.py ai_create_project_chunks" $(MAKE) exec-backend-command
16-
17-
ai-create-slack-message-chunks:
18-
@echo "Creating Slack message chunks"
19-
@CMD="python manage.py ai_create_slack_message_chunks" $(MAKE) exec-backend-command
20-
211
ai-run-rag-tool:
222
@echo "Running RAG tool"
233
@CMD="python manage.py ai_run_rag_tool" $(MAKE) exec-backend-command
244

5+
ai-update-chapter-chunks:
6+
@echo "Updating chapter chunks"
7+
@CMD="python manage.py ai_update_chapter_chunks" $(MAKE) exec-backend-command
8+
259
ai-update-chapter-context:
2610
@echo "Updating chapter context"
2711
@CMD="python manage.py ai_update_chapter_context" $(MAKE) exec-backend-command
2812

13+
ai-update-committee-chunks:
14+
@echo "Updating committee chunks"
15+
@CMD="python manage.py ai_update_committee_chunks" $(MAKE) exec-backend-command
16+
2917
ai-update-committee-context:
3018
@echo "Updating committee context"
3119
@CMD="python manage.py ai_update_committee_context" $(MAKE) exec-backend-command
3220

21+
ai-update-event-chunks:
22+
@echo "Updating event chunks"
23+
@CMD="python manage.py ai_update_event_chunks" $(MAKE) exec-backend-command
24+
3325
ai-update-event-context:
3426
@echo "Updating event context"
3527
@CMD="python manage.py ai_update_event_context" $(MAKE) exec-backend-command
3628

29+
ai-update-project-chunks:
30+
@echo "Updating project chunks"
31+
@CMD="python manage.py ai_update_project_chunks" $(MAKE) exec-backend-command
32+
3733
ai-update-project-context:
3834
@echo "Updating project context"
3935
@CMD="python manage.py ai_update_project_context" $(MAKE) exec-backend-command
4036

37+
ai-update-slack-message-chunks:
38+
@echo "Updating Slack message chunks"
39+
@CMD="python manage.py ai_update_slack_message_chunks" $(MAKE) exec-backend-command
40+
4141
ai-update-slack-message-context:
4242
@echo "Updating Slack message context"
4343
@CMD="python manage.py ai_update_slack_message_context" $(MAKE) exec-backend-command

backend/apps/ai/common/base/chunk_command.py

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Base chunk command class for creating chunks."""
22

33
from django.contrib.contenttypes.models import ContentType
4-
from django.db.models import Model
4+
from django.db.models import Max, Model
55

66
from apps.ai.common.base.ai_command import BaseAICommand
77
from apps.ai.common.utils import create_chunks_and_embeddings
@@ -14,51 +14,65 @@ class BaseChunkCommand(BaseAICommand):
1414

1515
def help(self) -> str:
1616
"""Return help text for the chunk creation command."""
17-
return f"Create chunks for OWASP {self.entity_name} data"
17+
return f"Create or update chunks for OWASP {self.entity_name} data"
1818

1919
def process_chunks_batch(self, entities: list[Model]) -> int:
20-
"""Process a batch of entities to create chunks."""
20+
"""Process a batch of entities to create or update chunks."""
2121
processed = 0
22-
batch_chunks = []
22+
batch_chunks_to_create = []
2323
content_type = ContentType.objects.get_for_model(self.model_class)
2424

2525
for entity in entities:
26-
context = Context.objects.filter(entity_type=content_type, entity_id=entity.id).first()
27-
2826
entity_key = self.get_entity_key(entity)
27+
context = Context.objects.filter(entity_type=content_type, entity_id=entity.id).first()
2928

3029
if not context:
3130
self.stdout.write(
3231
self.style.WARNING(f"No context found for {self.entity_name} {entity_key}")
3332
)
3433
continue
3534

36-
prose_content, metadata_content = self.extract_content(entity)
37-
full_content = (
38-
f"{metadata_content}\n\n{prose_content}" if metadata_content else prose_content
39-
)
35+
latest_chunk_timestamp = context.chunks.aggregate(
36+
latest_created=Max("nest_created_at")
37+
)["latest_created"]
4038

41-
if not full_content.strip():
42-
self.stdout.write(f"No content to chunk for {self.entity_name} {entity_key}")
43-
continue
39+
if not latest_chunk_timestamp or context.nest_updated_at > latest_chunk_timestamp:
40+
self.stdout.write(f"Context for {entity_key} requires chunk creation/update")
4441

45-
chunk_texts = Chunk.split_text(full_content)
46-
if not chunk_texts:
47-
self.stdout.write(f"No chunks created for {self.entity_name} {entity_key}")
48-
continue
42+
if latest_chunk_timestamp:
43+
count, _ = context.chunks.all().delete()
44+
self.stdout.write(f"Deleted {count} stale chunks for {entity_key}")
45+
46+
prose_content, metadata_content = self.extract_content(entity)
47+
full_content = (
48+
f"{metadata_content}\n\n{prose_content}" if metadata_content else prose_content
49+
)
4950

50-
if chunks := create_chunks_and_embeddings(
51-
chunk_texts=chunk_texts,
52-
context=context,
53-
openai_client=self.openai_client,
54-
save=False,
55-
):
56-
batch_chunks.extend(chunks)
57-
processed += 1
58-
self.stdout.write(f"Created {len(chunks)} chunks for {entity_key}")
59-
60-
if batch_chunks:
61-
Chunk.bulk_save(batch_chunks)
51+
if not full_content.strip():
52+
self.stdout.write(f"No content to chunk for {self.entity_name} {entity_key}")
53+
continue
54+
55+
chunk_texts = Chunk.split_text(full_content)
56+
if not chunk_texts:
57+
self.stdout.write(f"No chunks created for {self.entity_name} {entity_key}")
58+
continue
59+
60+
if chunks := create_chunks_and_embeddings(
61+
chunk_texts=chunk_texts,
62+
context=context,
63+
openai_client=self.openai_client,
64+
save=False,
65+
):
66+
batch_chunks_to_create.extend(chunks)
67+
processed += 1
68+
self.stdout.write(
69+
self.style.SUCCESS(f"Created {len(chunks)} new chunks for {entity_key}")
70+
)
71+
else:
72+
self.stdout.write(f"Chunks for {entity_key} are already up to date.")
73+
74+
if batch_chunks_to_create:
75+
Chunk.bulk_save(batch_chunks_to_create)
6276

6377
return processed
6478

backend/apps/ai/models/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,6 @@ def update_data(
6060
context.content = content
6161
context.source = source
6262
if save:
63-
context.save(update_fields=["content", "source"])
63+
context.save()
6464

6565
return context

backend/tests/apps/ai/common/base/ai_command_test.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,16 @@ class MockTestModel:
1212

1313
objects = Mock()
1414
pk = 1
15+
__name__ = "TestEntity"
1516

1617

1718
@pytest.fixture
1819
def command():
1920
"""Fixture for ConcreteAICommand instance."""
20-
return ConcreteAICommand()
21+
cmd = ConcreteAICommand()
22+
cmd.entity_name = "test_entity"
23+
cmd.entity_name_plural = "test_entities"
24+
return cmd
2125

2226

2327
@pytest.fixture

backend/tests/apps/ai/common/base/chunk_command_test.py

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for the BaseChunkCommand class."""
22

33
from typing import Any
4-
from unittest.mock import Mock, patch
4+
from unittest.mock import Mock, call, patch
55

66
import pytest
77
from django.contrib.contenttypes.models import ContentType
@@ -30,8 +30,10 @@ def command():
3030
"""Return a concrete chunk command instance for testing."""
3131
cmd = ConcreteChunkCommand()
3232
mock_model = Mock()
33-
mock_model.__name__ = "MockChunkTestModel"
33+
mock_model.__name__ = "TestEntity"
3434
cmd.model_class = mock_model
35+
cmd.entity_name = "test_entity"
36+
cmd.entity_name_plural = "test_entities"
3537
return cmd
3638

3739

@@ -52,6 +54,9 @@ def mock_context():
5254
context.id = 1
5355
context.content_type_id = 1
5456
context.object_id = 1
57+
context.chunks.aggregate.return_value = {"latest_created": None}
58+
context.chunks.all.return_value.delete.return_value = (0, {})
59+
context.nest_updated_at = Mock()
5560
return context
5661

5762

@@ -85,12 +90,12 @@ def test_command_inheritance(self, command):
8590

8691
def test_help_method(self, command):
8792
"""Test the help method returns appropriate help text."""
88-
expected_help = "Create chunks for OWASP test_entity data"
93+
expected_help = "Create or update chunks for OWASP test_entity data"
8994
assert command.help() == expected_help
9095

9196
def test_abstract_methods_implemented(self, command):
9297
"""Test that all abstract methods are properly implemented."""
93-
assert command.model_class.__name__ == "MockChunkTestModel"
98+
assert command.model_class.__name__ == "TestEntity"
9499
assert command.entity_name == "test_entity"
95100
assert command.entity_name_plural == "test_entities"
96101
assert command.key_field_name == "test_key"
@@ -143,7 +148,12 @@ def test_process_chunks_batch_empty_content(
143148
result = command.process_chunks_batch([mock_entity])
144149

145150
assert result == 0
146-
mock_write.assert_called_once_with("No content to chunk for test_entity test-key-123")
151+
# Check that it wrote the initial message and the empty content message
152+
expected_calls = [
153+
call("Context for test-key-123 requires chunk creation/update"),
154+
call("No content to chunk for test_entity test-key-123"),
155+
]
156+
mock_write.assert_has_calls(expected_calls)
147157

148158
@patch("apps.ai.common.base.chunk_command.ContentType.objects.get_for_model")
149159
@patch("apps.ai.common.base.chunk_command.Context.objects.filter")
@@ -167,7 +177,12 @@ def test_process_chunks_batch_no_chunks_created(
167177
result = command.process_chunks_batch([mock_entity])
168178

169179
assert result == 0
170-
mock_write.assert_called_once()
180+
# Check that both messages were written
181+
expected_calls = [
182+
call("Context for test-key-123 requires chunk creation/update"),
183+
call("No chunks created for test_entity test-key-123"),
184+
]
185+
mock_write.assert_has_calls(expected_calls)
171186
call_args = mock_write.call_args[0][0]
172187
assert "No chunks created for test_entity test-key-123" in call_args
173188

@@ -207,7 +222,12 @@ def test_process_chunks_batch_success(
207222
save=False,
208223
)
209224
mock_bulk_save.assert_called_once_with(mock_chunks)
210-
mock_write.assert_called_once_with("Created 3 chunks for test-key-123")
225+
mock_write.assert_has_calls(
226+
[
227+
call("Context for test-key-123 requires chunk creation/update"),
228+
call(command.style.SUCCESS("Created 3 new chunks for test-key-123")),
229+
]
230+
)
211231

212232
@patch("apps.ai.common.base.chunk_command.ContentType.objects.get_for_model")
213233
@patch("apps.ai.common.base.chunk_command.Context.objects.filter")
@@ -409,6 +429,8 @@ def test_process_chunks_batch_whitespace_only_content(
409429
result = command.process_chunks_batch([mock_entity])
410430

411431
assert result == 0
412-
mock_write.assert_called_once_with(
413-
"No content to chunk for test_entity test-key-123"
414-
)
432+
expected_calls = [
433+
call("Context for test-key-123 requires chunk creation/update"),
434+
call("No content to chunk for test_entity test-key-123"),
435+
]
436+
mock_write.assert_has_calls(expected_calls)

0 commit comments

Comments
 (0)