Skip to content

Commit 385ca51

Browse files
Fix slack and duplication errors (#2352)
* fix slack and duplication errors * code rabbit suggestions * integrity error solved * using set * Update code --------- Co-authored-by: Arkadii Yakovets <arkadii.yakovets@owasp.org>
1 parent 0f839b9 commit 385ca51

File tree

7 files changed

+89
-61
lines changed

7 files changed

+89
-61
lines changed

backend/apps/ai/common/base/chunk_command.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,12 @@ def process_chunks_batch(self, entities: list[Model]) -> int:
5757
self.stdout.write(f"No content to chunk for {self.entity_name} {entity_key}")
5858
continue
5959

60-
chunk_texts = Chunk.split_text(full_content)
61-
if not chunk_texts:
60+
if not (unique_chunk_texts := set(Chunk.split_text(full_content))):
6261
self.stdout.write(f"No chunks created for {self.entity_name} {entity_key}")
6362
continue
6463

6564
if chunks := create_chunks_and_embeddings(
66-
chunk_texts=chunk_texts,
65+
chunk_texts=list(unique_chunk_texts),
6766
context=context,
6867
openai_client=self.openai_client,
6968
save=False,

backend/apps/ai/common/base/context_command.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ def process_context_batch(self, entities: list[Model]) -> int:
3535
):
3636
processed += 1
3737
entity_key = self.get_entity_key(entity)
38-
self.stdout.write(f"Created context for {entity_key}")
38+
self.stdout.write(f"Created/updated context for {entity_key}")
3939
else:
4040
entity_key = self.get_entity_key(entity)
41-
self.stdout.write(self.style.ERROR(f"Failed to create context for {entity_key}"))
41+
self.stdout.write(
42+
self.style.ERROR(f"Failed to create/update context for {entity_key}")
43+
)
4244

4345
return processed
4446

backend/apps/ai/management/commands/ai_update_slack_message_context.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,26 +10,6 @@ class Command(BaseContextCommand):
1010
key_field_name = "slack_message_id"
1111
model_class = Message
1212

13-
def add_arguments(self, parser):
14-
"""Override to use different default batch size for messages."""
15-
super().add_arguments(parser)
16-
parser.add_argument(
17-
"--message-key",
18-
type=str,
19-
help="Process only the message with this key",
20-
)
21-
parser.add_argument(
22-
"--all",
23-
action="store_true",
24-
help="Process all the messages",
25-
)
26-
parser.add_argument(
27-
"--batch-size",
28-
type=int,
29-
default=100,
30-
help="Number of messages to process in each batch",
31-
)
32-
3313
def extract_content(self, entity: Message) -> tuple[str, str]:
3414
"""Extract content from the message."""
3515
return entity.cleaned_text or "", ""

backend/tests/apps/ai/common/base/chunk_command_test.py

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -211,16 +211,21 @@ def test_process_chunks_batch_success(
211211
mock_create_chunks.return_value = mock_chunks
212212
command.openai_client = Mock()
213213

214-
with patch.object(command.stdout, "write") as mock_write:
214+
with (
215+
patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter,
216+
patch.object(command.stdout, "write") as mock_write,
217+
):
218+
mock_qs = Mock()
219+
mock_qs.values_list.return_value = []
220+
mock_chunk_filter.return_value = mock_qs
215221
result = command.process_chunks_batch([mock_entity])
216222

217223
assert result == 1
218-
mock_create_chunks.assert_called_once_with(
219-
chunk_texts=["chunk1", "chunk2", "chunk3"],
220-
context=mock_context,
221-
openai_client=command.openai_client,
222-
save=False,
223-
)
224+
_, kwargs = mock_create_chunks.call_args
225+
assert set(kwargs["chunk_texts"]) == {"chunk1", "chunk2", "chunk3"}
226+
assert kwargs["context"] == mock_context
227+
assert kwargs["openai_client"] == command.openai_client
228+
assert kwargs["save"] is False
224229
mock_bulk_save.assert_called_once_with(mock_chunks)
225230
mock_write.assert_has_calls(
226231
[
@@ -261,7 +266,13 @@ def test_process_chunks_batch_multiple_entities(
261266
mock_create_chunks.return_value = mock_chunks[:2]
262267
command.openai_client = Mock()
263268

264-
with patch.object(command.stdout, "write"):
269+
with (
270+
patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter,
271+
patch.object(command.stdout, "write"),
272+
):
273+
mock_qs = Mock()
274+
mock_qs.values_list.return_value = []
275+
mock_chunk_filter.return_value = mock_qs
265276
result = command.process_chunks_batch(entities)
266277

267278
assert result == 3
@@ -325,14 +336,22 @@ def test_process_chunks_batch_content_combination(
325336
"extract_content",
326337
return_value=("prose", "metadata"),
327338
):
328-
command.process_chunks_batch([mock_entity])
339+
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
340+
mock_qs = Mock()
341+
mock_qs.values_list.return_value = []
342+
mock_chunk_filter.return_value = mock_qs
343+
command.process_chunks_batch([mock_entity])
329344

330345
expected_content = "metadata\n\nprose"
331346
mock_split_text.assert_called_once_with(expected_content)
332347

333348
mock_split_text.reset_mock()
334349
with patch.object(command, "extract_content", return_value=("prose", "")):
335-
command.process_chunks_batch([mock_entity])
350+
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
351+
mock_qs = Mock()
352+
mock_qs.values_list.return_value = []
353+
mock_chunk_filter.return_value = mock_qs
354+
command.process_chunks_batch([mock_entity])
336355

337356
mock_split_text.assert_called_with("prose")
338357

@@ -402,11 +421,52 @@ def test_process_chunks_batch_metadata_only_content(
402421
"extract_content",
403422
return_value=("", "metadata"),
404423
):
405-
command.process_chunks_batch([mock_entity])
424+
with patch("apps.ai.models.chunk.Chunk.objects.filter") as mock_chunk_filter:
425+
mock_qs = Mock()
426+
mock_qs.values_list.return_value = []
427+
mock_chunk_filter.return_value = mock_qs
428+
command.process_chunks_batch([mock_entity])
406429

407430
mock_split_text.assert_called_once_with("metadata\n\n")
408431
mock_bulk_save.assert_called_once()
409432

433+
@patch("apps.ai.common.base.chunk_command.ContentType.objects.get_for_model")
434+
@patch("apps.ai.common.base.chunk_command.Context.objects.filter")
435+
@patch("apps.ai.models.chunk.Chunk.split_text")
436+
@patch("apps.ai.common.base.chunk_command.create_chunks_and_embeddings")
437+
@patch("apps.ai.models.chunk.Chunk.bulk_save")
438+
def test_process_chunks_batch_with_duplicates(
439+
self,
440+
mock_bulk_save,
441+
mock_create_chunks,
442+
mock_split_text,
443+
mock_context_filter,
444+
mock_get_content_type,
445+
command,
446+
mock_entity,
447+
mock_context,
448+
mock_content_type,
449+
mock_chunks,
450+
):
451+
"""Test that duplicate chunk texts are filtered out before processing."""
452+
mock_get_content_type.return_value = mock_content_type
453+
mock_context_filter.return_value.first.return_value = mock_context
454+
mock_split_text.return_value = ["chunk1", "chunk2", "chunk1", "chunk3", "chunk2"]
455+
mock_create_chunks.return_value = mock_chunks
456+
command.openai_client = Mock()
457+
458+
with patch.object(command.stdout, "write"):
459+
result = command.process_chunks_batch([mock_entity])
460+
461+
assert result == 1
462+
mock_split_text.assert_called_once()
463+
_, kwargs = mock_create_chunks.call_args
464+
assert set(kwargs["chunk_texts"]) == {"chunk1", "chunk2", "chunk3"}
465+
assert kwargs["context"] == mock_context
466+
assert kwargs["openai_client"] == command.openai_client
467+
assert kwargs["save"] is False
468+
mock_bulk_save.assert_called_once_with(mock_chunks)
469+
410470
def test_process_chunks_batch_whitespace_only_content(
411471
self, command, mock_entity, mock_context, mock_content_type
412472
):

backend/tests/apps/ai/common/base/context_command_test.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def test_process_context_batch_success(
116116
entity=mock_entity,
117117
source="owasp_test_entity",
118118
)
119-
mock_write.assert_called_once_with("Created context for test-key-123")
119+
mock_write.assert_called_once_with("Created/updated context for test-key-123")
120120

121121
@patch("apps.ai.common.base.context_command.Context")
122122
def test_process_context_batch_creation_fails(self, mock_context_class, command, mock_entity):
@@ -130,7 +130,7 @@ def test_process_context_batch_creation_fails(self, mock_context_class, command,
130130
mock_context_class.update_data.assert_called_once()
131131
mock_write.assert_called_once()
132132
call_args = mock_write.call_args[0][0]
133-
assert "Failed to create context for test-key-123" in str(call_args)
133+
assert "Failed to create/update context for test-key-123" in str(call_args)
134134

135135
@patch("apps.ai.common.base.context_command.Context")
136136
def test_process_context_batch_multiple_entities(
@@ -184,9 +184,9 @@ def test_process_context_batch_mixed_success_failure(
184184
assert mock_write.call_count == 3
185185

186186
write_calls = mock_write.call_args_list
187-
assert "Created context for test-key-1" in str(write_calls[0])
188-
assert "Failed to create context for test-key-2" in str(write_calls[1])
189-
assert "Created context for test-key-3" in str(write_calls[2])
187+
assert "Created/updated context for test-key-1" in str(write_calls[0])
188+
assert "Failed to create/update context for test-key-2" in str(write_calls[1])
189+
assert "Created/updated context for test-key-3" in str(write_calls[2])
190190

191191
def test_process_context_batch_content_combination(self, command, mock_entity, mock_context):
192192
"""Test that metadata and prose content are properly combined."""
@@ -261,7 +261,7 @@ def test_get_entity_key_usage(self, command, mock_context):
261261
with patch.object(command.stdout, "write") as mock_write:
262262
command.process_context_batch([entity])
263263

264-
mock_write.assert_called_once_with("Created context for custom-entity-key")
264+
mock_write.assert_called_once_with("Created/updated context for custom-entity-key")
265265

266266
def test_process_context_batch_empty_list(self, command):
267267
"""Test process_context_batch with empty entity list."""

backend/tests/apps/ai/management/commands/ai_update_committee_context_test.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ def test_process_context_batch_success(self, command, mock_committee):
169169
entity=mock_committee,
170170
source="owasp_committee",
171171
)
172-
mock_write.assert_called_once_with("Created context for test-committee")
172+
mock_write.assert_called_once_with(
173+
"Created/updated context for test-committee"
174+
)
173175

174176
def test_process_context_batch_empty_content(self, command, mock_committee):
175177
"""Test context batch processing with empty content."""
@@ -206,7 +208,7 @@ def test_process_context_batch_create_failure(self, command, mock_committee):
206208

207209
assert result == 0
208210
mock_error.assert_called_once_with(
209-
"Failed to create context for test-committee"
211+
"Failed to create/update context for test-committee"
210212
)
211213
mock_write.assert_called_once_with("ERROR: Failed")
212214

backend/tests/apps/ai/management/commands/ai_update_slack_message_context_test.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,9 @@ def test_add_arguments(self, command):
7272
parser = Mock()
7373
command.add_arguments(parser)
7474

75-
assert parser.add_argument.call_count == 6
75+
assert parser.add_argument.call_count == 3
7676
calls = parser.add_argument.call_args_list
7777

78-
# First 3 calls are from parent class (BaseAICommand)
7978
assert calls[0][0] == ("--message-key",)
8079
assert calls[0][1]["type"] is str
8180
assert "Process only the message with this key" in calls[0][1]["help"]
@@ -86,19 +85,5 @@ def test_add_arguments(self, command):
8685

8786
assert calls[2][0] == ("--batch-size",)
8887
assert calls[2][1]["type"] is int
89-
assert calls[2][1]["default"] == 50 # Default from parent class
88+
assert calls[2][1]["default"] == 50
9089
assert "Number of messages to process in each batch" in calls[2][1]["help"]
91-
92-
# Next 3 calls are from the command itself (duplicates with different defaults)
93-
assert calls[3][0] == ("--message-key",)
94-
assert calls[3][1]["type"] is str
95-
assert "Process only the message with this key" in calls[3][1]["help"]
96-
97-
assert calls[4][0] == ("--all",)
98-
assert calls[4][1]["action"] == "store_true"
99-
assert "Process all the messages" in calls[4][1]["help"]
100-
101-
assert calls[5][0] == ("--batch-size",)
102-
assert calls[5][1]["type"] is int
103-
assert calls[5][1]["default"] == 100 # Overridden default from command
104-
assert "Number of messages to process in each batch" in calls[5][1]["help"]

0 commit comments

Comments
 (0)