Skip to content

Commit

Permalink
feat: improve delete knowledge performance (#1733)
Browse files Browse the repository at this point in the history
Issue: #1724
  • Loading branch information
mamadoudicko authored Nov 27, 2023
1 parent f1ddaca commit 10e94e5
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 27 deletions.
49 changes: 26 additions & 23 deletions backend/models/databases/supabase/brains.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,36 +278,39 @@ def get_brain_vector_ids(self, brain_id):

def delete_file_from_brain(self, brain_id, file_name: str):
# First, get the vector_ids associated with the file_name
vector_response = (
file_vectors = (
self.db.table("vectors")
.select("id")
.filter("metadata->>file_name", "eq", file_name)
.execute()
)
vector_ids = [item["id"] for item in vector_response.data]

# For each vector_id, delete the corresponding entry from the 'brains_vectors' table
for vector_id in vector_ids:
self.db.table("brains_vectors").delete().filter(
"vector_id", "eq", vector_id
).filter("brain_id", "eq", brain_id).execute()
file_vectors_ids = [item["id"] for item in file_vectors.data]

# Check if the vector is still associated with any other brains
associated_brains_response = (
self.db.table("brains_vectors")
.select("brain_id")
.filter("vector_id", "eq", vector_id)
.execute()
)
associated_brains = [
item["brain_id"] for item in associated_brains_response.data
]

# If the vector is not associated with any other brains, delete it from 'vectors' table
if not associated_brains:
self.db.table("vectors").delete().filter(
"id", "eq", vector_id
).execute()
# remove current file vectors from brain vectors
self.db.table("brains_vectors").delete().filter(
"vector_id", "in", file_vectors_ids
).filter("brain_id", "eq", brain_id).execute()

vectors_used_by_another_brain = (
self.db.table("brains_vectors")
.select("vector_id")
.filter("vector_id", "in", file_vectors_ids)
.filter("brain_id", "neq", brain_id)
.execute()
)

vectors_used_by_another_brain_ids = [
item["vector_id"] for item in vectors_used_by_another_brain.data
]

vectors_no_longer_used_ids = [
id for id in file_vectors_ids if id not in vectors_used_by_another_brain_ids
]

self.db.table("vectors").delete().filter(
"id", "in", vectors_no_longer_used_ids
).execute()

return {"message": f"File {file_name} in brain {brain_id} has been deleted."}

Expand Down
10 changes: 6 additions & 4 deletions backend/routes/knowledge_routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from uuid import UUID

from fastapi import APIRouter, Depends, Query
from fastapi import APIRouter, Depends, HTTPException, Query
from logger import get_logger
from middlewares.auth import AuthBearer, get_current_user
from models import Brain
Expand All @@ -10,6 +10,7 @@
from repository.knowledge.get_all_knowledge import get_all_knowledge
from repository.knowledge.get_knowledge import get_knowledge
from repository.knowledge.remove_knowledge import remove_knowledge

from routes.authorizations.brain_authorization import (
RoleEnum,
has_brain_authorization,
Expand Down Expand Up @@ -56,8 +57,6 @@ async def delete_endpoint(
Delete a specific knowledge from a brain.
"""

validate_brain_authorization(brain_id=brain_id, user_id=current_user.id)

brain = Brain(id=brain_id)

knowledge = get_knowledge(knowledge_id)
Expand Down Expand Up @@ -93,7 +92,10 @@ async def generate_signed_url_endpoint(
validate_brain_authorization(brain_id=knowledge.brain_id, user_id=current_user.id)

if knowledge.file_name == None:
raise Exception(f"Knowledge {knowledge_id} has no file_name associated with it")
raise HTTPException(
status_code=404,
detail=f"Knowledge with id {knowledge_id} is not a file.",
)

file_path_in_storage = f"{knowledge.brain_id}/{knowledge.file_name}"

Expand Down

0 comments on commit 10e94e5

Please sign in to comment.