⚡️ Speed up method BaseArangoService.delete_record by 6%
#643
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 6% (0.06x) speedup for
BaseArangoService.delete_recordinbackend/python/app/connectors/services/base_arango_service.py⏱️ Runtime :
1.27 milliseconds→1.19 milliseconds(best of63runs)📝 Explanation and details
The optimization delivers a 6% runtime improvement through two key database query optimizations:
What was optimized:
get_document(): Replacedlist(cursor)+ indexing withnext(cursor, None)for immediate single-result retrievalLIMIT 1to the AQL query to stop processing after finding the first matchelifchain to sequentialifstatements indelete_record()Why this improves performance:
next()avoids materializing an entire result list in memory, only fetching the needed single documentLIMIT 1allows ArangoDB's query optimizer to exit early once a match is found, reducing unnecessary document scanningPerformance impact analysis:
The line profiler shows the most significant improvement in
get_document()where cursor processing time dropped from 168,227ns to 116,306ns (31% faster). Sinceget_document()is called frequently (423 hits in profiling), this optimization has a compounding effect on overall performance.Test case benefits:
The optimization particularly benefits scenarios with:
The optimizations maintain identical behavior and error handling while providing measurable performance gains for database-intensive workloads.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
import asyncio
Import the function/class under test
from types import SimpleNamespace
import pytest
from app.connectors.services.base_arango_service import BaseArangoService
Mocks for dependencies
class DummyLogger:
def init(self):
self.logs = []
def info(self, msg, *args, **kwargs):
self.logs.append(("info", msg))
def error(self, msg, *args, **kwargs):
self.logs.append(("error", msg))
def warning(self, msg, *args, **kwargs):
self.logs.append(("warning", msg))
class DummyTransaction:
def init(self):
self.committed = False
self.aborted = False
def commit_transaction(self):
self.committed = True
def abort_transaction(self):
self.aborted = True
class aql:
@staticmethod
def execute(query, bind_vars=None):
return iter([])
class DummyAQL:
def init(self, responses):
self.responses = responses
def execute(self, query, bind_vars=None):
# Use the query string to select response
key = (query.strip(), tuple(sorted((bind_vars or {}).items())))
# Fallback to first response if not found
return iter(self.responses.get(key, self.responses.get(query.strip(), [])))
class DummyDB:
def init(self, responses):
self.aql = DummyAQL(responses)
def begin_transaction(self, write):
return DummyTransaction()
Minimal constants and enums
class DummyEnum:
def init(self, value): self.value = value
class CollectionNames:
RECORDS = DummyEnum("records")
FILES = DummyEnum("files")
MAILS = DummyEnum("mails")
USERS = DummyEnum("users")
RECORD_GROUPS = DummyEnum("recordGroups")
BELONGS_TO = DummyEnum("belongsTo")
PERMISSIONS = DummyEnum("permissions")
ANYONE = DummyEnum("anyone")
IS_OF_TYPE = DummyEnum("isOfType")
USER_DRIVE_RELATION = DummyEnum("userDriveRelation")
RECORD_RELATIONS = DummyEnum("recordRelations")
PERMISSIONS_TO_KB = DummyEnum("permissionsToKb")
class Connectors:
GOOGLE_DRIVE = DummyEnum("GOOGLE_DRIVE")
GOOGLE_MAIL = DummyEnum("GOOGLE_MAIL")
OUTLOOK = DummyEnum("OUTLOOK")
KNOWLEDGE_BASE = DummyEnum("KNOWLEDGE_BASE")
class OriginTypes:
UPLOAD = DummyEnum("UPLOAD")
@pytest.fixture
def service_factory():
# Factory to create a service with dummy dependencies
def factory(db_responses=None, user=None, kb_context=None, permission=None, record=None):
logger = DummyLogger()
arango_client = None
config_service = None
kafka_service = None
service = BaseArangoService(logger, arango_client, config_service, kafka_service)
# Patch db
service.db = DummyDB(db_responses or {})
# Patch async helpers
async def get_document(record_id, collection):
if record and collection == CollectionNames.RECORDS.value and record_id == record.get("_key"):
return record
if collection == CollectionNames.RECORDS.value and record_id == "notfound":
return None
if collection == CollectionNames.FILES.value and record_id == record.get("_key"):
return {"_key": record_id, "file": True}
if collection == CollectionNames.MAILS.value and record_id == record.get("_key"):
return {"_key": record_id, "mail": True}
return None
service.get_document = get_document
async def get_user_by_user_id(user_id):
return user if user_id == (user or {}).get("userId") else None
service.get_user_by_user_id = get_user_by_user_id
async def _get_kb_context_for_record(record_id):
return kb_context if record_id == (record or {}).get("_key") else None
service._get_kb_context_for_record = _get_kb_context_for_record
async def get_user_kb_permission(kb_id, user_id, transaction=None):
return permission
service.get_user_kb_permission = get_user_kb_permission
async def _check_drive_permissions(record_id, user_key): return permission
service._check_drive_permissions = _check_drive_permissions
async def _check_gmail_permissions(record_id, user_key): return permission
service._check_gmail_permissions = _check_gmail_permissions
async def _check_record_permission(record_id, user_key): return permission
service._check_record_permission = _check_record_permission
# Patch deletion helpers to return success immediately
async def _execute_kb_record_deletion(record_id, record, kb_context): return {"success": True, "record_id": record_id, "connector": Connectors.KNOWLEDGE_BASE.value.value, "kb_context": kb_context}
async def _execute_drive_record_deletion(record_id, record, user_role): return {"success": True, "record_id": record_id, "connector": Connectors.GOOGLE_DRIVE.value.value, "user_role": user_role}
async def _execute_gmail_record_deletion(record_id, record, user_role): return {"success": True, "record_id": record_id, "connector": Connectors.GOOGLE_MAIL.value.value, "user_role": user_role}
async def _execute_outlook_record_deletion(record_id, record): return {"success": True, "record_id": record_id, "attachments_deleted": 0}
service._execute_kb_record_deletion = _execute_kb_record_deletion
service._execute_drive_record_deletion = _execute_drive_record_deletion
service._execute_gmail_record_deletion = _execute_gmail_record_deletion
service._execute_outlook_record_deletion = _execute_outlook_record_deletion
return service
return factory
1. Basic Test Cases
@pytest.mark.asyncio
async def test_delete_record_not_found(service_factory):
"""Test record not found returns 404"""
service = service_factory()
result = await service.delete_record("notfound", "user1")
@pytest.mark.asyncio
async def test_delete_record_unsupported_connector(service_factory):
"""Test unsupported connector returns error"""
record = {"_key": "bad1", "connectorName": "UNSUPPORTED", "origin": ""}
user = {"userId": "user5", "_key": "user5"}
service = service_factory(user=user, record=record)
result = await service.delete_record("bad1", "user5")
2. Edge Test Cases
@pytest.mark.asyncio
#------------------------------------------------
import asyncio
import pytest
from app.connectors.services.base_arango_service import BaseArangoService
Mocks for dependencies
class DummyLogger:
def init(self):
self.infos = []
self.errors = []
self.warnings = []
def info(self, msg, *args):
self.infos.append(msg)
def error(self, msg, *args):
self.errors.append(msg)
def warning(self, msg, *args):
self.warnings.append(msg)
class DummyTransaction:
def init(self):
self.committed = False
self.aborted = False
def commit_transaction(self):
self.committed = True
def abort_transaction(self):
self.aborted = True
@Property
def aql(self):
return self
def execute(self, query, bind_vars=None):
# For attachment deletion in Outlook
if "ATTACHMENT" in query:
return iter(["attachment1", "attachment2"])
return iter([])
class DummyDB:
def init(self, documents, users, permissions, kb_contexts):
self.documents = documents # {collection: {key: doc}}
self.users = users # {user_id: user_dict}
self.permissions = permissions # {(kb_id, user_id): role}
self.kb_contexts = kb_contexts # {record_id: kb_context}
self.transactions = []
@Property
def aql(self):
return self
def execute(self, query, bind_vars=None):
# Simulate document lookups
if "FOR doc IN @@collection" in query:
collection = bind_vars["@collection"]
document_key = bind_vars["document_key"]
doc = self.documents.get(collection, {}).get(document_key)
return iter([doc] if doc else [])
if "FOR user IN users" in query:
user_id = bind_vars["user_id"]
user = self.users.get(user_id)
return iter([user] if user else [])
if "FOR perm IN @@permissions_collection" in query:
kb_id = bind_vars.get("kb_id")
user_id = bind_vars.get("user_id")
role = self.permissions.get((kb_id, user_id))
if role:
return iter([{"role": role}])
else:
return iter([])
if "LET kb_edge =" in query:
record_id = bind_vars.get("record_id")
context = self.kb_contexts.get(record_id)
return iter([context] if context else [])
return iter([])
def begin_transaction(self, write=None):
tx = DummyTransaction()
self.transactions.append(tx)
return tx
class Connectors:
GOOGLE_DRIVE = type("Enum", (), {"value": "GOOGLE_DRIVE"})
GOOGLE_MAIL = type("Enum", (), {"value": "GOOGLE_MAIL"})
OUTLOOK = type("Enum", (), {"value": "OUTLOOK"})
KNOWLEDGE_BASE = type("Enum", (), {"value": "KNOWLEDGE_BASE"})
class OriginTypes:
UPLOAD = type("Enum", (), {"value": "UPLOAD"})
The function to test (from the provided code)
... [PASTE THE ENTIRE BaseArangoService CLASS HERE] ...
For brevity, we assume the class is pasted above as per instructions.
For testing, we patch the connector-specific methods to avoid database complexity
@pytest.fixture
def service_factory():
# Helper to create a service with custom documents/users/permissions/kb_contexts
def make_service(documents, users, permissions, kb_contexts):
logger = DummyLogger()
db = DummyDB(documents, users, permissions, kb_contexts)
config_service = None
service = BaseArangoService(
logger=logger,
arango_client=None,
config_service=config_service,
kafka_service=None
)
service.db = db
# Patch connector-specific methods to return canned responses
async def delete_knowledge_base_record(record_id, user_id, record):
return {"success": True, "record_id": record_id, "connector": Connectors.KNOWLEDGE_BASE.value}
async def delete_google_drive_record(record_id, user_id, record):
return {"success": True, "record_id": record_id, "connector": Connectors.GOOGLE_DRIVE.value}
async def delete_gmail_record(record_id, user_id, record):
return {"success": True, "record_id": record_id, "connector": Connectors.GOOGLE_MAIL.value}
async def delete_outlook_record(record_id, user_id, record):
return {"success": True, "record_id": record_id, "connector": Connectors.OUTLOOK.value}
service.delete_knowledge_base_record = delete_knowledge_base_record
service.delete_google_drive_record = delete_google_drive_record
service.delete_gmail_record = delete_gmail_record
service.delete_outlook_record = delete_outlook_record
return service
return make_service
------------------- BASIC TEST CASES -------------------
@pytest.mark.asyncio
async def test_delete_record_success_knowledge_base(service_factory):
"""Test successful deletion for a knowledge base record (origin: UPLOAD)"""
documents = {
"records": {"rec1": {"_key": "rec1", "origin": OriginTypes.UPLOAD.value, "connectorName": Connectors.KNOWLEDGE_BASE.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec1", "user1")
@pytest.mark.asyncio
async def test_delete_record_success_google_drive(service_factory):
"""Test successful deletion for a Google Drive record"""
documents = {
"records": {"rec2": {"_key": "rec2", "connectorName": Connectors.GOOGLE_DRIVE.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec2", "user2")
@pytest.mark.asyncio
async def test_delete_record_success_gmail(service_factory):
"""Test successful deletion for a Gmail record"""
documents = {
"records": {"rec3": {"_key": "rec3", "connectorName": Connectors.GOOGLE_MAIL.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec3", "user3")
@pytest.mark.asyncio
async def test_delete_record_success_outlook(service_factory):
"""Test successful deletion for an Outlook record"""
documents = {
"records": {"rec4": {"_key": "rec4", "connectorName": Connectors.OUTLOOK.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec4", "user4")
@pytest.mark.asyncio
async def test_delete_record_not_found(service_factory):
"""Test deletion when record does not exist"""
documents = {
"records": {}, # No record
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("missing", "user1")
@pytest.mark.asyncio
async def test_delete_record_unsupported_connector(service_factory):
"""Test deletion when connector is unsupported"""
documents = {
"records": {"rec5": {"_key": "rec5", "connectorName": "UNSUPPORTED"}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec5", "user5")
------------------- EDGE TEST CASES -------------------
@pytest.mark.asyncio
async def test_delete_record_exception_in_get_document(service_factory):
"""Test exception handling if get_document raises Exception"""
documents = {
"records": {"rec6": {"_key": "rec6", "connectorName": Connectors.GOOGLE_DRIVE.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
# Patch get_document to raise Exception
async def bad_get_document(document_key, collection):
raise RuntimeError("DB error")
service.get_document = bad_get_document
result = await service.delete_record("rec6", "user6")
@pytest.mark.asyncio
async def test_delete_record_concurrent_requests(service_factory):
"""Test concurrent deletion requests for different records"""
documents = {
"records": {
"rec7": {"_key": "rec7", "connectorName": Connectors.GOOGLE_DRIVE.value},
"rec8": {"_key": "rec8", "connectorName": Connectors.GOOGLE_MAIL.value},
"rec9": {"_key": "rec9", "origin": OriginTypes.UPLOAD.value, "connectorName": Connectors.KNOWLEDGE_BASE.value},
}
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
results = await asyncio.gather(
service.delete_record("rec7", "user7"),
service.delete_record("rec8", "user8"),
service.delete_record("rec9", "user9"),
)
@pytest.mark.asyncio
async def test_delete_record_concurrent_same_record(service_factory):
"""Test concurrent deletion requests for the same record"""
documents = {
"records": {"rec10": {"_key": "rec10", "connectorName": Connectors.OUTLOOK.value}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
# Simulate concurrent deletion of the same record
results = await asyncio.gather(
service.delete_record("rec10", "user10"),
service.delete_record("rec10", "user10"),
service.delete_record("rec10", "user10"),
)
for result in results:
pass
@pytest.mark.asyncio
async def test_delete_record_edge_case_empty_connector_name(service_factory):
"""Test deletion when connectorName is missing/empty"""
documents = {
"records": {"rec11": {"_key": "rec11"}},
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
result = await service.delete_record("rec11", "user11")
------------------- LARGE SCALE TEST CASES -------------------
@pytest.mark.asyncio
async def test_delete_record_large_scale_mixed_connectors(service_factory):
"""Test large scale concurrent deletions with mixed connectors"""
N = 50
documents = {
"records": {
f"rec{i}": {
"_key": f"rec{i}",
"connectorName": [Connectors.GOOGLE_DRIVE.value, Connectors.GOOGLE_MAIL.value, Connectors.OUTLOOK.value, Connectors.KNOWLEDGE_BASE.value][i % 4],
"origin": OriginTypes.UPLOAD.value if i % 4 == 3 else None
}
for i in range(N)
}
}
users = {}
permissions = {}
kb_contexts = {}
service = service_factory(documents, users, permissions, kb_contexts)
tasks = [service.delete_record(f"rec{i}", f"user{i}") for i in range(N)]
results = await asyncio.gather(*tasks)
for i, result in enumerate(results):
pass
------------------- THROUGHPUT TEST CASES -------------------
@pytest.mark.asyncio
To edit these changes
git checkout codeflash/optimize-BaseArangoService.delete_record-mhxk3bfdand push.