diff --git a/.jules/bolt.md b/.jules/bolt.md index d81dff90..f1595cb4 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,6 @@ ## 2025-05-15 - [Sequential session destruction in SDKs] **Learning:** All Copilot SDKs (Node.js, Python, Go, .NET) were initially implementing session destruction sequentially during client shutdown. This leads to a linear increase in shutdown time as the number of active sessions grows, especially when individual destructions involve retries and backoff. **Action:** Parallelize session cleanup using language-specific concurrency primitives (e.g., `Promise.all` in Node.js, `asyncio.gather` in Python, `Task.WhenAll` in .NET, or WaitGroups/Channels in Go) to ensure shutdown time remains constant and minimal. +## 2026-02-07 - [Python SDK] Parallelize Session Destruction +**Learning:** Sequential cleanup of network-bound resources (like JSON-RPC sessions) leads to (N)$ shutdown time. Parallelizing with `asyncio.gather` reduces it to (1)$ relative to session count. +**Action:** Always check cleanup/stop methods for sequential IO and parallelize where safe. Implement retry logic for cleanup to match robust SDK patterns. diff --git a/python/copilot/client.py b/python/copilot/client.py index 85b72897..e384ef91 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -313,13 +313,29 @@ async def stop(self) -> list["StopError"]: sessions_to_destroy = list(self._sessions.values()) self._sessions.clear() - for session in sessions_to_destroy: - try: - await session.destroy() - except Exception as e: - errors.append( - StopError(message=f"Failed to destroy session {session.session_id}: {e}") + async def destroy_with_retry(session: CopilotSession) -> Optional[StopError]: + last_error: Optional[Exception] = None + # Try up to 3 times with exponential backoff (match Node.js SDK) + for attempt in range(1, 4): + try: + await session.destroy() + return None + except Exception as e: + last_error = e + if attempt < 3: + # Exponential backoff: 100ms, 200ms + await asyncio.sleep(0.1 * (2 ** (attempt - 1))) + + return StopError( + message=( + f"Failed to destroy session {session.session_id} after 3 attempts: {last_error}" ) + ) + + # Destroy all active sessions in parallel to ensure shutdown time is + # independent of the number of active sessions. + results = await asyncio.gather(*(destroy_with_retry(s) for s in sessions_to_destroy)) + errors.extend([r for r in results if r is not None]) # Close client if self._client: