diff --git a/.jules/bolt.md b/.jules/bolt.md index d81dff90..a1819e87 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,7 @@ ## 2025-05-15 - [Sequential session destruction in SDKs] **Learning:** All Copilot SDKs (Node.js, Python, Go, .NET) were initially implementing session destruction sequentially during client shutdown. This leads to a linear increase in shutdown time as the number of active sessions grows, especially when individual destructions involve retries and backoff. **Action:** Parallelize session cleanup using language-specific concurrency primitives (e.g., `Promise.all` in Node.js, `asyncio.gather` in Python, `Task.WhenAll` in .NET, or WaitGroups/Channels in Go) to ensure shutdown time remains constant and minimal. + +## 2025-05-16 - [Sequential session destruction in Go and .NET SDKs] +**Learning:** While Node.js and Python SDKs have been optimized for parallel session destruction, Go and .NET SDKs still implement this sequentially. This leads to a linear increase in shutdown time as the number of active sessions grows in those languages. +**Action:** Parallelize session cleanup in Go using goroutines/WaitGroups and in .NET using Task.WhenAll to ensure consistent O(T) shutdown time across all SDKs. diff --git a/python/copilot/client.py b/python/copilot/client.py index 85b72897..0b4712ea 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -313,13 +313,33 @@ async def stop(self) -> list["StopError"]: sessions_to_destroy = list(self._sessions.values()) self._sessions.clear() - for session in sessions_to_destroy: - try: - await session.destroy() - except Exception as e: - errors.append( - StopError(message=f"Failed to destroy session {session.session_id}: {e}") - ) + async def destroy_with_retry(session: CopilotSession) -> Optional[StopError]: + last_error = None + # Try up to 3 times with exponential backoff + for attempt in range(1, 4): + try: + await session.destroy() + return None + except Exception as e: + last_error = e + if attempt < 3: + # Exponential backoff: 100ms, 200ms + delay = 0.1 * (2 ** (attempt - 1)) + await asyncio.sleep(delay) + + msg = f"Failed to destroy session {session.session_id}" + msg += f" after 3 attempts: {last_error}" + return StopError(message=msg) + + if sessions_to_destroy: + # Parallelize session destruction to ensure O(T) shutdown time + results = await asyncio.gather( + *[destroy_with_retry(s) for s in sessions_to_destroy], + return_exceptions=False, + ) + for result in results: + if result: + errors.append(result) # Close client if self._client: