From 00bf6d2505438d0df7768b58e35d28caf7f85e66 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:10:09 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20parallelize=20session=20des?= =?UTF-8?q?truction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit parallelizes session destruction during client shutdown across the Go, .NET, and Python SDKs. 💡 What: - Transitioned from sequential to parallel session destruction in Stop/StopAsync methods. - Implemented 3-attempt retry logic with exponential backoff (100ms, 200ms) for robustness. - Used language-specific parallelization primitives: goroutines (Go), Task.WhenAll (.NET), and asyncio.gather (Python). 🎯 Why: Sequential session destruction was a performance bottleneck, especially when multiple sessions were active, as each destruction involves an IPC/Network request. 📊 Impact: Reduces shutdown latency from O(N*T) to O(T), where N is the number of sessions and T is the time to destroy a single session. 🔬 Measurement: Verified with unit tests in Go and Python, and successful builds in .NET. Added documentation comments explaining the optimization. Co-authored-by: AkCodes23 <135016848+AkCodes23@users.noreply.github.com> --- .jules/bolt.md | 8 +++++--- dotnet/src/Client.cs | 40 +++++++++++++++++++++++++++++----------- go/client.go | 34 +++++++++++++++++++++++++++++++--- python/copilot/client.py | 33 +++++++++++++++++++++++++++------ 4 files changed, 92 insertions(+), 23 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index d81dff90..fb3099a8 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -1,3 +1,5 @@ -## 2025-05-15 - [Sequential session destruction in SDKs] -**Learning:** All Copilot SDKs (Node.js, Python, Go, .NET) were initially implementing session destruction sequentially during client shutdown. This leads to a linear increase in shutdown time as the number of active sessions grows, especially when individual destructions involve retries and backoff. -**Action:** Parallelize session cleanup using language-specific concurrency primitives (e.g., `Promise.all` in Node.js, `asyncio.gather` in Python, `Task.WhenAll` in .NET, or WaitGroups/Channels in Go) to ensure shutdown time remains constant and minimal. +# Bolt's Performance Journal + +## 2025-05-15 - Parallel Session Destruction +**Learning:** Sequential session destruction during client shutdown is a significant bottleneck when multiple sessions are active, as each destruction involves a JSON-RPC request over IPC/Network. Parallelizing this across all SDKs (Go, .NET, Python) brings them to parity with the Node.js implementation and significantly reduces shutdown latency. +**Action:** Always prefer parallel execution for independent I/O-bound cleanup tasks during client shutdown. diff --git a/dotnet/src/Client.cs b/dotnet/src/Client.cs index 74f1c66f..6f2538ef 100644 --- a/dotnet/src/Client.cs +++ b/dotnet/src/Client.cs @@ -3,6 +3,7 @@ *--------------------------------------------------------------------------------------------*/ using Microsoft.Extensions.AI; +using System.Linq; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging.Abstractions; using StreamJsonRpc; @@ -210,25 +211,42 @@ async Task StartCoreAsync(CancellationToken ct) /// public async Task StopAsync() { - var errors = new List(); + // Destroy all active sessions in parallel to speed up shutdown. + // This reduces StopAsync() time from O(N*T) to O(T) where N is the number of sessions. + var errors = new ConcurrentBag(); - foreach (var session in _sessions.Values.ToArray()) + var sessionTasks = _sessions.Values.ToArray().Select(async session => { - try - { - await session.DisposeAsync(); - } - catch (Exception ex) + for (int attempt = 1; attempt <= 3; attempt++) { - errors.Add(new Exception($"Failed to destroy session {session.SessionId}: {ex.Message}", ex)); + try + { + await session.DisposeAsync(); + return; // Success + } + catch (Exception ex) + { + if (attempt == 3) + { + errors.Add(new Exception($"Failed to destroy session {session.SessionId} after 3 attempts: {ex.Message}", ex)); + } + else + { + // Exponential backoff: 100ms, 200ms + await Task.Delay(100 * attempt); + } + } } - } + }); + + await Task.WhenAll(sessionTasks); _sessions.Clear(); - await CleanupConnectionAsync(errors); + var errorList = errors.ToList(); + await CleanupConnectionAsync(errorList); _connectionTask = null; - ThrowErrors(errors); + ThrowErrors(errorList); } /// diff --git a/go/client.go b/go/client.go index 319c6588..d28b3969 100644 --- a/go/client.go +++ b/go/client.go @@ -294,10 +294,38 @@ func (c *Client) Stop() error { } c.sessionsMux.Unlock() + // Destroy all active sessions in parallel to speed up shutdown. + // This reduces Stop() time from O(N*T) to O(T) where N is the number of sessions. + var wg sync.WaitGroup + errsChan := make(chan error, len(sessions)) + for _, session := range sessions { - if err := session.Destroy(); err != nil { - errs = append(errs, fmt.Errorf("failed to destroy session %s: %w", session.SessionID, err)) - } + wg.Add(1) + go func(s *Session) { + defer wg.Done() + var lastErr error + for attempt := 1; attempt <= 3; attempt++ { + if err := s.Destroy(); err != nil { + lastErr = err + if attempt < 3 { + // Exponential backoff: 100ms, 200ms + time.Sleep(time.Duration(100*attempt) * time.Millisecond) + } + } else { + lastErr = nil + break + } + } + if lastErr != nil { + errsChan <- fmt.Errorf("failed to destroy session %s after 3 attempts: %w", s.SessionID, lastErr) + } + }(session) + } + + wg.Wait() + close(errsChan) + for err := range errsChan { + errs = append(errs, err) } c.sessionsMux.Lock() diff --git a/python/copilot/client.py b/python/copilot/client.py index 85b72897..30947b9b 100644 --- a/python/copilot/client.py +++ b/python/copilot/client.py @@ -313,13 +313,34 @@ async def stop(self) -> list["StopError"]: sessions_to_destroy = list(self._sessions.values()) self._sessions.clear() - for session in sessions_to_destroy: - try: - await session.destroy() - except Exception as e: - errors.append( - StopError(message=f"Failed to destroy session {session.session_id}: {e}") + # Destroy all active sessions in parallel to speed up shutdown. + # This reduces stop() time from O(N*T) to O(T) where N is the number of sessions. + async def destroy_with_retry(session: CopilotSession) -> Optional[StopError]: + last_error = None + for attempt in range(1, 4): + try: + await session.destroy() + return None + except Exception as e: + last_error = e + if attempt < 3: + # Exponential backoff: 100ms, 200ms + await asyncio.sleep(0.1 * attempt) + return StopError( + message=( + f"Failed to destroy session {session.session_id} after 3 attempts: {last_error}" ) + ) + + if sessions_to_destroy: + results = await asyncio.gather( + *[destroy_with_retry(s) for s in sessions_to_destroy], return_exceptions=True + ) + for res in results: + if isinstance(res, StopError): + errors.append(res) + elif isinstance(res, Exception): + errors.append(StopError(message=str(res))) # Close client if self._client: