Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .jules/bolt.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
## 2025-05-15 - [Sequential session destruction in SDKs]
**Learning:** All Copilot SDKs (Node.js, Python, Go, .NET) were initially implementing session destruction sequentially during client shutdown. This leads to a linear increase in shutdown time as the number of active sessions grows, especially when individual destructions involve retries and backoff.
**Action:** Parallelize session cleanup using language-specific concurrency primitives (e.g., `Promise.all` in Node.js, `asyncio.gather` in Python, `Task.WhenAll` in .NET, or WaitGroups/Channels in Go) to ensure shutdown time remains constant and minimal.
# Bolt's Performance Journal

## 2025-05-15 - Parallel Session Destruction
**Learning:** Sequential session destruction during client shutdown is a significant bottleneck when multiple sessions are active, as each destruction involves a JSON-RPC request over IPC/Network. Parallelizing this across all SDKs (Go, .NET, Python) brings them to parity with the Node.js implementation and significantly reduces shutdown latency.
**Action:** Always prefer parallel execution for independent I/O-bound cleanup tasks during client shutdown.
40 changes: 29 additions & 11 deletions dotnet/src/Client.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*--------------------------------------------------------------------------------------------*/

using Microsoft.Extensions.AI;
using System.Linq;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging.Abstractions;
using StreamJsonRpc;
Expand Down Expand Up @@ -210,25 +211,42 @@ async Task<Connection> StartCoreAsync(CancellationToken ct)
/// </example>
public async Task StopAsync()
{
var errors = new List<Exception>();
// Destroy all active sessions in parallel to speed up shutdown.
// This reduces StopAsync() time from O(N*T) to O(T) where N is the number of sessions.
var errors = new ConcurrentBag<Exception>();

foreach (var session in _sessions.Values.ToArray())
var sessionTasks = _sessions.Values.ToArray().Select(async session =>
{
try
{
await session.DisposeAsync();
}
catch (Exception ex)
for (int attempt = 1; attempt <= 3; attempt++)
{
errors.Add(new Exception($"Failed to destroy session {session.SessionId}: {ex.Message}", ex));
try
{
await session.DisposeAsync();
return; // Success
}
catch (Exception ex)
{
if (attempt == 3)
{
errors.Add(new Exception($"Failed to destroy session {session.SessionId} after 3 attempts: {ex.Message}", ex));
}
else
{
// Exponential backoff: 100ms, 200ms
await Task.Delay(100 * attempt);
}
}
}
}
});

await Task.WhenAll(sessionTasks);

_sessions.Clear();
await CleanupConnectionAsync(errors);
var errorList = errors.ToList();
await CleanupConnectionAsync(errorList);
_connectionTask = null;

ThrowErrors(errors);
ThrowErrors(errorList);
}

/// <summary>
Expand Down
34 changes: 31 additions & 3 deletions go/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,38 @@ func (c *Client) Stop() error {
}
c.sessionsMux.Unlock()

// Destroy all active sessions in parallel to speed up shutdown.
// This reduces Stop() time from O(N*T) to O(T) where N is the number of sessions.
var wg sync.WaitGroup
errsChan := make(chan error, len(sessions))

for _, session := range sessions {
if err := session.Destroy(); err != nil {
errs = append(errs, fmt.Errorf("failed to destroy session %s: %w", session.SessionID, err))
}
wg.Add(1)
go func(s *Session) {
defer wg.Done()
var lastErr error
for attempt := 1; attempt <= 3; attempt++ {
if err := s.Destroy(); err != nil {
lastErr = err
if attempt < 3 {
// Exponential backoff: 100ms, 200ms
time.Sleep(time.Duration(100*attempt) * time.Millisecond)
}
} else {
lastErr = nil
break
}
}
if lastErr != nil {
errsChan <- fmt.Errorf("failed to destroy session %s after 3 attempts: %w", s.SessionID, lastErr)
}
}(session)
}

wg.Wait()
close(errsChan)
for err := range errsChan {
errs = append(errs, err)
}

c.sessionsMux.Lock()
Expand Down
33 changes: 27 additions & 6 deletions python/copilot/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,34 @@ async def stop(self) -> list["StopError"]:
sessions_to_destroy = list(self._sessions.values())
self._sessions.clear()

for session in sessions_to_destroy:
try:
await session.destroy()
except Exception as e:
errors.append(
StopError(message=f"Failed to destroy session {session.session_id}: {e}")
# Destroy all active sessions in parallel to speed up shutdown.
# This reduces stop() time from O(N*T) to O(T) where N is the number of sessions.
async def destroy_with_retry(session: CopilotSession) -> Optional[StopError]:
last_error = None
for attempt in range(1, 4):
try:
await session.destroy()
return None
except Exception as e:
last_error = e
if attempt < 3:
# Exponential backoff: 100ms, 200ms
await asyncio.sleep(0.1 * attempt)
return StopError(
message=(
f"Failed to destroy session {session.session_id} after 3 attempts: {last_error}"
)
)

if sessions_to_destroy:
results = await asyncio.gather(
*[destroy_with_retry(s) for s in sessions_to_destroy], return_exceptions=True
)
for res in results:
if isinstance(res, StopError):
errors.append(res)
elif isinstance(res, Exception):
errors.append(StopError(message=str(res)))

# Close client
if self._client:
Expand Down
Loading