Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 24 additions & 10 deletions dotnet/src/Client.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.Data;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Net.Sockets;
using System.Text.Json;
using System.Text.Json.Serialization;
Expand Down Expand Up @@ -210,21 +211,34 @@ async Task<Connection> StartCoreAsync(CancellationToken ct)
/// </example>
public async Task StopAsync()
{
var errors = new List<Exception>();
var sessionsToDestroy = _sessions.Values.ToArray();
_sessions.Clear();

foreach (var session in _sessions.Values.ToArray())
var sessionTasks = sessionsToDestroy.Select(async session =>
{
try
{
await session.DisposeAsync();
}
catch (Exception ex)
Exception? lastEx = null;
for (int attempt = 1; attempt <= 3; attempt++)
{
errors.Add(new Exception($"Failed to destroy session {session.SessionId}: {ex.Message}", ex));
try
{
await session.DisposeAsync();
return null;
}
catch (Exception ex)
{
lastEx = ex;
if (attempt < 3)
{
// Exponential backoff: 100ms, 200ms
await Task.Delay(100 * (1 << (attempt - 1)));
}
}
}
}
return new Exception($"Failed to destroy session {session.SessionId} after 3 attempts: {lastEx?.Message}", lastEx);
});

_sessions.Clear();
var results = await Task.WhenAll(sessionTasks);
var errors = results.Where(e => e != null).Cast<Exception>().ToList();
await CleanupConnectionAsync(errors);
_connectionTask = null;

Expand Down
30 changes: 27 additions & 3 deletions go/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,34 @@ func (c *Client) Stop() error {
}
c.sessionsMux.Unlock()

var wg sync.WaitGroup
errChan := make(chan error, len(sessions))

for _, session := range sessions {
if err := session.Destroy(); err != nil {
errs = append(errs, fmt.Errorf("failed to destroy session %s: %w", session.SessionID, err))
}
wg.Add(1)
go func(s *Session) {
defer wg.Done()
var lastErr error
for attempt := 1; attempt <= 3; attempt++ {
if err := s.Destroy(); err != nil {
lastErr = err
if attempt < 3 {
// Exponential backoff: 100ms, 200ms
time.Sleep(time.Duration(100*(1<<(attempt-1))) * time.Millisecond)
}
} else {
return
}
}
errChan <- fmt.Errorf("failed to destroy session %s after 3 attempts: %w", s.SessionID, lastErr)
}(session)
}

wg.Wait()
close(errChan)

for err := range errChan {
errs = append(errs, err)
}

c.sessionsMux.Lock()
Expand Down
29 changes: 23 additions & 6 deletions python/copilot/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,30 @@ async def stop(self) -> list["StopError"]:
sessions_to_destroy = list(self._sessions.values())
self._sessions.clear()

for session in sessions_to_destroy:
try:
await session.destroy()
except Exception as e:
errors.append(
StopError(message=f"Failed to destroy session {session.session_id}: {e}")
async def destroy_with_retry(session: CopilotSession) -> Optional[StopError]:
"""Destroy a session with up to 3 attempts and exponential backoff."""
last_err: Optional[Exception] = None
for attempt in range(1, 4):
try:
await session.destroy()
return None
except Exception as e:
last_err = e
if attempt < 3:
# Exponential backoff: 100ms, 200ms
delay = 0.1 * (2 ** (attempt - 1))
await asyncio.sleep(delay)

Comment on lines +319 to +329
Copy link

Copilot AI Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

destroy_with_retry catches Exception, which on Python 3.9/3.10 includes asyncio.CancelledError. This can swallow task cancellation during shutdown (and even retry after cancellation), preventing stop() from being promptly cancellable. Handle asyncio.CancelledError explicitly by re-raising it before the broad except Exception (and avoid retrying on cancellation).

Copilot uses AI. Check for mistakes.
return StopError(
message=(
f"Failed to destroy session {session.session_id} after 3 attempts: {last_err}"
)
)

# Destroy all active sessions in parallel with retry logic
if sessions_to_destroy:
results = await asyncio.gather(*(destroy_with_retry(s) for s in sessions_to_destroy))
errors.extend([r for r in results if r is not None])
Comment on lines +316 to +339
Copy link

Copilot AI Feb 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new parallel destruction + retry behavior in stop() (including backoff timing and multi-attempt success/failure handling) isn’t covered by existing tests. Add a unit or e2e test that simulates a transient failure in session.destroy() (fail once, then succeed) to assert stop() returns no StopError, and a test that verifies persistent failure reports exactly one StopError per session.

Copilot uses AI. Check for mistakes.

# Close client
if self._client:
Expand Down
6 changes: 4 additions & 2 deletions python/e2e/test_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ async def test_should_list_sessions(self, ctx: E2ETestContext):
await session2.send_and_wait({"prompt": "Say goodbye"})

# Small delay to ensure session files are written to disk
await asyncio.sleep(0.2)
# Increased to 0.5s to avoid flakiness on slower environments (e.g. Windows CI)
await asyncio.sleep(0.5)

# List sessions and verify they're included
sessions = await ctx.client.list_sessions()
Expand Down Expand Up @@ -229,7 +230,8 @@ async def test_should_delete_session(self, ctx: E2ETestContext):
session_id = session.session_id

# Small delay to ensure session file is written to disk
await asyncio.sleep(0.2)
# Increased to 0.5s to avoid flakiness on slower environments (e.g. Windows CI)
await asyncio.sleep(0.5)

# Verify session exists in the list
sessions = await ctx.client.list_sessions()
Expand Down
Loading