Skip to content

Commit

Permalink
clientv3: cancel watches proactively on client context cancellation
Browse files Browse the repository at this point in the history
Currently, watch cancel requests are only sent to the server after a
message comes through on a watch where the client has cancelled. This
means that cancelled watches that don't receive any new messages are
never cancelled; they persist for the lifetime of the client stream.
This has negative connotations for locking applications where a watch
may observe a key which might never change again after cancellation,
leading to many accumulating watches on the server.

By cancelling proactively, in most cases we simply move the cancel
request to happen earlier, and additionally we solve the case where the
cancel request would never be sent.

Fixes etcd-io#9416
Heavy inspiration drawn from the solutions proposed there.
  • Loading branch information
jackkleeman authored and suhailpatel committed Feb 3, 2022
1 parent 5cf5d88 commit e4c4409
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
12 changes: 12 additions & 0 deletions clientv3/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -637,6 +637,18 @@ func (w *watchGrpcStream) run() {
return

case ws := <-w.closingc:
if ws.id != -1 {
// client is closing an established watch; close it on the server proactively instead of waiting
// to close when the next message arrives
cancelSet[ws.id] = struct{}{}
cr := &pb.WatchRequest_CancelRequest{
CancelRequest: &pb.WatchCancelRequest{
WatchId: ws.id,
},
}
req := &pb.WatchRequest{RequestUnion: cr}
wc.Send(req)
}
w.closeSubstream(ws)
delete(closing, ws)
// no more watchers on this stream, shutdown
Expand Down
32 changes: 32 additions & 0 deletions integration/v3_watch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1201,3 +1201,35 @@ func TestV3WatchWithPrevKV(t *testing.T) {
}
}
}

// TestV3WatchCancellation ensures that watch cancellation frees up server resources.
func TestV3WatchCancellation(t *testing.T) {
clus := NewClusterV3(t, &ClusterConfig{Size: 1})
defer clus.Terminate(t)

ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()

cli := clus.RandClient()

// increment watcher total count and keep a stream open
cli.Watch(ctx, "/foo")

for i := 0; i < 1000; i++ {
ctx, cancel := context.WithCancel(ctx)
cli.Watch(ctx, "/foo")
cancel()
}

// Wait a little for cancellations to take hold
time.Sleep(3 * time.Second)

minWatches, err := clus.Members[0].Metric("etcd_debugging_mvcc_watcher_total")
if err != nil {
t.Fatal(err)
}

if minWatches != "1" {
t.Fatalf("expected one watch, got %s", minWatches)
}
}

0 comments on commit e4c4409

Please sign in to comment.