@@ -81,12 +81,23 @@ type Etcd struct {
81
81
82
82
Server * etcdserver.EtcdServer
83
83
84
- cfg Config
85
- stopc chan struct {}
86
- errc chan error
84
+ cfg Config
87
85
86
+ // closeOnce is to ensure `stopc` is closed only once, no matter
87
+ // how many times the Close() method is called.
88
88
closeOnce sync.Once
89
- wg sync.WaitGroup
89
+ // stopc is used to notify the sub goroutines not to send
90
+ // any errors to `errc`.
91
+ stopc chan struct {}
92
+ // errc is used to receive error from sub goroutines (including
93
+ // client handler, peer handler and metrics handler). It's closed
94
+ // after all these sub goroutines exit (checked via `wg`). Writers
95
+ // should avoid writing after `stopc` is closed by selecting on
96
+ // reading from `stopc`.
97
+ errc chan error
98
+
99
+ // wg is used to track the lifecycle of all sub goroutines created by `StartEtcd`.
100
+ wg sync.WaitGroup
90
101
}
91
102
92
103
type peerListener struct {
@@ -368,6 +379,24 @@ func (e *Etcd) Config() Config {
368
379
// Close gracefully shuts down all servers/listeners.
369
380
// Client requests will be terminated with request timeout.
370
381
// After timeout, enforce remaning requests be closed immediately.
382
+ //
383
+ // The rough workflow to shut down etcd:
384
+ // 1. close the `stopc` channel, so that all error handlers (child
385
+ // goroutines) won't send back any errors anymore;
386
+ // 2. stop the http and grpc servers gracefully, within request timeout;
387
+ // 3. close all client and metrics listeners, so that etcd server
388
+ // stops receiving any new connection;
389
+ // 4. call the cancel function to close the gateway context, so that
390
+ // all gateway connections are closed.
391
+ // 5. stop etcd server gracefully, and ensure the main raft loop
392
+ // goroutine is stopped;
393
+ // 6. stop all peer listeners, so that it stops receiving peer connections
394
+ // and messages (wait up to 1-second);
395
+ // 7. wait for all child goroutines (i.e. client handlers, peer handlers
396
+ // and metrics handlers) to exit;
397
+ // 8. close the `errc` channel to release the resource. Note that it's only
398
+ // safe to close the `errc` after step 7 above is done, otherwise the
399
+ // child goroutines may send errors back to already closed `errc` channel.
371
400
func (e * Etcd ) Close () {
372
401
fields := []zap.Field {
373
402
zap .String ("name" , e .cfg .Name ),
@@ -597,7 +626,9 @@ func (e *Etcd) servePeers() (err error) {
597
626
598
627
// start peer servers in a goroutine
599
628
for _ , pl := range e .Peers {
629
+ e .wg .Add (1 )
600
630
go func (l * peerListener ) {
631
+ defer e .wg .Done ()
601
632
u := l .Addr ().String ()
602
633
e .cfg .logger .Info (
603
634
"serving peer traffic" ,
@@ -781,7 +812,9 @@ func (e *Etcd) serveClients() (err error) {
781
812
782
813
// start client servers in each goroutine
783
814
for _ , sctx := range e .sctxs {
815
+ e .wg .Add (1 )
784
816
go func (s * serveCtx ) {
817
+ defer e .wg .Done ()
785
818
e .errHandler (s .serve (e .Server , & e .cfg .ClientTLSInfo , h , e .errHandler , e .grpcGatewayDial (splitHttp ), splitHttp , gopts ... ))
786
819
}(sctx )
787
820
}
@@ -869,7 +902,9 @@ func (e *Etcd) serveMetrics() (err error) {
869
902
return err
870
903
}
871
904
e .metricsListeners = append (e .metricsListeners , ml )
905
+ e .wg .Add (1 )
872
906
go func (u url.URL , ln net.Listener ) {
907
+ defer e .wg .Done ()
873
908
e .cfg .logger .Info (
874
909
"serving metrics" ,
875
910
zap .String ("address" , u .String ()),
@@ -882,9 +917,6 @@ func (e *Etcd) serveMetrics() (err error) {
882
917
}
883
918
884
919
func (e * Etcd ) errHandler (err error ) {
885
- e .wg .Add (1 )
886
- defer e .wg .Done ()
887
-
888
920
select {
889
921
case <- e .stopc :
890
922
return
0 commit comments