diff --git a/server/etcdserver/server.go b/server/etcdserver/server.go index 4b40e32bada2..3ccb09b47146 100644 --- a/server/etcdserver/server.go +++ b/server/etcdserver/server.go @@ -858,6 +858,9 @@ func (s *EtcdServer) run() { } func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) { + if !s.ensureLeadership() { + return + } s.GoAttach(func() { lg := s.Logger() // Increases throughput of expired leases deletion process through parallelization @@ -892,6 +895,31 @@ func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) { }) } +// ensureLeadership ensures current member is still the leader before +// performing the leases revoking operations. It can mitigate the issue +// https://github.com/etcd-io/etcd/issues/15247, but it can't completely +// resolve it because the leases may be wrongly revoked by the new leader. +// Refer to the document in +// https://github.com/etcd-io/etcd/issues/15247#issuecomment-1777862093. +func (s *EtcdServer) ensureLeadership() bool { + lg := s.Logger() + if err := s.linearizableReadNotify(s.ctx); err != nil { + lg.Warn("Ignore leases revoking request due to failing to ensure current member's leadership", + zap.Error(err)) + return false + } + + newLeaderId := s.raftStatus().Lead + if newLeaderId != uint64(s.MemberId()) { + lg.Warn("Ignore old leader's leases revoking requests", + zap.Uint64("local-member-id", uint64(s.MemberId())), + zap.Uint64("new-lead", newLeaderId)) + return false + } + + return true +} + // Cleanup removes allocated objects by EtcdServer.NewServer in // situation that EtcdServer::Start was not called (that takes care of cleanup). func (s *EtcdServer) Cleanup() {