Skip to content
This repository was archived by the owner on Aug 23, 2023. It is now read-only.

cluster: drop node updates that are old or about thisNode #948

Merged
merged 4 commits into from
Oct 29, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions cluster/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ type ClusterManager interface {

type MemberlistManager struct {
sync.RWMutex
members map[string]HTTPNode // all members in the cluster, including this node.
members map[string]HTTPNode // all members in the cluster, guaranteed to always have this node
nodeName string
list *memberlist.Memberlist
cfg *memberlist.Config
Expand Down Expand Up @@ -227,15 +227,28 @@ func (c *MemberlistManager) NotifyJoin(node *memberlist.Node) {
unmarshalErrJoin.Inc()
return
}

member.RemoteAddr = node.Addr.String()
if member.Name == c.nodeName {
member.local = true
member.local = (member.Name == c.nodeName)

// we never want anyone else in the cluster to tell us anything about ourselves
// cause we know ourself best.
if member.local {
return
}

existing, ok := c.members[node.Name]
if ok && !member.Updated.After(existing.Updated) {
return
}
c.members[node.Name] = member
c.clusterStats()
}

func (c *MemberlistManager) NotifyLeave(node *memberlist.Node) {
if node.Name == c.nodeName {
return
}
eventsLeave.Inc()
c.Lock()
defer c.Unlock()
Expand All @@ -256,8 +269,9 @@ func (c *MemberlistManager) NotifyUpdate(node *memberlist.Node) {
if err != nil {
log.Errorf("CLU manager: Failed to decode node meta from %s: %s", node.Name, err.Error())
unmarshalErrUpdate.Inc()
// if the node is known, lets mark it as notReady until it starts sending valid data again.
if p, ok := c.members[node.Name]; ok {
// if the node is known and it is not thisNode,
// lets mark it as notReady until it starts sending valid data again.
if p, ok := c.members[node.Name]; ok && node.Name != c.nodeName {
p.State = NodeNotReady
p.StateChange = time.Now()
// we dont set Updated as we dont want the NotReady state to propagate incase we are the only node
Expand All @@ -266,9 +280,19 @@ func (c *MemberlistManager) NotifyUpdate(node *memberlist.Node) {
}
return
}

member.RemoteAddr = node.Addr.String()
if member.Name == c.nodeName {
member.local = true
member.local = (member.Name == c.nodeName)

// we never want anyone else in the cluster to tell us anything about ourselves
// cause we know ourself best.
if member.local {
return
}

existing, ok := c.members[node.Name]
if ok && !member.Updated.After(existing.Updated) {
return
}
c.members[node.Name] = member
log.Infof("CLU manager: HTTPNode %s at %s has been updated - %s", node.Name, node.Addr.String(), node.Meta)
Expand Down Expand Up @@ -353,6 +377,7 @@ func (c *MemberlistManager) SetState(state NodeState) {
node := c.members[c.nodeName]
node.State = state
node.Updated = time.Now()
node.StateChange = time.Now()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add comments to the HTTPNode definition about what Updated and StateChange represent / how they are used? it's not clear how they differ

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

really? one is set when the httpNode is updated one is set when the httpNodes's state changes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, really.
It's more about why things are implemented this way. i.e. the overarching design of the code.
there is a lot of information in your head that is not there for others reading your code.

for example:

  • why is StateChange never read from in the code, is it only for inspection by admin?
  • why does changing the State and StateChange fields not trigger a gossip broadcast whereas changing Update would? (error clause in MemberlistManager.NotifyUpdate)
  • who is responsible for updating the fields: the local nodes emitting their state, or the peers receiving it? (seems to be both?)
  • does "update" include an update to the state field? seems to be yes except in the above mentioned exception case?

documentation helps in answering these sort of questions.

BTW should SingleNodeManager.SetState not update StateChange ?

c.members[c.nodeName] = node
c.Unlock()
nodeReady.Set(state == NodeReady)
Expand Down