Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gossip tuneables #4444

Merged
merged 4 commits into from
Jul 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -782,10 +782,12 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
base.SerfLANConfig.MemberlistConfig.AdvertisePort = a.config.SerfAdvertiseAddrLAN.Port
base.SerfLANConfig.MemberlistConfig.GossipVerifyIncoming = a.config.EncryptVerifyIncoming
base.SerfLANConfig.MemberlistConfig.GossipVerifyOutgoing = a.config.EncryptVerifyOutgoing
base.SerfLANConfig.MemberlistConfig.GossipInterval = a.config.ConsulSerfLANGossipInterval
base.SerfLANConfig.MemberlistConfig.ProbeInterval = a.config.ConsulSerfLANProbeInterval
base.SerfLANConfig.MemberlistConfig.ProbeTimeout = a.config.ConsulSerfLANProbeTimeout
base.SerfLANConfig.MemberlistConfig.SuspicionMult = a.config.ConsulSerfLANSuspicionMult
base.SerfLANConfig.MemberlistConfig.GossipInterval = a.config.GossipLANGossipInterval
base.SerfLANConfig.MemberlistConfig.GossipNodes = a.config.GossipLANGossipNodes
base.SerfLANConfig.MemberlistConfig.ProbeInterval = a.config.GossipLANProbeInterval
base.SerfLANConfig.MemberlistConfig.ProbeTimeout = a.config.GossipLANProbeTimeout
base.SerfLANConfig.MemberlistConfig.SuspicionMult = a.config.GossipLANSuspicionMult
base.SerfLANConfig.MemberlistConfig.RetransmitMult = a.config.GossipLANRetransmitMult

if a.config.SerfBindAddrWAN != nil {
base.SerfWANConfig.MemberlistConfig.BindAddr = a.config.SerfBindAddrWAN.IP.String()
Expand All @@ -794,10 +796,12 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
base.SerfWANConfig.MemberlistConfig.AdvertisePort = a.config.SerfAdvertiseAddrWAN.Port
base.SerfWANConfig.MemberlistConfig.GossipVerifyIncoming = a.config.EncryptVerifyIncoming
base.SerfWANConfig.MemberlistConfig.GossipVerifyOutgoing = a.config.EncryptVerifyOutgoing
base.SerfWANConfig.MemberlistConfig.GossipInterval = a.config.ConsulSerfWANGossipInterval
base.SerfWANConfig.MemberlistConfig.ProbeInterval = a.config.ConsulSerfWANProbeInterval
base.SerfWANConfig.MemberlistConfig.ProbeTimeout = a.config.ConsulSerfWANProbeTimeout
base.SerfWANConfig.MemberlistConfig.SuspicionMult = a.config.ConsulSerfWANSuspicionMult
base.SerfWANConfig.MemberlistConfig.GossipInterval = a.config.GossipWANGossipInterval
base.SerfWANConfig.MemberlistConfig.GossipNodes = a.config.GossipWANGossipNodes
base.SerfWANConfig.MemberlistConfig.ProbeInterval = a.config.GossipWANProbeInterval
base.SerfWANConfig.MemberlistConfig.ProbeTimeout = a.config.GossipWANProbeTimeout
base.SerfWANConfig.MemberlistConfig.SuspicionMult = a.config.GossipWANSuspicionMult
base.SerfWANConfig.MemberlistConfig.RetransmitMult = a.config.GossipWANRetransmitMult
} else {
// Disable serf WAN federation
base.SerfWANConfig = nil
Expand Down
22 changes: 14 additions & 8 deletions agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,16 +579,22 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
ConsulRaftElectionTimeout: consulRaftElectionTimeout,
ConsulRaftHeartbeatTimeout: consulRaftHeartbeatTimeout,
ConsulRaftLeaderLeaseTimeout: consulRaftLeaderLeaseTimeout,
ConsulSerfLANGossipInterval: b.durationVal("consul.serf_lan.gossip_interval", c.Consul.SerfLAN.Memberlist.GossipInterval),
ConsulSerfLANProbeInterval: b.durationVal("consul.serf_lan.probe_interval", c.Consul.SerfLAN.Memberlist.ProbeInterval),
ConsulSerfLANProbeTimeout: b.durationVal("consul.serf_lan.probe_timeout", c.Consul.SerfLAN.Memberlist.ProbeTimeout),
ConsulSerfLANSuspicionMult: b.intVal(c.Consul.SerfLAN.Memberlist.SuspicionMult),
ConsulSerfWANGossipInterval: b.durationVal("consul.serf_wan.gossip_interval", c.Consul.SerfWAN.Memberlist.GossipInterval),
ConsulSerfWANProbeInterval: b.durationVal("consul.serf_wan.probe_interval", c.Consul.SerfWAN.Memberlist.ProbeInterval),
ConsulSerfWANProbeTimeout: b.durationVal("consul.serf_wan.probe_timeout", c.Consul.SerfWAN.Memberlist.ProbeTimeout),
ConsulSerfWANSuspicionMult: b.intVal(c.Consul.SerfWAN.Memberlist.SuspicionMult),
ConsulServerHealthInterval: b.durationVal("consul.server.health_interval", c.Consul.Server.HealthInterval),

// gossip configuration
GossipLANGossipInterval: b.durationVal("gossip_lan..gossip_interval", c.GossipLAN.GossipInterval),
GossipLANGossipNodes: b.intVal(c.GossipLAN.GossipNodes),
GossipLANProbeInterval: b.durationVal("gossip_lan..probe_interval", c.GossipLAN.ProbeInterval),
GossipLANProbeTimeout: b.durationVal("gossip_lan..probe_timeout", c.GossipLAN.ProbeTimeout),
GossipLANSuspicionMult: b.intVal(c.GossipLAN.SuspicionMult),
GossipLANRetransmitMult: b.intVal(c.GossipLAN.RetransmitMult),
GossipWANGossipInterval: b.durationVal("gossip_wan..gossip_interval", c.GossipWAN.GossipInterval),
GossipWANGossipNodes: b.intVal(c.GossipWAN.GossipNodes),
GossipWANProbeInterval: b.durationVal("gossip_wan..probe_interval", c.GossipWAN.ProbeInterval),
GossipWANProbeTimeout: b.durationVal("gossip_wan..probe_timeout", c.GossipWAN.ProbeTimeout),
GossipWANSuspicionMult: b.intVal(c.GossipWAN.SuspicionMult),
GossipWANRetransmitMult: b.intVal(c.GossipWAN.RetransmitMult),

// ACL
ACLAgentMasterToken: b.stringVal(c.ACLAgentMasterToken),
ACLAgentToken: b.stringVal(c.ACLAgentToken),
Expand Down
38 changes: 20 additions & 18 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@ type Config struct {
EncryptKey *string `json:"encrypt,omitempty" hcl:"encrypt" mapstructure:"encrypt"`
EncryptVerifyIncoming *bool `json:"encrypt_verify_incoming,omitempty" hcl:"encrypt_verify_incoming" mapstructure:"encrypt_verify_incoming"`
EncryptVerifyOutgoing *bool `json:"encrypt_verify_outgoing,omitempty" hcl:"encrypt_verify_outgoing" mapstructure:"encrypt_verify_outgoing"`
GossipLAN GossipLANConfig `json:"gossip_lan,omitempty" hcl:"gossip_lan" mapstructure:"gossip_lan"`
GossipWAN GossipWANConfig `json:"gossip_wan,omitempty" hcl:"gossip_wan" mapstructure:"gossip_wan"`
HTTPConfig HTTPConfig `json:"http_config,omitempty" hcl:"http_config" mapstructure:"http_config"`
KeyFile *string `json:"key_file,omitempty" hcl:"key_file" mapstructure:"key_file"`
LeaveOnTerm *bool `json:"leave_on_terminate,omitempty" hcl:"leave_on_terminate" mapstructure:"leave_on_terminate"`
Expand Down Expand Up @@ -259,6 +261,24 @@ type Config struct {
VersionPrerelease *string `json:"version_prerelease,omitempty" hcl:"version_prerelease" mapstructure:"version_prerelease"`
}

type GossipLANConfig struct {
GossipNodes *int `json:"gossip_nodes,omitempty" hcl:"gossip_nodes" mapstructure:"gossip_nodes"`
GossipInterval *string `json:"gossip_interval,omitempty" hcl:"gossip_interval" mapstructure:"gossip_interval"`
ProbeInterval *string `json:"probe_interval,omitempty" hcl:"probe_interval" mapstructure:"probe_interval"`
ProbeTimeout *string `json:"probe_timeout,omitempty" hcl:"probe_timeout" mapstructure:"probe_timeout"`
SuspicionMult *int `json:"suspicion_mult,omitempty" hcl:"suspicion_mult" mapstructure:"suspicion_mult"`
RetransmitMult *int `json:"retransmit_mult,omitempty" hcl:"retransmit_mult" mapstructure:"retransmit_mult"`
}

type GossipWANConfig struct {
GossipNodes *int `json:"gossip_nodes,omitempty" hcl:"gossip_nodes" mapstructure:"gossip_nodes"`
GossipInterval *string `json:"gossip_interval,omitempty" hcl:"gossip_interval" mapstructure:"gossip_interval"`
ProbeInterval *string `json:"probe_interval,omitempty" hcl:"probe_interval" mapstructure:"probe_interval"`
ProbeTimeout *string `json:"probe_timeout,omitempty" hcl:"probe_timeout" mapstructure:"probe_timeout"`
SuspicionMult *int `json:"suspicion_mult,omitempty" hcl:"suspicion_mult" mapstructure:"suspicion_mult"`
RetransmitMult *int `json:"retransmit_mult,omitempty" hcl:"retransmit_mult" mapstructure:"retransmit_mult"`
}

type Consul struct {
Coordinate struct {
UpdateBatchSize *int `json:"update_batch_size,omitempty" hcl:"update_batch_size" mapstructure:"update_batch_size"`
Expand All @@ -272,24 +292,6 @@ type Consul struct {
LeaderLeaseTimeout *string `json:"leader_lease_timeout,omitempty" hcl:"leader_lease_timeout" mapstructure:"leader_lease_timeout"`
} `json:"raft,omitempty" hcl:"raft" mapstructure:"raft"`

SerfLAN struct {
Memberlist struct {
GossipInterval *string `json:"gossip_interval,omitempty" hcl:"gossip_interval" mapstructure:"gossip_interval"`
ProbeInterval *string `json:"probe_interval,omitempty" hcl:"probe_interval" mapstructure:"probe_interval"`
ProbeTimeout *string `json:"probe_timeout,omitempty" hcl:"probe_timeout" mapstructure:"probe_timeout"`
SuspicionMult *int `json:"suspicion_mult,omitempty" hcl:"suspicion_mult" mapstructure:"suspicion_mult"`
} `json:"memberlist,omitempty" hcl:"memberlist" mapstructure:"memberlist"`
} `json:"serf_lan,omitempty" hcl:"serf_lan" mapstructure:"serf_lan"`

SerfWAN struct {
Memberlist struct {
GossipInterval *string `json:"gossip_interval,omitempty" hcl:"gossip_interval" mapstructure:"gossip_interval"`
ProbeInterval *string `json:"probe_interval,omitempty" hcl:"probe_interval" mapstructure:"probe_interval"`
ProbeTimeout *string `json:"probe_timeout,omitempty" hcl:"probe_timeout" mapstructure:"probe_timeout"`
SuspicionMult *int `json:"suspicion_mult,omitempty" hcl:"suspicion_mult" mapstructure:"suspicion_mult"`
} `json:"memberlist,omitempty" hcl:"memberlist" mapstructure:"memberlist"`
} `json:"serf_wan,omitempty" hcl:"serf_wan" mapstructure:"serf_wan"`

Server struct {
HealthInterval *string `json:"health_interval,omitempty" hcl:"health_interval" mapstructure:"health_interval"`
} `json:"server,omitempty" hcl:"server" mapstructure:"server"`
Expand Down
67 changes: 33 additions & 34 deletions agent/config/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ func DefaultRPCProtocol() (int, error) {
// todo(fs): IMO, this should be the definitive default for all configurable values
// todo(fs): and whatever is in here should clobber every default value. Hence, no sourcing.
func DefaultSource() Source {
cfg := consul.DefaultConfig()
serfLAN := cfg.SerfLANConfig.MemberlistConfig
serfWAN := cfg.SerfWANConfig.MemberlistConfig
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the significance of these moving from DefaultConsulSource to here? This seems like a better place but I don't recall offhand the details of how these two methods interact...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In depth explanation in my other comment.

DefaultConsulSource = non user modifiable
DefaultSource = user modifiable


return Source{
Name: "default",
Format: "hcl",
Expand Down Expand Up @@ -62,6 +66,22 @@ func DefaultSource() Source {
max_trailing_logs = 250
server_stabilization_time = "10s"
}
gossip_lan = {
gossip_interval = "` + serfLAN.GossipInterval.String() + `"
gossip_nodes = ` + strconv.Itoa(serfLAN.GossipNodes) + `
retransmit_mult = ` + strconv.Itoa(serfLAN.RetransmitMult) + `
probe_interval = "` + serfLAN.ProbeInterval.String() + `"
probe_timeout = "` + serfLAN.ProbeTimeout.String() + `"
suspicion_mult = ` + strconv.Itoa(serfLAN.SuspicionMult) + `
}
gossip_wan = {
gossip_interval = "` + serfWAN.GossipInterval.String() + `"
gossip_nodes = ` + strconv.Itoa(serfLAN.GossipNodes) + `
retransmit_mult = ` + strconv.Itoa(serfLAN.RetransmitMult) + `
probe_interval = "` + serfWAN.ProbeInterval.String() + `"
probe_timeout = "` + serfWAN.ProbeTimeout.String() + `"
suspicion_mult = ` + strconv.Itoa(serfWAN.SuspicionMult) + `
}
dns_config = {
allow_stale = true
a_record_limit = 0
Expand Down Expand Up @@ -92,6 +112,7 @@ func DefaultSource() Source {
metrics_prefix = "consul"
filter_default = true
}

`,
}
}
Expand All @@ -111,6 +132,18 @@ func DevSource() Source {
log_level = "DEBUG"
server = true

gossip_lan = {
gossip_interval = "100ms"
probe_interval = "100ms"
probe_timeout = "100ms"
suspicion_mult = 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are retransmit_mult and gossip_nodes not set here for a good reason? I suspect because we already use the default value for them in memberlist here but is it worth hard coding it for clarity?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my other comment. We don't want to do that here. Only items specifically overridden for dev mode go here.

}
gossip_wan = {
gossip_interval = "100ms"
probe_interval = "100ms"
probe_timeout = "100ms"
suspicion_mult = 3
}
connect = {
enabled = true
}
Expand Down Expand Up @@ -166,8 +199,6 @@ func DefaultVersionSource() Source {
func DefaultConsulSource() Source {
cfg := consul.DefaultConfig()
raft := cfg.RaftConfig
serfLAN := cfg.SerfLANConfig.MemberlistConfig
serfWAN := cfg.SerfWANConfig.MemberlistConfig
return Source{
Name: "consul",
Format: "hcl",
Expand All @@ -183,22 +214,6 @@ func DefaultConsulSource() Source {
heartbeat_timeout = "` + raft.HeartbeatTimeout.String() + `"
leader_lease_timeout = "` + raft.LeaderLeaseTimeout.String() + `"
}
serf_lan = {
memberlist = {
gossip_interval = "` + serfLAN.GossipInterval.String() + `"
probe_interval = "` + serfLAN.ProbeInterval.String() + `"
probe_timeout = "` + serfLAN.ProbeTimeout.String() + `"
suspicion_mult = ` + strconv.Itoa(serfLAN.SuspicionMult) + `
}
}
serf_wan = {
memberlist = {
gossip_interval = "` + serfWAN.GossipInterval.String() + `"
probe_interval = "` + serfWAN.ProbeInterval.String() + `"
probe_timeout = "` + serfWAN.ProbeTimeout.String() + `"
suspicion_mult = ` + strconv.Itoa(serfWAN.SuspicionMult) + `
}
}
server = {
health_interval = "` + cfg.ServerHealthInterval.String() + `"
}
Expand All @@ -223,22 +238,6 @@ func DevConsulSource() Source {
heartbeat_timeout = "35ms"
leader_lease_timeout = "20ms"
}
serf_lan = {
memberlist = {
gossip_interval = "100ms"
probe_interval = "100ms"
probe_timeout = "100ms"
suspicion_mult = 3
}
}
serf_wan = {
memberlist = {
gossip_interval = "100ms"
probe_interval = "100ms"
probe_timeout = "100ms"
suspicion_mult = 3
}
}
server = {
health_interval = "10ms"
}
Expand Down
Loading