Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

[tablet] Split health_check_interval into two flags #248

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/cmd/vttablet/vttablet.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func main() {
VREngine: vreplication.NewEngine(config, ts, tabletAlias.Cell, mysqld, qsc.LagThrottler()),
MetadataManager: &mysqlctl.MetadataManager{},
}
if err := tm.Start(tablet, config.Healthcheck.IntervalSeconds.Get()); err != nil {
if err := tm.Start(tablet, config.Healthcheck.ReplicationIntervalSeconds.Get()); err != nil {
log.Exitf("failed to parse -tablet-path or initialize DB credentials: %v", err)
}
servenv.OnClose(func() {
Expand Down
57 changes: 40 additions & 17 deletions go/vt/vttablet/tabletserver/tabletenv/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ const (
NotOnMaster = "notOnMaster"
Polling = "polling"
Heartbeat = "heartbeat"

ReplicationHealthCheckIntervalSecondsFlagName = "replication_health_check_interval"
)

var (
Expand Down Expand Up @@ -69,17 +71,18 @@ var (
deprecatedFoundRowsPoolSize int

// The following vars are used for custom initialization of Tabletconfig.
enableHotRowProtection bool
enableHotRowProtectionDryRun bool
enableConsolidator bool
enableConsolidatorReplicas bool
enableHeartbeat bool
heartbeatInterval time.Duration
healthCheckInterval time.Duration
degradedThreshold time.Duration
unhealthyThreshold time.Duration
transitionGracePeriod time.Duration
enableReplicationReporter bool
enableHotRowProtection bool
enableHotRowProtectionDryRun bool
enableConsolidator bool
enableConsolidatorReplicas bool
enableHeartbeat bool
heartbeatInterval time.Duration
healthCheckInterval time.Duration
replicationHealthCheckInterval time.Duration
degradedThreshold time.Duration
unhealthyThreshold time.Duration
transitionGracePeriod time.Duration
enableReplicationReporter bool
)

func init() {
Expand Down Expand Up @@ -156,6 +159,7 @@ func init() {
flagutil.DualFormatBoolVar(&currentConfig.CacheResultFields, "enable_query_plan_field_caching", defaultConfig.CacheResultFields, "This option fetches & caches fields (columns) when storing query plans")

flag.DurationVar(&healthCheckInterval, "health_check_interval", 20*time.Second, "Interval between health checks")
flag.DurationVar(&replicationHealthCheckInterval, ReplicationHealthCheckIntervalSecondsFlagName, 20*time.Second, "Interval at which ReplicationManager checks replication health")
flag.DurationVar(&degradedThreshold, "degraded_threshold", 30*time.Second, "replication lag after which a replica is considered degraded")
flag.DurationVar(&unhealthyThreshold, "unhealthy_threshold", 2*time.Hour, "replication lag after which a replica is considered unhealthy")
flag.DurationVar(&transitionGracePeriod, "serving_state_grace_period", 0, "how long to pause after broadcasting health to vtgate, before enforcing a new serving state")
Expand Down Expand Up @@ -207,6 +211,13 @@ func Init() {
}

currentConfig.Healthcheck.IntervalSeconds.Set(healthCheckInterval)

var actualReplHealthCheckInterval = healthCheckInterval
if flagWasPassed(ReplicationHealthCheckIntervalSecondsFlagName) {
actualReplHealthCheckInterval = replicationHealthCheckInterval
}
currentConfig.Healthcheck.ReplicationIntervalSeconds.Set(actualReplHealthCheckInterval)

currentConfig.Healthcheck.DegradedThresholdSeconds.Set(degradedThreshold)
currentConfig.Healthcheck.UnhealthyThresholdSeconds.Set(unhealthyThreshold)
currentConfig.GracePeriods.TransitionSeconds.Set(transitionGracePeriod)
Expand Down Expand Up @@ -309,9 +320,10 @@ type HotRowProtectionConfig struct {

// HealthcheckConfig contains the config for healthcheck.
type HealthcheckConfig struct {
IntervalSeconds Seconds `json:"intervalSeconds,omitempty"`
DegradedThresholdSeconds Seconds `json:"degradedThresholdSeconds,omitempty"`
UnhealthyThresholdSeconds Seconds `json:"unhealthyThresholdSeconds,omitempty"`
IntervalSeconds Seconds `json:"intervalSeconds,omitempty"`
ReplicationIntervalSeconds Seconds `json:"replicationIntervalSeconds,omitempty"`
DegradedThresholdSeconds Seconds `json:"degradedThresholdSeconds,omitempty"`
UnhealthyThresholdSeconds Seconds `json:"unhealthyThresholdSeconds,omitempty"`
}

// GracePeriodsConfig contains various grace periods.
Expand Down Expand Up @@ -433,9 +445,10 @@ var defaultConfig = TabletConfig{
MaxRows: 10000,
},
Healthcheck: HealthcheckConfig{
IntervalSeconds: 20,
DegradedThresholdSeconds: 30,
UnhealthyThresholdSeconds: 7200,
IntervalSeconds: 20,
ReplicationIntervalSeconds: 20,
DegradedThresholdSeconds: 30,
UnhealthyThresholdSeconds: 7200,
},
ReplicationTracker: ReplicationTrackerConfig{
Mode: Disable,
Expand Down Expand Up @@ -508,3 +521,13 @@ func defaultTransactionLimitConfig() TransactionLimitConfig {
TransactionLimitBySubcomponent: false,
}
}

func flagWasPassed(name string) bool {
found := false
flag.Visit(func(f *flag.Flag) {
if f.Name == name {
found = true
}
})
return found
}
5 changes: 5 additions & 0 deletions go/vt/vttablet/tabletserver/tabletenv/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ gracePeriods: {}
healthcheck:
degradedThresholdSeconds: 30
intervalSeconds: 20
replicationIntervalSeconds: 20
unhealthyThresholdSeconds: 7200
hotRowProtection:
maxConcurrency: 5
Expand Down Expand Up @@ -226,6 +227,7 @@ func TestFlags(t *testing.T) {
want.HotRowProtection.Mode = Disable
want.Consolidator = Enable
want.Healthcheck.IntervalSeconds = 20
want.Healthcheck.ReplicationIntervalSeconds = 20
want.Healthcheck.DegradedThresholdSeconds = 30
want.Healthcheck.UnhealthyThresholdSeconds = 7200
want.ReplicationTracker.HeartbeatIntervalSeconds = 1
Expand Down Expand Up @@ -309,9 +311,12 @@ func TestFlags(t *testing.T) {
assert.Equal(t, want, currentConfig)

healthCheckInterval = 1 * time.Second
replicationHealthCheckInterval = 2 * time.Second
currentConfig.Healthcheck.IntervalSeconds = 0
currentConfig.Healthcheck.ReplicationIntervalSeconds = 0
Init()
want.Healthcheck.IntervalSeconds = 1
want.Healthcheck.ReplicationIntervalSeconds = 1
assert.Equal(t, want, currentConfig)

degradedThreshold = 2 * time.Second
Expand Down