Skip to content

Commit

Permalink
configure the rest of discovery replicationlag settings
Browse files Browse the repository at this point in the history
Signed-off-by: Andrew Mason <andrew@planetscale.com>
  • Loading branch information
Andrew Mason committed May 23, 2023
1 parent cbf55ea commit 8ff1866
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
56 changes: 42 additions & 14 deletions go/vt/discovery/replicationlag.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,49 +28,77 @@ import (
)

var (
configKey = viperutil.KeyPrefixFunc("discovery")
// lowReplicationLag defines the duration that replication lag is low enough that the VTTablet is considered healthy.
lowReplicationLag time.Duration
highReplicationLagMinServing time.Duration
minNumTablets = viperutil.Configure(
lowReplicationLag = viperutil.Configure(
configKey("low_replication_lag"),
viperutil.Options[time.Duration]{
FlagName: "discovery_low_replication_lag",
Default: 30 * time.Second,
Dynamic: true,
},
)
highReplicationLagMinServing = viperutil.Configure(
configKey("high_replication_lag"),
viperutil.Options[time.Duration]{
FlagName: "discovery_high_replication_lag_minimum_serving",
Default: 2 * time.Hour,
Dynamic: true,
},
)
minNumTablets = viperutil.Configure(
"discovery.min_number_serving_vttablets",
viperutil.Options[int]{
FlagName: "min_number_serving_vttablets",
Default: 2,
Dynamic: true,
},
)
legacyReplicationLagAlgorithm bool
legacyReplicationLagAlgorithm = viperutil.Configure(
configKey("legacy_replication_lag_algorithm"),
viperutil.Options[bool]{
FlagName: "legacy_replication_lag_algorithm",
Default: true,
},
)
)

func init() {
servenv.OnParseFor("vtgate", registerReplicationFlags)
}

func registerReplicationFlags(fs *pflag.FlagSet) {
fs.DurationVar(&lowReplicationLag, "discovery_low_replication_lag", 30*time.Second, "Threshold below which replication lag is considered low enough to be healthy.")
fs.DurationVar(&highReplicationLagMinServing, "discovery_high_replication_lag_minimum_serving", 2*time.Hour, "Threshold above which replication lag is considered too high when applying the min_number_serving_vttablets flag.")
fs.Duration("discovery_low_replication_lag", lowReplicationLag.Default(), "Threshold below which replication lag is considered low enough to be healthy.")
fs.Duration("discovery_high_replication_lag_minimum_serving", highReplicationLagMinServing.Default(), "Threshold above which replication lag is considered too high when applying the min_number_serving_vttablets flag.")
fs.Int("min_number_serving_vttablets", minNumTablets.Default(), "The minimum number of vttablets for each replicating tablet_type (e.g. replica, rdonly) that will be continue to be used even with replication lag above discovery_low_replication_lag, but still below discovery_high_replication_lag_minimum_serving.")
fs.BoolVar(&legacyReplicationLagAlgorithm, "legacy_replication_lag_algorithm", true, "Use the legacy algorithm when selecting vttablets for serving.")
fs.Bool("legacy_replication_lag_algorithm", legacyReplicationLagAlgorithm.Default(), "Use the legacy algorithm when selecting vttablets for serving.")

viperutil.BindFlags(fs,
lowReplicationLag,
highReplicationLagMinServing,
minNumTablets,
legacyReplicationLagAlgorithm,
)
}

// GetLowReplicationLag getter for use by debugenv
func GetLowReplicationLag() time.Duration {
return lowReplicationLag
return lowReplicationLag.Get()
}

// SetLowReplicationLag setter for use by debugenv
func SetLowReplicationLag(lag time.Duration) {
lowReplicationLag = lag
lowReplicationLag.Set(lag)
}

// GetHighReplicationLagMinServing getter for use by debugenv
func GetHighReplicationLagMinServing() time.Duration {
return highReplicationLagMinServing
return highReplicationLagMinServing.Get()
}

// SetHighReplicationLagMinServing setter for use by debugenv
func SetHighReplicationLagMinServing(lag time.Duration) {
highReplicationLagMinServing = lag
highReplicationLagMinServing.Set(lag)
}

// GetMinNumTablets getter for use by debugenv
Expand All @@ -86,13 +114,13 @@ func SetMinNumTablets(numTablets int) {
// IsReplicationLagHigh verifies that the given LegacytabletHealth refers to a tablet with high
// replication lag, i.e. higher than the configured discovery_low_replication_lag flag.
func IsReplicationLagHigh(tabletHealth *TabletHealth) bool {
return float64(tabletHealth.Stats.ReplicationLagSeconds) > lowReplicationLag.Seconds()
return float64(tabletHealth.Stats.ReplicationLagSeconds) > lowReplicationLag.Get().Seconds()
}

// IsReplicationLagVeryHigh verifies that the given LegacytabletHealth refers to a tablet with very high
// replication lag, i.e. higher than the configured discovery_high_replication_lag_minimum_serving flag.
func IsReplicationLagVeryHigh(tabletHealth *TabletHealth) bool {
return float64(tabletHealth.Stats.ReplicationLagSeconds) > highReplicationLagMinServing.Seconds()
return float64(tabletHealth.Stats.ReplicationLagSeconds) > highReplicationLagMinServing.Get().Seconds()
}

// FilterStatsByReplicationLag filters the list of TabletHealth by TabletHealth.Stats.ReplicationLagSeconds.
Expand Down Expand Up @@ -121,7 +149,7 @@ func IsReplicationLagVeryHigh(tabletHealth *TabletHealth) bool {
// - degraded_threshold: this is only used by vttablet for display. It should match
// discovery_low_replication_lag here, so the vttablet status display matches what vtgate will do of it.
func FilterStatsByReplicationLag(tabletHealthList []*TabletHealth) []*TabletHealth {
if !legacyReplicationLagAlgorithm {
if !legacyReplicationLagAlgorithm.Get() {
return filterStatsByLag(tabletHealthList)
}
res := filterStatsByLagWithLegacyAlgorithm(tabletHealthList)
Expand Down
8 changes: 4 additions & 4 deletions go/vt/discovery/replicationlag_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ import (
)

func init() {
lowReplicationLag = 30 * time.Second
highReplicationLagMinServing = 2 * time.Hour
lowReplicationLag.Set(30 * time.Second)
highReplicationLagMinServing.Set(2 * time.Hour)
minNumTablets.Set(2)
legacyReplicationLagAlgorithm = true
legacyReplicationLagAlgorithm.Set(true)
}

// testSetLegacyReplicationLagAlgorithm is a test helper function, if this is used by a production code path, something is wrong.
func testSetLegacyReplicationLagAlgorithm(newLegacy bool) {
legacyReplicationLagAlgorithm = newLegacy
legacyReplicationLagAlgorithm.Set(newLegacy)
}

// testSetMinNumTablets is a test helper function, if this is used by a production code path, something is wrong.
Expand Down
2 changes: 1 addition & 1 deletion go/vt/discovery/tablet_health_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ func (thc *tabletHealthCheck) isTrivialReplagChange(newStats *query.RealtimeStat
}
// Skip replag filter when replag remains in the low rep lag range,
// which should be the case majority of the time.
lowRepLag := lowReplicationLag.Seconds()
lowRepLag := lowReplicationLag.Get().Seconds()
oldRepLag := float64(thc.Stats.ReplicationLagSeconds)
newRepLag := float64(newStats.ReplicationLagSeconds)
if oldRepLag <= lowRepLag && newRepLag <= lowRepLag {
Expand Down

0 comments on commit 8ff1866

Please sign in to comment.