Skip to content

Commit

Permalink
Merge pull request #9360 from planetscale/fane-9359-buffer-default
Browse files Browse the repository at this point in the history
Change default buffer implementation #9359
  • Loading branch information
vmg authored Dec 15, 2021
2 parents 9c454d0 + 34adeb2 commit 4a2e63a
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 12 deletions.
3 changes: 3 additions & 0 deletions doc/releasenotes/13_0_0_summary.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ The default value used to be false. What this means is that during a failover, w
In addition, all Vitess-managed databases will be started with `super-read-only` in the cnf file.
It is expected that this change is safe and backwards-compatible. Anyone who is relying on the current behavior should pass `-use_super_read_only=false` on the vttablet command line, and make sure they are using a custom my.cnf instead of the one provided as the default by Vitess.

### vtgate -buffer_implementation now defaults to keyspace_events
The default value used to be `healthcheck`. The new `keyspace_events` implementation has been tested in production with good results and shows more consistent buffering behavior during PlannedReparentShard operations. The `keyspace_events` implementation utilizes heuristics to detect additional cluster states where buffering is safe to perform, including cases where the primary may be down. If there is a need to revert back to the previous buffer implementation, ensure buffering is enabled in vtgate and pass the flag `-buffer_implementation=healthcheck`.

### ddl_strategy: -postpone-completion flag

`ddl_strategy` (either `@@ddl_strategy` in VtGate or `-ddl_strategy` in `vtctlclient ApplySchema`) supports the flag `-postpone-completion`
Expand Down
16 changes: 8 additions & 8 deletions go/vt/vtgate/buffer/buffer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,15 @@ func testPassthrough1(t *testing.T, fail failover) {
}
}

// TestLastReparentTooRecent_BufferingSkipped tests that buffering is skipped if
// TestLastReparentTooRecentBufferingSkipped tests that buffering is skipped if
// we see the reparent (end) *before* any request failures due to it.
// We must not start buffering because we already observed the trigger for
// stopping buffering (the reparent) and may not see it again.
func TestLastReparentTooRecent_BufferingSkipped(t *testing.T) {
testAllImplementations(t, testLastReparentTooRecent_BufferingSkipped1)
func TestLastReparentTooRecentBufferingSkipped(t *testing.T) {
testAllImplementations(t, testLastReparentTooRecentBufferingSkipped1)
}

func testLastReparentTooRecent_BufferingSkipped1(t *testing.T, fail failover) {
func testLastReparentTooRecentBufferingSkipped1(t *testing.T, fail failover) {
resetVariables()

now := time.Now()
Expand Down Expand Up @@ -312,14 +312,14 @@ func testLastReparentTooRecent_BufferingSkipped1(t *testing.T, fail failover) {
}
}

// TestLastReparentTooRecent_Buffering explicitly tests that the "too recent"
// TestLastReparentTooRecentBuffering explicitly tests that the "too recent"
// skipping of the buffering does NOT get triggered because enough time has
// elapsed since the last seen reparent.
func TestLastReparentTooRecent_Buffering(t *testing.T) {
testAllImplementations(t, testLastReparentTooRecent_Buffering1)
func TestLastReparentTooRecentBuffering(t *testing.T) {
testAllImplementations(t, testLastReparentTooRecentBuffering1)
}

func testLastReparentTooRecent_Buffering1(t *testing.T, fail failover) {
func testLastReparentTooRecentBuffering1(t *testing.T, fail failover) {
resetVariables()

now := time.Now()
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/buffer/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ var (
bufferEnabledDryRun = flag.Bool("enable_buffer_dry_run", false, "Detect and log failover events, but do not actually buffer requests.")

bufferWindow = flag.Duration("buffer_window", 10*time.Second, "Duration for how long a request should be buffered at most.")
bufferSize = flag.Int("buffer_size", 10, "Maximum number of buffered requests in flight (across all ongoing failovers).")
bufferSize = flag.Int("buffer_size", 1000, "Maximum number of buffered requests in flight (across all ongoing failovers).")
bufferMaxFailoverDuration = flag.Duration("buffer_max_failover_duration", 20*time.Second, "Stop buffering completely if a failover takes longer than this duration.")
bufferMinTimeBetweenFailovers = flag.Duration("buffer_min_time_between_failovers", 1*time.Minute, "Minimum time between the end of a failover and the start of the next one (tracked per shard). Faster consecutive failovers will not trigger buffering.")

Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/buffer/flags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func TestVerifyFlags(t *testing.T) {
// Set all flags to their default value.
flag.Set("enable_buffer", "false")
flag.Set("enable_buffer_dry_run", "false")
flag.Set("buffer_size", "10")
flag.Set("buffer_size", "1000")
flag.Set("buffer_window", "10s")
flag.Set("buffer_keyspace_shards", "")
flag.Set("buffer_max_failover_duration", "20s")
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
var (
// GatewayImplementation allows you to choose which gateway to use for vtgate routing. Defaults to tabletgateway, other option is discoverygateway
GatewayImplementation = flag.String("gateway_implementation", "tabletgateway", "Allowed values: discoverygateway (deprecated), tabletgateway (default)")
bufferImplementation = flag.String("buffer_implementation", "healthcheck", "Allowed values: healthcheck (default), keyspace_events (for testing)")
bufferImplementation = flag.String("buffer_implementation", "keyspace_events", "Allowed values: healthcheck (legacy implementation), keyspace_events (default)")
initialTabletTimeout = flag.Duration("gateway_initial_tablet_timeout", 30*time.Second, "At startup, the gateway will wait up to that duration to get one tablet per keyspace/shard/tablettype")
// RetryCount is the number of times a query will be retried on error
// Make this unexported after DiscoveryGateway is deprecated
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/sandbox_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ func (sct *sandboxTopo) GetSrvKeyspace(ctx context.Context, cell, keyspace strin
}

func (sct *sandboxTopo) WatchSrvKeyspace(ctx context.Context, cell, keyspace string, callback func(*topodatapb.SrvKeyspace, error) bool) {
panic("not supported: WatchSrvKeyspace")
// panic("not supported: WatchSrvKeyspace")
}

// WatchSrvVSchema is part of the srvtopo.Server interface.
Expand Down

0 comments on commit 4a2e63a

Please sign in to comment.