Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tablet throttler: support for custom query & threshold #7541

Merged
40 changes: 40 additions & 0 deletions .github/workflows/cluster_endtoend_tabletmanager_throttler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# DO NOT MODIFY: THIS FILE IS GENERATED USING "make generate_ci_workflows"

name: Cluster (tabletmanager_throttler)
on: [push, pull_request]
jobs:

build:
name: Run endtoend tests on Cluster (tabletmanager_throttler)
runs-on: ubuntu-latest

steps:
- name: Set up Go
uses: actions/setup-go@v1
with:
go-version: 1.15

- name: Check out code
uses: actions/checkout@v2

- name: Get dependencies
run: |
sudo apt-get update
sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata
sudo service mysql stop
sudo service etcd stop
sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/
sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld
go mod download

wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb
sudo apt-get install -y gnupg2
sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb
sudo apt-get update
sudo apt-get install percona-xtrabackup-24

- name: Run cluster endtoend test
timeout-minutes: 30
run: |
source build.env
eatmydata -- go run test.go -docker=false -print-log -follow -shard tabletmanager_throttler
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# DO NOT MODIFY: THIS FILE IS GENERATED USING "make generate_ci_workflows"

name: Cluster (tabletmanager_throttler_custom_config)
on: [push, pull_request]
jobs:

build:
name: Run endtoend tests on Cluster (tabletmanager_throttler_custom_config)
runs-on: ubuntu-latest

steps:
- name: Set up Go
uses: actions/setup-go@v1
with:
go-version: 1.15

- name: Check out code
uses: actions/checkout@v2

- name: Get dependencies
run: |
sudo apt-get update
sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata
sudo service mysql stop
sudo service etcd stop
sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/
sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld
go mod download

wget https://repo.percona.com/apt/percona-release_latest.$(lsb_release -sc)_all.deb
sudo apt-get install -y gnupg2
sudo dpkg -i percona-release_latest.$(lsb_release -sc)_all.deb
sudo apt-get update
sudo apt-get install percona-xtrabackup-24

- name: Run cluster endtoend test
timeout-minutes: 30
run: |
source build.env
eatmydata -- go run test.go -docker=false -print-log -follow -shard tabletmanager_throttler_custom_config
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
/*
Copyright 2020 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package master

import (
"context"
"flag"
"fmt"
"net/http"
"os"
"testing"
"time"

"vitess.io/vitess/go/mysql"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base"

"vitess.io/vitess/go/test/endtoend/cluster"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

var (
clusterInstance *cluster.LocalProcessCluster
primaryTablet *cluster.Vttablet
replicaTablet *cluster.Vttablet
hostname = "localhost"
keyspaceName = "ks"
cell = "zone1"
sqlSchema = `
create table t1(
id bigint,
value varchar(16),
primary key(id)
) Engine=InnoDB;
`

vSchema = `
{
"sharded": true,
"vindexes": {
"hash": {
"type": "hash"
}
},
"tables": {
"t1": {
"column_vindexes": [
{
"column": "id",
"name": "hash"
}
]
}
}
}`

httpClient = base.SetupHTTPClient(time.Second)
checkAPIPath = "throttler/check"
checkSelfAPIPath = "throttler/check-self"
vtParams mysql.ConnParams
)

const (
testThreshold = 5
throttlerInitWait = 10 * time.Second
)

func TestMain(m *testing.M) {
defer cluster.PanicHandler(nil)
flag.Parse()

exitCode := func() int {
clusterInstance = cluster.NewCluster(cell, hostname)
defer clusterInstance.Teardown()

// Start topo server
err := clusterInstance.StartTopo()
if err != nil {
return 1
}

// Set extra tablet args for lock timeout
clusterInstance.VtTabletExtraArgs = []string{
"-lock_tables_timeout", "5s",
"-watch_replication_stream",
"-enable_replication_reporter",
"-enable-lag-throttler",
"-throttle_metrics_query", "show global status like 'threads_running'",
"-throttle_metrics_threshold", fmt.Sprintf("%d", testThreshold),
"-throttle_check_as_check_self",
"-heartbeat_enable",
"-heartbeat_interval", "250ms",
}
// We do not need semiSync for this test case.
clusterInstance.EnableSemiSync = false

// Start keyspace
keyspace := &cluster.Keyspace{
Name: keyspaceName,
SchemaSQL: sqlSchema,
VSchema: vSchema,
}

if err = clusterInstance.StartUnshardedKeyspace(*keyspace, 0, false); err != nil {
return 1
}

// Collect table paths and ports
tablets := clusterInstance.Keyspaces[0].Shards[0].Vttablets
for _, tablet := range tablets {
if tablet.Type == "master" {
primaryTablet = tablet
} else if tablet.Type != "rdonly" {
replicaTablet = tablet
}
}

vtgateInstance := clusterInstance.NewVtgateInstance()
// set the gateway we want to use
vtgateInstance.GatewayImplementation = "tabletgateway"
// Start vtgate
if err := vtgateInstance.Setup(); err != nil {
return 1
}
// ensure it is torn down during cluster TearDown
clusterInstance.VtgateProcess = *vtgateInstance
vtParams = mysql.ConnParams{
Host: clusterInstance.Hostname,
Port: clusterInstance.VtgateMySQLPort,
}

return m.Run()
}()
os.Exit(exitCode)
}

func throttleCheck(tablet *cluster.Vttablet) (*http.Response, error) {
return httpClient.Head(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, checkAPIPath))
}

func throttleCheckSelf(tablet *cluster.Vttablet) (*http.Response, error) {
return httpClient.Head(fmt.Sprintf("http://localhost:%d/%s", tablet.HTTPPort, checkSelfAPIPath))
}

func TestThrottlerThresholdOK(t *testing.T) {
defer cluster.PanicHandler(t)

{
resp, err := throttleCheck(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
}
}

func TestThrottlerAfterMetricsCollected(t *testing.T) {
defer cluster.PanicHandler(t)

time.Sleep(throttlerInitWait)
// By this time metrics will have been collected. We expect no lag, and something like:
// {"StatusCode":200,"Value":0.282278,"Threshold":1,"Message":""}
{
resp, err := throttleCheck(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
}
{
resp, err := throttleCheckSelf(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
}
}

func TestThreadsRunning(t *testing.T) {
defer cluster.PanicHandler(t)

sleepSeconds := 6
for i := 0; i < testThreshold; i++ {
go vtgateExec(t, fmt.Sprintf("select sleep(%d)", sleepSeconds), "")
}
t.Run("exceeds threshold", func(t *testing.T) {
time.Sleep(3 * time.Second)
// by this time we will have +1 threads_running, and we should hit the threshold
// {"StatusCode":429,"Value":2,"Threshold":2,"Message":"Threshold exceeded"}
{
resp, err := throttleCheck(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
}
{
resp, err := throttleCheckSelf(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusTooManyRequests, resp.StatusCode)
}
})
t.Run("restored below threshold", func(t *testing.T) {
time.Sleep(time.Duration(sleepSeconds) * time.Second)
// Restore
{
resp, err := throttleCheck(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
}
{
resp, err := throttleCheckSelf(primaryTablet)
assert.NoError(t, err)
assert.Equal(t, http.StatusOK, resp.StatusCode)
}
})
}

func vtgateExec(t *testing.T, query string, expectError string) *sqltypes.Result {
t.Helper()

ctx := context.Background()
conn, err := mysql.Connect(ctx, &vtParams)
require.Nil(t, err)
defer conn.Close()

qr, err := conn.ExecuteFetch(query, 1000, true)
if expectError == "" {
require.NoError(t, err)
} else {
require.Error(t, err, "error should not be nil")
assert.Contains(t, err.Error(), expectError, "Unexpected error")
}
return qr
}
3 changes: 3 additions & 0 deletions go/vt/vttablet/tabletserver/throttle/base/throttle_metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ var errNoResultYet = errors.New("Metric not collected yet")
// ErrNoSuchMetric is for when a user requests a metric by an unknown metric name
var ErrNoSuchMetric = errors.New("No such metric")

// ErrInvalidCheckType is an internal error indicating an unknown check type
var ErrInvalidCheckType = errors.New("Unknown throttler check type")

// IsDialTCPError sees if th egiven error indicates a TCP issue
func IsDialTCPError(e error) bool {
if e == nil {
Expand Down
2 changes: 2 additions & 0 deletions go/vt/vttablet/tabletserver/throttle/check_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,5 @@ func NewErrorCheckResult(statusCode int, err error) *CheckResult {
var NoSuchMetricCheckResult = NewErrorCheckResult(http.StatusNotFound, base.ErrNoSuchMetric)

var okMetricCheckResult = NewCheckResult(http.StatusOK, 0, 0, nil)

var invalidCheckTypeCheckResult = NewErrorCheckResult(http.StatusInternalServerError, base.ErrInvalidCheckType)
Loading