Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

calibrate: refactor metrics error #44451

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions executor/calibrate_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro
return e.staticCalibrate(ctx, req, exec)
}

var (
errTooFewMetricsPoints = errors.Normalize("There are too few metrics points available in selected time window, %v")
Copy link
Contributor

@glorv glorv Jun 6, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall we also return the error message The workload in selected time window is too low ... here? The user may not know the meaning of too few metrics points

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it.

errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v")
)

func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error {
startTs, endTs, err := e.parseCalibrateDuration(ctx)
if err != nil {
Expand All @@ -206,23 +211,23 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime)
if err != nil {
return err
return errTooFewMetricsPoints.FastGenByArgs(err.Error())
}
tikvCPUs, err := getComponentCPUUsagePerSec(ctx, e.ctx, exec, "tikv", startTime, endTime)
if err != nil {
return err
return errTooFewMetricsPoints.FastGenByArgs(err.Error())
}
tidbCPUs, err := getComponentCPUUsagePerSec(ctx, e.ctx, exec, "tidb", startTime, endTime)
if err != nil {
return err
return errTooFewMetricsPoints.FastGenByArgs(err.Error())
}
quotas := make([]float64, 0)
lowCount := 0
Expand Down Expand Up @@ -256,7 +261,7 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
tikvCPUs.next()
}
if len(quotas) < 5 {
return errors.Errorf("There are too few metrics points available in selected time window")
return errTooFewMetricsPoints.FastGenByArgs("low usage")
}
if float64(len(quotas))/float64(len(quotas)+lowCount) > percentOfPass {
sort.Slice(quotas, func(i, j int) bool {
Expand Down Expand Up @@ -287,11 +292,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk.

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}

// The default workload to calculate the RU capacity.
Expand Down
47 changes: 30 additions & 17 deletions executor/calibrate_resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,30 @@ func TestCalibrateResource(t *testing.T) {
return time
}

mockData := map[string][][]types.Datum{
"tikv_cpu_quota": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
},
"tidb_server_maxprocs": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
},
}
mockData := make(map[string][][]types.Datum)
ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData)
ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool {
return fpName == fpname
})
rs, err = tk.Exec("CALIBRATE RESOURCE")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
// because when mock metrics is empty, error is always `pd unavailable`, don't check detail.
require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable")

mockData["tikv_cpu_quota"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
}
mockData["tidb_server_maxprocs"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
}
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823"))
Expand Down Expand Up @@ -402,7 +408,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "There are too few metrics points available in selected time window, low usage")

ru3 := [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0),
Expand Down Expand Up @@ -442,7 +448,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "There are too few metrics points available in selected time window, low usage")

// flash back to init data.
mockData["resource_manager_resource_unit"] = ru1
Expand Down Expand Up @@ -553,7 +559,14 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "There are too few metrics points available in selected time window, low usage")

delete(mockData, "process_cpu_usage")
rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window, query metric error: pd unavailable")
}

type mockResourceGroupProvider struct {
Expand Down