From 8184fb2280067c4e862b0bf62931bc84dfdc75ba Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 8 Sep 2023 17:05:56 +0800 Subject: [PATCH 01/10] statistics: add historgram bench Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 96 ++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 statistics/histogram_bench_test.go diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go new file mode 100644 index 0000000000000..1353a44231bcf --- /dev/null +++ b/statistics/histogram_bench_test.go @@ -0,0 +1,96 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statistics + +import ( + "math/rand" + "testing" + + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tidb/util/codec" + "github.com/pingcap/tidb/util/mock" + "github.com/stretchr/testify/require" +) + +const histogramLen = 100 +const popedTopNLen = 100 + +var magicLower = [histogramLen]int64{ + 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, + 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, + 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, + 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, + 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, + 5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900, + 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, + 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, + 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900, + 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, +} + +func genBucket4TestData(len int) []*bucket4Test { + result := make([]*bucket4Test, 0, len) + var lower, upper int64 + for n := 0; n < len; n++ { + if n == 0 { + lower = 0 + } else { + lower = upper + 1 + } + upper = lower + (rand.Int63n(magicLower[n+1] - lower)) + result = append(result, &bucket4Test{ + lower: lower, + upper: upper, + count: rand.Int63n(10000), + repeat: rand.Int63n(100), + ndv: rand.Int63n(100), + }) + } + return result +} + +func genHist4Bench(t *testing.T, buckets []*bucket4Test, totColSize int64) *Histogram { + h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeBlob), len(buckets), totColSize) + for _, bucket := range buckets { + lower, err := codec.EncodeKey(nil, nil, types.NewIntDatum(bucket.lower)) + require.NoError(t, err) + upper, err := codec.EncodeKey(nil, nil, types.NewIntDatum(bucket.upper)) + require.NoError(t, err) + di, du := types.NewBytesDatum(lower), types.NewBytesDatum(upper) + h.AppendBucketWithNDV(&di, &du, bucket.count, bucket.repeat, bucket.ndv) + } + return h +} + +func BenchABC(b *testing.B) { + ctx := mock.NewContext() + sc := ctx.GetSessionVars().StmtCtx + hists := make([]*Histogram, 0, histogramLen) + const expBucketNumber = 100 + poped := make([]TopNMeta, 0, popedTopNLen) + for _, top := range tt.popedTopN { + b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(top.data)) + require.NoError(t, err) + tmp := TopNMeta{ + Encoded: b, + Count: uint64(top.count), + } + poped = append(poped, tmp) + } + for i := 0; i < b.N; i++ { + globalHist, err := MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) + } +} From 167c2d02b8b23838d9d9c5324912328a46fdf738 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 8 Sep 2023 21:43:51 +0800 Subject: [PATCH 02/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index 1353a44231bcf..d24ad440063cf 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -62,7 +62,7 @@ func genBucket4TestData(len int) []*bucket4Test { return result } -func genHist4Bench(t *testing.T, buckets []*bucket4Test, totColSize int64) *Histogram { +func genHist4Bench(t *testing.B, buckets []*bucket4Test, totColSize int64) *Histogram { h := NewHistogram(0, 0, 0, 0, types.NewFieldType(mysql.TypeBlob), len(buckets), totColSize) for _, bucket := range buckets { lower, err := codec.EncodeKey(nil, nil, types.NewIntDatum(bucket.lower)) @@ -79,6 +79,8 @@ func BenchABC(b *testing.B) { ctx := mock.NewContext() sc := ctx.GetSessionVars().StmtCtx hists := make([]*Histogram, 0, histogramLen) + buckets := genBucket4TestData(histogramLen) + hists := genHist4Bench(b, buckets, histogramLen) const expBucketNumber = 100 poped := make([]TopNMeta, 0, popedTopNLen) for _, top := range tt.popedTopN { @@ -90,6 +92,15 @@ func BenchABC(b *testing.B) { } poped = append(poped, tmp) } + for _, top := range tt.popedTopN { + b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(top.data)) + require.NoError(t, err) + tmp := TopNMeta{ + Encoded: b, + Count: uint64(top.count), + } + poped = append(poped, tmp) + } for i := 0; i < b.N; i++ { globalHist, err := MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) } From d29209290b1a6a8541e4386ff9094f33cc71a3b6 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 8 Sep 2023 22:29:06 +0800 Subject: [PATCH 03/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 53 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index d24ad440063cf..b5ce1c42be6d3 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -50,7 +50,11 @@ func genBucket4TestData(len int) []*bucket4Test { } else { lower = upper + 1 } - upper = lower + (rand.Int63n(magicLower[n+1] - lower)) + if n == len-1 { + upper = 10000 + } else { + upper = lower + (rand.Int63n(magicLower[n+1] - lower)) + } result = append(result, &bucket4Test{ lower: lower, upper: upper, @@ -75,33 +79,28 @@ func genHist4Bench(t *testing.B, buckets []*bucket4Test, totColSize int64) *Hist return h } -func BenchABC(b *testing.B) { - ctx := mock.NewContext() - sc := ctx.GetSessionVars().StmtCtx - hists := make([]*Histogram, 0, histogramLen) - buckets := genBucket4TestData(histogramLen) - hists := genHist4Bench(b, buckets, histogramLen) - const expBucketNumber = 100 - poped := make([]TopNMeta, 0, popedTopNLen) - for _, top := range tt.popedTopN { - b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(top.data)) - require.NoError(t, err) - tmp := TopNMeta{ - Encoded: b, - Count: uint64(top.count), +func BenchmarkMergePartitionHist2GlobalHist(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + ctx := mock.NewContext() + sc := ctx.GetSessionVars().StmtCtx + hists := make([]*Histogram, 0, histogramLen) + for i := 0; i < histogramLen; i++ { + buckets := genBucket4TestData(histogramLen) + hist := genHist4Bench(b, buckets, histogramLen) + hists = append(hists, hist) } - poped = append(poped, tmp) - } - for _, top := range tt.popedTopN { - b, err := codec.EncodeKey(sc, nil, types.NewIntDatum(top.data)) - require.NoError(t, err) - tmp := TopNMeta{ - Encoded: b, - Count: uint64(top.count), + const expBucketNumber = 100 + poped := make([]TopNMeta, 0, popedTopNLen) + for n := 0; n < popedTopNLen; n++ { + b, _ := codec.EncodeKey(sc, nil, types.NewIntDatum(rand.Int63n(10000))) + tmp := TopNMeta{ + Encoded: b, + Count: uint64(rand.Int63n(10000)), + } + poped = append(poped, tmp) } - poped = append(poped, tmp) - } - for i := 0; i < b.N; i++ { - globalHist, err := MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) + b.StartTimer() + MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) } } From ffa39f9823929497e066c79bbdc72a034ce7623c Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 8 Sep 2023 22:48:46 +0800 Subject: [PATCH 04/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index b5ce1c42be6d3..639ca0feb5660 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -25,6 +25,7 @@ import ( "github.com/stretchr/testify/require" ) +const partition = 1000 const histogramLen = 100 const popedTopNLen = 100 @@ -84,8 +85,8 @@ func BenchmarkMergePartitionHist2GlobalHist(b *testing.B) { b.StopTimer() ctx := mock.NewContext() sc := ctx.GetSessionVars().StmtCtx - hists := make([]*Histogram, 0, histogramLen) - for i := 0; i < histogramLen; i++ { + hists := make([]*Histogram, 0, partition) + for i := 0; i < partition; i++ { buckets := genBucket4TestData(histogramLen) hist := genHist4Bench(b, buckets, histogramLen) hists = append(hists, hist) From 64669ce1135202885ece136a29cfd87a0bc40f74 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Fri, 8 Sep 2023 23:28:06 +0800 Subject: [PATCH 05/10] update Signed-off-by: Weizhen Wang --- statistics/BUILD.bazel | 1 + statistics/histogram_bench_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/statistics/BUILD.bazel b/statistics/BUILD.bazel index 2a26bfb76f34d..f18d7dffef3bb 100644 --- a/statistics/BUILD.bazel +++ b/statistics/BUILD.bazel @@ -70,6 +70,7 @@ go_test( "cmsketch_test.go", "feedback_test.go", "fmsketch_test.go", + "histogram_bench_test.go", "histogram_test.go", "integration_test.go", "main_test.go", diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index 639ca0feb5660..38fd158174513 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -80,6 +80,7 @@ func genHist4Bench(t *testing.B, buckets []*bucket4Test, totColSize int64) *Hist return h } +// cmd: go test -run=^$ -bench=BenchmarkMergePartitionHist2GlobalHist -benchmem github.com/pingcap/tidb/statistics func BenchmarkMergePartitionHist2GlobalHist(b *testing.B) { for i := 0; i < b.N; i++ { b.StopTimer() From 632377be37f815ca1287d9d773f34370c05eff6e Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Sat, 9 Sep 2023 01:48:25 +0800 Subject: [PATCH 06/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index 38fd158174513..41a7a14cafdd4 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -42,16 +42,16 @@ var magicLower = [histogramLen]int64{ 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, } -func genBucket4TestData(len int) []*bucket4Test { - result := make([]*bucket4Test, 0, len) +func genBucket4TestData(length int) []*bucket4Test { + result := make([]*bucket4Test, 0, length) var lower, upper int64 - for n := 0; n < len; n++ { + for n := 0; n < length; n++ { if n == 0 { lower = 0 } else { lower = upper + 1 } - if n == len-1 { + if n == length-1 { upper = 10000 } else { upper = lower + (rand.Int63n(magicLower[n+1] - lower)) From f6273303b3ecec0bb4ad16d318695577dfb0c68e Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Sat, 9 Sep 2023 10:11:44 +0800 Subject: [PATCH 07/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 55 ++++++++++++++++++------------ 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index 41a7a14cafdd4..8431ef5a4f47d 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -15,6 +15,7 @@ package statistics import ( + "fmt" "math/rand" "testing" @@ -25,7 +26,6 @@ import ( "github.com/stretchr/testify/require" ) -const partition = 1000 const histogramLen = 100 const popedTopNLen = 100 @@ -80,29 +80,40 @@ func genHist4Bench(t *testing.B, buckets []*bucket4Test, totColSize int64) *Hist return h } +func benchmarkMergePartitionHist2GlobalHist(b *testing.B, partition int) { + b.StopTimer() + ctx := mock.NewContext() + sc := ctx.GetSessionVars().StmtCtx + hists := make([]*Histogram, 0, partition) + for i := 0; i < partition; i++ { + buckets := genBucket4TestData(histogramLen) + hist := genHist4Bench(b, buckets, histogramLen) + hists = append(hists, hist) + } + const expBucketNumber = 100 + poped := make([]TopNMeta, 0, popedTopNLen) + for n := 0; n < popedTopNLen; n++ { + b, _ := codec.EncodeKey(sc, nil, types.NewIntDatum(rand.Int63n(10000))) + tmp := TopNMeta{ + Encoded: b, + Count: uint64(rand.Int63n(10000)), + } + poped = append(poped, tmp) + } + b.StartTimer() + MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) +} + +var benchmarkPartitionSize = []int{1000, 10000, 100000} + // cmd: go test -run=^$ -bench=BenchmarkMergePartitionHist2GlobalHist -benchmem github.com/pingcap/tidb/statistics func BenchmarkMergePartitionHist2GlobalHist(b *testing.B) { - for i := 0; i < b.N; i++ { - b.StopTimer() - ctx := mock.NewContext() - sc := ctx.GetSessionVars().StmtCtx - hists := make([]*Histogram, 0, partition) - for i := 0; i < partition; i++ { - buckets := genBucket4TestData(histogramLen) - hist := genHist4Bench(b, buckets, histogramLen) - hists = append(hists, hist) - } - const expBucketNumber = 100 - poped := make([]TopNMeta, 0, popedTopNLen) - for n := 0; n < popedTopNLen; n++ { - b, _ := codec.EncodeKey(sc, nil, types.NewIntDatum(rand.Int63n(10000))) - tmp := TopNMeta{ - Encoded: b, - Count: uint64(rand.Int63n(10000)), + for _, size := range benchmarkPartitionSize { + b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) { + for i := 0; i < b.N; i++ { + benchmarkMergePartitionHist2GlobalHist(b, size) } - poped = append(poped, tmp) - } - b.StartTimer() - MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) + }) } + } From dbbfa3f406916b33b25564e3e163b8f72f3d2d43 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Sat, 9 Sep 2023 11:32:08 +0800 Subject: [PATCH 08/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index 8431ef5a4f47d..a8c8225977e60 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -81,7 +81,6 @@ func genHist4Bench(t *testing.B, buckets []*bucket4Test, totColSize int64) *Hist } func benchmarkMergePartitionHist2GlobalHist(b *testing.B, partition int) { - b.StopTimer() ctx := mock.NewContext() sc := ctx.GetSessionVars().StmtCtx hists := make([]*Histogram, 0, partition) @@ -102,6 +101,7 @@ func benchmarkMergePartitionHist2GlobalHist(b *testing.B, partition int) { } b.StartTimer() MergePartitionHist2GlobalHist(sc, hists, poped, expBucketNumber, true) + b.StopTimer() } var benchmarkPartitionSize = []int{1000, 10000, 100000} From 9c7a79c9b5f4fef8e1d7dc6c4d94f1de62625d28 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Sun, 10 Sep 2023 11:00:30 +0800 Subject: [PATCH 09/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index a8c8225977e60..f2575b99e902e 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -28,19 +28,7 @@ import ( const histogramLen = 100 const popedTopNLen = 100 - -var magicLower = [histogramLen]int64{ - 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, - 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, - 2000, 2100, 2200, 2300, 2400, 2500, 2600, 2700, 2800, 2900, - 3000, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3800, 3900, - 4000, 4100, 4200, 4300, 4400, 4500, 4600, 4700, 4800, 4900, - 5000, 5100, 5200, 5300, 5400, 5500, 5600, 5700, 5800, 5900, - 6000, 6100, 6200, 6300, 6400, 6500, 6600, 6700, 6800, 6900, - 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700, 7800, 7900, - 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800, 8900, - 9000, 9100, 9200, 9300, 9400, 9500, 9600, 9700, 9800, 9900, -} +const expBucketNumber = 100 func genBucket4TestData(length int) []*bucket4Test { result := make([]*bucket4Test, 0, length) @@ -54,7 +42,7 @@ func genBucket4TestData(length int) []*bucket4Test { if n == length-1 { upper = 10000 } else { - upper = lower + (rand.Int63n(magicLower[n+1] - lower)) + upper = lower + (rand.Int63n(int64(100*(n+1)) - lower)) } result = append(result, &bucket4Test{ lower: lower, @@ -89,7 +77,6 @@ func benchmarkMergePartitionHist2GlobalHist(b *testing.B, partition int) { hist := genHist4Bench(b, buckets, histogramLen) hists = append(hists, hist) } - const expBucketNumber = 100 poped := make([]TopNMeta, 0, popedTopNLen) for n := 0; n < popedTopNLen; n++ { b, _ := codec.EncodeKey(sc, nil, types.NewIntDatum(rand.Int63n(10000))) @@ -115,5 +102,4 @@ func BenchmarkMergePartitionHist2GlobalHist(b *testing.B) { } }) } - } From d8b10789c6963f8c2cce1bf0d1d9eef02ece7204 Mon Sep 17 00:00:00 2001 From: Weizhen Wang Date: Sun, 10 Sep 2023 11:02:56 +0800 Subject: [PATCH 10/10] update Signed-off-by: Weizhen Wang --- statistics/histogram_bench_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/statistics/histogram_bench_test.go b/statistics/histogram_bench_test.go index f2575b99e902e..1a4b8e3db9746 100644 --- a/statistics/histogram_bench_test.go +++ b/statistics/histogram_bench_test.go @@ -26,9 +26,11 @@ import ( "github.com/stretchr/testify/require" ) -const histogramLen = 100 -const popedTopNLen = 100 -const expBucketNumber = 100 +const ( + histogramLen = 100 + popedTopNLen = 100 + expBucketNumber = 100 +) func genBucket4TestData(length int) []*bucket4Test { result := make([]*bucket4Test, 0, length)