From 2df3c1c5accc5e0e1b89d8c8c9ca0beeb07031c6 Mon Sep 17 00:00:00 2001 From: Guido Iaquinti Date: Mon, 17 May 2021 12:10:47 +0200 Subject: [PATCH 1/3] vtctl.generateShardRanges -> key.GenerateShardRanges Signed-off-by: Guido Iaquinti --- go/vt/key/key.go | 72 +++++++++++++++++++++++++++++++++++ go/vt/key/key_test.go | 72 +++++++++++++++++++++++++++++++++++ go/vt/vtctl/vtctl.go | 72 +---------------------------------- go/vt/vtctl/vtctl_test.go | 79 --------------------------------------- 4 files changed, 145 insertions(+), 150 deletions(-) diff --git a/go/vt/key/key.go b/go/vt/key/key.go index 9cedad6f409..be54b522b84 100644 --- a/go/vt/key/key.go +++ b/go/vt/key/key.go @@ -20,6 +20,7 @@ import ( "bytes" "encoding/binary" "encoding/hex" + "errors" "fmt" "math" "regexp" @@ -346,3 +347,74 @@ var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`) func IsKeyRange(kr string) bool { return krRegexp.MatchString(kr) } + +// GenerateShardRanges returns shard ranges assuming a keyspace with N shards. +func GenerateShardRanges(shards int) ([]string, error) { + var format string + var maxShards int + + switch { + case shards <= 0: + return nil, errors.New("shards must be greater than zero") + case shards <= 256: + format = "%02x" + maxShards = 256 + case shards <= 65536: + format = "%04x" + maxShards = 65536 + default: + return nil, errors.New("this function does not support more than 65336 shards in a single keyspace") + } + + rangeFormatter := func(start, end int) string { + var ( + startKid string + endKid string + ) + + if start != 0 { + startKid = fmt.Sprintf(format, start) + } + + if end != maxShards { + endKid = fmt.Sprintf(format, end) + } + + return fmt.Sprintf("%s-%s", startKid, endKid) + } + + start := 0 + end := 0 + + // If shards does not divide evenly into maxShards, then there is some lossiness, + // where each shard is smaller than it should technically be (if, for example, size == 25.6). + // If we choose to keep everything in ints, then we have two choices: + // - Have every shard in #numshards be a uniform size, tack on an additional shard + // at the end of the range to account for the loss. This is bad because if you ask for + // 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards. + // It's also bad because one shard will have much different data distribution than the rest. + // - Expand the final shard to include whatever is left in the keyrange. This will give the + // correct number of shards, which is good, but depending on how lossy each individual shard is, + // you could end with that final shard being significantly larger than the rest of the shards, + // so this doesn't solve the data distribution problem. + // + // By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense), + // we can re-truncate the integer end on each iteration, which spreads the lossiness more + // evenly across the shards. + // + // This implementation has no impact on shard numbers that are powers of 2, even at large numbers, + // which you can see in the tests. + size := float64(maxShards) / float64(shards) + realEnd := float64(0) + shardRanges := make([]string, 0, shards) + + for i := 1; i <= shards; i++ { + realEnd = float64(i) * size + + end = int(realEnd) + shardRanges = append(shardRanges, rangeFormatter(start, end)) + start = end + } + + return shardRanges, nil +} diff --git a/go/vt/key/key_test.go b/go/vt/key/key_test.go index 8643d0bcc73..935e51082b7 100644 --- a/go/vt/key/key_test.go +++ b/go/vt/key/key_test.go @@ -579,3 +579,75 @@ func TestIsKeyRange(t *testing.T) { assert.Equal(t, IsKeyRange(tcase.in), tcase.out, tcase.in) } } + +func TestGenerateShardRanges(t *testing.T) { + type args struct { + shards int + } + + tests := []struct { + name string + args args + want []string + wantErr bool + }{ + { + "errors for shards less than 0", + args{0}, + nil, + true, + }, + { + "errors for shards more than 65536", + args{65537}, + nil, + true, + }, + { + "works for a single shard", + args{1}, + []string{"-"}, + false, + }, + { + "works for more than one shard", + args{2}, + []string{"-80", "80-"}, + false, + }, + { + "works for an odd number of shards", + args{7}, + []string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"}, + false, + }, + { + "works for large number of shards", + args{256}, + []string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"}, + false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := GenerateShardRanges(tt.args.shards) + if tt.wantErr { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + assert.Equal(t, got, tt.want) + }) + } +} + +func TestShardCalculatorForShardsGreaterThan512(t *testing.T) { + got, err := GenerateShardRanges(512) + assert.NoError(t, err) + + want := "ff80-" + + assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511]) +} diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 30ae2a4cc75..db146f444af 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -3532,7 +3532,7 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF return err } - shardRanges, err := generateShardRanges(*numShards) + shardRanges, err := key.GenerateShardRanges(*numShards) if err != nil { return err } @@ -3540,76 +3540,6 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF return printJSON(wr.Logger(), shardRanges) } -func generateShardRanges(shards int) ([]string, error) { - var format string - var maxShards int - - switch { - case shards <= 0: - return nil, errors.New("shards must be greater than zero") - case shards <= 256: - format = "%02x" - maxShards = 256 - case shards <= 65536: - format = "%04x" - maxShards = 65536 - default: - return nil, errors.New("this tool does not support more than 65336 shards in a single keyspace") - } - - rangeFormatter := func(start, end int) string { - var ( - startKid string - endKid string - ) - - if start != 0 { - startKid = fmt.Sprintf(format, start) - } - - if end != maxShards { - endKid = fmt.Sprintf(format, end) - } - - return fmt.Sprintf("%s-%s", startKid, endKid) - } - - start := 0 - end := 0 - - // If shards does not divide evenly into maxShards, then there is some lossiness, - // where each shard is smaller than it should technically be (if, for example, size == 25.6). - // If we choose to keep everything in ints, then we have two choices: - // - Have every shard in #numshards be a uniform size, tack on an additional shard - // at the end of the range to account for the loss. This is bad because if you ask for - // 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards. - // It's also bad because one shard will have much different data distribution than the rest. - // - Expand the final shard to include whatever is left in the keyrange. This will give the - // correct number of shards, which is good, but depending on how lossy each individual shard is, - // you could end with that final shard being significantly larger than the rest of the shards, - // so this doesn't solve the data distribution problem. - // - // By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense), - // we can re-truncate the integer end on each iteration, which spreads the lossiness more - // evenly across the shards. - // - // This implementation has no impact on shard numbers that are powers of 2, even at large numbers, - // which you can see in the tests. - size := float64(maxShards) / float64(shards) - realEnd := float64(0) - shardRanges := make([]string, 0, shards) - - for i := 1; i <= shards; i++ { - realEnd = float64(i) * size - - end = int(realEnd) - shardRanges = append(shardRanges, rangeFormatter(start, end)) - start = end - } - - return shardRanges, nil -} - func commandPanic(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error { panic(fmt.Errorf("this command panics on purpose")) } diff --git a/go/vt/vtctl/vtctl_test.go b/go/vt/vtctl/vtctl_test.go index 40ebf065bf1..8179a2383da 100644 --- a/go/vt/vtctl/vtctl_test.go +++ b/go/vt/vtctl/vtctl_test.go @@ -1,80 +1 @@ package vtctl - -import ( - "testing" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestGenerateShardRanges(t *testing.T) { - type args struct { - shards int - } - - tests := []struct { - name string - args args - want []string - wantErr bool - }{ - { - "errors for shards less than 0", - args{0}, - nil, - true, - }, - { - "errors for shards more than 65536", - args{65537}, - nil, - true, - }, - { - "works for a single shard", - args{1}, - []string{"-"}, - false, - }, - { - "works for more than one shard", - args{2}, - []string{"-80", "80-"}, - false, - }, - { - "works for an odd number of shards", - args{7}, - []string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"}, - false, - }, - { - "works for large number of shards", - args{256}, - []string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"}, - false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := generateShardRanges(tt.args.shards) - if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) - } - - assert.Equal(t, got, tt.want) - }) - } -} - -func TestShardCalculatorForShardsGreaterThan512(t *testing.T) { - got, err := generateShardRanges(512) - require.NoError(t, err) - - want := "ff80-" - - assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511]) -} From d83ebb63c0382a5fa8ab5cad0dbcfe056c9ed144 Mon Sep 17 00:00:00 2001 From: Guido Iaquinti Date: Mon, 17 May 2021 14:48:25 +0200 Subject: [PATCH 2/3] Use require instead of assert Signed-off-by: Guido Iaquinti --- go/vt/key/key_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go/vt/key/key_test.go b/go/vt/key/key_test.go index 935e51082b7..ff363b7f673 100644 --- a/go/vt/key/key_test.go +++ b/go/vt/key/key_test.go @@ -23,6 +23,7 @@ import ( "github.com/golang/protobuf/proto" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) @@ -633,9 +634,9 @@ func TestGenerateShardRanges(t *testing.T) { t.Run(tt.name, func(t *testing.T) { got, err := GenerateShardRanges(tt.args.shards) if tt.wantErr { - assert.Error(t, err) + require.Error(t, err) } else { - assert.NoError(t, err) + require.NoError(t, err) } assert.Equal(t, got, tt.want) From 8faf89628a36c4a05d145a66a3194926abbe5f09 Mon Sep 17 00:00:00 2001 From: Guido Iaquinti Date: Mon, 17 May 2021 17:24:47 +0200 Subject: [PATCH 3/3] Remove useless else in test Signed-off-by: Guido Iaquinti --- go/vt/key/key_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go/vt/key/key_test.go b/go/vt/key/key_test.go index ff363b7f673..7191edf939c 100644 --- a/go/vt/key/key_test.go +++ b/go/vt/key/key_test.go @@ -634,11 +634,11 @@ func TestGenerateShardRanges(t *testing.T) { t.Run(tt.name, func(t *testing.T) { got, err := GenerateShardRanges(tt.args.shards) if tt.wantErr { - require.Error(t, err) - } else { - require.NoError(t, err) + assert.Error(t, err) + return } + require.NoError(t, err) assert.Equal(t, got, tt.want) }) }