Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vtctl.generateShardRanges -> key.GenerateShardRanges #8134

Merged
merged 3 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions go/vt/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
"math"
"regexp"
Expand Down Expand Up @@ -346,3 +347,74 @@ var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`)
func IsKeyRange(kr string) bool {
return krRegexp.MatchString(kr)
}

// GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
func GenerateShardRanges(shards int) ([]string, error) {
var format string
var maxShards int

switch {
case shards <= 0:
return nil, errors.New("shards must be greater than zero")
case shards <= 256:
format = "%02x"
maxShards = 256
case shards <= 65536:
format = "%04x"
maxShards = 65536
default:
return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error has been renamed

}

rangeFormatter := func(start, end int) string {
var (
startKid string
endKid string
)

if start != 0 {
startKid = fmt.Sprintf(format, start)
}

if end != maxShards {
endKid = fmt.Sprintf(format, end)
}

return fmt.Sprintf("%s-%s", startKid, endKid)
}

start := 0
end := 0

// If shards does not divide evenly into maxShards, then there is some lossiness,
// where each shard is smaller than it should technically be (if, for example, size == 25.6).
// If we choose to keep everything in ints, then we have two choices:
// - Have every shard in #numshards be a uniform size, tack on an additional shard
// at the end of the range to account for the loss. This is bad because if you ask for
// 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
// It's also bad because one shard will have much different data distribution than the rest.
// - Expand the final shard to include whatever is left in the keyrange. This will give the
// correct number of shards, which is good, but depending on how lossy each individual shard is,
// you could end with that final shard being significantly larger than the rest of the shards,
// so this doesn't solve the data distribution problem.
//
// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
// we can re-truncate the integer end on each iteration, which spreads the lossiness more
// evenly across the shards.
//
// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
// which you can see in the tests.
size := float64(maxShards) / float64(shards)
realEnd := float64(0)
shardRanges := make([]string, 0, shards)

for i := 1; i <= shards; i++ {
realEnd = float64(i) * size

end = int(realEnd)
shardRanges = append(shardRanges, rangeFormatter(start, end))
start = end
}

return shardRanges, nil
}
73 changes: 73 additions & 0 deletions go/vt/key/key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/golang/protobuf/proto"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

topodatapb "vitess.io/vitess/go/vt/proto/topodata"
)
Expand Down Expand Up @@ -579,3 +580,75 @@ func TestIsKeyRange(t *testing.T) {
assert.Equal(t, IsKeyRange(tcase.in), tcase.out, tcase.in)
}
}

func TestGenerateShardRanges(t *testing.T) {
type args struct {
shards int
}

tests := []struct {
name string
args args
want []string
wantErr bool
}{
{
"errors for shards less than 0",
args{0},
nil,
true,
},
{
"errors for shards more than 65536",
args{65537},
nil,
true,
},
{
"works for a single shard",
args{1},
[]string{"-"},
false,
},
{
"works for more than one shard",
args{2},
[]string{"-80", "80-"},
false,
},
{
"works for an odd number of shards",
args{7},
[]string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"},
false,
},
{
"works for large number of shards",
args{256},
[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := GenerateShardRanges(tt.args.shards)
if tt.wantErr {
assert.Error(t, err)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before it was require.Error(t, err) but I don't think this was really needed (and we can avoid importing require)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We actually want the behavior of require in the cases where tt.wantErr=false in that it will stop execution of the test if the assertion fails (this is the main difference between the assert and require packages. It makes the intent of the test assertion much clearer in my view

Copy link
Member Author

@guidoiaquinti guidoiaquinti May 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Package require implements the same assertions as the assert package but stops test execution when a test fails.

I don't see a big difference between the two but if you think it's cleaner I revert my change (see next commit).

return
}

require.NoError(t, err)
assert.Equal(t, got, tt.want)
})
}
}

func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
got, err := GenerateShardRanges(512)
assert.NoError(t, err)

want := "ff80-"

assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
}
72 changes: 1 addition & 71 deletions go/vt/vtctl/vtctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -3532,84 +3532,14 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF
return err
}

shardRanges, err := generateShardRanges(*numShards)
shardRanges, err := key.GenerateShardRanges(*numShards)
if err != nil {
return err
}

return printJSON(wr.Logger(), shardRanges)
}

func generateShardRanges(shards int) ([]string, error) {
var format string
var maxShards int

switch {
case shards <= 0:
return nil, errors.New("shards must be greater than zero")
case shards <= 256:
format = "%02x"
maxShards = 256
case shards <= 65536:
format = "%04x"
maxShards = 65536
default:
return nil, errors.New("this tool does not support more than 65336 shards in a single keyspace")
}

rangeFormatter := func(start, end int) string {
var (
startKid string
endKid string
)

if start != 0 {
startKid = fmt.Sprintf(format, start)
}

if end != maxShards {
endKid = fmt.Sprintf(format, end)
}

return fmt.Sprintf("%s-%s", startKid, endKid)
}

start := 0
end := 0

// If shards does not divide evenly into maxShards, then there is some lossiness,
// where each shard is smaller than it should technically be (if, for example, size == 25.6).
// If we choose to keep everything in ints, then we have two choices:
// - Have every shard in #numshards be a uniform size, tack on an additional shard
// at the end of the range to account for the loss. This is bad because if you ask for
// 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
// It's also bad because one shard will have much different data distribution than the rest.
// - Expand the final shard to include whatever is left in the keyrange. This will give the
// correct number of shards, which is good, but depending on how lossy each individual shard is,
// you could end with that final shard being significantly larger than the rest of the shards,
// so this doesn't solve the data distribution problem.
//
// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
// we can re-truncate the integer end on each iteration, which spreads the lossiness more
// evenly across the shards.
//
// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
// which you can see in the tests.
size := float64(maxShards) / float64(shards)
realEnd := float64(0)
shardRanges := make([]string, 0, shards)

for i := 1; i <= shards; i++ {
realEnd = float64(i) * size

end = int(realEnd)
shardRanges = append(shardRanges, rangeFormatter(start, end))
start = end
}

return shardRanges, nil
}

func commandPanic(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error {
panic(fmt.Errorf("this command panics on purpose"))
}
Expand Down
79 changes: 0 additions & 79 deletions go/vt/vtctl/vtctl_test.go
Original file line number Diff line number Diff line change
@@ -1,80 +1 @@
package vtctl

import (
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestGenerateShardRanges(t *testing.T) {
type args struct {
shards int
}

tests := []struct {
name string
args args
want []string
wantErr bool
}{
{
"errors for shards less than 0",
args{0},
nil,
true,
},
{
"errors for shards more than 65536",
args{65537},
nil,
true,
},
{
"works for a single shard",
args{1},
[]string{"-"},
false,
},
{
"works for more than one shard",
args{2},
[]string{"-80", "80-"},
false,
},
{
"works for an odd number of shards",
args{7},
[]string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"},
false,
},
{
"works for large number of shards",
args{256},
[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := generateShardRanges(tt.args.shards)
if tt.wantErr {
require.Error(t, err)
} else {
require.NoError(t, err)
}

assert.Equal(t, got, tt.want)
})
}
}

func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
got, err := generateShardRanges(512)
require.NoError(t, err)

want := "ff80-"

assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
}