vitessio · ajm188 · May 18, 2021 · May 17, 2021 · May 17, 2021 · May 17, 2021
diff --git a/go/vt/key/key.go b/go/vt/key/key.go
@@ -20,6 +20,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"encoding/hex"
+	"errors"
 	"fmt"
 	"math"
 	"regexp"
@@ -346,3 +347,74 @@ var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`)
 func IsKeyRange(kr string) bool {
 	return krRegexp.MatchString(kr)
 }
+
+// GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
+func GenerateShardRanges(shards int) ([]string, error) {
+	var format string
+	var maxShards int
+
+	switch {
+	case shards <= 0:
+		return nil, errors.New("shards must be greater than zero")
+	case shards <= 256:
+		format = "%02x"
+		maxShards = 256
+	case shards <= 65536:
+		format = "%04x"
+		maxShards = 65536
+	default:
+		return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
+	}
+
+	rangeFormatter := func(start, end int) string {
+		var (
+			startKid string
+			endKid   string
+		)
+
+		if start != 0 {
+			startKid = fmt.Sprintf(format, start)
+		}
+
+		if end != maxShards {
+			endKid = fmt.Sprintf(format, end)
+		}
+
+		return fmt.Sprintf("%s-%s", startKid, endKid)
+	}
+
+	start := 0
+	end := 0
+
+	// If shards does not divide evenly into maxShards, then there is some lossiness,
+	// where each shard is smaller than it should technically be (if, for example, size == 25.6).
+	// If we choose to keep everything in ints, then we have two choices:
+	// 	- Have every shard in #numshards be a uniform size, tack on an additional shard
+	//	  at the end of the range to account for the loss. This is bad because if you ask for
+	//	  7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
+	//	  It's also bad because one shard will have much different data distribution than the rest.
+	//	- Expand the final shard to include whatever is left in the keyrange. This will give the
+	//	  correct number of shards, which is good, but depending on how lossy each individual shard is,
+	//	  you could end with that final shard being significantly larger than the rest of the shards,
+	//	  so this doesn't solve the data distribution problem.
+	//
+	// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
+	// we can re-truncate the integer end on each iteration, which spreads the lossiness more
+	// evenly across the shards.
+	//
+	// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
+	// which you can see in the tests.
+	size := float64(maxShards) / float64(shards)
+	realEnd := float64(0)
+	shardRanges := make([]string, 0, shards)
+
+	for i := 1; i <= shards; i++ {
+		realEnd = float64(i) * size
+
+		end = int(realEnd)
+		shardRanges = append(shardRanges, rangeFormatter(start, end))
+		start = end
+	}
+
+	return shardRanges, nil
+}
diff --git a/go/vt/key/key_test.go b/go/vt/key/key_test.go
@@ -23,6 +23,7 @@ import (
 
 	"github.com/golang/protobuf/proto"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
 )
@@ -579,3 +580,75 @@ func TestIsKeyRange(t *testing.T) {
 		assert.Equal(t, IsKeyRange(tcase.in), tcase.out, tcase.in)
 	}
 }
+
+func TestGenerateShardRanges(t *testing.T) {
+	type args struct {
+		shards int
+	}
+
+	tests := []struct {
+		name    string
+		args    args
+		want    []string
+		wantErr bool
+	}{
+		{
+			"errors for shards less than 0",
+			args{0},
+			nil,
+			true,
+		},
+		{
+			"errors for shards more than 65536",
+			args{65537},
+			nil,
+			true,
+		},
+		{
+			"works for a single shard",
+			args{1},
+			[]string{"-"},
+			false,
+		},
+		{
+			"works for more than one shard",
+			args{2},
+			[]string{"-80", "80-"},
+			false,
+		},
+		{
+			"works for an odd number of shards",
+			args{7},
+			[]string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"},
+			false,
+		},
+		{
+			"works for large number of shards",
+			args{256},
+			[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
+			false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got, err := GenerateShardRanges(tt.args.shards)
+			if tt.wantErr {
+				assert.Error(t, err)
+				return
+			}
+
+			require.NoError(t, err)
+			assert.Equal(t, got, tt.want)
+		})
+	}
+}
+
+func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
+	got, err := GenerateShardRanges(512)
+	assert.NoError(t, err)
+
+	want := "ff80-"
+
+	assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
+}
diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go
@@ -3532,84 +3532,14 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF
 		return err
 	}
 
-	shardRanges, err := generateShardRanges(*numShards)
+	shardRanges, err := key.GenerateShardRanges(*numShards)
 	if err != nil {
 		return err
 	}
 
 	return printJSON(wr.Logger(), shardRanges)
 }
 
-func generateShardRanges(shards int) ([]string, error) {
-	var format string
-	var maxShards int
-
-	switch {
-	case shards <= 0:
-		return nil, errors.New("shards must be greater than zero")
-	case shards <= 256:
-		format = "%02x"
-		maxShards = 256
-	case shards <= 65536:
-		format = "%04x"
-		maxShards = 65536
-	default:
-		return nil, errors.New("this tool does not support more than 65336 shards in a single keyspace")
-	}
-
-	rangeFormatter := func(start, end int) string {
-		var (
-			startKid string
-			endKid   string
-		)
-
-		if start != 0 {
-			startKid = fmt.Sprintf(format, start)
-		}
-
-		if end != maxShards {
-			endKid = fmt.Sprintf(format, end)
-		}
-
-		return fmt.Sprintf("%s-%s", startKid, endKid)
-	}
-
-	start := 0
-	end := 0
-
-	// If shards does not divide evenly into maxShards, then there is some lossiness,
-	// where each shard is smaller than it should technically be (if, for example, size == 25.6).
-	// If we choose to keep everything in ints, then we have two choices:
-	// 	- Have every shard in #numshards be a uniform size, tack on an additional shard
-	//	  at the end of the range to account for the loss. This is bad because if you ask for
-	//	  7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
-	//	  It's also bad because one shard will have much different data distribution than the rest.
-	//	- Expand the final shard to include whatever is left in the keyrange. This will give the
-	//	  correct number of shards, which is good, but depending on how lossy each individual shard is,
-	//	  you could end with that final shard being significantly larger than the rest of the shards,
-	//	  so this doesn't solve the data distribution problem.
-	//
-	// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
-	// we can re-truncate the integer end on each iteration, which spreads the lossiness more
-	// evenly across the shards.
-	//
-	// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
-	// which you can see in the tests.
-	size := float64(maxShards) / float64(shards)
-	realEnd := float64(0)
-	shardRanges := make([]string, 0, shards)
-
-	for i := 1; i <= shards; i++ {
-		realEnd = float64(i) * size
-
-		end = int(realEnd)
-		shardRanges = append(shardRanges, rangeFormatter(start, end))
-		start = end
-	}
-
-	return shardRanges, nil
-}
-
 func commandPanic(ctx context.Context, wr *wrangler.Wrangler, subFlags *flag.FlagSet, args []string) error {
 	panic(fmt.Errorf("this command panics on purpose"))
 }

diff --git a/go/vt/vtctl/vtctl_test.go b/go/vt/vtctl/vtctl_test.go
@@ -1,80 +1 @@
 package vtctl
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
-)
-
-func TestGenerateShardRanges(t *testing.T) {
-	type args struct {
-		shards int
-	}
-
-	tests := []struct {
-		name    string
-		args    args
-		want    []string
-		wantErr bool
-	}{
-		{
-			"errors for shards less than 0",
-			args{0},
-			nil,
-			true,
-		},
-		{
-			"errors for shards more than 65536",
-			args{65537},
-			nil,
-			true,
-		},
-		{
-			"works for a single shard",
-			args{1},
-			[]string{"-"},
-			false,
-		},
-		{
-			"works for more than one shard",
-			args{2},
-			[]string{"-80", "80-"},
-			false,
-		},
-		{
-			"works for an odd number of shards",
-			args{7},
-			[]string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"},
-			false,
-		},
-		{
-			"works for large number of shards",
-			args{256},
-			[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
-			false,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			got, err := generateShardRanges(tt.args.shards)
-			if tt.wantErr {
-				require.Error(t, err)
-			} else {
-				require.NoError(t, err)
-			}
-
-			assert.Equal(t, got, tt.want)
-		})
-	}
-}
-
-func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
-	got, err := generateShardRanges(512)
-	require.NoError(t, err)
-
-	want := "ff80-"
-
-	assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
-}