Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

slack-vitess vtctld 10.0.2.r2 #233

Merged
merged 3 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 110 additions & 4 deletions go/vt/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bytes"
"encoding/binary"
"encoding/hex"
"errors"
"fmt"
"math"
"regexp"
Expand Down Expand Up @@ -191,8 +192,30 @@ func KeyRangeEqual(left, right *topodatapb.KeyRange) bool {
if right == nil {
return len(left.Start) == 0 && len(left.End) == 0
}
return bytes.Equal(left.Start, right.Start) &&
bytes.Equal(left.End, right.End)
return bytes.Equal(addPadding(left.Start), addPadding(right.Start)) &&
bytes.Equal(addPadding(left.End), addPadding(right.End))
}

// addPadding adds padding to make sure keyrange represents an 8 byte integer.
// From Vitess docs:
// A hash vindex produces an 8-byte number.
// This means that all numbers less than 0x8000000000000000 will fall in shard -80.
// Any number with the highest bit set will be >= 0x8000000000000000, and will therefore
// belong to shard 80-.
// This means that from a keyrange perspective -80 == 00-80 == 0000-8000 == 000000-800000
// If we don't add this padding, we could run into issues when transitioning from keyranges
// that use 2 bytes to 4 bytes.
func addPadding(kr []byte) []byte {
paddedKr := make([]byte, 8)

for i := 0; i < len(kr); i++ {
paddedKr = append(paddedKr, kr[i])
}

for i := len(kr); i < 8; i++ {
paddedKr = append(paddedKr, 0)
}
return paddedKr
}

// KeyRangeStartSmaller returns true if right's keyrange start is _after_ left's start
Expand All @@ -214,7 +237,19 @@ func KeyRangeStartEqual(left, right *topodatapb.KeyRange) bool {
if right == nil {
return len(left.Start) == 0
}
return bytes.Equal(left.Start, right.Start)
return bytes.Equal(addPadding(left.Start), addPadding(right.Start))
}

// KeyRangeContiguous returns true if the end of the left key range exactly
// matches the start of the right key range (i.e they are contigious)
func KeyRangeContiguous(left, right *topodatapb.KeyRange) bool {
if left == nil {
return right == nil || (len(right.Start) == 0 && len(right.End) == 0)
}
if right == nil {
return len(left.Start) == 0 && len(left.End) == 0
}
return bytes.Equal(addPadding(left.End), addPadding(right.Start))
}

// KeyRangeEndEqual returns true if both key ranges have the same end
Expand All @@ -225,7 +260,7 @@ func KeyRangeEndEqual(left, right *topodatapb.KeyRange) bool {
if right == nil {
return len(left.End) == 0
}
return bytes.Equal(left.End, right.End)
return bytes.Equal(addPadding(left.End), addPadding(right.End))
}

// For more info on the following functions, see:
Expand Down Expand Up @@ -346,3 +381,74 @@ var krRegexp = regexp.MustCompile(`^[0-9a-fA-F]*-[0-9a-fA-F]*$`)
func IsKeyRange(kr string) bool {
return krRegexp.MatchString(kr)
}

// GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
func GenerateShardRanges(shards int) ([]string, error) {
var format string
var maxShards int

switch {
case shards <= 0:
return nil, errors.New("shards must be greater than zero")
case shards <= 256:
format = "%02x"
maxShards = 256
case shards <= 65536:
format = "%04x"
maxShards = 65536
default:
return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
}

rangeFormatter := func(start, end int) string {
var (
startKid string
endKid string
)

if start != 0 {
startKid = fmt.Sprintf(format, start)
}

if end != maxShards {
endKid = fmt.Sprintf(format, end)
}

return fmt.Sprintf("%s-%s", startKid, endKid)
}

start := 0
end := 0

// If shards does not divide evenly into maxShards, then there is some lossiness,
// where each shard is smaller than it should technically be (if, for example, size == 25.6).
// If we choose to keep everything in ints, then we have two choices:
// - Have every shard in #numshards be a uniform size, tack on an additional shard
// at the end of the range to account for the loss. This is bad because if you ask for
// 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
// It's also bad because one shard will have much different data distribution than the rest.
// - Expand the final shard to include whatever is left in the keyrange. This will give the
// correct number of shards, which is good, but depending on how lossy each individual shard is,
// you could end with that final shard being significantly larger than the rest of the shards,
// so this doesn't solve the data distribution problem.
//
// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
// we can re-truncate the integer end on each iteration, which spreads the lossiness more
// evenly across the shards.
//
// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
// which you can see in the tests.
size := float64(maxShards) / float64(shards)
realEnd := float64(0)
shardRanges := make([]string, 0, shards)

for i := 1; i <= shards; i++ {
realEnd = float64(i) * size

end = int(realEnd)
shardRanges = append(shardRanges, rangeFormatter(start, end))
start = end
}

return shardRanges, nil
}
Loading