Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unicode_loose_xxhash Vindex type #6549

Merged
merged 4 commits into from
Aug 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions go/test/endtoend/vtgate/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,28 @@ create table t6_id2_idx(
keyspace_id varbinary(50),
primary key(id1),
key(id2)
) Engine=InnoDB;

create table t7_xxhash(
uid varchar(50),
phone bigint,
msg varchar(100),
primary key(uid)
) Engine=InnoDB;

create table t7_xxhash_idx(
phone bigint,
keyspace_id varbinary(50),
primary key(phone, keyspace_id)
) Engine=InnoDB;`

VSchema = `
{
"sharded": true,
"vindexes": {
"unicode_loose_xxhash" : {
"type": "unicode_loose_xxhash"
},
"unicode_loose_md5" : {
"type": "unicode_loose_md5"
},
Expand Down Expand Up @@ -175,6 +191,16 @@ create table t6_id2_idx(
"ignore_nulls": "true"
},
"owner": "t6"
},
"t7_xxhash_vdx": {
"type": "consistent_lookup",
"params": {
"table": "t7_xxhash_idx",
"from": "phone",
"to": "keyspace_id",
"ignore_nulls": "true"
},
"owner": "t7_xxhash"
}
},
"tables": {
Expand Down Expand Up @@ -307,6 +333,26 @@ create table t6_id2_idx(
"type": "VARCHAR"
}
]
},
"t7_xxhash": {
"column_vindexes": [
{
"column": "uid",
"name": "unicode_loose_xxhash"
},
{
"column": "phone",
"name": "t7_xxhash_vdx"
}
]
},
"t7_xxhash_idx": {
"column_vindexes": [
{
"column": "phone",
"name": "unicode_loose_xxhash"
}
]
}
}
}`
Expand Down
19 changes: 19 additions & 0 deletions go/test/endtoend/vtgate/misc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,25 @@ func TestExplainPassthrough(t *testing.T) {
// but we are trying to make the test less fragile
}

func TestXXHash(t *testing.T) {
defer cluster.PanicHandler(t)
ctx := context.Background()
conn, err := mysql.Connect(ctx, &vtParams)
require.Nil(t, err)
defer conn.Close()

exec(t, conn, "insert into t7_xxhash(uid, phone, msg) values('u-1', 1, 'message')")
assertMatches(t, conn, "select uid, phone, msg from t7_xxhash where phone = 1", `[[VARCHAR("u-1") INT64(1) VARCHAR("message")]]`)
assertMatches(t, conn, "select phone, keyspace_id from t7_xxhash_idx", `[[INT64(1) VARBINARY("\x1cU^f\xbfyE^")]]`)
exec(t, conn, "update t7_xxhash set phone = 2 where uid = 'u-1'")
assertMatches(t, conn, "select uid, phone, msg from t7_xxhash where phone = 1", `[]`)
assertMatches(t, conn, "select uid, phone, msg from t7_xxhash where phone = 2", `[[VARCHAR("u-1") INT64(2) VARCHAR("message")]]`)
assertMatches(t, conn, "select phone, keyspace_id from t7_xxhash_idx", `[[INT64(2) VARBINARY("\x1cU^f\xbfyE^")]]`)
exec(t, conn, "delete from t7_xxhash where uid = 'u-1'")
assertMatches(t, conn, "select uid, phone, msg from t7_xxhash where uid = 'u-1'", `[]`)
assertMatches(t, conn, "select phone, keyspace_id from t7_xxhash_idx", `[]`)
}

func assertMatches(t *testing.T, conn *mysql.Conn, query, expected string) {
t.Helper()
qr := exec(t, conn, query)
Expand Down
6 changes: 3 additions & 3 deletions go/vt/vtgate/vindexes/binarymd5.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (vind *BinaryMD5) NeedsVCursor() bool {
func (vind *BinaryMD5) Verify(_ VCursor, ids []sqltypes.Value, ksids [][]byte) ([]bool, error) {
out := make([]bool, len(ids))
for i := range ids {
out[i] = bytes.Equal(binHash(ids[i].ToBytes()), ksids[i])
out[i] = bytes.Equal(vMD5Hash(ids[i].ToBytes()), ksids[i])
}
return out, nil
}
Expand All @@ -71,12 +71,12 @@ func (vind *BinaryMD5) Verify(_ VCursor, ids []sqltypes.Value, ksids [][]byte) (
func (vind *BinaryMD5) Map(cursor VCursor, ids []sqltypes.Value) ([]key.Destination, error) {
out := make([]key.Destination, len(ids))
for i, id := range ids {
out[i] = key.DestinationKeyspaceID(binHash(id.ToBytes()))
out[i] = key.DestinationKeyspaceID(vMD5Hash(id.ToBytes()))
}
return out, nil
}

func binHash(source []byte) []byte {
func vMD5Hash(source []byte) []byte {
sum := md5.Sum(source)
return sum[:]
}
Expand Down
35 changes: 35 additions & 0 deletions go/vt/vtgate/vindexes/binarymd5_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package vindexes

import (
"fmt"
"reflect"
"testing"

Expand Down Expand Up @@ -88,3 +89,37 @@ func TestSQLValue(t *testing.T) {
t.Errorf("Map(%#v): %#v, want %#v", val, out, want)
}
}

func BenchmarkMD5Hash(b *testing.B) {
for _, benchSize := range []struct {
name string
n int
}{
{"8B", 8},
{"32B", 32},
{"64B", 64},
{"512B", 512},
{"1KB", 1e3},
{"4KB", 4e3},
} {
input := make([]byte, benchSize.n)
for i := range input {
input[i] = byte(i)
}

name := fmt.Sprintf("md5Hash,direct,bytes,n=%s", benchSize.name)
b.Run(name, func(b *testing.B) {
benchmarkMD5HashBytes(b, input)
})

}
}

var sinkMD5 []byte

func benchmarkMD5HashBytes(b *testing.B, input []byte) {
b.SetBytes(int64(len(input)))
for i := 0; i < b.N; i++ {
sinkMD5 = vMD5Hash(input)
}
}
2 changes: 1 addition & 1 deletion go/vt/vtgate/vindexes/lookup_unicodeloosemd5_hash.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ func (lhu *LookupUnicodeLooseMD5HashUnique) MarshalJSON() ([]byte, error) {
}

func unicodeHashValue(value sqltypes.Value) (sqltypes.Value, error) {
hash, err := unicodeHash(value)
hash, err := unicodeHash(vMD5Hash, value)
if err != nil {
return sqltypes.NULL, err
}
Expand Down
89 changes: 89 additions & 0 deletions go/vt/vtgate/vindexes/unicode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
Copyright 2020 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package vindexes

import (
"bytes"
"fmt"
"sync"
"unicode/utf8"

"vitess.io/vitess/go/sqltypes"

"golang.org/x/text/collate"
"golang.org/x/text/language"
)

// Shared functions for Unicode string normalization
// for Vindexes.

func unicodeHash(hashFunc func([]byte) []byte, key sqltypes.Value) ([]byte, error) {
collator := collatorPool.Get().(*pooledCollator)
defer collatorPool.Put(collator)

norm, err := normalize(collator.col, collator.buf, key.ToBytes())
if err != nil {
return nil, err
}
return hashFunc(norm), nil
}

func normalize(col *collate.Collator, buf *collate.Buffer, in []byte) ([]byte, error) {
// We cannot pass invalid UTF-8 to the collator.
if !utf8.Valid(in) {
return nil, fmt.Errorf("cannot normalize string containing invalid UTF-8: %q", string(in))
}

// Ref: http://dev.mysql.com/doc/refman/5.6/en/char.html.
// Trailing spaces are ignored by MySQL.
in = bytes.TrimRight(in, " ")

// We use the collation key which can be used to
// perform lexical comparisons.
return col.Key(buf, in), nil
}

// pooledCollator pairs a Collator and a Buffer.
// These pairs are pooled to avoid reallocating for every request,
// which would otherwise be required because they can't be used concurrently.
//
// Note that you must ensure no active references into the buffer remain
// before you return this pair back to the pool.
// That is, either do your processing on the result first, or make a copy.
type pooledCollator struct {
col *collate.Collator
buf *collate.Buffer
}

var collatorPool = sync.Pool{New: newPooledCollator}

func newPooledCollator() interface{} {
// Ref: http://www.unicode.org/reports/tr10/#Introduction.
// Unicode seems to define a universal (or default) order.
// But various locales have conflicting order,
// which they have the right to override.
// Unfortunately, the Go library requires you to specify a locale.
// So, I chose English assuming that it won't override
// the Unicode universal order. But I couldn't find an easy
// way to verify this.
// Also, the locale differences are not an issue for level 1,
// because the conservative comparison makes them all equal.
return &pooledCollator{
col: collate.New(language.English, collate.Loose),
buf: new(collate.Buffer),
}
}
Loading