diff --git a/cmd/explaintest/r/vitess_hash.result b/cmd/explaintest/r/vitess_hash.result new file mode 100644 index 0000000000000..1e20ff988c8f4 --- /dev/null +++ b/cmd/explaintest/r/vitess_hash.result @@ -0,0 +1,30 @@ +use test; +drop table if exists t; +create table t( +customer_id bigint, +id bigint, +expected_shard bigint unsigned, +computed_shard bigint unsigned null, +primary key (customer_id, id) +); +create index t_vitess_shard on t((vitess_hash(customer_id) >> 56)); +explain format = 'brief' select customer_id from t where (vitess_hash(customer_id) >> 56) = x'd6' ORDER BY id; +id estRows task access object operator info +Projection 10.00 root test.t.customer_id +└─Sort 10.00 root test.t.id + └─IndexLookUp 10.00 root + ├─IndexRangeScan(Build) 10.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) range:[214,214], keep order:false, stats:pseudo + └─TableRowIDScan(Probe) 10.00 cop[tikv] table:t keep order:false, stats:pseudo +explain format = 'brief' select id from t where (vitess_hash(customer_id) >> 56) IN (x'e0', x'e1') AND id BETWEEN 2 AND 5 ORDER BY id; +id estRows task access object operator info +Projection 0.50 root test.t.id +└─Sort 0.50 root test.t.id + └─IndexLookUp 0.50 root + ├─IndexRangeScan(Build) 20.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) range:[224,224], [225,225], keep order:false, stats:pseudo + └─Selection(Probe) 0.50 cop[tikv] ge(test.t.id, 2), le(test.t.id, 5) + └─TableRowIDScan 20.00 cop[tikv] table:t keep order:false, stats:pseudo +explain format = 'brief' select hex(vitess_hash(1123)) from t; +id estRows task access object operator info +Projection 10000.00 root 31B565D41BDF8CA->Column#7 +└─IndexReader 10000.00 root index:IndexFullScan + └─IndexFullScan 10000.00 cop[tikv] table:t, index:t_vitess_shard(vitess_hash(`customer_id`) >> 56) keep order:false, stats:pseudo diff --git a/cmd/explaintest/t/vitess_hash.test b/cmd/explaintest/t/vitess_hash.test new file mode 100644 index 0000000000000..6fd221a44019d --- /dev/null +++ b/cmd/explaintest/t/vitess_hash.test @@ -0,0 +1,13 @@ +use test; +drop table if exists t; +create table t( + customer_id bigint, + id bigint, + expected_shard bigint unsigned, + computed_shard bigint unsigned null, + primary key (customer_id, id) +); +create index t_vitess_shard on t((vitess_hash(customer_id) >> 56)); +explain format = 'brief' select customer_id from t where (vitess_hash(customer_id) >> 56) = x'd6' ORDER BY id; +explain format = 'brief' select id from t where (vitess_hash(customer_id) >> 56) IN (x'e0', x'e1') AND id BETWEEN 2 AND 5 ORDER BY id; +explain format = 'brief' select hex(vitess_hash(1123)) from t; diff --git a/executor/show_test.go b/executor/show_test.go index 032c4d08839c8..a4a0b763337c7 100644 --- a/executor/show_test.go +++ b/executor/show_test.go @@ -1078,9 +1078,9 @@ func (s *testSuite5) TestShowBuiltin(c *C) { res := tk.MustQuery("show builtins;") c.Assert(res, NotNil) rows := res.Rows() - c.Assert(267, Equals, len(rows)) + c.Assert(268, Equals, len(rows)) c.Assert("abs", Equals, rows[0][0].(string)) - c.Assert("yearweek", Equals, rows[266][0].(string)) + c.Assert("yearweek", Equals, rows[267][0].(string)) } func (s *testSuite5) TestShowClusterConfig(c *C) { diff --git a/expression/builtin.go b/expression/builtin.go index 46c61220f8ce5..9c530f92949e1 100644 --- a/expression/builtin.go +++ b/expression/builtin.go @@ -787,6 +787,7 @@ var funcs = map[string]functionClass{ ast.ReleaseAllLocks: &releaseAllLocksFunctionClass{baseFunctionClass{ast.ReleaseAllLocks, 0, 0}}, ast.UUID: &uuidFunctionClass{baseFunctionClass{ast.UUID, 0, 0}}, ast.UUIDShort: &uuidShortFunctionClass{baseFunctionClass{ast.UUIDShort, 0, 0}}, + ast.VitessHash: &vitessHashFunctionClass{baseFunctionClass{ast.VitessHash, 1, 1}}, // get_lock() and release_lock() are parsed but do nothing. // It is used for preventing error in Ruby's activerecord migrations. diff --git a/expression/builtin_miscellaneous.go b/expression/builtin_miscellaneous.go index 25b4ddb3c4fbb..72f1b8e6ac2ed 100644 --- a/expression/builtin_miscellaneous.go +++ b/expression/builtin_miscellaneous.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/types/json" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/vitess" "github.com/pingcap/tipb/go-tipb" ) @@ -51,6 +52,7 @@ var ( _ functionClass = &releaseAllLocksFunctionClass{} _ functionClass = &uuidFunctionClass{} _ functionClass = &uuidShortFunctionClass{} + _ functionClass = &vitessHashFunctionClass{} ) var ( @@ -73,6 +75,7 @@ var ( _ builtinFunc = &builtinIsIPv4MappedSig{} _ builtinFunc = &builtinIsIPv6Sig{} _ builtinFunc = &builtinUUIDSig{} + _ builtinFunc = &builtinVitessHashSig{} _ builtinFunc = &builtinNameConstIntSig{} _ builtinFunc = &builtinNameConstRealSig{} @@ -1046,3 +1049,48 @@ type uuidShortFunctionClass struct { func (c *uuidShortFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) { return nil, errFunctionNotExists.GenWithStackByArgs("FUNCTION", "UUID_SHORT") } + +type vitessHashFunctionClass struct { + baseFunctionClass +} + +func (c *vitessHashFunctionClass) getFunction(ctx sessionctx.Context, args []Expression) (builtinFunc, error) { + if err := c.verifyArgs(args); err != nil { + return nil, err + } + bf, err := newBaseBuiltinFuncWithTp(ctx, c.funcName, args, types.ETInt, types.ETInt) + if err != nil { + return nil, err + } + + bf.tp.Flen = 20 //64 bit unsigned + bf.tp.Flag |= mysql.UnsignedFlag + types.SetBinChsClnFlag(bf.tp) + + sig := &builtinVitessHashSig{bf} + sig.setPbCode(tipb.ScalarFuncSig_VitessHash) + return sig, nil +} + +type builtinVitessHashSig struct { + baseBuiltinFunc +} + +func (b *builtinVitessHashSig) Clone() builtinFunc { + newSig := &builtinVitessHashSig{} + newSig.cloneFrom(&b.baseBuiltinFunc) + return newSig +} + +// evalInt evals VITESS_HASH(int64). +func (b *builtinVitessHashSig) evalInt(row chunk.Row) (int64, bool, error) { + shardKeyInt, isNull, err := b.args[0].EvalInt(b.ctx, row) + if isNull || err != nil { + return 0, true, err + } + var hashed uint64 + if hashed, err = vitess.HashUint64(uint64(shardKeyInt)); err != nil { + return 0, true, err + } + return int64(hashed), false, nil +} diff --git a/expression/builtin_miscellaneous_vec.go b/expression/builtin_miscellaneous_vec.go index 8780920c13a04..8e0689be3aab8 100644 --- a/expression/builtin_miscellaneous_vec.go +++ b/expression/builtin_miscellaneous_vec.go @@ -27,6 +27,7 @@ import ( "github.com/pingcap/tidb/sessionctx/variable" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/chunk" + "github.com/pingcap/tidb/util/vitess" ) func (b *builtinInetNtoaSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) error { @@ -617,3 +618,38 @@ func (b *builtinReleaseLockSig) vecEvalInt(input *chunk.Chunk, result *chunk.Col } return nil } + +func (b *builtinVitessHashSig) vectorized() bool { + return true +} + +func (b *builtinVitessHashSig) vecEvalInt(input *chunk.Chunk, result *chunk.Column) error { + n := input.NumRows() + column, err := b.bufAllocator.get(types.ETInt, n) + if err != nil { + return err + } + defer b.bufAllocator.put(column) + + if err := b.args[0].VecEvalInt(b.ctx, input, column); err != nil { + return err + } + + result.ResizeInt64(n, false) + r64s := result.Uint64s() + result.MergeNulls(column) + + for i := 0; i < n; i++ { + if column.IsNull(i) { + continue + } + var uintKey = column.GetUint64(i) + var hash uint64 + if hash, err = vitess.HashUint64(uintKey); err != nil { + return err + } + r64s[i] = hash + } + + return nil +} diff --git a/expression/integration_test.go b/expression/integration_test.go index 6c2523fff6a03..c554524ab8b99 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -8998,3 +8998,75 @@ func (s *testIntegrationSerialSuite) TestIssue23805(c *C) { " UNIQUE KEY `idx_10` (`col_26`(5)));") tk.MustExec("insert ignore into tbl_5 set col_28 = 'ZmZIdSnq' , col_25 = '18:50:52.00' on duplicate key update col_26 = 'y';\n") } + +func (s *testIntegrationSuite) TestVitessHash(c *C) { + defer s.cleanEnv(c) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t_int, t_blob, t_varchar;") + tk.MustExec("create table t_int(id int, a bigint unsigned null);") + tk.MustExec("insert into t_int values " + + "(1, 30375298039), " + + "(2, 1123), " + + "(3, 30573721600), " + + "(4, " + fmt.Sprintf("%d", uint64(math.MaxUint64)) + ")," + + "(5, 116)," + + "(6, null);") + + // Integers + tk.MustQuery("select hex(vitess_hash(a)) from t_int"). + Check(testkit.Rows( + "31265661E5F1133", + "31B565D41BDF8CA", + "1EFD6439F2050FFD", + "355550B2150E2451", + "1E1788FF0FDE093C", + "")) + + // Nested function sanity test + tk.MustQuery("select hex(vitess_hash(convert(a, decimal(8,4)))) from t_int where id = 5"). + Check(testkit.Rows("1E1788FF0FDE093C")) +} + +func (s *testIntegrationSuite) TestVitessHashMatchesVitessShards(c *C) { + defer s.cleanEnv(c) + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + tk.MustExec("drop table if exists t;") + tk.MustExec("create table t(customer_id bigint, id bigint, expected_shard bigint unsigned, computed_shard bigint unsigned null, primary key (customer_id, id));") + + tk.MustExec("insert into t (customer_id, id, expected_shard) values " + + "(30370720100, 1, x'd6'), " + + "(30370670010, 2, x'd6'), " + + "(30370689320, 3, x'e1'), " + + "(30370693008, 4, x'e0'), " + + "(30370656005, 5, x'89'), " + + "(30370702638, 6, x'89'), " + + "(30370658809, 7, x'ce'), " + + "(30370665369, 8, x'cf'), " + + "(30370706138, 9, x'85'), " + + "(30370708769, 10, x'85'), " + + "(30370711915, 11, x'a3'), " + + "(30370712595, 12, x'a3'), " + + "(30370656340, 13, x'7d'), " + + "(30370660143, 14, x'7c'), " + + "(30371738450, 15, x'fc'), " + + "(30371683979, 16, x'fd'), " + + "(30370664597, 17, x'92'), " + + "(30370667361, 18, x'93'), " + + "(30370656406, 19, x'd2'), " + + "(30370716959, 20, x'd3'), " + + "(30375207698, 21, x'9a'), " + + "(30375168766, 22, x'9a'), " + + "(30370711813, 23, x'ca'), " + + "(30370721803, 24, x'ca'), " + + "(30370717957, 25, x'97'), " + + "(30370734969, 26, x'96'), " + + "(30375203572, 27, x'98'), " + + "(30375292643, 28, x'99'); ") + + // Sanity check the shards being computed correctly + tk.MustExec("update t set computed_shard = (vitess_hash(customer_id) >> 56);") + tk.MustQuery("select customer_id, id, hex(expected_shard), hex(computed_shard) from t where expected_shard <> computed_shard"). + Check(testkit.Rows()) +} diff --git a/go.mod b/go.mod index c9595e91666e6..1f9b7c41f6f27 100644 --- a/go.mod +++ b/go.mod @@ -53,7 +53,7 @@ require ( github.com/pingcap/parser v0.0.0-20210325072920-0d17053a8a69 github.com/pingcap/sysutil v0.0.0-20210221112134-a07bda3bde99 github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible - github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7 + github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b github.com/prometheus/client_golang v1.5.1 github.com/prometheus/client_model v0.2.0 github.com/prometheus/common v0.9.1 @@ -79,7 +79,7 @@ require ( golang.org/x/mod v0.4.2 // indirect golang.org/x/net v0.0.0-20210323141857-08027d57d8cf golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 - golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 + golang.org/x/sys v0.0.0-20210324051608-47abb6519492 golang.org/x/text v0.3.5 golang.org/x/tools v0.1.0 google.golang.org/grpc v1.27.1 diff --git a/go.sum b/go.sum index c6d8028be4201..6e36beeed5cf9 100644 --- a/go.sum +++ b/go.sum @@ -474,8 +474,8 @@ github.com/pingcap/sysutil v0.0.0-20210221112134-a07bda3bde99/go.mod h1:EB/852NM github.com/pingcap/tidb-dashboard v0.0.0-20210312062513-eef5d6404638/go.mod h1:OzFN8H0EDMMqeulPhPMw2i2JaiZWOKFQ7zdRPhENNgo= github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible h1:ceznmu/lLseGHP/jKyOa/3u/5H3wtLLLqkH2V3ssSjg= github.com/pingcap/tidb-tools v4.0.9-0.20201127090955-2707c97b3853+incompatible/go.mod h1:XGdcy9+yqlDSEMTpOXnwf3hiTeqrV6MN/u1se9N8yIM= -github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7 h1:j8MkWmy5tduhHVsdsgZJugN1U9OWTMSBQoZIpn8kqPc= -github.com/pingcap/tipb v0.0.0-20210309080453-72c4feaa6da7/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo= +github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b h1:sZHSH0mh8PcRbmZlsIqP7CEwnfFuBpmkGt5i9JStLWA= +github.com/pingcap/tipb v0.0.0-20210326161441-1164ca065d1b/go.mod h1:nsEhnMokcn7MRqd2J60yxpn/ac3ZH8A6GOJ9NslabUo= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -798,8 +798,8 @@ golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4 h1:EZ2mChiOa8udjfp6rRmswTbtZN/QzUQp4ptM4rnjHvc= -golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210324051608-47abb6519492 h1:Paq34FxTluEPvVyayQqMPgHm+vTOrIifmcYxFBx9TLg= +golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/util/vitess/vitess_hash.go b/util/vitess/vitess_hash.go new file mode 100644 index 0000000000000..08e2d57f2c527 --- /dev/null +++ b/util/vitess/vitess_hash.go @@ -0,0 +1,42 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package vitess + +import ( + "crypto/cipher" + "crypto/des" + "encoding/binary" + + "github.com/pingcap/errors" +) + +var nullKeyBlock cipher.Block + +func init() { + var err error + nullKeyBlock, err = des.NewCipher(make([]byte, 8)) + if err != nil { + panic(errors.Trace(err)) + } +} + +// HashUint64 implements vitess' method of calculating a hash used for determining a shard key range. +// Uses a DES encryption with 64 bit key, 64 bit block, null-key +func HashUint64(shardKey uint64) (uint64, error) { + var keybytes [8]byte + binary.BigEndian.PutUint64(keybytes[:], shardKey) + var hashed [8]byte + nullKeyBlock.Encrypt(hashed[:], keybytes[:]) + return binary.BigEndian.Uint64(hashed[:]), nil +} diff --git a/util/vitess/vitess_hash_test.go b/util/vitess/vitess_hash_test.go new file mode 100644 index 0000000000000..70a4b5aaf0b83 --- /dev/null +++ b/util/vitess/vitess_hash_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package vitess + +import ( + "encoding/binary" + "encoding/hex" + "math" + "strings" + "testing" + + . "github.com/pingcap/check" + "github.com/pingcap/tidb/util/testleak" +) + +var _ = Suite(&testVitessSuite{}) + +func TestT(t *testing.T) { + TestingT(t) +} + +type testVitessSuite struct { +} + +func toHex(value uint64) string { + var keybytes [8]byte + binary.BigEndian.PutUint64(keybytes[:], value) + return strings.ToUpper(hex.EncodeToString(keybytes[:])) +} + +var _ = Suite(&testVitessSuite{}) + +func (s *testVitessSuite) TestVitessHash(c *C) { + defer testleak.AfterTest(c)() + + hashed, err := HashUint64(30375298039) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "031265661E5F1133") + + hashed, err = HashUint64(1123) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "031B565D41BDF8CA") + + hashed, err = HashUint64(30573721600) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "1EFD6439F2050FFD") + + hashed, err = HashUint64(116) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "1E1788FF0FDE093C") + + hashed, err = HashUint64(math.MaxUint64) + c.Assert(err, IsNil) + c.Assert(toHex(hashed), Equals, "355550B2150E2451") +}