From f32d36e5ffb7fda6f8bcf719eb90371261395b11 Mon Sep 17 00:00:00 2001 From: xiongjiwei Date: Fri, 24 Jul 2020 17:16:03 +0800 Subject: [PATCH] cherry pick #18678 to release-4.0 Signed-off-by: ti-srebot --- ddl/serial_test.go | 31 ++++++++++-------- executor/seqtest/seq_executor_test.go | 20 +++++++----- expression/distsql_builtin_test.go | 2 ++ expression/expr_to_pb_test.go | 2 ++ util/collate/collate.go | 9 ++++- util/collate/collate_test.go | 8 +++++ util/collate/unicode_ci.go | 47 +++++++++++++++++++++++++++ 7 files changed, 96 insertions(+), 23 deletions(-) create mode 100644 util/collate/unicode_ci.go diff --git a/ddl/serial_test.go b/ddl/serial_test.go index 5b0449f9dd02d..684e795482c4b 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1096,21 +1096,26 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) { // Column collation can be changed as long as there is no index defined. tk.MustExec("alter table t modify b varchar(10) collate utf8_general_ci") tk.MustExec("alter table t modify c varchar(10) collate utf8_bin") + tk.MustExec("alter table t modify c varchar(10) collate utf8_unicode_ci") tk.MustExec("alter table t charset utf8 collate utf8_general_ci") tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") + tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci") tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") + tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci") tk.MustExec("alter table t modify b varchar(10) collate utf8_bin") tk.MustExec("alter table t add index b_idx(b)") tk.MustExec("alter table t add index c_idx(c)") tk.MustGetErrMsg("alter table t modify b varchar(10) collate utf8_general_ci", "[ddl:8200]Unsupported modifying collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.") tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_bin", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_bin' when index is defined on it.") + tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_unicode_ci", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_unicode_ci' when index is defined on it.") tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_general_ci", "[ddl:8200]Unsupported converting collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.") // Change to a compatible collation is allowed. tk.MustExec("alter table t modify c varchar(10) collate utf8mb4_general_ci") // Change the default collation of table is allowed. tk.MustExec("alter table t collate utf8mb4_general_ci") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin") + tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci") // Change the default collation of database is allowed. tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci") } @@ -1124,27 +1129,27 @@ func (s *testSerialSuite) TestForbidUnsupportedCollations(c *C) { tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll)) } // Test default collation of database. - mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci") + mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_roman_ci", "utf8_roman_ci") tk.MustExec("create database ucd") - mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("alter database ucd collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") + mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("alter database ucd collate utf8mb4_roman_ci", "utf8mb4_roman_ci") // Test default collation of table. tk.MustExec("use ucd") - mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_unicode_ci", "utf8_unicode_ci") + mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_roman_ci", "utf8_roman_ci") tk.MustExec("create table t(a varchar(20)) collate utf8mb4_general_ci") - mustGetUnsupportedCollation("alter table t default collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") + mustGetUnsupportedCollation("alter table t default collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci") // Test collation of columns. - mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci") + mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") tk.MustExec("create table t1(a varchar(20))") - mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") - mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci") - mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci") + mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_roman_ci", "utf8mb4_roman_ci") + mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") + mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") // TODO(bb7133): fix the following cases by setting charset from collate firstly. // mustGetUnsupportedCollation("create database ucd collate utf8mb4_unicode_ci", errMsgUnsupportedUnicodeCI) diff --git a/executor/seqtest/seq_executor_test.go b/executor/seqtest/seq_executor_test.go index 46cffc1a895b5..8660cc175500b 100644 --- a/executor/seqtest/seq_executor_test.go +++ b/executor/seqtest/seq_executor_test.go @@ -1237,8 +1237,10 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) { "latin1_bin latin1 47 Yes Yes 1", "utf8_bin utf8 83 Yes Yes 1", "utf8_general_ci utf8 33 Yes 1", + "utf8_unicode_ci utf8 192 Yes 1", "utf8mb4_bin utf8mb4 46 Yes Yes 1", "utf8mb4_general_ci utf8mb4 45 Yes 1", + "utf8mb4_unicode_ci utf8mb4 224 Yes 1", ) tk.MustQuery("show collation").Check(expectRows) tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows) @@ -1253,15 +1255,15 @@ func (s *seqTestSuite) TestForbidUnsupportedCollations(c *C) { tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll)) } - mustGetUnsupportedCollation("select 'a' collate utf8_unicode_ci", "utf8_unicode_ci") - mustGetUnsupportedCollation("select cast('a' as char) collate utf8_unicode_ci", "utf8_unicode_ci") - mustGetUnsupportedCollation("set names utf8 collate utf8_unicode_ci", "utf8_unicode_ci") - mustGetUnsupportedCollation("set session collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci") - mustGetUnsupportedCollation("set session collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci") - mustGetUnsupportedCollation("set session collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci") - mustGetUnsupportedCollation("set global collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci") - mustGetUnsupportedCollation("set global collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci") - mustGetUnsupportedCollation("set global collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci") + mustGetUnsupportedCollation("select 'a' collate utf8_roman_ci", "utf8_roman_ci") + mustGetUnsupportedCollation("select cast('a' as char) collate utf8_roman_ci", "utf8_roman_ci") + mustGetUnsupportedCollation("set names utf8 collate utf8_roman_ci", "utf8_roman_ci") + mustGetUnsupportedCollation("set session collation_server = 'utf8_roman_ci'", "utf8_roman_ci") + mustGetUnsupportedCollation("set session collation_database = 'utf8_roman_ci'", "utf8_roman_ci") + mustGetUnsupportedCollation("set session collation_connection = 'utf8_roman_ci'", "utf8_roman_ci") + mustGetUnsupportedCollation("set global collation_server = 'utf8_roman_ci'", "utf8_roman_ci") + mustGetUnsupportedCollation("set global collation_database = 'utf8_roman_ci'", "utf8_roman_ci") + mustGetUnsupportedCollation("set global collation_connection = 'utf8_roman_ci'", "utf8_roman_ci") } func (s *seqTestSuite) TestAutoIncIDInRetry(c *C) { diff --git a/expression/distsql_builtin_test.go b/expression/distsql_builtin_test.go index 3d4e036e9d59d..a438ea80a65d7 100644 --- a/expression/distsql_builtin_test.go +++ b/expression/distsql_builtin_test.go @@ -54,6 +54,8 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) { {"utf8mb4_general_ci", "utf8mb4_general_ci", 45, 45}, {"", "utf8mb4_bin", 46, 46}, {"some_error_collation", "utf8mb4_bin", 46, 46}, + {"utf8_unicode_ci", "utf8_unicode_ci", 192, 192}, + {"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224}, } for _, cs := range cases { diff --git a/expression/expr_to_pb_test.go b/expression/expr_to_pb_test.go index e65d393a3fb92..88135445a3d94 100644 --- a/expression/expr_to_pb_test.go +++ b/expression/expr_to_pb_test.go @@ -884,6 +884,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarString, 3), "utf8mb4_general_ci")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci")) colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin")) + colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci")) pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified) c.Assert(len(pushed), Equals, len(colExprs)) pbExprs, err := ExpressionsToPBList(sc, colExprs, client) @@ -894,6 +895,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) { "{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}", "{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}", "{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}", + "{\"tp\":201,\"val\":\"gAAAAAAAAAY=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-192,\"charset\":\"\"}}", } for i, pbExpr := range pbExprs { c.Assert(pbExprs, NotNil) diff --git a/util/collate/collate.go b/util/collate/collate.go index ec99afcf2dfd5..32c73c3da617e 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -90,6 +90,8 @@ func CompatibleCollate(collate1, collate2 string) bool { return true } else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") { return true + } else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") { + return true } else { return collate1 == collate2 } @@ -217,7 +219,8 @@ func truncateTailingSpace(str string) string { // IsCICollation returns if the collation is case-sensitive func IsCICollation(collate string) bool { - return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" + return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" || + collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci" } func init() { @@ -238,4 +241,8 @@ func init() { newCollatorIDMap[int(mysql.CollationNames["utf8mb4_general_ci"])] = &generalCICollator{} newCollatorMap["utf8_general_ci"] = &generalCICollator{} newCollatorIDMap[int(mysql.CollationNames["utf8_general_ci"])] = &generalCICollator{} + newCollatorMap["utf8mb4_unicode_ci"] = &unicodeCICollator{} + newCollatorIDMap[int(mysql.CollationNames["utf8mb4_unicode_ci"])] = &unicodeCICollator{} + newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{} + newCollatorIDMap[int(mysql.CollationNames["utf8_unicode_ci"])] = &unicodeCICollator{} } diff --git a/util/collate/collate_test.go b/util/collate/collate_test.go index 358807a60a5c6..446afb209e812 100644 --- a/util/collate/collate_test.go +++ b/util/collate/collate_test.go @@ -164,12 +164,16 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{}) + c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) + c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(83), FitsTypeOf, &binPaddingCollator{}) c.Assert(GetCollatorByID(45), FitsTypeOf, &generalCICollator{}) c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{}) + c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{}) + c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{}) SetNewCollationEnabledForTest(false) @@ -178,11 +182,15 @@ func (s *testCollateSuite) TestGetCollator(c *C) { c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{}) + c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{}) + c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{}) c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(83), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(45), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(33), FitsTypeOf, &binCollator{}) + c.Assert(GetCollatorByID(224), FitsTypeOf, &binCollator{}) + c.Assert(GetCollatorByID(192), FitsTypeOf, &binCollator{}) c.Assert(GetCollatorByID(9999), FitsTypeOf, &binCollator{}) } diff --git a/util/collate/unicode_ci.go b/util/collate/unicode_ci.go new file mode 100644 index 0000000000000..0492c851c881e --- /dev/null +++ b/util/collate/unicode_ci.go @@ -0,0 +1,47 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package collate + +type unicodeCICollator struct { +} + +// Compare implements Collator interface. Always return 0 temporary, will change when implement +func (uc *unicodeCICollator) Compare(a, b string) int { + return 0 +} + +// Key implements Collator interface. Always return nothing temporary, will change when implement +func (uc *unicodeCICollator) Key(str string) []byte { + return []byte{} +} + +// Pattern implements Collator interface. +func (uc *unicodeCICollator) Pattern() WildcardPattern { + return &unicodePattern{} +} + +type unicodePattern struct { + patChars []rune + patTypes []byte +} + +// Compile implements WildcardPattern interface. Do nothing temporary, will change when implement +func (p *unicodePattern) Compile(patternStr string, escape byte) { + +} + +// DoMatch implements WildcardPattern interface. Always return false temporary, will change when implement +func (p *unicodePattern) DoMatch(str string) bool { + return false +}