Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

charset: add utf8_unicode_ci and utf8mb4_unicode_ci interface (#18678) #22099

Merged
merged 6 commits into from
Jan 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions ddl/serial_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1096,21 +1096,26 @@ func (s *testSerialSuite) TestModifyingColumn4NewCollations(c *C) {
// Column collation can be changed as long as there is no index defined.
tk.MustExec("alter table t modify b varchar(10) collate utf8_general_ci")
tk.MustExec("alter table t modify c varchar(10) collate utf8_bin")
tk.MustExec("alter table t modify c varchar(10) collate utf8_unicode_ci")
tk.MustExec("alter table t charset utf8 collate utf8_general_ci")
tk.MustExec("alter table t convert to charset utf8 collate utf8_bin")
tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci")
tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci")
tk.MustExec("alter table t modify b varchar(10) collate utf8_unicode_ci")
tk.MustExec("alter table t modify b varchar(10) collate utf8_bin")

tk.MustExec("alter table t add index b_idx(b)")
tk.MustExec("alter table t add index c_idx(c)")
tk.MustGetErrMsg("alter table t modify b varchar(10) collate utf8_general_ci", "[ddl:8200]Unsupported modifying collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.")
tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_bin", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_bin' when index is defined on it.")
tk.MustGetErrMsg("alter table t modify c varchar(10) collate utf8_unicode_ci", "[ddl:8200]Unsupported modifying collation of column 'c' from 'utf8_general_ci' to 'utf8_unicode_ci' when index is defined on it.")
tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_general_ci", "[ddl:8200]Unsupported converting collation of column 'b' from 'utf8_bin' to 'utf8_general_ci' when index is defined on it.")
// Change to a compatible collation is allowed.
tk.MustExec("alter table t modify c varchar(10) collate utf8mb4_general_ci")
// Change the default collation of table is allowed.
tk.MustExec("alter table t collate utf8mb4_general_ci")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_unicode_ci")
// Change the default collation of database is allowed.
tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci")
}
Expand All @@ -1124,27 +1129,27 @@ func (s *testSerialSuite) TestForbidUnsupportedCollations(c *C) {
tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll))
}
// Test default collation of database.
mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create database ucd charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create database ucd")
mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter database ucd collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter database ucd charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter database ucd collate utf8mb4_roman_ci", "utf8mb4_roman_ci")

// Test default collation of table.
tk.MustExec("use ucd")
mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create table t(a varchar(20)) collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create table t(a varchar(20)) collate utf8mb4_general_ci")
mustGetUnsupportedCollation("alter table t default collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t default collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter table t convert to charset utf8mb4 collate utf8mb4_roman_ci", "utf8mb4_roman_ci")

// Test collation of columns.
mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("create table t1(a varchar(20)) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
tk.MustExec("create table t1(a varchar(20))")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_roman_ci", "utf8mb4_roman_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci")

// TODO(bb7133): fix the following cases by setting charset from collate firstly.
// mustGetUnsupportedCollation("create database ucd collate utf8mb4_unicode_ci", errMsgUnsupportedUnicodeCI)
Expand Down
20 changes: 11 additions & 9 deletions executor/seqtest/seq_executor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1237,8 +1237,10 @@ func (s *seqTestSuite) TestShowForNewCollations(c *C) {
"latin1_bin latin1 47 Yes Yes 1",
"utf8_bin utf8 83 Yes Yes 1",
"utf8_general_ci utf8 33 Yes 1",
"utf8_unicode_ci utf8 192 Yes 1",
"utf8mb4_bin utf8mb4 46 Yes Yes 1",
"utf8mb4_general_ci utf8mb4 45 Yes 1",
"utf8mb4_unicode_ci utf8mb4 224 Yes 1",
)
tk.MustQuery("show collation").Check(expectRows)
tk.MustQuery("select * from information_schema.COLLATIONS").Check(expectRows)
Expand All @@ -1253,15 +1255,15 @@ func (s *seqTestSuite) TestForbidUnsupportedCollations(c *C) {
tk.MustGetErrMsg(sql, fmt.Sprintf("[ddl:1273]Unsupported collation when new collation is enabled: '%s'", coll))
}

mustGetUnsupportedCollation("select 'a' collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("select cast('a' as char) collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("set names utf8 collate utf8_unicode_ci", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set session collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_server = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_database = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("set global collation_connection = 'utf8_unicode_ci'", "utf8_unicode_ci")
mustGetUnsupportedCollation("select 'a' collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("select cast('a' as char) collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("set names utf8 collate utf8_roman_ci", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_server = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_database = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set session collation_connection = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_server = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_database = 'utf8_roman_ci'", "utf8_roman_ci")
mustGetUnsupportedCollation("set global collation_connection = 'utf8_roman_ci'", "utf8_roman_ci")
}

func (s *seqTestSuite) TestAutoIncIDInRetry(c *C) {
Expand Down
2 changes: 2 additions & 0 deletions expression/distsql_builtin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ func (s *testEvalSerialSuite) TestPBToExprWithNewCollation(c *C) {
{"utf8mb4_general_ci", "utf8mb4_general_ci", 45, 45},
{"", "utf8mb4_bin", 46, 46},
{"some_error_collation", "utf8mb4_bin", 46, 46},
{"utf8_unicode_ci", "utf8_unicode_ci", 192, 192},
{"utf8mb4_unicode_ci", "utf8mb4_unicode_ci", 224, 224},
}

for _, cs := range cases {
Expand Down
2 changes: 2 additions & 0 deletions expression/expr_to_pb_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarString, 3), "utf8mb4_general_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeString, 4), "utf8mb4_0900_ai_ci"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 5), "utf8_bin"))
colExprs = append(colExprs, columnCollation(dg.genColumn(mysql.TypeVarchar, 6), "utf8_unicode_ci"))
pushed, _ := PushDownExprs(sc, colExprs, client, kv.UnSpecified)
c.Assert(len(pushed), Equals, len(colExprs))
pbExprs, err := ExpressionsToPBList(sc, colExprs, client)
Expand All @@ -894,6 +895,7 @@ func (s *testEvaluatorSerialSuites) TestNewCollationsEnabled(c *C) {
"{\"tp\":201,\"val\":\"gAAAAAAAAAM=\",\"sig\":0,\"field_type\":{\"tp\":253,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-45,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAQ=\",\"sig\":0,\"field_type\":{\"tp\":254,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-255,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAU=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-83,\"charset\":\"\"}}",
"{\"tp\":201,\"val\":\"gAAAAAAAAAY=\",\"sig\":0,\"field_type\":{\"tp\":15,\"flag\":0,\"flen\":-1,\"decimal\":-1,\"collate\":-192,\"charset\":\"\"}}",
}
for i, pbExpr := range pbExprs {
c.Assert(pbExprs, NotNil)
Expand Down
9 changes: 8 additions & 1 deletion util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ func CompatibleCollate(collate1, collate2 string) bool {
return true
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
return true
} else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") {
return true
} else {
return collate1 == collate2
}
Expand Down Expand Up @@ -217,7 +219,8 @@ func truncateTailingSpace(str string) string {

// IsCICollation returns if the collation is case-sensitive
func IsCICollation(collate string) bool {
return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci"
return collate == "utf8_general_ci" || collate == "utf8mb4_general_ci" ||
collate == "utf8_unicode_ci" || collate == "utf8mb4_unicode_ci"
}

func init() {
Expand All @@ -238,4 +241,8 @@ func init() {
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_general_ci"])] = &generalCICollator{}
newCollatorMap["utf8_general_ci"] = &generalCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_general_ci"])] = &generalCICollator{}
newCollatorMap["utf8mb4_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8mb4_unicode_ci"])] = &unicodeCICollator{}
newCollatorMap["utf8_unicode_ci"] = &unicodeCICollator{}
newCollatorIDMap[int(mysql.CollationNames["utf8_unicode_ci"])] = &unicodeCICollator{}
}
8 changes: 8 additions & 0 deletions util/collate/collate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,16 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(83), FitsTypeOf, &binPaddingCollator{})
c.Assert(GetCollatorByID(45), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollatorByID(33), FitsTypeOf, &generalCICollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &unicodeCICollator{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binPaddingCollator{})

SetNewCollationEnabledForTest(false)
Expand All @@ -178,11 +182,15 @@ func (s *testCollateSuite) TestGetCollator(c *C) {
c.Assert(GetCollator("utf8_bin"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_general_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8_general_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8mb4_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("utf8_unicode_ci"), FitsTypeOf, &binCollator{})
c.Assert(GetCollator("default_test"), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(63), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(46), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(83), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(45), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(33), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(224), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(192), FitsTypeOf, &binCollator{})
c.Assert(GetCollatorByID(9999), FitsTypeOf, &binCollator{})
}
47 changes: 47 additions & 0 deletions util/collate/unicode_ci.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package collate

type unicodeCICollator struct {
}

// Compare implements Collator interface. Always return 0 temporary, will change when implement
func (uc *unicodeCICollator) Compare(a, b string) int {
return 0
}

// Key implements Collator interface. Always return nothing temporary, will change when implement
func (uc *unicodeCICollator) Key(str string) []byte {
return []byte{}
}

// Pattern implements Collator interface.
func (uc *unicodeCICollator) Pattern() WildcardPattern {
return &unicodePattern{}
}

type unicodePattern struct {
patChars []rune
patTypes []byte
}

// Compile implements WildcardPattern interface. Do nothing temporary, will change when implement
func (p *unicodePattern) Compile(patternStr string, escape byte) {

}

// DoMatch implements WildcardPattern interface. Always return false temporary, will change when implement
func (p *unicodePattern) DoMatch(str string) bool {
return false
}