Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

charset: Support parsing CHARSET=utf8mb3 #37084

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cmd/explaintest/r/collation_misc_disabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ binary 1
gbk_bin 2
latin1_bin 1
utf8_bin 3
utf8mb3_general_ci 3
utf8mb4_bin 4
SELECT character_set_name, id, sortlen FROM information_schema.collations ORDER BY collation_name, id;
character_set_name id sortlen
Expand All @@ -99,6 +100,7 @@ binary 63 1
gbk 87 1
latin1 47 1
utf8 83 1
utf8mb3 33 1
utf8mb4 46 1
select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY where COLLATION_NAME='utf8mb4_bin';
COLLATION_NAME CHARACTER_SET_NAME
Expand All @@ -110,9 +112,11 @@ binary binary binary 1
gbk Chinese Internal Code Specification gbk_bin 2
latin1 Latin1 latin1_bin 1
utf8 UTF-8 Unicode utf8_bin 3
utf8mb3 UTF-8 Unicode utf8mb3_general_ci 3
utf8mb4 UTF-8 Unicode utf8mb4_bin 4
show collation;
Collation Charset Id Default Compiled Sortlen
utf8mb3_general_ci utf8mb3 33 Yes Yes 1
utf8mb4_bin utf8mb4 46 Yes Yes 1
latin1_bin latin1 47 Yes Yes 1
binary binary 63 Yes Yes 1
Expand Down
2 changes: 2 additions & 0 deletions cmd/explaintest/r/collation_misc_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ binary 1
gbk_chinese_ci 2
latin1_bin 1
utf8_bin 3
utf8mb3_general_ci 3
utf8mb4_bin 4
SELECT character_set_name, id, sortlen FROM information_schema.collations ORDER BY collation_name, id;
character_set_name id sortlen
Expand All @@ -118,6 +119,7 @@ binary binary binary 1
gbk Chinese Internal Code Specification gbk_chinese_ci 2
latin1 Latin1 latin1_bin 1
utf8 UTF-8 Unicode utf8_bin 3
utf8mb3 UTF-8 Unicode utf8mb3_general_ci 3
utf8mb4 UTF-8 Unicode utf8mb4_bin 4
show collation;
Collation Charset Id Default Compiled Sortlen
Expand Down
7 changes: 7 additions & 0 deletions parser/charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ var supportedCollations = make([]*Collation, 0, len(supportedCollationNames))
// CharacterSetInfos contains all the supported charsets.
var CharacterSetInfos = map[string]*Charset{
CharsetUTF8: {CharsetUTF8, CollationUTF8, make(map[string]*Collation), "UTF-8 Unicode", 3},
CharsetUTF8MB3: {CharsetUTF8MB3, CollationUTF8MB3, make(map[string]*Collation), "UTF-8 Unicode", 3},
CharsetUTF8MB4: {CharsetUTF8MB4, CollationUTF8MB4, make(map[string]*Collation), "UTF-8 Unicode", 4},
CharsetASCII: {CharsetASCII, CollationASCII, make(map[string]*Collation), "US ASCII", 1},
CharsetLatin1: {CharsetLatin1, CollationLatin1, make(map[string]*Collation), "Latin1", 1},
Expand All @@ -67,6 +68,7 @@ var CharacterSetInfos = map[string]*Charset{
// All the names supported collations should be in the following table.
var supportedCollationNames = map[string]struct{}{
CollationUTF8: {},
CollationUTF8MB3: {},
CollationUTF8MB4: {},
CollationASCII: {},
CollationLatin1: {},
Expand Down Expand Up @@ -204,6 +206,8 @@ const (
CollationBin = "binary"
// CollationUTF8 is the default collation for CharsetUTF8.
CollationUTF8 = "utf8_bin"
// CollationUTF8MB3 is the default collation for CharsetUTF8MB3.
CollationUTF8MB3 = "utf8mb3_general_ci"
// CollationUTF8MB4 is the default collation for CharsetUTF8MB4.
CollationUTF8MB4 = "utf8mb4_bin"
// CollationASCII is the default collation for CharsetACSII.
Expand All @@ -225,6 +229,8 @@ const (
CharsetLatin1 = "latin1"
// CharsetUTF8 is the default charset for string types.
CharsetUTF8 = "utf8"
// CharsetUTF8MB3 is another name of CharsetUTF8.
CharsetUTF8MB3 = "utf8mb3"
// CharsetUTF8MB4 represents 4 bytes utf8, which works the same way as utf8 in Go.
CharsetUTF8MB4 = "utf8mb4"
//revive:disable:exported
Expand Down Expand Up @@ -344,6 +350,7 @@ var collations = []*Collation{
{31, "latin1", "latin1_german2_ci", false},
{32, "armscii8", "armscii8_general_ci", true},
{33, "utf8", "utf8_general_ci", false},
{33, "utf8mb3", "utf8mb3_general_ci", true},
{34, "cp1250", "cp1250_czech_cs", false},
{35, "ucs2", "ucs2_general_ci", true},
{36, "cp866", "cp866_general_ci", true},
Expand Down
1 change: 1 addition & 0 deletions parser/charset/charset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func TestGetDefaultCollation(t *testing.T) {
}{
{"utf8", "utf8_bin", true},
{"UTF8", "utf8_bin", true},
{"utf8mb3", "utf8mb3_general_ci", true},
{"utf8mb4", "utf8mb4_bin", true},
{"ascii", "ascii_bin", true},
{"binary", "binary", true},
Expand Down