From 55c452774cb978273a5382c95c06829f73e5c5e9 Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Thu, 5 May 2022 16:21:24 +0800 Subject: [PATCH 1/7] *: set latin1 to be an alias for utf8mb4 in TiDB --- ddl/ddl_api.go | 6 ++++-- parser/charset/encoding_latin1.go | 28 ++-------------------------- parser/mysql/charset.go | 5 +++-- util/collate/collate.go | 2 +- 4 files changed, 10 insertions(+), 31 deletions(-) diff --git a/ddl/ddl_api.go b/ddl/ddl_api.go index b0037aba2c99e..101dfb1b38b2a 100644 --- a/ddl/ddl_api.go +++ b/ddl/ddl_api.go @@ -4177,8 +4177,10 @@ func checkModifyCharsetAndCollation(toCharset, toCollate, origCharset, origColla if (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8MB4) || (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8) || - (origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) { - // TiDB only allow utf8 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4. + (origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) || + (origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8) || + (origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8MB4) { + // TiDB only allow utf8/latin1 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4/latin1. return nil } diff --git a/parser/charset/encoding_latin1.go b/parser/charset/encoding_latin1.go index 38f9bb601ac4e..d14f188c03a4c 100644 --- a/parser/charset/encoding_latin1.go +++ b/parser/charset/encoding_latin1.go @@ -14,20 +14,18 @@ package charset import ( - "bytes" - "golang.org/x/text/encoding" ) // EncodingLatin1Impl is the instance of encodingLatin1. -// TiDB uses utf8 implementation for latin1 charset because of the backward compatibility. +// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1 var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}} func init() { EncodingLatin1Impl.self = EncodingLatin1Impl } -// encodingLatin1 compatibles with latin1 in old version TiDB. +// encodingLatin1 compatibles with latin1 in old version TiDB type encodingLatin1 struct { encodingUTF8 } @@ -36,25 +34,3 @@ type encodingLatin1 struct { func (e *encodingLatin1) Name() string { return CharsetLatin1 } - -// Peek implements Encoding interface. -func (e *encodingLatin1) Peek(src []byte) []byte { - if len(src) == 0 { - return src - } - return src[:1] -} - -// IsValid implements Encoding interface. -func (e *encodingLatin1) IsValid(src []byte) bool { - return true -} - -// Tp implements Encoding interface. -func (e *encodingLatin1) Tp() EncodingTp { - return EncodingTpLatin1 -} - -func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) { - return src, nil -} diff --git a/parser/mysql/charset.go b/parser/mysql/charset.go index d3115df457cb8..8095e6fa6fd98 100644 --- a/parser/mysql/charset.go +++ b/parser/mysql/charset.go @@ -574,6 +574,7 @@ var CollationNames = map[string]uint8{ const ( UTF8Charset = "utf8" UTF8MB4Charset = "utf8mb4" + Latin1Charset = "latin1" DefaultCharset = UTF8MB4Charset // DefaultCollationID is utf8mb4_bin(46) DefaultCollationID = 46 @@ -592,9 +593,9 @@ const ( MaxBytesOfCharacter = 4 ) -// IsUTF8Charset checks if charset is utf8 or utf8mb4 +// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1 func IsUTF8Charset(charset string) bool { - return charset == UTF8Charset || charset == UTF8MB4Charset + return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset } // RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition. diff --git a/util/collate/collate.go b/util/collate/collate.go index 171f7f7cfe491..e018215d92886 100644 --- a/util/collate/collate.go +++ b/util/collate/collate.go @@ -98,7 +98,7 @@ func NewCollationEnabled() bool { func CompatibleCollate(collate1, collate2 string) bool { if (collate1 == "utf8mb4_general_ci" || collate1 == "utf8_general_ci") && (collate2 == "utf8mb4_general_ci" || collate2 == "utf8_general_ci") { return true - } else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") { + } else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin" || collate1 == "latin1_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") { return true } else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") { return true From e8ef3e03545245c99178f3e1cb4239077856d5c4 Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Thu, 5 May 2022 17:07:49 +0800 Subject: [PATCH 2/7] add test --- ddl/serial_test.go | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/ddl/serial_test.go b/ddl/serial_test.go index 821400f685952..aab2314b079c1 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1102,6 +1102,32 @@ func TestModifyingColumn4NewCollations(t *testing.T) { tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci") } +func TestAlterTableCharsetAndCollate(t *testing.T) { + store, clean := testkit.CreateMockStore(t) + defer clean() + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a varchar(20), key i(a)) charset=latin1") + tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_unicode_ci", "[ddl:8200]Unsupported converting collation of column 'a' from 'latin1_bin' to 'utf8_unicode_ci' when index is defined on it.") + tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_general_ci", "[ddl:8200]Unsupported converting collation of column 'a' from 'latin1_bin' to 'utf8_general_ci' when index is defined on it.") + tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") + tk.MustGetErrMsg("alter table t convert to charset latin1", "[ddl:8200]Unsupported modify charset from utf8 to latin1") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a varchar(20)) charset=latin1") + tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a varchar(20)) charset=latin1") + tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a varchar(20)) charset=latin1") + tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") +} + func TestForbidUnsupportedCollations(t *testing.T) { store, clean := testkit.CreateMockStore(t) defer clean() @@ -1133,10 +1159,6 @@ func TestForbidUnsupportedCollations(t *testing.T) { mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_roman_ci", "utf8mb4_roman_ci") mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") - - // TODO(bb7133): fix the following cases by setting charset from collate firstly. - // mustGetUnsupportedCollation("create database ucd collate utf8mb4_unicode_ci", errMsgUnsupportedUnicodeCI) - // mustGetUnsupportedCollation("alter table t convert to collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") } func TestCreateTableNoBlock(t *testing.T) { From b6fd70d13b3210895af97e5218784305ebb4b683 Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Thu, 5 May 2022 18:09:53 +0800 Subject: [PATCH 3/7] fix test --- .../r/collation_misc_disabled.result | 22 +++++++++++++++---- .../r/collation_misc_enabled.result | 22 +++++++++++++++---- cmd/explaintest/t/collation_misc.test | 19 +++++++++++++--- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/cmd/explaintest/r/collation_misc_disabled.result b/cmd/explaintest/r/collation_misc_disabled.result index 33f0ebe26f701..ca900d32ddd25 100644 --- a/cmd/explaintest/r/collation_misc_disabled.result +++ b/cmd/explaintest/r/collation_misc_disabled.result @@ -14,13 +14,27 @@ select * from t; a t_value alter table t modify column a varchar(20) charset utf8; -Error 8200: Unsupported modify charset from latin1 to utf8 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; -Error 8200: Unsupported modify charset from latin1 to utf8mb4 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; -Error 8200: Unsupported modify charset from latin1 to utf8 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci; -Error 8200: Unsupported modify charset from latin1 to utf8mb4 alter table t modify column a varchar(20) charset utf8mb4 collate utf8bin; [ddl:1273]Unknown collation: 'utf8bin' alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin; diff --git a/cmd/explaintest/r/collation_misc_enabled.result b/cmd/explaintest/r/collation_misc_enabled.result index 8f75d4e18d151..1abb539679148 100644 --- a/cmd/explaintest/r/collation_misc_enabled.result +++ b/cmd/explaintest/r/collation_misc_enabled.result @@ -14,13 +14,27 @@ select * from t; a t_value alter table t modify column a varchar(20) charset utf8; -Error 8200: Unsupported modify charset from latin1 to utf8 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; -Error 8200: Unsupported modify charset from latin1 to utf8mb4 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; -Error 8200: Unsupported modify charset from latin1 to utf8 +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci; -Error 8200: Unsupported modify charset from latin1 to utf8mb4 alter table t modify column a varchar(20) charset utf8mb4 collate utf8bin; [ddl:1273]Unknown collation: 'utf8bin' alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin; diff --git a/cmd/explaintest/t/collation_misc.test b/cmd/explaintest/t/collation_misc.test index 443c6c4106cc0..13b89f3f3aa49 100644 --- a/cmd/explaintest/t/collation_misc.test +++ b/cmd/explaintest/t/collation_misc.test @@ -12,12 +12,25 @@ create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset latin1; select * from t; ---error 8200 + alter table t modify column a varchar(20) charset utf8; ---error 8200 +select * from t; + +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; ---error 8200 +select * from t; + +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; +select * from t; + +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); --error 8200 alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci; --error 1273 From 79a5adef82657688b8a54e71626f9eca7659900c Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Fri, 6 May 2022 09:58:39 +0800 Subject: [PATCH 4/7] Revert "add test" This reverts commit e8ef3e03545245c99178f3e1cb4239077856d5c4. --- ddl/serial_test.go | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/ddl/serial_test.go b/ddl/serial_test.go index aab2314b079c1..821400f685952 100644 --- a/ddl/serial_test.go +++ b/ddl/serial_test.go @@ -1102,32 +1102,6 @@ func TestModifyingColumn4NewCollations(t *testing.T) { tk.MustExec("alter database dct charset utf8mb4 collate utf8mb4_general_ci") } -func TestAlterTableCharsetAndCollate(t *testing.T) { - store, clean := testkit.CreateMockStore(t) - defer clean() - tk := testkit.NewTestKit(t, store) - - tk.MustExec("use test") - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a varchar(20), key i(a)) charset=latin1") - tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_unicode_ci", "[ddl:8200]Unsupported converting collation of column 'a' from 'latin1_bin' to 'utf8_unicode_ci' when index is defined on it.") - tk.MustGetErrMsg("alter table t convert to charset utf8 collate utf8_general_ci", "[ddl:8200]Unsupported converting collation of column 'a' from 'latin1_bin' to 'utf8_general_ci' when index is defined on it.") - tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") - tk.MustGetErrMsg("alter table t convert to charset latin1", "[ddl:8200]Unsupported modify charset from utf8 to latin1") - - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a varchar(20)) charset=latin1") - tk.MustExec("alter table t convert to charset utf8 collate utf8_unicode_ci") - - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a varchar(20)) charset=latin1") - tk.MustExec("alter table t convert to charset utf8 collate utf8_general_ci") - - tk.MustExec("drop table if exists t") - tk.MustExec("create table t(a varchar(20)) charset=latin1") - tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") -} - func TestForbidUnsupportedCollations(t *testing.T) { store, clean := testkit.CreateMockStore(t) defer clean() @@ -1159,6 +1133,10 @@ func TestForbidUnsupportedCollations(t *testing.T) { mustGetUnsupportedCollation("alter table t1 modify a varchar(20) collate utf8mb4_roman_ci", "utf8mb4_roman_ci") mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") mustGetUnsupportedCollation("alter table t1 modify a varchar(20) charset utf8 collate utf8_roman_ci", "utf8_roman_ci") + + // TODO(bb7133): fix the following cases by setting charset from collate firstly. + // mustGetUnsupportedCollation("create database ucd collate utf8mb4_unicode_ci", errMsgUnsupportedUnicodeCI) + // mustGetUnsupportedCollation("alter table t convert to collate utf8mb4_unicode_ci", "utf8mb4_unicode_ci") } func TestCreateTableNoBlock(t *testing.T) { From d1eaecda5c489b5bec98809f756de1d00b57beef Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Fri, 6 May 2022 10:09:08 +0800 Subject: [PATCH 5/7] fix test --- ddl/db_integration_test.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ddl/db_integration_test.go b/ddl/db_integration_test.go index 1e2762ce47a46..496f23413eee6 100644 --- a/ddl/db_integration_test.go +++ b/ddl/db_integration_test.go @@ -758,7 +758,7 @@ func TestChangingTableCharset(t *testing.T) { tk := testkit.NewTestKit(t, store) tk.MustExec("USE test") - tk.MustExec("create table t(a char(10)) charset latin1 collate latin1_bin") + tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin") tk.MustGetErrCode("alter table t charset gbk", errno.ErrUnsupportedDDLOperation) tk.MustGetErrCode("alter table t charset ''", errno.ErrUnknownCharacterSet) @@ -769,9 +769,15 @@ func TestChangingTableCharset(t *testing.T) { tk.MustGetErrCode("alter table t charset utf8 collate utf8mb4_bin;", errno.ErrCollationCharsetMismatch) tk.MustGetErrCode("alter table t charset utf8 collate utf8_bin collate utf8mb4_bin collate utf8_bin;", errno.ErrCollationCharsetMismatch) - tk.MustGetErrCode("alter table t charset utf8", errno.ErrUnsupportedDDLOperation) - tk.MustGetErrCode("alter table t charset utf8mb4", errno.ErrUnsupportedDDLOperation) - tk.MustGetErrCode("alter table t charset utf8mb4 collate utf8mb4_bin", errno.ErrUnsupportedDDLOperation) + tk.MustExec("alter table t charset utf8") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin") + tk.MustExec("alter table t charset utf8mb4") + + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin") + tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin") tk.MustGetErrCode("alter table t charset latin1 charset utf8 charset utf8mb4 collate utf8_bin;", errno.ErrConflictingDeclarations) @@ -791,6 +797,13 @@ func TestChangingTableCharset(t *testing.T) { } checkCharset(charset.CharsetUTF8MB4, charset.CollationUTF8MB4) + tk.MustExec("drop table if exists t") + tk.MustExec("create table t(a varchar(20), key i(a)) charset=latin1") + tk.MustGetErrCode("alter table t convert to charset utf8 collate utf8_unicode_ci", errno.ErrUnsupportedDDLOperation) + tk.MustGetErrCode("alter table t convert to charset utf8 collate utf8_general_ci", errno.ErrUnsupportedDDLOperation) + tk.MustExec("alter table t convert to charset utf8 collate utf8_bin") + tk.MustGetErrCode("alter table t convert to charset latin1", errno.ErrUnsupportedDDLOperation) + // Test when column charset can not convert to the target charset. tk.MustExec("drop table t;") tk.MustExec("create table t(a varchar(10) character set ascii) charset utf8mb4") From b34242946045d487c609d9fdad1a8cff9e219334 Mon Sep 17 00:00:00 2001 From: Jason Mo Date: Fri, 6 May 2022 10:37:03 +0800 Subject: [PATCH 6/7] fix test --- expression/integration_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/expression/integration_test.go b/expression/integration_test.go index e3f2f0c628274..7dfef39299025 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -648,7 +648,7 @@ func TestStringBuiltin(t *testing.T) { result = tk.MustQuery("select ord('123'), ord(123), ord(''), ord('你好'), ord(NULL), ord('👍')") result.Check(testkit.Rows("49 49 0 14990752 4036989325")) result = tk.MustQuery("select ord(X''), ord(X'6161'), ord(X'e4bd'), ord(X'e4bda0'), ord(_ascii'你'), ord(_latin1'你')") - result.Check(testkit.Rows("0 97 228 228 228 228")) + result.Check(testkit.Rows("0 97 228 228 228 14990752")) // for space result = tk.MustQuery(`select space(0), space(2), space(-1), space(1.1), space(1.9)`) From f92dc77b7b9e71a6ca01a43713df1db2c0d60468 Mon Sep 17 00:00:00 2001 From: Hangjie Mo Date: Tue, 10 May 2022 11:02:04 +0800 Subject: [PATCH 7/7] follw comments --- .../r/collation_misc_disabled.result | 24 +++++++++++++++++++ .../r/collation_misc_enabled.result | 24 +++++++++++++++++++ cmd/explaintest/t/collation_misc.test | 18 ++++++++++++++ ddl/db_integration_test.go | 3 +++ parser/charset/encoding_latin1.go | 4 ++-- parser/mysql/charset.go | 2 +- 6 files changed, 72 insertions(+), 3 deletions(-) diff --git a/cmd/explaintest/r/collation_misc_disabled.result b/cmd/explaintest/r/collation_misc_disabled.result index ca900d32ddd25..20b3d9db10a9e 100644 --- a/cmd/explaintest/r/collation_misc_disabled.result +++ b/cmd/explaintest/r/collation_misc_disabled.result @@ -14,6 +14,8 @@ select * from t; a t_value alter table t modify column a varchar(20) charset utf8; +admin check table t; + select * from t; a t_value @@ -21,6 +23,8 @@ drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; +admin check table t; + select * from t; a t_value @@ -28,6 +32,8 @@ drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; +admin check table t; + select * from t; a t_value @@ -41,6 +47,24 @@ alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin; Error 1302: Conflicting declarations: 'CHARACTER SET latin1' and 'CHARACTER SET utf8' alter table t collate LATIN1_GENERAL_CI collate UTF8MB4_UNICODE_ci collate utf8_bin; Error 1253: COLLATION 'utf8mb4_unicode_ci' is not valid for CHARACTER SET 'latin1' +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8mb4; +admin check table t; + +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8 collate utf8_bin; +admin check table t; + +select * from t; +a +t_value create database if not exists cd_test_utf8 CHARACTER SET utf8 COLLATE utf8_bin; create database if not exists cd_test_latin1 CHARACTER SET latin1 COLLATE latin1_swedish_ci; use cd_test_utf8; diff --git a/cmd/explaintest/r/collation_misc_enabled.result b/cmd/explaintest/r/collation_misc_enabled.result index 1abb539679148..687ea0486e046 100644 --- a/cmd/explaintest/r/collation_misc_enabled.result +++ b/cmd/explaintest/r/collation_misc_enabled.result @@ -14,6 +14,8 @@ select * from t; a t_value alter table t modify column a varchar(20) charset utf8; +admin check table t; + select * from t; a t_value @@ -21,6 +23,8 @@ drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; +admin check table t; + select * from t; a t_value @@ -28,6 +32,8 @@ drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; +admin check table t; + select * from t; a t_value @@ -41,6 +47,24 @@ alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin; Error 1273: Unsupported collation when new collation is enabled: 'latin1_general_ci' alter table t collate LATIN1_GENERAL_CI collate UTF8MB4_UNICODE_ci collate utf8_bin; Error 1273: Unsupported collation when new collation is enabled: 'latin1_general_ci' +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8mb4; +admin check table t; + +select * from t; +a +t_value +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8 collate utf8_bin; +admin check table t; + +select * from t; +a +t_value create database if not exists cd_test_utf8 CHARACTER SET utf8 COLLATE utf8_bin; create database if not exists cd_test_latin1 CHARACTER SET latin1 COLLATE latin1_swedish_ci; Error 1273: Unsupported collation when new collation is enabled: 'latin1_swedish_ci' diff --git a/cmd/explaintest/t/collation_misc.test b/cmd/explaintest/t/collation_misc.test index 13b89f3f3aa49..e77fea4d783fd 100644 --- a/cmd/explaintest/t/collation_misc.test +++ b/cmd/explaintest/t/collation_misc.test @@ -14,18 +14,21 @@ alter table t modify column a varchar(20) charset latin1; select * from t; alter table t modify column a varchar(20) charset utf8; +admin check table t; select * from t; drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8mb4; +admin check table t; select * from t; drop table t; create table t(a varchar(20) charset latin1); insert into t values ("t_value"); alter table t modify column a varchar(20) charset utf8 collate utf8_bin; +admin check table t; select * from t; drop table t; @@ -40,6 +43,21 @@ alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin; --error 1253, 1273 alter table t collate LATIN1_GENERAL_CI collate UTF8MB4_UNICODE_ci collate utf8_bin; +# ChangingCharsetToUtf8 with reorg +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8mb4; +admin check table t; +select * from t; + +drop table t; +create table t(a varchar(20) charset latin1); +insert into t values ("t_value"); +alter table t modify column a varchar(19) charset utf8 collate utf8_bin; +admin check table t; +select * from t; + # TestCharsetDatabase create database if not exists cd_test_utf8 CHARACTER SET utf8 COLLATE utf8_bin; --error 1273 diff --git a/ddl/db_integration_test.go b/ddl/db_integration_test.go index 496f23413eee6..2e460c6506ae2 100644 --- a/ddl/db_integration_test.go +++ b/ddl/db_integration_test.go @@ -770,14 +770,17 @@ func TestChangingTableCharset(t *testing.T) { tk.MustGetErrCode("alter table t charset utf8 collate utf8_bin collate utf8mb4_bin collate utf8_bin;", errno.ErrCollationCharsetMismatch) tk.MustExec("alter table t charset utf8") + tk.MustExec("admin check table t") tk.MustExec("drop table if exists t") tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin") tk.MustExec("alter table t charset utf8mb4") + tk.MustExec("admin check table t") tk.MustExec("drop table if exists t") tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin") tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin") + tk.MustExec("admin check table t") tk.MustGetErrCode("alter table t charset latin1 charset utf8 charset utf8mb4 collate utf8_bin;", errno.ErrConflictingDeclarations) diff --git a/parser/charset/encoding_latin1.go b/parser/charset/encoding_latin1.go index d14f188c03a4c..f1893484b9a6b 100644 --- a/parser/charset/encoding_latin1.go +++ b/parser/charset/encoding_latin1.go @@ -18,14 +18,14 @@ import ( ) // EncodingLatin1Impl is the instance of encodingLatin1. -// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1 +// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1. var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}} func init() { EncodingLatin1Impl.self = EncodingLatin1Impl } -// encodingLatin1 compatibles with latin1 in old version TiDB +// encodingLatin1 compatibles with latin1 in old version TiDB. type encodingLatin1 struct { encodingUTF8 } diff --git a/parser/mysql/charset.go b/parser/mysql/charset.go index 8095e6fa6fd98..cb3666bb21b31 100644 --- a/parser/mysql/charset.go +++ b/parser/mysql/charset.go @@ -593,7 +593,7 @@ const ( MaxBytesOfCharacter = 4 ) -// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1 +// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1. func IsUTF8Charset(charset string) bool { return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset }