Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: Support modify table/column charset from latin1 to utf8/utf8mb4 #34386

Merged
merged 10 commits into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions cmd/explaintest/r/collation_misc_disabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,27 @@ select * from t;
a
t_value
alter table t modify column a varchar(20) charset utf8;
Error 8200: Unsupported modify charset from latin1 to utf8
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8mb4;
Error 8200: Unsupported modify charset from latin1 to utf8mb4
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8 collate utf8_bin;
Error 8200: Unsupported modify charset from latin1 to utf8
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci;
Error 8200: Unsupported modify charset from latin1 to utf8mb4
alter table t modify column a varchar(20) charset utf8mb4 collate utf8bin;
[ddl:1273]Unknown collation: 'utf8bin'
alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin;
Expand Down
22 changes: 18 additions & 4 deletions cmd/explaintest/r/collation_misc_enabled.result
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,27 @@ select * from t;
a
t_value
alter table t modify column a varchar(20) charset utf8;
Error 8200: Unsupported modify charset from latin1 to utf8
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8mb4;
Error 8200: Unsupported modify charset from latin1 to utf8mb4
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8 collate utf8_bin;
Error 8200: Unsupported modify charset from latin1 to utf8
select * from t;
a
t_value
drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci;
Error 8200: Unsupported modify charset from latin1 to utf8mb4
alter table t modify column a varchar(20) charset utf8mb4 collate utf8bin;
[ddl:1273]Unknown collation: 'utf8bin'
alter table t collate LATIN1_GENERAL_CI charset utf8 collate utf8_bin;
Expand Down
19 changes: 16 additions & 3 deletions cmd/explaintest/t/collation_misc.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,25 @@ create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset latin1;
select * from t;
--error 8200

alter table t modify column a varchar(20) charset utf8;
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
--error 8200
select * from t;

drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8mb4;
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
--error 8200
select * from t;

drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
alter table t modify column a varchar(20) charset utf8 collate utf8_bin;
select * from t;

drop table t;
create table t(a varchar(20) charset latin1);
insert into t values ("t_value");
--error 8200
alter table t modify column a varchar(20) charset utf8mb4 collate utf8mb4_general_ci;
--error 1273
Expand Down
21 changes: 17 additions & 4 deletions ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ func TestChangingTableCharset(t *testing.T) {
tk := testkit.NewTestKit(t, store)

tk.MustExec("USE test")
tk.MustExec("create table t(a char(10)) charset latin1 collate latin1_bin")
tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin")

tk.MustGetErrCode("alter table t charset gbk", errno.ErrUnsupportedDDLOperation)
tk.MustGetErrCode("alter table t charset ''", errno.ErrUnknownCharacterSet)
Expand All @@ -769,9 +769,15 @@ func TestChangingTableCharset(t *testing.T) {
tk.MustGetErrCode("alter table t charset utf8 collate utf8mb4_bin;", errno.ErrCollationCharsetMismatch)
tk.MustGetErrCode("alter table t charset utf8 collate utf8_bin collate utf8mb4_bin collate utf8_bin;", errno.ErrCollationCharsetMismatch)

tk.MustGetErrCode("alter table t charset utf8", errno.ErrUnsupportedDDLOperation)
tk.MustGetErrCode("alter table t charset utf8mb4", errno.ErrUnsupportedDDLOperation)
tk.MustGetErrCode("alter table t charset utf8mb4 collate utf8mb4_bin", errno.ErrUnsupportedDDLOperation)
tk.MustExec("alter table t charset utf8")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add the check of admin check table t.
And do we need to add some records?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. I think with data is covered by collation_misc.test


tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin")
tk.MustExec("alter table t charset utf8mb4")

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(10), index i(a)) charset latin1 collate latin1_bin")
tk.MustExec("alter table t charset utf8mb4 collate utf8mb4_bin")

tk.MustGetErrCode("alter table t charset latin1 charset utf8 charset utf8mb4 collate utf8_bin;", errno.ErrConflictingDeclarations)

Expand All @@ -791,6 +797,13 @@ func TestChangingTableCharset(t *testing.T) {
}
checkCharset(charset.CharsetUTF8MB4, charset.CollationUTF8MB4)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a varchar(20), key i(a)) charset=latin1")
tk.MustGetErrCode("alter table t convert to charset utf8 collate utf8_unicode_ci", errno.ErrUnsupportedDDLOperation)
tk.MustGetErrCode("alter table t convert to charset utf8 collate utf8_general_ci", errno.ErrUnsupportedDDLOperation)
tk.MustExec("alter table t convert to charset utf8 collate utf8_bin")
tk.MustGetErrCode("alter table t convert to charset latin1", errno.ErrUnsupportedDDLOperation)

// Test when column charset can not convert to the target charset.
tk.MustExec("drop table t;")
tk.MustExec("create table t(a varchar(10) character set ascii) charset utf8mb4")
Expand Down
6 changes: 4 additions & 2 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4177,8 +4177,10 @@ func checkModifyCharsetAndCollation(toCharset, toCollate, origCharset, origColla

if (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8MB4) ||
(origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8) ||
(origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) {
// TiDB only allow utf8 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4.
(origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) ||
(origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8) ||
(origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8MB4) {
// TiDB only allow utf8/latin1 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4/latin1.
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ func TestStringBuiltin(t *testing.T) {
result = tk.MustQuery("select ord('123'), ord(123), ord(''), ord('你好'), ord(NULL), ord('👍')")
result.Check(testkit.Rows("49 49 0 14990752 <nil> 4036989325"))
result = tk.MustQuery("select ord(X''), ord(X'6161'), ord(X'e4bd'), ord(X'e4bda0'), ord(_ascii'你'), ord(_latin1'你')")
result.Check(testkit.Rows("0 97 228 228 228 228"))
result.Check(testkit.Rows("0 97 228 228 228 14990752"))

// for space
result = tk.MustQuery(`select space(0), space(2), space(-1), space(1.1), space(1.9)`)
Expand Down
28 changes: 2 additions & 26 deletions parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,18 @@
package charset

import (
"bytes"

"golang.org/x/text/encoding"
)

// EncodingLatin1Impl is the instance of encodingLatin1.
// TiDB uses utf8 implementation for latin1 charset because of the backward compatibility.
// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}}

func init() {
EncodingLatin1Impl.self = EncodingLatin1Impl
}

// encodingLatin1 compatibles with latin1 in old version TiDB.
// encodingLatin1 compatibles with latin1 in old version TiDB
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
type encodingLatin1 struct {
encodingUTF8
}
Expand All @@ -36,25 +34,3 @@ type encodingLatin1 struct {
func (e *encodingLatin1) Name() string {
return CharsetLatin1
}

// Peek implements Encoding interface.
func (e *encodingLatin1) Peek(src []byte) []byte {
if len(src) == 0 {
return src
}
return src[:1]
}

// IsValid implements Encoding interface.
func (e *encodingLatin1) IsValid(src []byte) bool {
return true
}

// Tp implements Encoding interface.
func (e *encodingLatin1) Tp() EncodingTp {
return EncodingTpLatin1
}

func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) {
return src, nil
}
5 changes: 3 additions & 2 deletions parser/mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ var CollationNames = map[string]uint8{
const (
UTF8Charset = "utf8"
UTF8MB4Charset = "utf8mb4"
Latin1Charset = "latin1"
DefaultCharset = UTF8MB4Charset
// DefaultCollationID is utf8mb4_bin(46)
DefaultCollationID = 46
Expand All @@ -592,9 +593,9 @@ const (
MaxBytesOfCharacter = 4
)

// IsUTF8Charset checks if charset is utf8 or utf8mb4
// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset
return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset
}

// RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
Expand Down
2 changes: 1 addition & 1 deletion util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func NewCollationEnabled() bool {
func CompatibleCollate(collate1, collate2 string) bool {
if (collate1 == "utf8mb4_general_ci" || collate1 == "utf8_general_ci") && (collate2 == "utf8mb4_general_ci" || collate2 == "utf8_general_ci") {
return true
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin" || collate1 == "latin1_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
return true
} else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") {
return true
Expand Down