From 7eaea3059bf8b0678746f6d757dd4c9d083bf417 Mon Sep 17 00:00:00 2001 From: bb7133 Date: Sun, 5 May 2019 19:39:55 +0800 Subject: [PATCH] infoschema, domain, ddl: fix upper cased charset names (#10272) (#10354) --- ddl/db_integration_test.go | 47 +++++++++++++++++++++++++++++++++- ddl/db_test.go | 2 +- domain/domain.go | 1 + expression/integration_test.go | 9 +++---- expression/typeinfer_test.go | 8 +++--- go.mod | 2 +- go.sum | 4 +-- infoschema/builder.go | 16 ++++++++++++ 8 files changed, 74 insertions(+), 15 deletions(-) diff --git a/ddl/db_integration_test.go b/ddl/db_integration_test.go index 9ed6ca3fe300d..3a3ee36eb6481 100644 --- a/ddl/db_integration_test.go +++ b/ddl/db_integration_test.go @@ -267,7 +267,7 @@ func (s *testIntegrationSuite) TestChangingTableCharset(c *C) { if rs != nil { rs.Close() } - c.Assert(err.Error(), Equals, "Unknown charset gbk") + c.Assert(err.Error(), Equals, "[parser:1115]Unknown character set: 'gbk'") rs, err = tk.Exec("alter table t charset utf8 collate latin1_bin") if rs != nil { rs.Close() @@ -286,6 +286,51 @@ func (s *testIntegrationSuite) TestCaseInsensitiveCharsetAndCollate(c *C) { tk.MustExec("create table t2(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8 COLLATE=utf8_BIN;") tk.MustExec("create table t3(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8mb4 COLLATE=utf8MB4_BIN;") tk.MustExec("create table t4(id int) ENGINE=InnoDB DEFAULT CHARSET=Utf8mb4 COLLATE=utf8MB4_general_ci;") + + tk.MustExec("create table t5(a varchar(20)) ENGINE=InnoDB DEFAULT CHARSET=UTF8MB4 COLLATE=UTF8MB4_GENERAL_CI;") + tk.MustExec("insert into t5 values ('特克斯和凯科斯群岛')") + + db, ok := domain.GetDomain(tk.Se).InfoSchema().SchemaByName(model.NewCIStr("test_charset_collate")) + c.Assert(ok, IsTrue) + tbl := testGetTableByName(c, tk.Se, "test_charset_collate", "t5") + tblInfo := tbl.Meta().Clone() + c.Assert(tblInfo.Charset, Equals, "utf8mb4") + c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4") + + tblInfo.Version = model.TableInfoVersion2 + tblInfo.Charset = "UTF8MB4" + + updateTableInfo := func(tblInfo *model.TableInfo) { + mockCtx := mock.NewContext() + mockCtx.Store = s.store + err := mockCtx.NewTxn() + c.Assert(err, IsNil) + txn, err := mockCtx.Txn(true) + c.Assert(err, IsNil) + mt := meta.NewMeta(txn) + c.Assert(ok, IsTrue) + err = mt.UpdateTable(db.ID, tblInfo) + c.Assert(err, IsNil) + err = txn.Commit(context.Background()) + c.Assert(err, IsNil) + } + updateTableInfo(tblInfo) + tk.MustExec("alter table t5 add column b varchar(10);") // load latest schema. + + tblInfo = testGetTableByName(c, tk.Se, "test_charset_collate", "t5").Meta() + c.Assert(tblInfo.Charset, Equals, "utf8mb4") + c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4") + + // For model.TableInfoVersion3, it is believed that all charsets / collations are lower-cased, do not do case-convert + tblInfo = tblInfo.Clone() + tblInfo.Version = model.TableInfoVersion3 + tblInfo.Charset = "UTF8MB4" + updateTableInfo(tblInfo) + tk.MustExec("alter table t5 add column c varchar(10);") // load latest schema. + + tblInfo = testGetTableByName(c, tk.Se, "test_charset_collate", "t5").Meta() + c.Assert(tblInfo.Charset, Equals, "UTF8MB4") + c.Assert(tblInfo.Columns[0].Charset, Equals, "utf8mb4") } func newStoreWithBootstrap() (kv.Storage, *domain.Domain, error) { diff --git a/ddl/db_test.go b/ddl/db_test.go index 66ac1a4474c47..a8b143c569fb8 100644 --- a/ddl/db_test.go +++ b/ddl/db_test.go @@ -4475,7 +4475,7 @@ func (s *testDBSuite) TestCheckTooBigFieldLength(c *C) { s.testErrorCode(c, "alter table tr_04 add column b varchar(20000) charset utf8mb4;", tmysql.ErrTooBigFieldlength) s.testErrorCode(c, "alter table tr_04 convert to character set utf8mb4;", tmysql.ErrTooBigFieldlength) s.testErrorCode(c, "create table tr_05 (id int, name varchar(30000), purchased date ) default charset=utf8 collate=utf8_bin;", tmysql.ErrTooBigFieldlength) - s.testErrorCode(c, "create table tr_05 (id int, name varchar(20000) charset utf8mb4, purchased date ) default charset=utf8 collate=utf8;", tmysql.ErrTooBigFieldlength) + s.testErrorCode(c, "create table tr_05 (id int, name varchar(20000) charset utf8mb4, purchased date ) default charset=utf8 collate=utf8_bin;", tmysql.ErrTooBigFieldlength) s.testErrorCode(c, "create table tr_05 (id int, name varchar(65536), purchased date ) default charset=latin1;", tmysql.ErrTooBigFieldlength) s.tk.MustExec("drop table if exists tr_05;") diff --git a/domain/domain.go b/domain/domain.go index 51bf77d284a1c..a3a5b23f33cc1 100644 --- a/domain/domain.go +++ b/domain/domain.go @@ -192,6 +192,7 @@ func (do *Domain) fetchSchemasWithTables(schemas []*model.DBInfo, m *meta.Meta, // schema is not public, can't be used outside. continue } + infoschema.ConvertCharsetCollateToLowerCaseIfNeed(tbl) di.Tables = append(di.Tables, tbl) } } diff --git a/expression/integration_test.go b/expression/integration_test.go index ba19e33bdb791..8461321ccc57b 100644 --- a/expression/integration_test.go +++ b/expression/integration_test.go @@ -898,7 +898,7 @@ func (s *testIntegrationSuite) TestStringBuiltin(c *C) { result.Check(testkit.Rows("'121' '0' '中文' ")) // for convert - result = tk.MustQuery(`select convert("123" using "866"), convert("123" using "binary"), convert("中文" using "binary"), convert("中文" using "utf8"), convert("中文" using "utf8mb4"), convert(cast("中文" as binary) using "utf8");`) + result = tk.MustQuery(`select convert("123" using "binary"), convert("123" using "binary"), convert("中文" using "binary"), convert("中文" using "utf8"), convert("中文" using "utf8mb4"), convert(cast("中文" as binary) using "utf8");`) result.Check(testkit.Rows("123 123 中文 中文 中文 中文")) // for insert @@ -2286,11 +2286,8 @@ func (s *testIntegrationSuite) TestBuiltin(c *C) { result.Check(testkit.Rows("ad\x01\x00Y")) result = tk.MustQuery("select char(97, null, 100, 256, 89 using ascii)") result.Check(testkit.Rows("ad\x01\x00Y")) - charRecordSet, err := tk.Exec("select char(97, null, 100, 256, 89 using tidb)") - c.Assert(err, IsNil) - c.Assert(charRecordSet, NotNil) - _, err = session.GetRows4Test(ctx, tk.Se, charRecordSet) - c.Assert(err.Error(), Equals, "unknown encoding: tidb") + _, err = tk.Exec("select char(97, null, 100, 256, 89 using tidb)") + c.Assert(err.Error(), Equals, "[parser:1115]Unknown character set: 'tidb'") // issue 3884 tk.MustExec("drop table if exists t") diff --git a/expression/typeinfer_test.go b/expression/typeinfer_test.go index c0226639c76bc..539fa5c90641e 100644 --- a/expression/typeinfer_test.go +++ b/expression/typeinfer_test.go @@ -455,10 +455,10 @@ func (s *testInferTypeSuite) createTestCase4StrFuncs() []typeInferTestCase { {"quote(c_float_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 26, types.UnspecifiedLength}, {"quote(c_double_d )", mysql.TypeVarString, charset.CharsetUTF8MB4, 0, 46, types.UnspecifiedLength}, - {"convert(c_double_d using c_text_d)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, - {"convert(c_binary using c_text_d)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, - {"convert(c_binary using c_binary)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, - {"convert(c_text_d using c_binary)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, + {"convert(c_double_d using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, + {"convert(c_binary using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, + {"convert(c_binary using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, + {"convert(c_text_d using utf8mb4)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, {"insert(c_varchar, c_int_d, c_int_d, c_varchar)", mysql.TypeLongBlob, charset.CharsetUTF8MB4, 0, mysql.MaxBlobWidth, types.UnspecifiedLength}, {"insert(c_varchar, c_int_d, c_int_d, c_binary)", mysql.TypeLongBlob, charset.CharsetBin, mysql.BinaryFlag, mysql.MaxBlobWidth, types.UnspecifiedLength}, diff --git a/go.mod b/go.mod index 156577fa10e47..23c407f104f0a 100644 --- a/go.mod +++ b/go.mod @@ -48,7 +48,7 @@ require ( github.com/pingcap/goleveldb v0.0.0-20171020084629-8d44bfdf1030 github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 - github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5 + github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 github.com/pingcap/pd v2.1.0-rc.4+incompatible github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible github.com/pingcap/tipb v0.0.0-20180910045846-371b48b15d93 diff --git a/go.sum b/go.sum index 6c69633883e8f..592bae0edeb24 100644 --- a/go.sum +++ b/go.sum @@ -101,8 +101,8 @@ github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11 h1:e81flSfRbbMW5RU github.com/pingcap/kvproto v0.0.0-20190226063853-f6c0b7ffff11/go.mod h1:0gwbe1F2iBIjuQ9AH0DbQhL+Dpr5GofU8fgYyXk+ykk= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596 h1:t2OQTpPJnrPDGlvA+3FwJptMTt6MEPdzK1Wt99oaefQ= github.com/pingcap/log v0.0.0-20190307075452-bd41d9273596/go.mod h1:WpHUKhNZ18v116SvGrmjkA9CBhYmuUTKL+p8JC9ANEw= -github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5 h1:csU8uAxq5yIK6SMRtuYkiazo7cFP9iio9vSK1uh23nU= -github.com/pingcap/parser v0.0.0-20190421035202-497ae72425c5/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= +github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7 h1:cbTQGLE0X69qL2nrvtG9HP4u5sBdVGyoIJOhc+KtJXc= +github.com/pingcap/parser v0.0.0-20190505094039-595d728571a7/go.mod h1:1FNvfp9+J0wvc4kl8eGNh7Rqrxveg15jJoWo/a0uHwA= github.com/pingcap/pd v2.1.0-rc.4+incompatible h1:/buwGk04aHO5odk/+O8ZOXGs4qkUjYTJ2UpCJXna8NE= github.com/pingcap/pd v2.1.0-rc.4+incompatible/go.mod h1:nD3+EoYes4+aNNODO99ES59V83MZSI+dFbhyr667a0E= github.com/pingcap/tidb-tools v2.1.3-0.20190116051332-34c808eef588+incompatible h1:e9Gi/LP9181HT3gBfSOeSBA+5JfemuE4aEAhqNgoE4k= diff --git a/infoschema/builder.go b/infoschema/builder.go index 9a7abea0ec985..21d5ac881904d 100644 --- a/infoschema/builder.go +++ b/infoschema/builder.go @@ -16,6 +16,7 @@ package infoschema import ( "fmt" "sort" + "strings" "github.com/pingcap/errors" "github.com/pingcap/parser/charset" @@ -173,6 +174,7 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i fmt.Sprintf("(Table ID %d)", tableID), ) } + ConvertCharsetCollateToLowerCaseIfNeed(tblInfo) ConvertOldVersionUTF8ToUTF8MB4IfNeed(tblInfo) if alloc == nil { @@ -198,6 +200,20 @@ func (b *Builder) applyCreateTable(m *meta.Meta, dbInfo *model.DBInfo, tableID i return nil } +// ConvertCharsetCollateToLowerCaseIfNeed convert the charset / collation of table and its columns to lower case, +// if the table's version is prior to TableInfoVersion3. +func ConvertCharsetCollateToLowerCaseIfNeed(tbInfo *model.TableInfo) { + if tbInfo.Version >= model.TableInfoVersion3 { + return + } + tbInfo.Charset = strings.ToLower(tbInfo.Charset) + tbInfo.Collate = strings.ToLower(tbInfo.Collate) + for _, col := range tbInfo.Columns { + col.Charset = strings.ToLower(col.Charset) + col.Collate = strings.ToLower(col.Collate) + } +} + // ConvertOldVersionUTF8ToUTF8MB4IfNeed convert old version UTF8 to UTF8MB4 if config.TreatOldVersionUTF8AsUTF8MB4 is enable. func ConvertOldVersionUTF8ToUTF8MB4IfNeed(tbInfo *model.TableInfo) { if !config.GetGlobalConfig().TreatOldVersionUTF8AsUTF8MB4 || tbInfo.Version >= model.TableInfoVersion2 {