Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: enhance column charset collation diff #595

Merged
merged 5 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions go/mysql/collations/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ type colldefaults struct {
// Environment is a collation environment for a MySQL version, which contains
// a database of collations and defaults for that specific version.
type Environment struct {
version collver
byName map[string]Collation
byID map[ID]Collation
byCharset map[string]*colldefaults
unsupported map[string]ID
version collver
byName map[string]Collation
byID map[ID]Collation
byCharset map[string]*colldefaults
byCharsetName map[ID]string
unsupported map[string]ID
}

// LookupByName returns the collation with the given name. The collation
Expand Down Expand Up @@ -158,18 +159,21 @@ func NewEnvironment(serverVersion string) *Environment {

func makeEnv(version collver) *Environment {
env := &Environment{
version: version,
byName: make(map[string]Collation),
byID: make(map[ID]Collation),
byCharset: make(map[string]*colldefaults),
unsupported: make(map[string]ID),
version: version,
byName: make(map[string]Collation),
byID: make(map[ID]Collation),
byCharset: make(map[string]*colldefaults),
unsupported: make(map[string]ID),
byCharsetName: make(map[ID]string),
}

for collid, vi := range globalVersionInfo {
var ournames []string
var ourcharsets []string
for _, alias := range vi.alias {
if alias.mask&version != 0 {
ournames = append(ournames, alias.name)
ourcharsets = append(ourcharsets, alias.charset)
}
}
if len(ournames) == 0 {
Expand Down Expand Up @@ -206,6 +210,11 @@ func makeEnv(version collver) *Environment {
}
defaults.Binary = collation
}

for i, _ := range ournames {
cs := ourcharsets[i]
env.byCharsetName[collid] = cs
}
}

for from, to := range version.charsetAliases() {
Expand Down Expand Up @@ -300,3 +309,7 @@ func (env *Environment) ParseConnectionCharset(csname string) (uint8, error) {
}
return uint8(collid), nil
}

func (env *Environment) LookupCharsetName(coll ID) string {
return env.byCharsetName[coll]
}
769 changes: 392 additions & 377 deletions go/mysql/collations/mysqlversion.go

Large diffs are not rendered by default.

109 changes: 100 additions & 9 deletions go/vt/schemadiff/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ limitations under the License.
package schemadiff

import (
"fmt"
"strings"

"vitess.io/vitess/go/vt/sqlparser"
)

Expand Down Expand Up @@ -70,26 +70,117 @@ func NewModifyColumnDiffByDefinition(definition *sqlparser.ColumnDefinition) *Mo
}

type ColumnDefinitionEntity struct {
columnDefinition *sqlparser.ColumnDefinition
ColumnDefinition *sqlparser.ColumnDefinition
tableCharsetCollate *charsetCollate
}

func NewColumnDefinitionEntity(c *sqlparser.ColumnDefinition) *ColumnDefinitionEntity {
return &ColumnDefinitionEntity{columnDefinition: c}
func NewColumnDefinitionEntity(c *sqlparser.ColumnDefinition, tableCharsetCollate *charsetCollate) *ColumnDefinitionEntity {
return &ColumnDefinitionEntity{ColumnDefinition: c, tableCharsetCollate: tableCharsetCollate}
}

// ColumnDiff compares this table statement with another table statement, and sees what it takes to
// change this table to look like the other table.
// It returns an AlterTable statement if changes are found, or nil if not.
// the other table may be of different name; its name is ignored.
func (c *ColumnDefinitionEntity) ColumnDiff(other *ColumnDefinitionEntity, _ *DiffHints) *ModifyColumnDiff {
if sqlparser.Equals.RefOfColumnDefinition(c.columnDefinition, other.columnDefinition) {
return nil
func (c *ColumnDefinitionEntity) ColumnDiff(other *ColumnDefinitionEntity, hints *DiffHints) (*ModifyColumnDiff, error) {
cClone := c // not real clone yet
otherClone := other // not real clone yet

if c.IsTextual() || other.IsTextual() {
cClone = c.Clone()
otherClone = other.Clone()
switch hints.ColumnCharsetCollateStrategy {
case ColumnCharsetCollateStrict:
if err := cClone.SetExplicitCharsetCollate(); err != nil {
return nil, err
}

if err := otherClone.SetExplicitCharsetCollate(); err != nil {
return nil, err
}

case ColumnCharsetCollateIgnoreAlways:
cClone.SetCharsetCollateEmpty()
otherClone.SetCharsetCollateEmpty()
}
}
if sqlparser.Equals.RefOfColumnDefinition(cClone.ColumnDefinition, otherClone.ColumnDefinition) {
return nil, nil
}

return NewModifyColumnDiffByDefinition(other.columnDefinition)
return NewModifyColumnDiffByDefinition(other.ColumnDefinition), nil
}

// IsTextual returns true when this column is of textual type, and is capable of having a character set property
func (c *ColumnDefinitionEntity) IsTextual() bool {
return charsetTypes[strings.ToLower(c.columnDefinition.Type.Type)]
return charsetTypes[strings.ToLower(c.ColumnDefinition.Type.Type)]
}

func (c *ColumnDefinitionEntity) Clone() *ColumnDefinitionEntity {
clone := &ColumnDefinitionEntity{
ColumnDefinition: sqlparser.Clone(c.ColumnDefinition),
tableCharsetCollate: c.tableCharsetCollate,
}
return clone
}

// SetExplicitCharsetCollate enriches this column definition with collation and charset. Those may be
// already present, or perhaps just one of them is present (in which case we use the one to populate the other),
// or both might be missing, in which case we use the table's charset/collation.
// Normally in schemadiff we work the opposite way: we strive to have the minimal equivalent representation
// of a definition. But this function can be used (often in conjunction with Clone()) to enrich a column definition
// so as to have explicit and authoritative view on any particular column.
func (c *ColumnDefinitionEntity) SetExplicitCharsetCollate() error {
if !c.IsTextual() {
return nil
}
// We will now denormalize the columns charset & collate as needed (if empty, populate from table.)
// Normalizing _this_ column definition:
if c.ColumnDefinition.Type.Charset.Name != "" && c.ColumnDefinition.Type.Options.Collate == "" {
// Charset defined without collation. Assign the default collation for that charset.
collation := defaultCharsetForCollation(c.ColumnDefinition.Type.Charset.Name)
if collation == "" {
if collation == "" {
return fmt.Errorf("unable to determine collation for column %s with charset %s", c.ColumnDefinition.Name, c.tableCharsetCollate.charset)
}
}
c.ColumnDefinition.Type.Options.Collate = collation
}
if c.ColumnDefinition.Type.Charset.Name == "" && c.ColumnDefinition.Type.Options.Collate != "" {
// Column has explicit collation but no charset. We can infer the charset from the collation.
collationID := collationEnv.LookupByName(c.ColumnDefinition.Type.Options.Collate)
if collationID == nil {
return fmt.Errorf("unable to determine charset for column %s with collation %s", c.ColumnDefinition.Name, c.ColumnDefinition.Type.Options.Collate)
}
charset := collationEnv.LookupCharsetName(collationID.ID())
if charset == "" {
return fmt.Errorf("unable to determine charset for column %s with collation %s", c.ColumnDefinition.Name, collationID.Name())
}
c.ColumnDefinition.Type.Charset.Name = charset
}
if c.ColumnDefinition.Type.Charset.Name == "" {
// Still nothing? Assign the table's charset/collation.
c.ColumnDefinition.Type.Charset.Name = c.tableCharsetCollate.charset
if c.ColumnDefinition.Type.Options.Collate == "" {
c.ColumnDefinition.Type.Options.Collate = c.tableCharsetCollate.collate
}
if c.ColumnDefinition.Type.Options.Collate = c.tableCharsetCollate.collate; c.ColumnDefinition.Type.Options.Collate == "" {

collation := defaultCharsetForCollation(c.tableCharsetCollate.charset)
if collation == "" {
return fmt.Errorf("unable to determine collation for column %s with charset %s", c.ColumnDefinition.Name, c.tableCharsetCollate.charset)
}

c.ColumnDefinition.Type.Options.Collate = collation
}
}
return nil
}

func (c *ColumnDefinitionEntity) SetCharsetCollateEmpty() {
if c.IsTextual() {
c.ColumnDefinition.Type.Charset.Name = ""
c.ColumnDefinition.Type.Charset.Binary = false
c.ColumnDefinition.Type.Options.Collate = ""
}
}
164 changes: 164 additions & 0 deletions go/vt/schemadiff/column_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package schemadiff

import (
"github.com/stretchr/testify/assert"
"testing"
"vitess.io/vitess/go/vt/sqlparser"
)

func TestColumnCharset(t *testing.T) {
testCase := []struct {
schema1 string
schema2 string
DiffHints *DiffHints
expect string
}{
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_0900_ai_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict},
"",
},
// todo enhancement?: it's a complex case, in this case, name should keep utf8mb4_0900_ai_ci. but we deal with table and column charset collate separately now,
// so it's hard to be resolved.
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_0900_ai_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict},
"ALTER TABLE `b2` COLLATE utf8mb4_general_ci",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict},
"ALTER TABLE `b2` MODIFY COLUMN `name` varchar(255) NOT NULL, COLLATE utf8mb4_general_ci",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateIgnoreAlways},
"ALTER TABLE `b2` COLLATE utf8mb4_general_ci",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_general_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_0900_ai_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateIgnoreAlways},
"",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_general_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL COLLATE utf8mb4_0900_ai_ci,
PRIMARY KEY (id)
) ENGINE=InnoDB `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict},
"ALTER TABLE `b2` MODIFY COLUMN `name` varchar(255) COLLATE utf8mb4_0900_ai_ci NOT NULL",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
age1 int NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
age2 int NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict},
"ALTER TABLE `b2` DROP COLUMN `age1`, ADD COLUMN `age2` int NOT NULL",
},
{
`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
age1 int NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB `,

`CREATE TABLE b2 (
id int NOT NULL AUTO_INCREMENT,
name varchar(255) NOT NULL,
age2 int NOT NULL,
PRIMARY KEY (id)
) ENGINE=InnoDB `,
&DiffHints{TableCharsetCollateStrategy: TableCharsetCollateStrict, ColumnCharsetCollateStrategy: ColumnCharsetCollateStrict, ColumnRenameStrategy: ColumnRenameHeuristicStatement},
"ALTER TABLE `b2` RENAME COLUMN `age1` TO `age2`",
},
}

for _, tt := range testCase {
stmt1, err := sqlparser.Parse(tt.schema1)
assert.NoError(t, err)
ctstmt1 := stmt1.(*sqlparser.CreateTable)
cte1 := &CreateTableEntity{CreateTable: ctstmt1}

stmt2, err := sqlparser.Parse(tt.schema2)
assert.NoError(t, err)
ctstmt2 := stmt2.(*sqlparser.CreateTable)
cte2 := &CreateTableEntity{CreateTable: ctstmt2}

diff, err := cte1.Diff(cte2, tt.DiffHints)
assert.NoError(t, err)
diffStr := diff.CanonicalStatementString()
if diff != nil {
print(diffStr)
}
assert.Equal(t, tt.expect, diffStr)

}
}
16 changes: 7 additions & 9 deletions go/vt/schemadiff/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1012,14 +1012,11 @@ func TestHints(t *testing.T) {
expectedDiff: false,
},
{
name: "TableCharsetCollateStrategy ignore always43",
schema1: "CREATE TABLE `t1` (\n `id` int NOT NULL AUTO_INCREMENT,\n `name` varchar(255) NOT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ",
schema2: "CREATE TABLE `t1` (\n `id` int NOT NULL AUTO_INCREMENT,\n `name` varchar(255) COLLATE utf8mb4_general_ci NOT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci",
hints: DiffHints{TableCharsetCollateStrategy: TableCharsetCollateIgnoreAlways},
// reason: table charset and collate is ignore; but column charset and collate is not equal.
// todo newborn22: now the function is not enable to set col charset and collate based on table's.
// diff:ALTER TABLE `t1` MODIFY COLUMN `name` varchar(255) COLLATE utf8mb4_general_ci NOT NULL
expectedDiff: true,
name: "TableCharsetCollateStrategy ignore always3",
schema1: "CREATE TABLE `t1` (\n `id` int NOT NULL AUTO_INCREMENT,\n `name` varchar(255) NOT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci ",
schema2: "CREATE TABLE `t1` (\n `id` int NOT NULL AUTO_INCREMENT,\n `name` varchar(255) COLLATE utf8mb4_general_ci NOT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci",
hints: DiffHints{TableCharsetCollateStrategy: TableCharsetCollateIgnoreAlways},
expectedDiff: false,
},
{
name: "TableCharsetCollateStrategy ignore always4",
Expand All @@ -1044,7 +1041,8 @@ func TestHints(t *testing.T) {
schema2: "CREATE TABLE `t1` (\n `id` int NOT NULL AUTO_INCREMENT,\n `name` varchar(255) NOT NULL,\n PRIMARY KEY (`id`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;",
hints: DiffHints{TableCharsetCollateStrategy: TableCharsetCollateIgnoreAlways},
// reason: table charset doesn't change, so column charset and collate doesn't need to change too.
expectedDiff: false,
// diff: ALTER TABLE `t1` MODIFY COLUMN `name` varchar(255) NOT NULL, because column charset will set as table's if not set explicitly.
expectedDiff: true,
},
{
name: "TableCharsetCollateStrategy ignore; column change because of table charset change",
Expand Down
1 change: 1 addition & 0 deletions go/vt/schemadiff/env.go
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package schemadiff
Loading
Loading