Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert column to destination charset in DML applications #27

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go/logic/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) {
this.migrationContext.MappedSharedColumns.SetEnumToTextConversion(column.Name)
this.migrationContext.MappedSharedColumns.SetEnumValues(column.Name, column.EnumValues)
}
if column.Name == mappedColumn.Name && column.Charset != mappedColumn.Charset {
this.migrationContext.MappedSharedColumns.SetCharsetConversion(column.Name, column.Charset, mappedColumn.Charset)
}
}

for _, column := range this.migrationContext.UniqueKey.Columns.Columns() {
Expand Down
4 changes: 4 additions & 0 deletions go/sql/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ func buildColumnsPreparedValues(columns *ColumnList) []string {
token = fmt.Sprintf("ELT(?, %s)", column.EnumValues)
} else if column.Type == JSONColumnType {
token = "convert(? using utf8mb4)"
} else if column.charsetConversion != nil {
token = fmt.Sprintf("convert(convert(? using %s) using %s)", column.charsetConversion.FromCharset, column.charsetConversion.ToCharset)
} else {
token = "?"
}
Expand Down Expand Up @@ -114,6 +116,8 @@ func BuildSetPreparedClause(columns *ColumnList) (result string, err error) {
setToken = fmt.Sprintf("%s=ELT(?, %s)", EscapeName(column.Name), column.EnumValues)
} else if column.Type == JSONColumnType {
setToken = fmt.Sprintf("%s=convert(? using utf8mb4)", EscapeName(column.Name))
} else if column.charsetConversion != nil {
setToken = fmt.Sprintf("%s=convert(convert(? using %s) using %s)", EscapeName(column.Name), column.charsetConversion.FromCharset, column.charsetConversion.ToCharset)
} else {
setToken = fmt.Sprintf("%s=?", EscapeName(column.Name))
}
Expand Down
14 changes: 14 additions & 0 deletions go/sql/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ type TimezoneConversion struct {
ToTimezone string
}

type CharsetConversion struct {
ToCharset string
FromCharset string
}

type Column struct {
Name string
IsUnsigned bool
Expand All @@ -40,6 +45,7 @@ type Column struct {
EnumValues string
timezoneConversion *TimezoneConversion
enumToTextConversion bool
charsetConversion *CharsetConversion
// add Octet length for binary type, fix bytes with suffix "00" get clipped in mysql binlog.
// https://github.com/github/gh-ost/issues/909
BinaryOctetLength uint
Expand Down Expand Up @@ -211,6 +217,14 @@ func (this *ColumnList) SetEnumValues(columnName string, enumValues string) {
this.GetColumn(columnName).EnumValues = enumValues
}

func (this *ColumnList) SetCharsetConversion(columnName string, fromCharset string, toCharset string) {
this.GetColumn(columnName).charsetConversion = &CharsetConversion{FromCharset: fromCharset, ToCharset: toCharset}
}

func (this *ColumnList) IsCharsetConversion(columnName string) bool {
return this.GetColumn(columnName).charsetConversion != nil
}

func (this *ColumnList) String() string {
return strings.Join(this.Names(), ",")
}
Expand Down
2 changes: 1 addition & 1 deletion localtests/convert-utf8mb4/create.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ create table gh_ost_test (
primary key(id)
) auto_increment=1;

insert into gh_ost_test values (null, 'átesting');
insert into gh_ost_test values (null, 'átesting', '', '');


insert into gh_ost_test values (null, 'Hello world, Καλημέρα κόσμε, コンニチハ', 'átesting0', 'initial');
Expand Down
24 changes: 24 additions & 0 deletions localtests/latin1text-to-utf8mb4/create.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
drop table if exists gh_ost_test;
create table gh_ost_test (
id int auto_increment,
t text charset latin1 collate latin1_swedish_ci,
primary key(id)
) auto_increment=1 charset latin1 collate latin1_swedish_ci;

insert into gh_ost_test values (null, char(189));

drop event if exists gh_ost_test;
delimiter ;;
create event gh_ost_test
on schedule every 1 second
starts current_timestamp
ends current_timestamp + interval 60 second
on completion not preserve
enable
do
begin
insert into gh_ost_test values (null, md5(rand()));
insert into gh_ost_test values (null, char(189));
update gh_ost_test set t=char(190) order by id desc limit 1;
delete from gh_ost_test where t=char(190);
end ;;
1 change: 1 addition & 0 deletions localtests/latin1text-to-utf8mb4/extra_args
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--alter "convert to character set utf8mb4 collate utf8mb4_unicode_ci"
1 change: 1 addition & 0 deletions localtests/latin1text-to-utf8mb4/ignored_versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(5.5)