Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddl: check column data when execute alter table/column charset #34534

Closed
wants to merge 18 commits into from
64 changes: 64 additions & 0 deletions ddl/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1556,6 +1557,61 @@ func updateChangingObjState(changingCol *model.ColumnInfo, changingIdxs []*model
}
}

func needCheckLatin1Convert(oldCol, newCol *model.ColumnInfo) bool {
return oldCol.GetCharset() == charset.CharsetLatin1 && newCol.GetCharset() != charset.CharsetLatin1
}

func (w *worker) checkLatin1Convert(job *model.Job, schema, t model.CIStr, oldCols []*model.ColumnInfo, newCols []*model.ColumnInfo) (err error) {
sctx, err := w.sessPool.get()
if err != nil {
return errors.Trace(err)
}
sctx.GetSessionVars().StmtCtx.TruncateAsWarning = false
sctx.GetSessionVars().StmtCtx.IgnoreTruncate = false
defer w.sessPool.put(sctx)

var buf strings.Builder
buf.WriteString("select ")
for i, col := range oldCols {
if i == 0 {
buf.WriteString(col.Name.L)
} else {
buf.WriteString("," + col.Name.L)
}
}
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
buf.WriteString(" from %n.%n where ")
paramsList := make([]interface{}, 0, 2+3*len(oldCols))
paramsList = append(paramsList, schema.L, t.L)
for i, col := range oldCols {
if i == 0 {
buf.WriteString("convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
} else {
buf.WriteString(" or convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
}
}
buf.WriteString(" limit 1")
rows, _, err := sctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(w.ddlJobCtx, nil, buf.String(), paramsList...)
if err != nil {
return errors.Trace(err)
}
rowCount := len(rows)
if rowCount != 0 {
row := rows[0]
for i, col := range oldCols {
_, err = table.CastValue(sctx, row.GetDatum(i, &col.FieldType), newCols[i], false, false)
if err != nil {
if job.ReorgMeta.SQLMode.HasStrictMode() {
return errors.Trace(err)
}
job.AddWarning(err)
}
}
}
return nil
}

// doModifyColumn updates the column information and reorders all columns. It does not support modifying column data.
func (w *worker) doModifyColumn(
d *ddlCtx, t *meta.Meta, job *model.Job, dbInfo *model.DBInfo, tblInfo *model.TableInfo,
Expand Down Expand Up @@ -1588,6 +1644,14 @@ func (w *worker) doModifyColumn(
}
}

if needCheckLatin1Convert(oldCol, newCol) {
err := w.checkLatin1Convert(job, dbInfo.Name, tblInfo.Name, []*model.ColumnInfo{oldCol}, []*model.ColumnInfo{newCol})
if err != nil {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
}
}

if err := adjustTableInfoAfterModifyColumn(tblInfo, newCol, oldCol, pos); err != nil {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
Expand Down
51 changes: 51 additions & 0 deletions ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,57 @@ func TestChangingTableCharset(t *testing.T) {
}
}

func TestModifyInvalidColumnData(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)

tk.MustExec("use test")
tk.MustExec("set @@tidb_skip_utf8_check = 1")

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(20) charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(19) charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20), c varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x3F, 0x3F, 0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(20) charset latin1, b char(20), c char(20) charset latin1)")
tk.MustExec("insert into t values (0x70, 0x61, 0x90)")
tk.MustGetErrMsg("alter table t convert to charset utf8mb4", "[table:1366]Incorrect string value '\\x90' for column 'c'")

tk.MustExec("set sql_mode=''")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustExec("alter table t convert to charset utf8")
zimulala marked this conversation as resolved.
Show resolved Hide resolved
tk.MustQuery("show warnings;").Check(testkit.Rows("Warning 1366 Incorrect string value '\\x90' for column 'a'"))
tk.MustQuery("select hex(a) from t").Check(testkit.Rows("90"))
tk.MustQuery("show create table t;").Check(testkit.Rows(
"t CREATE TABLE `t` (\n `a` varchar(20) DEFAULT NULL\n) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin"))
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a char(20) charset latin1, b char(20), c char(20) charset latin1)")
tk.MustExec("insert into t values (0x70, 0x61, 0x90)")
tk.MustExec("insert into t values (0x90, 0x61, 0x70)")
tk.MustExec("alter table t convert to charset utf8mb4")
tk.MustQuery("show warnings;").Check(testkit.Rows("Warning 1366 Incorrect string value '\\x90' for column 'c'"))

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90, 0x90)")
tk.MustExec("alter table t modify column a varchar(19) charset utf8")
tk.MustExec("alter table t modify column b varchar(20) charset utf8")
// change varchar(20) to varchar(19) will do reorg which uses '?' instead of invalid characters
tk.MustQuery("select hex(a), hex(b) from t").Check(testkit.Rows("3F 90"))
}

func TestModifyColumnOption(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
Expand Down
9 changes: 8 additions & 1 deletion ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4975,14 +4975,21 @@ func (d *ddl) AlterTableCharsetAndCollate(ctx sessionctx.Context, ident ast.Iden
return nil
}

tzName, tzOffset := ddlutil.GetTimeZone(ctx)
job := &model.Job{
SchemaID: schema.ID,
TableID: tb.Meta().ID,
SchemaName: schema.Name.L,
TableName: tb.Meta().Name.L,
Type: model.ActionModifyTableCharsetAndCollate,
BinlogInfo: &model.HistoryInfo{},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
ReorgMeta: &model.DDLReorgMeta{
SQLMode: ctx.GetSessionVars().SQLMode,
Warnings: make(map[errors.ErrorID]*terror.Error),
WarningsCount: make(map[errors.ErrorID]int64),
Location: &model.TimeZoneLocation{Name: tzName, Offset: tzOffset},
},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
}
err = d.DoDDLJob(ctx, job)
err = d.callHookOnChanged(err)
Expand Down
2 changes: 1 addition & 1 deletion ddl/ddl_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ func (w *worker) runDDLJob(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64,
case model.ActionAddTablePartition:
ver, err = w.onAddTablePartition(d, t, job)
case model.ActionModifyTableCharsetAndCollate:
ver, err = onModifyTableCharsetAndCollate(d, t, job)
ver, err = w.onModifyTableCharsetAndCollate(d, t, job)
case model.ActionRecoverTable:
ver, err = w.onRecoverTable(d, t, job)
case model.ActionLockTable:
Expand Down
26 changes: 22 additions & 4 deletions ddl/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -1015,7 +1015,7 @@ func onModifyTableComment(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _
return ver, nil
}

func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
func (w *worker) onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
var toCharset, toCollate string
var needsOverwriteCols bool
if err := job.DecodeArgs(&toCharset, &toCollate, &needsOverwriteCols); err != nil {
Expand All @@ -1040,11 +1040,26 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
return ver, errors.Trace(err)
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

if needsOverwriteCols {
oldCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
newCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
// update column charset.
for _, col := range tblInfo.Columns {
newCol := col.Clone()
newCol.SetCharset(toCharset)
newCol.SetCollate(toCollate)
if field_types.HasCharset(&col.FieldType) && needCheckLatin1Convert(col, newCol) {
oldCols = append(oldCols, col)
newCols = append(newCols, newCol)
}
}
if len(oldCols) != 0 {
err := w.checkLatin1Convert(job, dbInfo.Name, tblInfo.Name, oldCols, newCols)
if err != nil {
job.State = model.JobStateCancelled
return ver, err
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}
}
for _, col := range tblInfo.Columns {
if field_types.HasCharset(&col.FieldType) {
col.SetCharset(toCharset)
Expand All @@ -1056,6 +1071,9 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
}
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, true)
if err != nil {
return ver, errors.Trace(err)
Expand Down
11 changes: 11 additions & 0 deletions parser/model/ddl.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,17 @@ func (job *Job) GetWarnings() (map[errors.ErrorID]*terror.Error, map[errors.Erro
return job.ReorgMeta.Warnings, job.ReorgMeta.WarningsCount
}

// AddWarning add a warning for ddl job
func (job *Job) AddWarning(warning error) {
warn := errors.Cause(warning).(*terror.Error)
if _, ok := job.ReorgMeta.Warnings[warn.ID()]; !ok {
job.ReorgMeta.Warnings[warn.ID()] = warn
job.ReorgMeta.WarningsCount[warn.ID()] = 1
} else {
job.ReorgMeta.WarningsCount[warn.ID()]++
}
}

// Encode encodes job with json format.
// updateRawArgs is used to determine whether to update the raw args.
func (job *Job) Encode(updateRawArgs bool) ([]byte, error) {
Expand Down