Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddl: check column data when execute alter table/column charset #34534

Closed
wants to merge 18 commits into from
62 changes: 62 additions & 0 deletions ddl/column.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/meta/autoid"
"github.com/pingcap/tidb/metrics"
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/mysql"
"github.com/pingcap/tidb/parser/terror"
Expand Down Expand Up @@ -1556,6 +1557,53 @@ func updateChangingObjState(changingCol *model.ColumnInfo, changingIdxs []*model
}
}

func needCheckLatin1Convert(oldCol, newCol *model.ColumnInfo) bool {
if oldCol.GetCharset() != newCol.GetCharset() {
return oldCol.GetCharset() == charset.CharsetLatin1
}
return false
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}

func (w *worker) checkLatin1Convert(schema, t model.CIStr, oldCols []*model.ColumnInfo, newCols []*model.ColumnInfo) (err error) {
sctx, err := w.sessPool.get()
if err != nil {
return errors.Trace(err)
}
sctx.GetSessionVars().StmtCtx.TruncateAsWarning = false
sctx.GetSessionVars().StmtCtx.IgnoreTruncate = false
defer w.sessPool.put(sctx)

var buf strings.Builder
buf.WriteString("select * from %n.%n where ")
tangenta marked this conversation as resolved.
Show resolved Hide resolved
paramsList := make([]interface{}, 0, 2+3*len(oldCols))
paramsList = append(paramsList, schema.L, t.L)
for i, col := range oldCols {
if i == 0 {
buf.WriteString("convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
} else {
buf.WriteString(" or convert(%n using %n) != %n")
paramsList = append(paramsList, col.Name.L, newCols[i].GetCharset(), col.Name.L)
}
}
buf.WriteString(" limit 1")
rows, _, err := sctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(w.ddlJobCtx, nil, buf.String(), paramsList...)
if err != nil {
return errors.Trace(err)
}
rowCount := len(rows)
if rowCount != 0 {
row := rows[0]
for i, col := range oldCols {
_, err = table.CastValue(sctx, row.GetDatum(col.Offset, &col.FieldType), newCols[i], false, false)
if err != nil {
return errors.Trace(err)
}
}
}
return nil
}

// doModifyColumn updates the column information and reorders all columns. It does not support modifying column data.
func (w *worker) doModifyColumn(
d *ddlCtx, t *meta.Meta, job *model.Job, dbInfo *model.DBInfo, tblInfo *model.TableInfo,
Expand Down Expand Up @@ -1588,6 +1636,20 @@ func (w *worker) doModifyColumn(
}
}

if needCheckLatin1Convert(oldCol, newCol) {
err := w.checkLatin1Convert(dbInfo.Name, tblInfo.Name, []*model.ColumnInfo{oldCol}, []*model.ColumnInfo{newCol})
if err != nil {
if job.ReorgMeta.SQLMode.HasStrictMode() {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
} else {
warn := errors.Cause(err).(*terror.Error)
job.ReorgMeta.Warnings[warn.ID()] = warn
job.ReorgMeta.WarningsCount[warn.ID()] = 1
}
}
}

if err := adjustTableInfoAfterModifyColumn(tblInfo, newCol, oldCol, pos); err != nil {
job.State = model.JobStateRollingback
return ver, errors.Trace(err)
Expand Down
40 changes: 40 additions & 0 deletions ddl/db_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,6 +909,46 @@ func TestChangingTableCharset(t *testing.T) {
}
}

func TestModifyInvalidColumnData(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
tk := testkit.NewTestKit(t, store)

tk.MustExec("use test")
tk.MustExec("set @@tidb_skip_utf8_check = 1")

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(20) charset utf8", errno.ErrTruncatedWrongValueForField)
tk.MustGetErrCode("alter table t modify column a varchar(19) charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20), c varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x3F, 0x3F, 0x90)")
tk.MustGetErrCode("alter table t convert to charset utf8", errno.ErrTruncatedWrongValueForField)

tk.MustExec("set sql_mode=''")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90)")
tk.MustExec("alter table t convert to charset utf8")
zimulala marked this conversation as resolved.
Show resolved Hide resolved
tk.MustQuery("show warnings;").Check(testkit.Rows("Warning 1366 Incorrect string value '\\x90' for column 'a'"))
tk.MustQuery("select hex(a) from t").Check(testkit.Rows("90"))
tk.MustQuery("show create table t;").Check(testkit.Rows(
"t CREATE TABLE `t` (\n `a` varchar(20) DEFAULT NULL\n) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin"))
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved

tk.MustExec("drop table if exists t")
tk.MustExec("create table t (a varchar(20), b varchar(20)) charset = latin1")
tk.MustExec("insert into t values (0x90, 0x90)")
tk.MustExec("alter table t modify column a varchar(19) charset utf8")
tk.MustExec("alter table t modify column b varchar(20) charset utf8")
// change varchar(20) to varchar(19) will do reorg which uses '?' instead of invalid characters
tk.MustQuery("select hex(a), hex(b) from t").Check(testkit.Rows("3F 90"))
tk.MustExec("set sql_mode=default")
}

func TestModifyColumnOption(t *testing.T) {
store, clean := testkit.CreateMockStore(t)
defer clean()
Expand Down
9 changes: 8 additions & 1 deletion ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4975,14 +4975,21 @@ func (d *ddl) AlterTableCharsetAndCollate(ctx sessionctx.Context, ident ast.Iden
return nil
}

tzName, tzOffset := ddlutil.GetTimeZone(ctx)
job := &model.Job{
SchemaID: schema.ID,
TableID: tb.Meta().ID,
SchemaName: schema.Name.L,
TableName: tb.Meta().Name.L,
Type: model.ActionModifyTableCharsetAndCollate,
BinlogInfo: &model.HistoryInfo{},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
ReorgMeta: &model.DDLReorgMeta{
SQLMode: ctx.GetSessionVars().SQLMode,
Warnings: make(map[errors.ErrorID]*terror.Error),
WarningsCount: make(map[errors.ErrorID]int64),
Location: &model.TimeZoneLocation{Name: tzName, Offset: tzOffset},
},
Args: []interface{}{toCharset, toCollate, needsOverwriteCols},
}
err = d.DoDDLJob(ctx, job)
err = d.callHookOnChanged(err)
Expand Down
2 changes: 1 addition & 1 deletion ddl/ddl_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ func (w *worker) runDDLJob(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64,
case model.ActionAddTablePartition:
ver, err = w.onAddTablePartition(d, t, job)
case model.ActionModifyTableCharsetAndCollate:
ver, err = onModifyTableCharsetAndCollate(d, t, job)
ver, err = w.onModifyTableCharsetAndCollate(d, t, job)
case model.ActionRecoverTable:
ver, err = w.onRecoverTable(d, t, job)
case model.ActionLockTable:
Expand Down
33 changes: 29 additions & 4 deletions ddl/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/pingcap/tidb/parser/ast"
"github.com/pingcap/tidb/parser/charset"
"github.com/pingcap/tidb/parser/model"
"github.com/pingcap/tidb/parser/terror"
field_types "github.com/pingcap/tidb/parser/types"
"github.com/pingcap/tidb/table"
"github.com/pingcap/tidb/table/tables"
Expand Down Expand Up @@ -1015,7 +1016,7 @@ func onModifyTableComment(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _
return ver, nil
}

func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
func (w *worker) onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ver int64, _ error) {
var toCharset, toCollate string
var needsOverwriteCols bool
if err := job.DecodeArgs(&toCharset, &toCollate, &needsOverwriteCols); err != nil {
Expand All @@ -1040,11 +1041,32 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
return ver, errors.Trace(err)
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

if needsOverwriteCols {
oldCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
newCols := make([]*model.ColumnInfo, 0, len(tblInfo.Columns))
// update column charset.
for _, col := range tblInfo.Columns {
newCol := col.Clone()
newCol.SetCharset(toCharset)
newCol.SetCollate(toCollate)
if field_types.HasCharset(&col.FieldType) && needCheckLatin1Convert(col, newCol) {
oldCols = append(oldCols, col)
newCols = append(newCols, newCol)
}
}
if len(oldCols) != 0 {
err := w.checkLatin1Convert(dbInfo.Name, tblInfo.Name, oldCols, newCols)
if err != nil {
if job.ReorgMeta.SQLMode.HasStrictMode() {
job.State = model.JobStateCancelled
return ver, err
} else {
warn := errors.Cause(err).(*terror.Error)
job.ReorgMeta.Warnings[warn.ID()] = warn
job.ReorgMeta.WarningsCount[warn.ID()] = 1
}
}
Defined2014 marked this conversation as resolved.
Show resolved Hide resolved
}
for _, col := range tblInfo.Columns {
if field_types.HasCharset(&col.FieldType) {
col.SetCharset(toCharset)
Expand All @@ -1056,6 +1078,9 @@ func onModifyTableCharsetAndCollate(d *ddlCtx, t *meta.Meta, job *model.Job) (ve
}
}

tblInfo.Charset = toCharset
tblInfo.Collate = toCollate

ver, err = updateVersionAndTableInfo(d, t, job, tblInfo, true)
if err != nil {
return ver, errors.Trace(err)
Expand Down