diff --git a/cmd/explaintest/r/new_character_set_builtin.result b/cmd/explaintest/r/new_character_set_builtin.result index 75a3b89e5d192..e0c6b33e521da 100644 --- a/cmd/explaintest/r/new_character_set_builtin.result +++ b/cmd/explaintest/r/new_character_set_builtin.result @@ -226,3 +226,24 @@ select md5(b) from t where md5(b) = 'a45d4af7b243e7f393fa09bed72ac73e'; md5(b) a45d4af7b243e7f393fa09bed72ac73e set @@tidb_enable_vectorized_expression = false; +drop table if exists t; +create table t (a char(20)); +insert into t values ('65'), ('123456'), ('123456789'); +select char(a using gbk), char(a using utf8), char(a) from t; +char(a using gbk) char(a using utf8) char(a) +A A A +釦 â@ â@ +NULL [Í [Í +select char(12345678 using gbk); +char(12345678 using gbk) +ç³°N +set @@tidb_enable_vectorized_expression = true; +select char(a using gbk), char(a using utf8), char(a) from t; +char(a using gbk) char(a using utf8) char(a) +A A A +釦 â@ â@ +NULL [Í [Í +select char(12345678 using gbk); +char(12345678 using gbk) +ç³°N +set @@tidb_enable_vectorized_expression = false; diff --git a/cmd/explaintest/t/new_character_set_builtin.test b/cmd/explaintest/t/new_character_set_builtin.test index 4258c5742e308..4260119b7994e 100644 --- a/cmd/explaintest/t/new_character_set_builtin.test +++ b/cmd/explaintest/t/new_character_set_builtin.test @@ -122,3 +122,13 @@ set @@tidb_enable_vectorized_expression = true; select md5(b) from t where md5(b) = 'a45d4af7b243e7f393fa09bed72ac73e'; set @@tidb_enable_vectorized_expression = false; +-- test for builtin function char() +drop table if exists t; +create table t (a char(20)); +insert into t values ('65'), ('123456'), ('123456789'); +select char(a using gbk), char(a using utf8), char(a) from t; +select char(12345678 using gbk); +set @@tidb_enable_vectorized_expression = true; +select char(a using gbk), char(a using utf8), char(a) from t; +select char(12345678 using gbk); +set @@tidb_enable_vectorized_expression = false; diff --git a/expression/builtin_string.go b/expression/builtin_string.go index b5b495321e16c..411431c7318f5 100644 --- a/expression/builtin_string.go +++ b/expression/builtin_string.go @@ -2446,8 +2446,14 @@ func (b *builtinCharSig) evalString(row chunk.Row) (string, bool, error) { } bigints = append(bigints, val) } - result := string(b.convertToBytes(bigints)) - return result, false, nil + + dBytes := b.convertToBytes(bigints) + resultBytes, err := charset.NewEncoding(b.tp.Charset).Decode(nil, dBytes) + if err != nil { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(err) + return "", true, nil + } + return string(resultBytes), false, nil } type charLengthFunctionClass struct { diff --git a/expression/builtin_string_test.go b/expression/builtin_string_test.go index 6a98d367107e6..28d98d0215091 100644 --- a/expression/builtin_string_test.go +++ b/expression/builtin_string_test.go @@ -1430,34 +1430,37 @@ func TestChar(t *testing.T) { }() tbl := []struct { - str string - iNum int64 - fNum float64 - result string + str string + iNum int64 + fNum float64 + charset interface{} + result interface{} + warnings int }{ - {"65", 66, 67.5, "ABD"}, // float - {"65", 16740, 67.5, "AAdD"}, // large num - {"65", -1, 67.5, "A\xff\xff\xff\xffD"}, // nagtive int - {"a", -1, 67.5, "\x00\xff\xff\xff\xffD"}, // invalid 'a' + {"65", 66, 67.5, "utf8", "ABD", 0}, // float + {"65", 16740, 67.5, "utf8", "AAdD", 0}, // large num + {"65", -1, 67.5, nil, "A\xff\xff\xff\xffD", 0}, // nagtive int + {"a", -1, 67.5, nil, "\x00\xff\xff\xff\xffD", 0}, // invalid 'a' + // TODO: Uncomment it when issue #29685 be closed + // {"65", -1, 67.5, "utf8", nil, 1}, // with utf8, return nil + // {"a", -1, 67.5, "utf8", nil, 2}, // with utf8, return nil + // TODO: Uncomment it when gbk be added into charsetInfos + // {"1234567", 1234567, 1234567, "gbk", "謬謬謬", 0}, // test char for gbk + // {"123456789", 123456789, 123456789, "gbk", nil, 3}, // invalid 123456789 in gbk } for _, v := range tbl { - for _, char := range []interface{}{"utf8", nil} { - fc := funcs[ast.CharFunc] - f, err := fc.getFunction(ctx, datumsToConstants(types.MakeDatums(v.str, v.iNum, v.fNum, char))) - require.NoError(t, err) - require.NotNil(t, f) - r, err := evalBuiltinFunc(f, chunk.Row{}) - require.NoError(t, err) - trequire.DatumEqual(t, types.NewDatum(v.result), r) + fc := funcs[ast.CharFunc] + f, err := fc.getFunction(ctx, datumsToConstants(types.MakeDatums(v.str, v.iNum, v.fNum, v.charset))) + require.NoError(t, err) + require.NotNil(t, f) + r, err := evalBuiltinFunc(f, chunk.Row{}) + require.NoError(t, err) + trequire.DatumEqual(t, types.NewDatum(v.result), r) + if v.warnings != 0 { + warnings := ctx.GetSessionVars().StmtCtx.GetWarnings() + require.Equal(t, v.warnings, len(warnings)) } } - - fc := funcs[ast.CharFunc] - f, err := fc.getFunction(ctx, datumsToConstants(types.MakeDatums("65", 66, nil))) - require.NoError(t, err) - r, err := evalBuiltinFunc(f, chunk.Row{}) - require.NoError(t, err) - trequire.DatumEqual(t, types.NewDatum("AB"), r) } func TestCharLength(t *testing.T) { diff --git a/expression/builtin_string_vec.go b/expression/builtin_string_vec.go index 531d2379e1135..cf8df9c4278ed 100644 --- a/expression/builtin_string_vec.go +++ b/expression/builtin_string_vec.go @@ -2301,16 +2301,26 @@ func (b *builtinCharSig) vecEvalString(input *chunk.Chunk, result *chunk.Column) for i := 0; i < l-1; i++ { bufint[i] = buf[i].Int64s() } + var resultBytes []byte + enc := charset.NewEncoding(b.tp.Charset) for i := 0; i < n; i++ { bigints = bigints[0:0] for j := 0; j < l-1; j++ { if buf[j].IsNull(i) { + result.AppendNull() continue } bigints = append(bigints, bufint[j][i]) } - tempString := string(b.convertToBytes(bigints)) - result.AppendString(tempString) + dBytes := b.convertToBytes(bigints) + + resultBytes, err := enc.Decode(resultBytes, dBytes) + if err != nil { + b.ctx.GetSessionVars().StmtCtx.AppendWarning(err) + result.AppendNull() + continue + } + result.AppendString(string(resultBytes)) } return nil }