Skip to content

Commit

Permalink
let latin1 use UTF8MB3 instead of UTF8MB4
Browse files Browse the repository at this point in the history
  • Loading branch information
Defined2014 committed May 17, 2022
1 parent e255739 commit f7ae84b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
12 changes: 9 additions & 3 deletions parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,22 @@ import (
)

// EncodingLatin1Impl is the instance of encodingLatin1.
// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1.
var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}}
// In TiDB, latin1 is an alias for utf8, so uses utf8mb3 implementation for latin1.
var EncodingLatin1Impl = &encodingUTF8MB3Strict{
encodingUTF8{
encodingBase{
enc: encoding.Nop,
},
},
}

func init() {
EncodingLatin1Impl.self = EncodingLatin1Impl
}

// encodingLatin1 compatibles with latin1 in old version TiDB.
type encodingLatin1 struct {
encodingUTF8
encodingUTF8MB3Strict
}

// Name implements Encoding interface.
Expand Down
9 changes: 9 additions & 0 deletions parser/charset/encoding_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,15 @@ func TestEncodingValidate(t *testing.T) {
{charset.CharsetUTF8, oxfffefd, "???", 0, false},
{charset.CharsetUTF8, "中文" + oxfffefd, "中文???", 6, false},
{charset.CharsetUTF8, string(utf8.RuneError), "�", 3, true},
{charset.CharsetLatin1, "", "", 0, true},
{charset.CharsetLatin1, "qwerty", "qwerty", 6, true},
{charset.CharsetLatin1, "qwÊrty", "qwÊrty", 7, true},
{charset.CharsetLatin1, "qwÊ合法字符串", "qwÊ合法字符串", 19, true},
{charset.CharsetLatin1, "😂", "?", 0, false},
{charset.CharsetLatin1, "valid_str😂", "valid_str?", 9, false},
{charset.CharsetLatin1, oxfffefd, "???", 0, false},
{charset.CharsetLatin1, "中文" + oxfffefd, "中文???", 6, false},
{charset.CharsetLatin1, string(utf8.RuneError), "�", 3, true},
{charset.CharsetGBK, "", "", 0, true},
{charset.CharsetGBK, "asdf", "asdf", 4, true},
{charset.CharsetGBK, "中文", "中文", 6, true},
Expand Down

0 comments on commit f7ae84b

Please sign in to comment.