Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: revert latin1 as an alias for utf8mb4 #35025

Merged
merged 9 commits into from
May 30, 2022
2 changes: 1 addition & 1 deletion expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ func TestStringBuiltin(t *testing.T) {
result = tk.MustQuery("select ord('123'), ord(123), ord(''), ord('你好'), ord(NULL), ord('👍')")
result.Check(testkit.Rows("49 49 0 14990752 <nil> 4036989325"))
result = tk.MustQuery("select ord(X''), ord(X'6161'), ord(X'e4bd'), ord(X'e4bda0'), ord(_ascii'你'), ord(_latin1'你')")
result.Check(testkit.Rows("0 97 228 228 228 14990752"))
result.Check(testkit.Rows("0 97 228 228 228 228"))

// for space
result = tk.MustQuery(`select space(0), space(2), space(-1), space(1.1), space(1.9)`)
Expand Down
25 changes: 24 additions & 1 deletion parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
package charset

import (
"bytes"
"golang.org/x/text/encoding"
)

// EncodingLatin1Impl is the instance of encodingLatin1.
// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1.
// TiDB uses utf8 implementation for latin1 charset because of the backward compatibility.
var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}}

func init() {
Expand All @@ -34,3 +35,25 @@ type encodingLatin1 struct {
func (e *encodingLatin1) Name() string {
return CharsetLatin1
}

// Peek implements Encoding interface.
func (e *encodingLatin1) Peek(src []byte) []byte {
if len(src) == 0 {
return src
}
return src[:1]
}

// IsValid implements Encoding interface.
func (e *encodingLatin1) IsValid(src []byte) bool {
return true
}

// Tp implements Encoding interface.
func (e *encodingLatin1) Tp() EncodingTp {
return EncodingTpLatin1
}

func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) {
return src, nil
}
4 changes: 2 additions & 2 deletions parser/mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ const (
MaxBytesOfCharacter = 4
)

// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1.
// IsUTF8Charset checks if charset is utf8, utf8mb4.
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset
return charset == UTF8Charset || charset == UTF8MB4Charset
}

// RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
Expand Down