Skip to content

Commit

Permalink
parser: revert latin1 as an alias for utf8mb4 (#35025)
Browse files Browse the repository at this point in the history
ref #34008
  • Loading branch information
Defined2014 committed May 30, 2022
1 parent daa1691 commit b7eeb41
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 4 deletions.
2 changes: 1 addition & 1 deletion expression/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ func TestStringBuiltin(t *testing.T) {
result = tk.MustQuery("select ord('123'), ord(123), ord(''), ord('你好'), ord(NULL), ord('👍')")
result.Check(testkit.Rows("49 49 0 14990752 <nil> 4036989325"))
result = tk.MustQuery("select ord(X''), ord(X'6161'), ord(X'e4bd'), ord(X'e4bda0'), ord(_ascii'你'), ord(_latin1'你')")
result.Check(testkit.Rows("0 97 228 228 228 14990752"))
result.Check(testkit.Rows("0 97 228 228 228 228"))

// for space
result = tk.MustQuery(`select space(0), space(2), space(-1), space(1.1), space(1.9)`)
Expand Down
25 changes: 24 additions & 1 deletion parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
package charset

import (
"bytes"
"golang.org/x/text/encoding"
)

// EncodingLatin1Impl is the instance of encodingLatin1.
// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1.
// TiDB uses utf8 implementation for latin1 charset because of the backward compatibility.
var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}}

func init() {
Expand All @@ -34,3 +35,25 @@ type encodingLatin1 struct {
func (e *encodingLatin1) Name() string {
return CharsetLatin1
}

// Peek implements Encoding interface.
func (e *encodingLatin1) Peek(src []byte) []byte {
if len(src) == 0 {
return src
}
return src[:1]
}

// IsValid implements Encoding interface.
func (e *encodingLatin1) IsValid(src []byte) bool {
return true
}

// Tp implements Encoding interface.
func (e *encodingLatin1) Tp() EncodingTp {
return EncodingTpLatin1
}

func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) {
return src, nil
}
4 changes: 2 additions & 2 deletions parser/mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ const (
MaxBytesOfCharacter = 4
)

// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1.
// IsUTF8Charset checks if charset is utf8, utf8mb4.
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset
return charset == UTF8Charset || charset == UTF8MB4Charset
}

// RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
Expand Down

0 comments on commit b7eeb41

Please sign in to comment.