Skip to content

Commit

Permalink
*: set latin1 to be an alias for utf8mb4 in TiDB
Browse files Browse the repository at this point in the history
  • Loading branch information
Defined2014 committed May 5, 2022
1 parent 4e21680 commit 55c4527
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 31 deletions.
6 changes: 4 additions & 2 deletions ddl/ddl_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -4177,8 +4177,10 @@ func checkModifyCharsetAndCollation(toCharset, toCollate, origCharset, origColla

if (origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8MB4) ||
(origCharset == charset.CharsetUTF8 && toCharset == charset.CharsetUTF8) ||
(origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) {
// TiDB only allow utf8 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4.
(origCharset == charset.CharsetUTF8MB4 && toCharset == charset.CharsetUTF8MB4) ||
(origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8) ||
(origCharset == charset.CharsetLatin1 && toCharset == charset.CharsetUTF8MB4) {
// TiDB only allow utf8/latin1 to be changed to utf8mb4, or changing the collation when the charset is utf8/utf8mb4/latin1.
return nil
}

Expand Down
28 changes: 2 additions & 26 deletions parser/charset/encoding_latin1.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,18 @@
package charset

import (
"bytes"

"golang.org/x/text/encoding"
)

// EncodingLatin1Impl is the instance of encodingLatin1.
// TiDB uses utf8 implementation for latin1 charset because of the backward compatibility.
// In TiDB, latin1 is an alias for utf8, so uses utf8 implementation for latin1
var EncodingLatin1Impl = &encodingLatin1{encodingUTF8{encodingBase{enc: encoding.Nop}}}

func init() {
EncodingLatin1Impl.self = EncodingLatin1Impl
}

// encodingLatin1 compatibles with latin1 in old version TiDB.
// encodingLatin1 compatibles with latin1 in old version TiDB
type encodingLatin1 struct {
encodingUTF8
}
Expand All @@ -36,25 +34,3 @@ type encodingLatin1 struct {
func (e *encodingLatin1) Name() string {
return CharsetLatin1
}

// Peek implements Encoding interface.
func (e *encodingLatin1) Peek(src []byte) []byte {
if len(src) == 0 {
return src
}
return src[:1]
}

// IsValid implements Encoding interface.
func (e *encodingLatin1) IsValid(src []byte) bool {
return true
}

// Tp implements Encoding interface.
func (e *encodingLatin1) Tp() EncodingTp {
return EncodingTpLatin1
}

func (e *encodingLatin1) Transform(dest *bytes.Buffer, src []byte, op Op) ([]byte, error) {
return src, nil
}
5 changes: 3 additions & 2 deletions parser/mysql/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ var CollationNames = map[string]uint8{
const (
UTF8Charset = "utf8"
UTF8MB4Charset = "utf8mb4"
Latin1Charset = "latin1"
DefaultCharset = UTF8MB4Charset
// DefaultCollationID is utf8mb4_bin(46)
DefaultCollationID = 46
Expand All @@ -592,9 +593,9 @@ const (
MaxBytesOfCharacter = 4
)

// IsUTF8Charset checks if charset is utf8 or utf8mb4
// IsUTF8Charset checks if charset is utf8, utf8mb4 or latin1
func IsUTF8Charset(charset string) bool {
return charset == UTF8Charset || charset == UTF8MB4Charset
return charset == UTF8Charset || charset == UTF8MB4Charset || charset == Latin1Charset
}

// RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
Expand Down
2 changes: 1 addition & 1 deletion util/collate/collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func NewCollationEnabled() bool {
func CompatibleCollate(collate1, collate2 string) bool {
if (collate1 == "utf8mb4_general_ci" || collate1 == "utf8_general_ci") && (collate2 == "utf8mb4_general_ci" || collate2 == "utf8_general_ci") {
return true
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
} else if (collate1 == "utf8mb4_bin" || collate1 == "utf8_bin" || collate1 == "latin1_bin") && (collate2 == "utf8mb4_bin" || collate2 == "utf8_bin") {
return true
} else if (collate1 == "utf8mb4_unicode_ci" || collate1 == "utf8_unicode_ci") && (collate2 == "utf8mb4_unicode_ci" || collate2 == "utf8_unicode_ci") {
return true
Expand Down

0 comments on commit 55c4527

Please sign in to comment.