Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

collations: fix sorting in UCA900 collations #12555

Merged
merged 2 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions go/mysql/collations/internal/uca/iter_fast_900.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {

p1 := it.input
p2 := it2.input
var w1, w2 uint32
var w1, w2 uint16

for len(p1) >= 4 && len(p2) >= 4 {
dword1 := *(*uint32)(unsafe.Pointer(&p1[0]))
Expand All @@ -75,17 +75,20 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {

if nonascii == 0 {
if dword1 != dword2 {
// Use the weight string fast tables for quick weight comparisons;
// see (*FastIterator900).NextWeightBlock64 for a description of
// the table format
table := it.fastTable
if w1, w2 = table[p1[0]], table[p2[0]]; w1 != w2 {
if w1, w2 = uint16(table[p1[0]]), uint16(table[p2[0]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[1]], table[p2[1]]; w1 != w2 {
if w1, w2 = uint16(table[p1[1]]), uint16(table[p2[1]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[2]], table[p2[2]]; w1 != w2 {
if w1, w2 = uint16(table[p1[2]]), uint16(table[p2[2]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[3]], table[p2[3]]; w1 != w2 {
if w1, w2 = uint16(table[p1[3]]), uint16(table[p2[3]]); w1 != w2 {
goto mismatch
}
}
Expand Down Expand Up @@ -114,7 +117,8 @@ mismatch:
it.unicode++
return 0
}
return int(w1) - int(w2)
// The weights must be byte-swapped before comparison because they're stored in big endian
return int(bits.ReverseBytes16(w1)) - int(bits.ReverseBytes16(w2))
}

// NextWeightBlock64 takes a byte slice of 16 bytes and fills it with the next
Expand Down
39 changes: 39 additions & 0 deletions go/mysql/collations/uca_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package collations
import (
"bytes"
"fmt"
"math/rand"
"sort"
"strings"
"sync"
Expand All @@ -27,6 +28,7 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"

"vitess.io/vitess/go/mysql/collations/internal/charset"
"vitess.io/vitess/go/vt/vthash"
Expand Down Expand Up @@ -916,6 +918,43 @@ func TestEqualities(t *testing.T) {
}
}

func TestUCACollationOrder(t *testing.T) {
var sorted = []string{
"aaaa",
"bbbb",
"cccc",
"dddd",
"zzzz",
}

var collations = []string{
"utf8mb4_0900_ai_ci",
"utf8mb4_0900_as_cs",
}

for _, colname := range collations {
col := testcollation(t, colname)

for _, a := range sorted {
for _, b := range sorted {
want := strings.Compare(a, b) < 0
got := col.Collate([]byte(a), []byte(b), false) < 0
require.Equalf(t, want, got, "failed to compare %q vs %q", a, b)
}
}

ary := slices.Clone(sorted)
for i := range ary {
j := rand.Intn(i + 1)
ary[i], ary[j] = ary[j], ary[i]
}
slices.SortFunc(ary, func(a, b string) bool {
return col.Collate([]byte(a), []byte(b), false) < 0
})
require.Equal(t, sorted, ary)
}
}

func TestCaseChangeEqualities(t *testing.T) {
for _, teststr := range AllTestStrings {
str1 := []byte(teststr.Content)
Expand Down