Skip to content

Commit

Permalink
collations: fix sorting in UCA900 collations (#12555)
Browse files Browse the repository at this point in the history
* collations: fix sorting in UCA900 collations

When using the fast iterator to _compare_ two strings with an UCA
collation, we need to keep in mind that the weights in the collation are
in BIG ENDIAN (this is the output format for the weight strings, so we
store the weights this way), so comparing them directly will not result
in the proper collation order. They need to be byte-swapped before they
can be compared with an arithmetic operation!

Signed-off-by: Vicent Marti <vmg@strn.cat>

* collations: comment

Signed-off-by: Vicent Marti <vmg@strn.cat>

---------

Signed-off-by: Vicent Marti <vmg@strn.cat>
  • Loading branch information
vmg authored Mar 6, 2023
1 parent 9b60844 commit 041b1d7
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 6 deletions.
16 changes: 10 additions & 6 deletions go/mysql/collations/internal/uca/iter_fast_900.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {

p1 := it.input
p2 := it2.input
var w1, w2 uint32
var w1, w2 uint16

for len(p1) >= 4 && len(p2) >= 4 {
dword1 := *(*uint32)(unsafe.Pointer(&p1[0]))
Expand All @@ -75,17 +75,20 @@ func (it *FastIterator900) FastForward32(it2 *FastIterator900) int {

if nonascii == 0 {
if dword1 != dword2 {
// Use the weight string fast tables for quick weight comparisons;
// see (*FastIterator900).NextWeightBlock64 for a description of
// the table format
table := it.fastTable
if w1, w2 = table[p1[0]], table[p2[0]]; w1 != w2 {
if w1, w2 = uint16(table[p1[0]]), uint16(table[p2[0]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[1]], table[p2[1]]; w1 != w2 {
if w1, w2 = uint16(table[p1[1]]), uint16(table[p2[1]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[2]], table[p2[2]]; w1 != w2 {
if w1, w2 = uint16(table[p1[2]]), uint16(table[p2[2]]); w1 != w2 {
goto mismatch
}
if w1, w2 = table[p1[3]], table[p2[3]]; w1 != w2 {
if w1, w2 = uint16(table[p1[3]]), uint16(table[p2[3]]); w1 != w2 {
goto mismatch
}
}
Expand Down Expand Up @@ -114,7 +117,8 @@ mismatch:
it.unicode++
return 0
}
return int(w1) - int(w2)
// The weights must be byte-swapped before comparison because they're stored in big endian
return int(bits.ReverseBytes16(w1)) - int(bits.ReverseBytes16(w2))
}

// NextWeightBlock64 takes a byte slice of 16 bytes and fills it with the next
Expand Down
39 changes: 39 additions & 0 deletions go/mysql/collations/uca_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package collations
import (
"bytes"
"fmt"
"math/rand"
"sort"
"strings"
"sync"
Expand All @@ -27,6 +28,7 @@ import (

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"

"vitess.io/vitess/go/mysql/collations/charset"
"vitess.io/vitess/go/vt/vthash"
Expand Down Expand Up @@ -916,6 +918,43 @@ func TestEqualities(t *testing.T) {
}
}

func TestUCACollationOrder(t *testing.T) {
var sorted = []string{
"aaaa",
"bbbb",
"cccc",
"dddd",
"zzzz",
}

var collations = []string{
"utf8mb4_0900_ai_ci",
"utf8mb4_0900_as_cs",
}

for _, colname := range collations {
col := testcollation(t, colname)

for _, a := range sorted {
for _, b := range sorted {
want := strings.Compare(a, b) < 0
got := col.Collate([]byte(a), []byte(b), false) < 0
require.Equalf(t, want, got, "failed to compare %q vs %q", a, b)
}
}

ary := slices.Clone(sorted)
for i := range ary {
j := rand.Intn(i + 1)
ary[i], ary[j] = ary[j], ary[i]
}
slices.SortFunc(ary, func(a, b string) bool {
return col.Collate([]byte(a), []byte(b), false) < 0
})
require.Equal(t, sorted, ary)
}
}

func TestCaseChangeEqualities(t *testing.T) {
for _, teststr := range AllTestStrings {
str1 := []byte(teststr.Content)
Expand Down

0 comments on commit 041b1d7

Please sign in to comment.