Skip to content

Commit

Permalink
sql/sem/tree: properly encode serialize physical values of enums
Browse files Browse the repository at this point in the history
The optimizer generates check constraints for enums. It does so by
writing these constraints as physical values. Unfortunately the way
we serialized these physical values was bogus if the enum's physical
value was more than one byte long. The reason was because of a difference
between string literal and byte string literals and their use of `\x`.

This fixes that by using hex byte string literals for these enum
values.

Fixes cockroachdb#61814

Release note (bug fix): Fixed a bug whereby enums which have large numbers of
values may cause unexpected errors when attempting to read from tables with
columns using that enum.
  • Loading branch information
ajwerner committed Mar 25, 2021
1 parent da3c3dc commit 5cf07c4
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 17 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/logictestccl/testdata/logic_test/regional_by_row
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ TABLE regional_by_row_table
├── tableoid oid [hidden] [system]
├── j_inverted_key bytes not null [virtual-inverted]
├── FAMILY fam_0_pk_pk2_a_b_j_crdb_region (pk, pk2, a, b, j, crdb_region)
├── CHECK (crdb_region IN (b'\x40':::@100054, b'\x80':::@100054, b'\xc0':::@100054))
├── CHECK (crdb_region IN (x'40':::@100054, x'80':::@100054, x'c0':::@100054))
├── PRIMARY INDEX primary
│ ├── crdb_region crdb_internal_region not null default (default_to_database_primary_region(gateway_region())::@100054) [hidden] (implicit)
│ ├── pk int not null
Expand Down
9 changes: 6 additions & 3 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package tree

import (
"bytes"
"encoding/hex"
"fmt"
"math"
"math/big"
Expand Down Expand Up @@ -1443,12 +1444,14 @@ func (d *DBytes) Format(ctx *FmtCtx) {
ctx.WriteString(`"\\x`)
writeAsHexString(ctx, d)
ctx.WriteString(`"`)
} else if f.HasFlags(fmtFormatByteLiterals) {
ctx.WriteByte('x')
ctx.WriteByte('\'')
_, _ = hex.NewEncoder(ctx).Write([]byte(*d))
ctx.WriteByte('\'')
} else {
withQuotes := !f.HasFlags(FmtFlags(lexbase.EncBareStrings))
if withQuotes {
if f.HasFlags(fmtFormatByteLiterals) {
ctx.WriteByte('b')
}
ctx.WriteByte('\'')
}
ctx.WriteString("\\x")
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/sem/tree/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ const (
fmtStaticallyFormatUserDefinedTypes

// fmtFormatByteLiterals instructs bytes to be formatted as byte literals
// rather than string literals. For example, the bytes \x40 will be formatted
// as b'\x40' rather than '\x40'.
// rather than string literals. For example, the bytes \x40ab will be formatted
// as x'40ab' rather than '\x40ab'.
fmtFormatByteLiterals
)

Expand Down
11 changes: 7 additions & 4 deletions pkg/sql/sem/tree/format_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,11 @@ func TestFormatExpr2(t *testing.T) {
// enum related code in tree expects that the length of
// PhysicalRepresentations is equal to the length of
// LogicalRepresentations.
PhysicalRepresentations: make([][]byte, len(enumMembers)),
IsMemberReadOnly: make([]bool, len(enumMembers)),
PhysicalRepresentations: [][]byte{
{0x42, 0x1},
{0x42},
},
IsMemberReadOnly: make([]bool, len(enumMembers)),
},
}
enumHi, err := tree.MakeDEnumFromLogicalRepresentation(enumType, enumMembers[0])
Expand Down Expand Up @@ -409,13 +412,13 @@ func TestFormatExpr2(t *testing.T) {
types.MakeTuple([]*types.T{enumType, enumType}),
enumHi, enumHello),
tree.FmtSerializable,
`(b'\x':::@100500, b'\x':::@100500)`,
`(x'4201':::@100500, x'42':::@100500)`,
},
{tree.NewDTuple(
types.MakeTuple([]*types.T{enumType, enumType}),
tree.DNull, enumHi),
tree.FmtSerializable,
`(NULL:::@100500, b'\x':::@100500)`,
`(NULL:::@100500, x'4201':::@100500)`,
},
}

Expand Down
14 changes: 7 additions & 7 deletions pkg/sql/table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,30 +459,30 @@ func TestSerializedUDTsInTableDescriptor(t *testing.T) {
// Test a simple UDT as the default value.
{
"x greeting DEFAULT ('hello')",
`b'\x80':::@100053`,
`x'80':::@100053`,
getDefault,
},
{
"x greeting DEFAULT ('hello':::greeting)",
`b'\x80':::@100053`,
`x'80':::@100053`,
getDefault,
},
// Test when a UDT is used in a default value, but isn't the
// final type of the column.
{
"x INT DEFAULT (CASE WHEN 'hello'::greeting = 'hello'::greeting THEN 0 ELSE 1 END)",
`CASE WHEN b'\x80':::@100053 = b'\x80':::@100053 THEN 0:::INT8 ELSE 1:::INT8 END`,
`CASE WHEN x'80':::@100053 = x'80':::@100053 THEN 0:::INT8 ELSE 1:::INT8 END`,
getDefault,
},
{
"x BOOL DEFAULT ('hello'::greeting IS OF (greeting, greeting))",
`b'\x80':::@100053 IS OF (@100053, @100053)`,
`x'80':::@100053 IS OF (@100053, @100053)`,
getDefault,
},
// Test check constraints.
{
"x greeting, CHECK (x = 'hello')",
`x = b'\x80':::@100053`,
`x = x'80':::@100053`,
getCheck,
},
{
Expand All @@ -493,12 +493,12 @@ func TestSerializedUDTsInTableDescriptor(t *testing.T) {
// Test a computed column in the same cases as above.
{
"x greeting AS ('hello') STORED",
`b'\x80':::@100053`,
`x'80':::@100053`,
getComputed,
},
{
"x INT AS (CASE WHEN 'hello'::greeting = 'hello'::greeting THEN 0 ELSE 1 END) STORED",
`CASE WHEN b'\x80':::@100053 = b'\x80':::@100053 THEN 0:::INT8 ELSE 1:::INT8 END`,
`CASE WHEN x'80':::@100053 = x'80':::@100053 THEN 0:::INT8 ELSE 1:::INT8 END`,
getComputed,
},
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/tests/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ go_test(
size = "medium",
srcs = [
"bank_test.go",
"enum_test.go",
"hash_sharded_test.go",
"impure_builtin_test.go",
"inverted_index_test.go",
Expand Down Expand Up @@ -95,6 +96,7 @@ go_test(
"@com_github_cockroachdb_cockroach_go//crdb",
"@com_github_cockroachdb_datadriven//:datadriven",
"@com_github_cockroachdb_errors//:errors",
"@com_github_google_btree//:btree",
"@com_github_kr_pretty//:pretty",
"@com_github_lib_pq//:pq",
"@com_github_stretchr_testify//assert",
Expand Down
114 changes: 114 additions & 0 deletions pkg/sql/tests/enum_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tests

import (
"context"
"fmt"
"math/rand"
"sort"
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/google/btree"
"github.com/stretchr/testify/require"
)

func TestLargeEnums(t *testing.T) {
defer leaktest.AfterTest(t)()

skip.UnderStress(t, "too slow")

ctx := context.Background()
tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{})
defer tc.Stopper().Stop(ctx)

// The idea here is that we're going to create a very large number of
// enum elements corresponding to non-negative integers and then we're
// going to add them to the enum in a random order. Then we're going to
// make sure that we can cast the integers to strings to the enums, order
// them as enums then cast them back to ints and make sure it's what we want.
// Ideally we'd make this number even bigger but it's slow enough as it is.
const N = 100

order := rand.Perm(N)
tdb := sqlutils.MakeSQLRunner(tc.ServerConn(0))

tdb.Exec(t, "CREATE TYPE e AS ENUM ()")
// Construct a single transaction to insert all the values; otherwise we'd
// have to wait for lots of versions and it would take a very long time.
var createEnumsQuery string
{
alreadyInserted := btree.New(8)
next := func(n int) (next int, ok bool) {
alreadyInserted.AscendGreaterOrEqual(intItem(n), func(i btree.Item) (wantMore bool) {
next, ok = int(i.(intItem)), true
return false
})
return next, ok
}
prev := func(n int) (prev int, ok bool) {
alreadyInserted.DescendLessOrEqual(intItem(n), func(i btree.Item) (wantMore bool) {
prev, ok = int(i.(intItem)), true
return false
})
return prev, ok
}
var buf strings.Builder
buf.WriteString("BEGIN;\n")
for i, n := range order {
fmt.Fprintf(&buf, "\tALTER TYPE e ADD VALUE '%d'", n)
if alreadyInserted.Len() == 0 {
buf.WriteString(";\n")
} else if next, ok := next(n); ok {
fmt.Fprintf(&buf, " BEFORE '%d';\n", next)
} else {
prev, ok := prev(n)
require.Truef(t, ok, "prev %v %v", n, order[:i])
fmt.Fprintf(&buf, " AFTER '%d';\n", prev)
}
alreadyInserted.ReplaceOrInsert(intItem(n))
}
buf.WriteString("COMMIT;")
createEnumsQuery = buf.String()
}
tdb.Exec(t, createEnumsQuery)

// Okay, now we have enum values for all of these numbers.
tdb.Exec(t, `CREATE TABLE t (i e PRIMARY KEY);`)
tdb.Exec(t, `
INSERT INTO t SELECT i
FROM (
SELECT i::STRING::e AS i
FROM generate_series(0, $1 - 1, 1) AS t (i)
);`, N)
rows := tdb.Query(t, "SELECT i::STRING::INT FROM t")
var read []int
for rows.Next() {
var i int
require.NoError(t, rows.Scan(&i))
read = append(read, i)
}
require.NoError(t, rows.Err())
require.Len(t, read, N)
require.Truef(t, sort.IntsAreSorted(read), "%v", read)
}

type intItem int

func (i intItem) Less(o btree.Item) bool {
return i < o.(intItem)
}

0 comments on commit 5cf07c4

Please sign in to comment.