Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

workload/tpcc: improve indexing to permit better partitioning #36854

Merged
merged 6 commits into from
May 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pkg/sql/opt/bench/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ var schemas = [...]string{
s_remote_cnt integer,
s_data varchar(50),
primary key (s_w_id, s_i_id),
index (s_i_id)
index stock_item_fk_idx (s_i_id)
)
`,
`
Expand All @@ -170,8 +170,8 @@ var schemas = [...]string{
ol_amount decimal(6,2),
ol_dist_info char(24),
primary key (ol_w_id, ol_d_id, ol_o_id DESC, ol_number),
index order_line_fk (ol_supply_w_id, ol_d_id),
foreign key (ol_supply_w_id, ol_d_id) references stock (s_w_id, s_i_id)
index order_line_fk (ol_supply_w_id, ol_i_id),
foreign key (ol_supply_w_id, ol_i_id) references stock (s_w_id, s_i_id)
)
`,
`
Expand Down
374 changes: 183 additions & 191 deletions pkg/sql/opt/xform/testdata/external/tpcc

Large diffs are not rendered by default.

200 changes: 98 additions & 102 deletions pkg/sql/opt/xform/testdata/external/tpcc-no-stats

Large diffs are not rendered by default.

106 changes: 73 additions & 33 deletions pkg/workload/tpcc/ddls.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
)

const (
// WAREHOUSE table.
tpccWarehouseSchema = `(
w_id integer not null primary key,
w_name varchar(10),
Expand All @@ -34,7 +35,9 @@ const (
w_tax decimal(4,4),
w_ytd decimal(12,2)
)`
tpccDistrictSchema = `(

// DISTRICT table.
tpccDistrictSchemaBase = `(
d_id integer not null,
d_w_id integer not null,
d_name varchar(10),
Expand All @@ -48,8 +51,11 @@ const (
d_next_o_id integer,
primary key (d_w_id, d_id)
)`
tpccDistrictSchemaInterleave = ` interleave in parent warehouse (d_w_id)`
tpccCustomerSchema = `(
tpccDistrictSchemaInterleaveSuffix = `
interleave in parent warehouse (d_w_id)`

// CUSTOMER table.
tpccCustomerSchemaBase = `(
c_id integer not null,
c_d_id integer not null,
c_w_id integer not null,
Expand All @@ -74,22 +80,27 @@ const (
primary key (c_w_id, c_d_id, c_id),
index customer_idx (c_w_id, c_d_id, c_last, c_first)
)`
tpccCustomerSchemaInterleave = ` interleave in parent district (c_w_id, c_d_id)`
// No PK necessary for this table.
tpccHistorySchema = `(
rowid uuid PRIMARY KEY DEFAULT gen_random_uuid(),
h_c_id integer,
h_c_d_id integer,
h_c_w_id integer,
h_d_id integer,
h_w_id integer,
tpccCustomerSchemaInterleaveSuffix = `
interleave in parent district (c_w_id, c_d_id)`

// HISTORY table.
tpccHistorySchemaBase = `(
rowid uuid not null default gen_random_uuid(),
h_c_id integer not null,
h_c_d_id integer not null,
h_c_w_id integer not null,
h_d_id integer not null,
h_w_id integer not null,
h_date timestamp,
h_amount decimal(6,2),
h_data varchar(24),
index (h_w_id, h_d_id),
index (h_c_w_id, h_c_d_id, h_c_id)
)`
tpccOrderSchema = `(
primary key (h_w_id, rowid)`
tpccHistorySchemaFkSuffix = `
index history_customer_fk_idx (h_c_w_id, h_c_d_id, h_c_id),
index history_district_fk_idx (h_w_id, h_d_id)`

// ORDER table.
tpccOrderSchemaBase = `(
o_id integer not null,
o_d_id integer not null,
o_w_id integer not null,
Expand All @@ -98,27 +109,37 @@ const (
o_carrier_id integer,
o_ol_cnt integer,
o_all_local integer,
primary key (o_w_id, o_d_id, o_id DESC),
unique index order_idx (o_w_id, o_d_id, o_carrier_id, o_id),
index (o_w_id, o_d_id, o_c_id)
primary key (o_w_id, o_d_id, o_id DESC),
unique index order_idx (o_w_id, o_d_id, o_c_id, o_id DESC)
)`
tpccOrderSchemaInterleave = ` interleave in parent district (o_w_id, o_d_id)`
tpccNewOrderSchema = `(
tpccOrderSchemaInterleaveSuffix = `
interleave in parent district (o_w_id, o_d_id)`

// NEW-ORDER table.
tpccNewOrderSchema = `(
no_o_id integer not null,
no_d_id integer not null,
no_w_id integer not null,
primary key (no_w_id, no_d_id, no_o_id)
)`
tpccNewOrderSchemaInterleave = ` interleave in parent "order" (no_w_id, no_d_id, no_o_id)`
tpccItemSchema = `(
// This natural-seeming interleave makes performance worse, because this
// table has a ton of churn and produces a lot of MVCC tombstones, which
// then will gum up the works of scans over the parent table.
// tpccNewOrderSchemaInterleaveSuffix = `
// interleave in parent "order" (no_w_id, no_d_id, no_o_id)`

// ITEM table.
tpccItemSchema = `(
i_id integer not null,
i_im_id integer,
i_name varchar(24),
i_price decimal(5,2),
i_data varchar(50),
primary key (i_id)
)`
tpccStockSchema = `(

// STOCK table.
tpccStockSchemaBase = `(
s_i_id integer not null,
s_w_id integer not null,
s_quantity integer,
Expand All @@ -136,11 +157,14 @@ const (
s_order_cnt integer,
s_remote_cnt integer,
s_data varchar(50),
primary key (s_w_id, s_i_id),
index (s_i_id)
)`
tpccStockSchemaInterleave = ` interleave in parent warehouse (s_w_id)`
tpccOrderLineSchema = `(
primary key (s_w_id, s_i_id)`
tpccStockSchemaFkSuffix = `
index stock_item_fk_idx (s_i_id)`
tpccStockSchemaInterleaveSuffix = `
interleave in parent warehouse (s_w_id)`

// ORDER-LINE table.
tpccOrderLineSchemaBase = `(
ol_o_id integer not null,
ol_d_id integer not null,
ol_w_id integer not null,
Expand All @@ -151,12 +175,28 @@ const (
ol_quantity integer,
ol_amount decimal(6,2),
ol_dist_info char(24),
primary key (ol_w_id, ol_d_id, ol_o_id DESC, ol_number),
index order_line_fk (ol_supply_w_id, ol_i_id)
)`
tpccOrderLineSchemaInterleave = ` interleave in parent "order" (ol_w_id, ol_d_id, ol_o_id)`
primary key (ol_w_id, ol_d_id, ol_o_id DESC, ol_number)`
tpccOrderLineSchemaFkSuffix = `
index order_line_stock_fk_idx (ol_supply_w_id, ol_i_id)`
tpccOrderLineSchemaInterleaveSuffix = `
interleave in parent "order" (ol_w_id, ol_d_id, ol_o_id)`
)

func maybeAddFkSuffix(fks bool, base, suffix string) string {
const endSchema = "\n\t)"
if !fks {
return base + endSchema
}
return base + "," + suffix + endSchema
}

func maybeAddInterleaveSuffix(interleave bool, base, suffix string) string {
if !interleave {
return base
}
return base + suffix
}

func scatterRanges(db *gosql.DB) error {
tables := []string{
`customer`,
Expand Down
3 changes: 0 additions & 3 deletions pkg/workload/tpcc/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ var (
const (
numWarehousesPerRange = 10
numItemsPerRange = 100

historyRanges = 1000
numHistoryValsPerRange uint64 = math.MaxUint64 / historyRanges
)

type generateLocals struct {
Expand Down
76 changes: 28 additions & 48 deletions pkg/workload/tpcc/partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@ package tpcc
import (
"bytes"
gosql "database/sql"
"encoding/binary"
"fmt"
"math"
"strings"

"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/pkg/errors"
"golang.org/x/exp/rand"
)
Expand Down Expand Up @@ -223,6 +220,14 @@ func partitionTable(
func partitionIndex(
db *gosql.DB, p *partitioner, zones []string, table, index, col string, idx int,
) error {
if exists, err := indexExists(db, table, index); err != nil {
return err
} else if !exists {
// If the index doesn't exist then there's nothing to do. This is the
// case for a few of the indexes that are only needed for foreign keys
// when foreign keys are disabled.
return nil
}
indexStr := fmt.Sprintf("%s@%s", table, index)
return partitionObject(db, p, zones, "INDEX", indexStr, col, table, idx)
}
Expand All @@ -243,24 +248,21 @@ func partitionOrder(db *gosql.DB, wPart *partitioner, zones []string) error {
if err := partitionTable(db, wPart, zones, `"order"`, "o_w_id", 0); err != nil {
return err
}
if err := partitionIndex(db, wPart, zones, `"order"`, "order_idx", "o_w_id", 1); err != nil {
return err
}
return partitionIndex(db, wPart, zones, `"order"`, "order_o_w_id_o_d_id_o_c_id_idx", "o_w_id", 2)
return partitionIndex(db, wPart, zones, `"order"`, "order_idx", "o_w_id", 1)
}

func partitionOrderLine(db *gosql.DB, wPart *partitioner, zones []string) error {
if err := partitionTable(db, wPart, zones, "order_line", "ol_w_id", 0); err != nil {
return err
}
return partitionIndex(db, wPart, zones, "order_line", "order_line_fk", "ol_supply_w_id", 1)
return partitionIndex(db, wPart, zones, "order_line", "order_line_stock_fk_idx", "ol_supply_w_id", 1)
}

func partitionStock(db *gosql.DB, wPart, iPart *partitioner, zones []string) error {
if err := partitionTable(db, wPart, zones, "stock", "s_w_id", 0); err != nil {
return err
}
return partitionIndex(db, iPart, zones, "stock", "stock_s_i_id_idx", "s_i_id", 1)
return partitionIndex(db, iPart, zones, "stock", "stock_item_fk_idx", "s_i_id", 1)
}

func partitionCustomer(db *gosql.DB, wPart *partitioner, zones []string) error {
Expand All @@ -271,48 +273,13 @@ func partitionCustomer(db *gosql.DB, wPart *partitioner, zones []string) error {
}

func partitionHistory(db *gosql.DB, wPart *partitioner, zones []string) error {
const maxVal = math.MaxUint64
temp := make([]byte, 16)
rowids := make([]uuid.UUID, wPart.parts+1)
for i := 0; i < wPart.parts; i++ {
var err error

// We're splitting the UUID rowid column evenly into N partitions. The
// column is sorted lexicographically on the bytes of the UUID which means
// we should put the partitioning values at the front of the UUID.
binary.BigEndian.PutUint64(temp, uint64(i)*(maxVal/uint64(wPart.parts)))
rowids[i], err = uuid.FromBytes(temp)
if err != nil {
return err
}
}

rowids[wPart.parts], _ = uuid.FromString("ffffffff-ffff-ffff-ffff-ffffffffffff")

var buf bytes.Buffer
buf.WriteString("ALTER TABLE history PARTITION BY RANGE (rowid) (\n")
for i := 0; i < wPart.parts; i++ {
fmt.Fprintf(&buf, " PARTITION p0_%d VALUES FROM ('%s') to ('%s')", i, rowids[i], rowids[i+1])
if i+1 < wPart.parts {
buf.WriteString(",")
}
buf.WriteString("\n")
}
buf.WriteString(")\n")
if _, err := db.Exec(buf.String()); err != nil {
return errors.Wrapf(err, "Couldn't exec %s", buf.String())
}

for i := 0; i < wPart.parts; i++ {
if err := configureZone(db, `history`, fmt.Sprintf("p0_%d", i), i, zones); err != nil {
return err
}
if err := partitionTable(db, wPart, zones, "history", "h_w_id", 0); err != nil {
return err
}

if err := partitionIndex(db, wPart, zones, "history", "history_h_w_id_h_d_id_idx", "h_w_id", 1); err != nil {
if err := partitionIndex(db, wPart, zones, "history", "history_customer_fk_idx", "h_c_w_id", 1); err != nil {
return err
}
return partitionIndex(db, wPart, zones, "history", "history_h_c_w_id_h_c_d_id_h_c_id_idx", "h_c_w_id", 2)
return partitionIndex(db, wPart, zones, "history", "history_district_fk_idx", "h_w_id", 2)
}

func partitionItem(db *gosql.DB, iPart *partitioner, zones []string) error {
Expand Down Expand Up @@ -369,3 +336,16 @@ func partitionCount(db *gosql.DB) (int, error) {
}
return count, nil
}

func indexExists(db *gosql.DB, table, index string) (bool, error) {
var exists bool
if err := db.QueryRow(`
SELECT count(*) > 0
FROM information_schema.statistics
WHERE table_name = $1
AND index_name = $2
`, table, index).Scan(&exists); err != nil {
return false, err
}
return exists, nil
}
Loading