From 39c2ec8378397c7ce6687da1a9a164bd539f4f2a Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Wed, 10 Jul 2019 15:28:31 -0400 Subject: [PATCH] opt: add Index.PartitionByListPrefixes to catalog Add a catalog function that returns index regions of interest based on PARTITION BY LIST values. This information will be used for an "index skip scan". Informs #38031. Release note: None --- .../testdata/logic_test/partitioning | 169 +++++++++++++++++- pkg/sql/opt/cat/index.go | 34 ++++ pkg/sql/opt/cat/utils.go | 11 ++ pkg/sql/opt/testutils/testcat/create_table.go | 17 +- pkg/sql/opt/testutils/testcat/test_catalog.go | 64 +++++++ pkg/sql/opt/testutils/testcat/testdata/table | 76 ++++++++ pkg/sql/opt_catalog.go | 29 +++ pkg/sql/sem/tree/create.go | 18 +- 8 files changed, 402 insertions(+), 16 deletions(-) diff --git a/pkg/ccl/logictestccl/testdata/logic_test/partitioning b/pkg/ccl/logictestccl/testdata/logic_test/partitioning index 1ca7eaba4573..cf9dbf7e9329 100644 --- a/pkg/ccl/logictestccl/testdata/logic_test/partitioning +++ b/pkg/ccl/logictestccl/testdata/logic_test/partitioning @@ -1,4 +1,4 @@ -# LogicTest: local +# LogicTest: local-opt statement error syntax CREATE TABLE t (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST () @@ -399,6 +399,21 @@ ok1 CREATE TABLE ok1 ( PARTITION p2 VALUES IN ((2)) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok1 +---- +TABLE ok1 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + ├── (1) + └── (2) +scan ok1 + statement ok CREATE TABLE ok2 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a) ( PARTITION p1 VALUES IN ((1)), @@ -419,6 +434,21 @@ ok2 CREATE TABLE ok2 ( PARTITION p2 VALUES IN ((2)) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok2 +---- +TABLE ok2 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + ├── (1) + └── (2) +scan ok2 + statement ok CREATE TABLE ok3 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a) ( PARTITION p1 VALUES IN (1), @@ -439,6 +469,20 @@ ok3 CREATE TABLE ok3 ( PARTITION p2 VALUES IN ((DEFAULT)) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok3 +---- +TABLE ok3 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + └── (1) +scan ok3 + statement ok CREATE TABLE ok4 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a, b) ( PARTITION p1 VALUES IN ((1, 1)), @@ -463,6 +507,22 @@ ok4 CREATE TABLE ok4 ( PARTITION p4 VALUES IN ((DEFAULT, DEFAULT)) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok4 +---- +TABLE ok4 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + ├── (1, 1) + ├── (1) + └── (2, 3) +scan ok4 + statement ok CREATE TABLE ok5 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a) ( PARTITION p1 VALUES IN (1) PARTITION BY LIST (b) ( @@ -475,6 +535,21 @@ CREATE TABLE ok5 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a) PARTITION p3 VALUES IN (DEFAULT) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok5 +---- +TABLE ok5 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + ├── (1) + └── (2) +scan ok5 + query TT SHOW CREATE TABLE ok5 ---- @@ -515,6 +590,18 @@ ok6 CREATE TABLE ok6 ( PARTITION p2 VALUES FROM (1) TO (2) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok6 +---- +TABLE ok6 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + └── b int not null +scan ok6 + statement ok CREATE TABLE ok7 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY RANGE (a) ( PARTITION p1 VALUES FROM ((0)) TO (((1))) @@ -533,6 +620,18 @@ ok7 CREATE TABLE ok7 ( PARTITION p1 VALUES FROM (0) TO (1) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok7 +---- +TABLE ok7 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + └── b int not null +scan ok7 + statement ok CREATE TABLE ok8 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY RANGE (a) ( PARTITION p1 VALUES FROM (MINVALUE) TO (1), @@ -555,6 +654,18 @@ ok8 CREATE TABLE ok8 ( PARTITION p3 VALUES FROM (2) TO (MAXVALUE) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok8 +---- +TABLE ok8 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + └── b int not null +scan ok8 + statement ok CREATE TABLE ok9 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b) ( PARTITION p1 VALUES FROM (MINVALUE, MINVALUE) TO (1, MAXVALUE), @@ -579,6 +690,18 @@ ok9 CREATE TABLE ok9 ( PARTITION p4 VALUES FROM (3, MAXVALUE) TO (MAXVALUE, MAXVALUE) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok9 +---- +TABLE ok9 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + └── b int not null +scan ok9 + statement ok CREATE TABLE ok10 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b) ( PARTITION p1 VALUES FROM (MINVALUE, MINVALUE) TO (1, 1), @@ -605,6 +728,18 @@ ok10 CREATE TABLE ok10 ( PARTITION p5 VALUES FROM (3, 4) TO (MAXVALUE, MAXVALUE) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok10 +---- +TABLE ok10 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + └── b int not null +scan ok10 + statement ok CREATE TABLE ok11 (a INT, b INT, c INT, PRIMARY KEY (a, b, c)) PARTITION BY LIST (a) ( PARTITION p1 VALUES IN (1) PARTITION BY LIST (b) ( @@ -639,6 +774,22 @@ ok11 CREATE TABLE ok11 ( ) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok11 +---- +TABLE ok11 + ├── a int not null + ├── b int not null + ├── c int not null + └── INDEX primary + ├── a int not null + ├── b int not null + ├── c int not null + └── partition by list prefixes + ├── (1) + └── (6) +scan ok11 + statement ok CREATE TABLE IF NOT EXISTS ok12 (a INT, b INT, c INT, PRIMARY KEY (a, b)) PARTITION BY LIST (a) ( PARTITION pu VALUES IN (NULL), @@ -662,6 +813,22 @@ ok12 CREATE TABLE ok12 ( PARTITION p2 VALUES IN ((2)) ) +query T +EXPLAIN (OPT, CATALOG) SELECT * from ok12 +---- +TABLE ok12 + ├── a int not null + ├── b int not null + ├── c int + └── INDEX primary + ├── a int not null + ├── b int not null + └── partition by list prefixes + ├── (NULL) + ├── (1) + └── (2) +scan ok12 + # Verify that creating a partition that includes NULL does not change the # implicit NOT NULL contrainst of a primary key. statement error null value in column "a" violates not-null constraint diff --git a/pkg/sql/opt/cat/index.go b/pkg/sql/opt/cat/index.go index 790fc24902d0..9509bf106a27 100644 --- a/pkg/sql/opt/cat/index.go +++ b/pkg/sql/opt/cat/index.go @@ -128,6 +128,40 @@ type Index interface { // Span returns the KV span associated with the index. Span() roachpb.Span + + // PartitionByListPrefixes returns values that correspond to PARTITION BY LIST + // values. Specifically, it returns a list of tuples where each tuple contains + // values for a prefix of index columns (indicating a region of the index). + // Each tuple corresponds to a configured partition or subpartition. + // + // Note: this function decodes and allocates datums; use sparingly. + // + // Example: + // + // CREATE INDEX idx ON t(region,subregion,val) PARTITION BY LIST (region,subregion) ( + // PARTITION westcoast VALUES IN (('us', 'seattle'), ('us', 'cali')), + // PARTITION us VALUES IN (('us', DEFAULT)), + // PARTITION eu VALUES IN (('eu', DEFAULT)), + // PARTITION default VALUES IN (DEFAULT) + // ); + // + // PartitionByListPrefixes() returns + // ('us', 'seattle'), + // ('us', 'cali'), + // ('us'), + // ('eu'). + // + // The intended use of this function is for index skip scans. Each tuple + // corresponds to a region of the index that we can constrain further. In the + // example above: if we have a val=1 filter, instead of a full index scan we + // can skip most of the data under /us/cali and /us/seattle by scanning spans: + // [ - /us/cali ) + // [ /us/cali/1 - /us/cali/1 ] + // [ /us/cali\x00 - /us/seattle ) + // [ /us/seattle/1 - /us/seattle/1 ] + // [ /us/seattle\x00 - ] + // + PartitionByListPrefixes() []tree.Datums } // IndexColumn describes a single column that is part of an index definition. diff --git a/pkg/sql/opt/cat/utils.go b/pkg/sql/opt/cat/utils.go index 6ea38dffefb7..71dbba72c984 100644 --- a/pkg/sql/opt/cat/utils.go +++ b/pkg/sql/opt/cat/utils.go @@ -133,6 +133,9 @@ func FindTableColumnByName(tab Table, name tree.Name) int { // and testing. func FormatTable(cat Catalog, tab Table, tp treeprinter.Node) { child := tp.Childf("TABLE %s", tab.Name().TableName) + if tab.IsVirtualTable() { + child.Child("virtual table") + } var buf bytes.Buffer for i := 0; i < tab.DeletableColumnCount(); i++ { @@ -207,6 +210,14 @@ func formatCatalogIndex(tab Table, ord int, tp treeprinter.Node) { } FormatZone(idx.Zone(), child) + + partPrefixes := idx.PartitionByListPrefixes() + if len(partPrefixes) != 0 { + c := child.Child("partition by list prefixes") + for i := range partPrefixes { + c.Child(partPrefixes[i].String()) + } + } } // formatColPrefix returns a string representation of a list of columns. The diff --git a/pkg/sql/opt/testutils/testcat/create_table.go b/pkg/sql/opt/testutils/testcat/create_table.go index 20cb0a1c0167..66fcd79048b5 100644 --- a/pkg/sql/opt/testutils/testcat/create_table.go +++ b/pkg/sql/opt/testutils/testcat/create_table.go @@ -133,6 +133,12 @@ func (tc *Catalog) CreateTable(stmt *tree.CreateTable) *Table { } else if !tab.IsVirtual { tab.addPrimaryColumnIndex("rowid") } + if stmt.PartitionBy != nil { + if len(tab.Indexes) == 0 { + panic("cannot partition virtual table") + } + tab.Indexes[0].partitionBy = stmt.PartitionBy + } // Add check constraints. for _, def := range stmt.Defs { @@ -368,11 +374,12 @@ func (tt *Table) addColumn(def *tree.ColumnTableDef) { func (tt *Table) addIndex(def *tree.IndexTableDef, typ indexType) *Index { idx := &Index{ - IdxName: tt.makeIndexName(def.Name, typ), - Unique: typ != nonUniqueIndex, - Inverted: def.Inverted, - IdxZone: &config.ZoneConfig{}, - table: tt, + IdxName: tt.makeIndexName(def.Name, typ), + Unique: typ != nonUniqueIndex, + Inverted: def.Inverted, + IdxZone: &config.ZoneConfig{}, + table: tt, + partitionBy: def.PartitionBy, } // Look for name suffixes indicating this is a mutation index. diff --git a/pkg/sql/opt/testutils/testcat/test_catalog.go b/pkg/sql/opt/testutils/testcat/test_catalog.go index 0061597cb178..7cbaa257cfc5 100644 --- a/pkg/sql/opt/testutils/testcat/test_catalog.go +++ b/pkg/sql/opt/testutils/testcat/test_catalog.go @@ -690,6 +690,10 @@ type Index struct { // table is a back reference to the table this index is on. table *Table + + // partitionBy is the partitioning clause that corresponds to this index. Used + // to implement PartitionByListPrefixes. + partitionBy *tree.PartitionBy } // ID is part of the cat.Index interface. @@ -752,6 +756,66 @@ func (ti *Index) Span() roachpb.Span { panic("not implemented") } +// PartitionByListPrefixes is part of the cat.Index interface. +func (ti *Index) PartitionByListPrefixes() []tree.Datums { + p := ti.partitionBy + if p == nil { + return nil + } + if len(p.List) == 0 { + return nil + } + var res []tree.Datums + semaCtx := tree.MakeSemaContext() + evalCtx := tree.MakeTestingEvalContext(cluster.MakeTestingClusterSettings()) + for i := range p.Fields { + if i >= len(ti.Columns) || p.Fields[i] != ti.Columns[i].ColName() { + panic("partition by columns must be a prefix of the index columns") + } + } + for i := range p.List { + // Exprs contains a list of values. + for _, e := range p.List[i].Exprs { + var vals []tree.Expr + switch t := e.(type) { + case *tree.Tuple: + vals = t.Exprs + default: + vals = []tree.Expr{e} + } + + // Cut off at DEFAULT, if present. + for i := range vals { + if _, ok := vals[i].(tree.DefaultVal); ok { + vals = vals[:i] + } + } + if len(vals) == 0 { + continue + } + d := make(tree.Datums, len(vals)) + for i := range vals { + c := tree.CastExpr{Expr: vals[i], Type: ti.Columns[i].DatumType()} + cTyped, err := c.TypeCheck(&semaCtx, nil) + if err != nil { + panic(err) + } + d[i], err = cTyped.Eval(&evalCtx) + if err != nil { + panic(err) + } + } + + // TODO(radu): split into multiple prefixes if Subpartition is also by list. + // Note that this functionality should be kept in sync with the real catalog + // implementation (opt_catalog.go). + + res = append(res, d) + } + } + return res +} + // Column implements the cat.Column interface for testing purposes. type Column struct { Ordinal int diff --git a/pkg/sql/opt/testutils/testcat/testdata/table b/pkg/sql/opt/testutils/testcat/testdata/table index 8f0b6f1897b1..8f0be78a2974 100644 --- a/pkg/sql/opt/testutils/testcat/testdata/table +++ b/pkg/sql/opt/testutils/testcat/testdata/table @@ -86,3 +86,79 @@ TABLE a └── INDEX a_a_key ├── a int └── rowid int not null default (unique_rowid()) [hidden] (storing) + +exec-ddl +CREATE TABLE system.vtable (a INT, b INT) +---- + +exec-ddl +SHOW CREATE system.vtable +---- +TABLE vtable + ├── virtual table + ├── a int + └── b int + +exec-ddl +CREATE TABLE part1 (a INT PRIMARY KEY, b INT) PARTITION BY LIST (a) ( + PARTITION p1 VALUES IN (1), + PARTITION p2 VALUES IN (3, 4, 5), + PARTITION p3 VALUES IN (DEFAULT) +) +---- + +exec-ddl +SHOW CREATE part1 +---- +TABLE part1 + ├── a int not null + ├── b int + └── INDEX primary + ├── a int not null + └── partition by list prefixes + ├── (1) + ├── (3) + ├── (4) + └── (5) + +exec-ddl +CREATE TABLE part2 ( + a STRING, + b STRING, + c INT, + PRIMARY KEY(a,b,c), + INDEX (c) PARTITION BY LIST (c) ( + PARTITION pi1 VALUES IN (1), + PARTITION pi2 VALUES IN (3, 4) + ) +) PARTITION BY LIST (a, b) ( + PARTITION p1 VALUES IN (('foo', 'bar'), ('foo', 'baz'), ('qux', 'qux')), + PARTITION p2 VALUES IN (('waldo', DEFAULT)), + PARTITION p3 VALUES IN (DEFAULT) +) +---- + +exec-ddl +SHOW CREATE part2 +---- +TABLE part2 + ├── a string not null + ├── b string not null + ├── c int not null + ├── INDEX primary + │ ├── a string not null + │ ├── b string not null + │ ├── c int not null + │ └── partition by list prefixes + │ ├── ('foo', 'bar') + │ ├── ('foo', 'baz') + │ ├── ('qux', 'qux') + │ └── ('waldo') + └── INDEX secondary + ├── c int not null + ├── a string not null + ├── b string not null + └── partition by list prefixes + ├── (1) + ├── (3) + └── (4) diff --git a/pkg/sql/opt_catalog.go b/pkg/sql/opt_catalog.go index a7bbf6ffdd6c..12077df57d0c 100644 --- a/pkg/sql/opt_catalog.go +++ b/pkg/sql/opt_catalog.go @@ -1015,6 +1015,35 @@ func (oi *optIndex) Ordinal() int { return oi.indexOrdinal } +// PartitionByListPrefixes is part of the cat.Index interface. +func (oi *optIndex) PartitionByListPrefixes() []tree.Datums { + list := oi.desc.Partitioning.List + if len(list) == 0 { + return nil + } + res := make([]tree.Datums, 0, len(list)) + var a sqlbase.DatumAlloc + for i := range list { + for _, valueEncBuf := range list[i].Values { + t, _, err := sqlbase.DecodePartitionTuple( + &a, &oi.tab.desc.TableDescriptor, oi.desc, &oi.desc.Partitioning, + valueEncBuf, nil, /* prefixDatums */ + ) + if err != nil { + panic(errors.NewAssertionErrorWithWrappedErrf(err, "while decoding partition tuple")) + } + // Ignore the DEFAULT case, where there is nothing to return. + if len(t.Datums) > 0 { + res = append(res, t.Datums) + } + // TODO(radu): split into multiple prefixes if Subpartition is also by list. + // Note that this functionality should be kept in sync with the test catalog + // implementation (test_catalog.go). + } + } + return res +} + type optTableStat struct { createdAt time.Time columnOrdinals []int diff --git a/pkg/sql/sem/tree/create.go b/pkg/sql/sem/tree/create.go index 35006c17577c..f37c40c8ac96 100644 --- a/pkg/sql/sem/tree/create.go +++ b/pkg/sql/sem/tree/create.go @@ -156,9 +156,11 @@ type TableDef interface { SetName(name Name) } -func (*ColumnTableDef) tableDef() {} -func (*IndexTableDef) tableDef() {} -func (*FamilyTableDef) tableDef() {} +func (*ColumnTableDef) tableDef() {} +func (*IndexTableDef) tableDef() {} +func (*FamilyTableDef) tableDef() {} +func (*ForeignKeyConstraintTableDef) tableDef() {} +func (*CheckConstraintTableDef) tableDef() {} // TableDefs represents a list of table definitions. type TableDefs []TableDef @@ -569,7 +571,9 @@ type ConstraintTableDef interface { constraintTableDef() } -func (*UniqueConstraintTableDef) constraintTableDef() {} +func (*UniqueConstraintTableDef) constraintTableDef() {} +func (*ForeignKeyConstraintTableDef) constraintTableDef() {} +func (*CheckConstraintTableDef) constraintTableDef() {} // UniqueConstraintTableDef represents a unique constraint within a CREATE // TABLE statement. @@ -715,12 +719,6 @@ func (node *ForeignKeyConstraintTableDef) SetName(name Name) { node.Name = name } -func (*ForeignKeyConstraintTableDef) tableDef() {} -func (*ForeignKeyConstraintTableDef) constraintTableDef() {} - -func (*CheckConstraintTableDef) tableDef() {} -func (*CheckConstraintTableDef) constraintTableDef() {} - // CheckConstraintTableDef represents a check constraint within a CREATE // TABLE statement. type CheckConstraintTableDef struct {