Skip to content

Commit e59b3ab

Browse files
committed
sql: support partial stats at extremes without specifying columns
This commit adds support to collect partial statistics at extremes without specifying a column, such as: `CREATE STATISTICS my_stat FROM t USING EXTREMES` This will collect partial stats on a default set of columns, which are all single columns that are prefixes of forwards indexes. Closes: #127832 Release note (sql): Partial stats at extremes can now be collected on all valid columns of a table using the `CREATE STATISTICS <stat_name> FROM <table_name> USING EXTREMES` syntax, without an `ON <col_name>` clause. Valid columns are all single column prefixes of a forward index.
1 parent 9284f8c commit e59b3ab

File tree

3 files changed

+126
-20
lines changed

3 files changed

+126
-20
lines changed

pkg/sql/create_stats.go

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"github.com/cockroachdb/cockroach/pkg/util/hlc"
3939
"github.com/cockroachdb/cockroach/pkg/util/log"
4040
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
41+
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
4142
"github.com/cockroachdb/errors"
4243
"github.com/cockroachdb/redact"
4344
)
@@ -77,7 +78,7 @@ func StubTableStats(
7778
) ([]*stats.TableStatisticProto, error) {
7879
colStats, err := createStatsDefaultColumns(
7980
context.Background(), desc, false /* virtColEnabled */, false, /* multiColEnabled */
80-
nonIndexColHistogramBuckets, nil, /* evalCtx */
81+
false /* partialStats */, nonIndexColHistogramBuckets, nil, /* evalCtx */
8182
)
8283
if err != nil {
8384
return nil, err
@@ -249,7 +250,13 @@ func (n *createStatsNode) makeJobRecord(ctx context.Context) (*jobs.Record, erro
249250
}
250251
defaultHistogramBuckets := stats.GetDefaultHistogramBuckets(n.p.ExecCfg().SV(), tableDesc)
251252
if colStats, err = createStatsDefaultColumns(
252-
ctx, tableDesc, virtColEnabled, multiColEnabled, defaultHistogramBuckets, n.p.EvalContext(),
253+
ctx,
254+
tableDesc,
255+
virtColEnabled,
256+
multiColEnabled,
257+
n.Options.UsingExtremes,
258+
defaultHistogramBuckets,
259+
n.p.EvalContext(),
253260
); err != nil {
254261
return nil, err
255262
}
@@ -366,13 +373,17 @@ const maxNonIndexCols = 100
366373
// predicate expressions are also likely to appear in query filters, so stats
367374
// are collected for those columns as well.
368375
//
376+
// If partialStats is true, we only collect statistics on single columns that
377+
// are prefixes of forwards indexes. Partial statistic creation only supports
378+
// these columns.
379+
//
369380
// In addition to the index columns, we collect stats on up to maxNonIndexCols
370381
// other columns from the table. We only collect histograms for index columns,
371382
// plus any other boolean or enum columns (where the "histogram" is tiny).
372383
func createStatsDefaultColumns(
373384
ctx context.Context,
374385
desc catalog.TableDescriptor,
375-
virtColEnabled, multiColEnabled bool,
386+
virtColEnabled, multiColEnabled, partialStats bool,
376387
defaultHistogramBuckets uint32,
377388
evalCtx *eval.Context,
378389
) ([]jobspb.CreateStatsDetails_ColStat, error) {
@@ -472,6 +483,23 @@ func createStatsDefaultColumns(
472483
return nil
473484
}
474485

486+
// Only collect statistics on single columns that are prefixes of forward
487+
// indexes for partial statistics.
488+
if partialStats {
489+
for _, idx := range desc.ActiveIndexes() {
490+
if idx.GetType() != descpb.IndexDescriptor_FORWARD || idx.IsPartial() {
491+
continue
492+
}
493+
if idx.NumKeyColumns() != 0 {
494+
colID := idx.GetKeyColumnID(0)
495+
if err := addIndexColumnStatsIfNotExists(colID, false /* isInverted */); err != nil {
496+
return nil, err
497+
}
498+
}
499+
}
500+
return colStats, nil
501+
}
502+
475503
// Add column stats for the primary key.
476504
primaryIdx := desc.GetPrimaryIndex()
477505
for i := 0; i < primaryIdx.NumKeyColumns(); i++ {
@@ -690,13 +718,33 @@ func (r *createStatsResumer) Resume(ctx context.Context, execCtx interface{}) er
690718
}
691719

692720
dsp := innerP.DistSQLPlanner()
693-
planCtx := dsp.NewPlanningCtx(ctx, innerEvalCtx, innerP, txn.KV(), FullDistribution)
694721
// CREATE STATS flow doesn't produce any rows and only emits the
695722
// metadata, so we can use a nil rowContainerHelper.
696723
resultWriter := NewRowResultWriter(nil /* rowContainer */)
697-
if err := dsp.planAndRunCreateStats(
698-
ctx, innerEvalCtx, planCtx, innerP.SemaCtx(), txn.KV(), r.job, resultWriter,
699-
); err != nil {
724+
725+
var err error
726+
if details.UsingExtremes {
727+
for _, colStat := range details.ColumnStats {
728+
// Plan and run partial stats on multiple columns separately since each
729+
// partial stat collection will use a different index and have different
730+
// plans.
731+
singleColDetails := protoutil.Clone(&details).(*jobspb.CreateStatsDetails)
732+
singleColDetails.ColumnStats = []jobspb.CreateStatsDetails_ColStat{colStat}
733+
planCtx := dsp.NewPlanningCtx(ctx, innerEvalCtx, innerP, txn.KV(), FullDistribution)
734+
if err = dsp.planAndRunCreateStats(
735+
ctx, innerEvalCtx, planCtx, innerP.SemaCtx(), txn.KV(), resultWriter, r.job.ID(), *singleColDetails,
736+
); err != nil {
737+
break
738+
}
739+
}
740+
} else {
741+
planCtx := dsp.NewPlanningCtx(ctx, innerEvalCtx, innerP, txn.KV(), FullDistribution)
742+
err = dsp.planAndRunCreateStats(
743+
ctx, innerEvalCtx, planCtx, innerP.SemaCtx(), txn.KV(), resultWriter, r.job.ID(), details,
744+
)
745+
}
746+
747+
if err != nil {
700748
// Check if this was a context canceled error and restart if it was.
701749
if grpcutil.IsContextCanceled(err) {
702750
return jobs.MarkAsRetryJobError(err)

pkg/sql/distsql_plan_stats.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import (
1515
"math"
1616
"time"
1717

18-
"github.com/cockroachdb/cockroach/pkg/jobs"
1918
"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
2019
"github.com/cockroachdb/cockroach/pkg/kv"
2120
"github.com/cockroachdb/cockroach/pkg/settings"
@@ -274,13 +273,10 @@ func (dsp *DistSQLPlanner) createPartialStatsPlan(
274273
jobID jobspb.JobID,
275274
details jobspb.CreateStatsDetails,
276275
) (*PhysicalPlan, error) {
277-
278-
// Currently, we limit the number of requests for partial statistics
279-
// stats at a given point in time to 1.
280-
// TODO (faizaanmadhani): Add support for multiple distinct requested
281-
// partial stats in one job.
276+
// Partial stats collections on multiple columns create different plans,
277+
// so we only support one requested stat at a time here.
282278
if len(reqStats) > 1 {
283-
return nil, pgerror.Newf(pgcode.FeatureNotSupported, "cannot process multiple partial statistics at once")
279+
return nil, errors.AssertionFailedf("only one partial statistic can be requested at a time")
284280
}
285281

286282
reqStat := reqStats[0]
@@ -729,13 +725,13 @@ func (dsp *DistSQLPlanner) planAndRunCreateStats(
729725
planCtx *PlanningCtx,
730726
semaCtx *tree.SemaContext,
731727
txn *kv.Txn,
732-
job *jobs.Job,
733728
resultWriter *RowResultWriter,
729+
jobId jobspb.JobID,
730+
details jobspb.CreateStatsDetails,
734731
) error {
735732
ctx = logtags.AddTag(ctx, "create-stats-distsql", nil)
736733

737-
details := job.Details().(jobspb.CreateStatsDetails)
738-
physPlan, err := dsp.createPlanForCreateStats(ctx, planCtx, semaCtx, job.ID(), details)
734+
physPlan, err := dsp.createPlanForCreateStats(ctx, planCtx, semaCtx, jobId, details)
739735
if err != nil {
740736
return err
741737
}

pkg/sql/logictest/testdata/logic_test/distsql_stats

Lines changed: 65 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2503,9 +2503,6 @@ sdn NULL 8 2
25032503
sdnp (a IS NULL) OR ((a < 0:::INT8) OR (a > 5:::INT8)) 2 2
25042504

25052505
# Verify errors.
2506-
statement error pq: cannot process multiple partial statistics at once
2507-
CREATE STATISTICS abcd_defaults FROM abcd USING EXTREMES;
2508-
25092506
statement error pq: multi-column partial statistics are not currently supported
25102507
CREATE STATISTICS abcd_a_b ON a, c FROM abcd USING EXTREMES;
25112508

@@ -3354,3 +3351,68 @@ upper_bound range_rows distinct_range_rows equal_rows
33543351
'paid' 0 0 1
33553352
'dispatched' 0 0 1
33563353
'delivered' 0 0 1
3354+
3355+
# Verify that partial stats are collected on single column prefixes of forward
3356+
# indexes when columns are unspecified.
3357+
statement ok
3358+
SET enable_create_stats_using_extremes = on
3359+
3360+
statement ok
3361+
CREATE TABLE pstat_allindex (
3362+
a INT,
3363+
b INT,
3364+
c INT,
3365+
d INT,
3366+
j JSONB,
3367+
PRIMARY KEY (a),
3368+
INDEX(b, c),
3369+
INDEX(b, c, d),
3370+
INDEX(d),
3371+
INVERTED INDEX (j)
3372+
);
3373+
3374+
statement ok
3375+
INSERT INTO pstat_allindex VALUES
3376+
(1, 1, 1, 1, '{"1": "1"}'),
3377+
(2, 2, 2, 2, '{"2": "2"}'),
3378+
(3, 3, 3, 3, '{"3": "3"}'),
3379+
(4, 4, 4, 4, '{"4": "4"}');
3380+
3381+
statement ok
3382+
CREATE STATISTICS pstat_allindex_full FROM pstat_allindex;
3383+
3384+
statement ok
3385+
INSERT INTO pstat_allindex VALUES
3386+
(5, 5, 5, 5, '{"5": "5"}'),
3387+
(6, 6, 6, 6, '{"6": "6"}'),
3388+
(7, 7, 7, 7, '{"7": "7"}'),
3389+
(8, 8, 8, 8, '{"8": "8"}');
3390+
3391+
statement ok
3392+
CREATE STATISTICS pstat_allindex_partial FROM pstat_allindex USING EXTREMES;
3393+
3394+
query TTIII colnames
3395+
SELECT
3396+
statistics_name,
3397+
column_names,
3398+
row_count,
3399+
distinct_count,
3400+
null_count
3401+
FROM
3402+
[SHOW STATISTICS FOR TABLE pstat_allindex]
3403+
ORDER BY statistics_name, column_names::STRING
3404+
----
3405+
statistics_name column_names row_count distinct_count null_count
3406+
pstat_allindex_full {a} 4 4 0
3407+
pstat_allindex_full {b,c,d} 4 4 0
3408+
pstat_allindex_full {b,c} 4 4 0
3409+
pstat_allindex_full {b} 4 4 0
3410+
pstat_allindex_full {c} 4 4 0
3411+
pstat_allindex_full {d} 4 4 0
3412+
pstat_allindex_full {j} 4 4 0
3413+
pstat_allindex_partial {a} 4 4 0
3414+
pstat_allindex_partial {b} 4 4 0
3415+
pstat_allindex_partial {d} 4 4 0
3416+
3417+
statement ok
3418+
RESET enable_create_stats_using_extremes

0 commit comments

Comments
 (0)