Skip to content

Commit

Permalink
This is an automated cherry-pick of pingcap#48315
Browse files Browse the repository at this point in the history
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
  • Loading branch information
tangenta authored and ti-chi-bot committed Nov 8, 2023
1 parent 5051428 commit c4239f5
Show file tree
Hide file tree
Showing 3 changed files with 339 additions and 10 deletions.
25 changes: 15 additions & 10 deletions pkg/planner/core/find_best_task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1084,12 +1084,7 @@ func (ds *DataSource) findBestTask(prop *property.PhysicalProperty, planCounter
}

ds.storeTask(prop, t)
if ds.SampleInfo != nil && !t.invalid() {
if _, ok := t.plan().(*PhysicalTableSample); !ok {
warning := expression.ErrInvalidTableSample.GenWithStackByArgs("plan not supported")
ds.SCtx().GetSessionVars().StmtCtx.AppendWarning(warning)
}
}
err = validateTableSamplePlan(ds, t, err)
}()

t, err = ds.tryToGetDualTask()
Expand Down Expand Up @@ -2243,10 +2238,8 @@ func (ds *DataSource) convertToSampleTable(prop *property.PhysicalProperty,
return invalidTask, nil
}
if candidate.isMatchProp {
// TableSample on partition table can't keep order.
if ds.tableInfo.GetPartitionInfo() != nil {
return invalidTask, nil
}
// Disable keep order property for sample table path.
return invalidTask, nil
}
p := PhysicalTableSample{
TableSampleInfo: ds.SampleInfo,
Expand Down Expand Up @@ -2618,3 +2611,15 @@ func pushDownNot(ctx sessionctx.Context, conds []expression.Expression) []expres
}
return conds
}

func validateTableSamplePlan(ds *DataSource, t task, err error) error {
if err != nil {
return err
}
if ds.SampleInfo != nil && !t.invalid() {
if _, ok := t.plan().(*PhysicalTableSample); !ok {
return expression.ErrInvalidTableSample.GenWithStackByArgs("plan not supported")
}
}
return nil
}
191 changes: 191 additions & 0 deletions tests/integrationtest/r/executor/sample.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
set @@global.tidb_scatter_region=1
drop table if exists t;
set tidb_enable_clustered_index = on;
create table t (a varchar(255) primary key, b bigint);
insert into t values ('b', 100), ('y', 100);
split table t between ('a') and ('z') regions 2;
select a from t tablesample regions();
a
b
y
drop table t;
create table t (a varchar(255), b int, c decimal, primary key (a, b, c));
split table t between ('a', 0, 0) and ('z', 100, 100) regions 2;
insert into t values ('b', 10, 100), ('y', 100, 10);
select * from t tablesample regions();
a b c
b 10 100
y 100 10
drop table t;
create table t (a bigint primary key, b int default 10);
split table t between (1) and (100000) regions 4;
insert into t(a) values (200), (25600), (50300), (99900), (99901);
select a from t tablesample regions();
a
200
25600
50300
99900
drop table t;
create table t (a bigint, b int default 10);
split table t between (0) and (100000) regions 4;
insert into t(a) values (1), (2), (3);
select a from t tablesample regions();
a
1
set tidb_enable_clustered_index=default;
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
create view v as select * from t;
select * from v tablesample regions();
Error 8128 (HY000): Invalid TABLESAMPLE: Unsupported TABLESAMPLE in views
select * from information_schema.tables tablesample regions();
Error 8128 (HY000): Invalid TABLESAMPLE: Unsupported TABLESAMPLE in virtual tables
select a from t tablesample system();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t tablesample bernoulli(10 percent);
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t as t1 tablesample regions(), t as t2 tablesample system();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
select a from t tablesample ();
Error 8128 (HY000): Invalid TABLESAMPLE: Only supports REGIONS sampling method
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
select _tidb_rowid from t tablesample regions();
_tidb_rowid
1
select a, _tidb_rowid from t tablesample regions();
a _tidb_rowid
1 1
select _tidb_rowid, b from t tablesample regions();
_tidb_rowid b
1 abc
select b, _tidb_rowid, a from t tablesample regions();
b _tidb_rowid a
abc 1 1
drop table if exists t;
create table t (a int, b varchar(255), primary key (a)) partition by hash(a) partitions 2;
insert into t values (1, '1'), (2, '2'), (3, '3');
select count(*) from t tablesample regions();
count(*)
2
delete from t;
insert into t values (1, '1');
select count(*) from t partition (p0) tablesample regions();
count(*)
0
select count(*) from t partition (p1) tablesample regions();
count(*)
1
drop table if exists t;
create table t (a int, b int, unique key idx(a)) partition by range (a) (
partition p0 values less than (0),
partition p1 values less than (10),
partition p2 values less than (30),
partition p3 values less than (maxvalue));
insert into t values (2, 2), (31, 31), (12, 12);
select _tidb_rowid from t tablesample regions() order by _tidb_rowid;
_tidb_rowid
1
2
3
drop table if exists t;
create table t (a int primary key, b int as (a + 1), c int as (b + 1), d int as (c + 1));
split table t between (0) and (10000) regions 4;
insert into t(a) values (1), (2), (2999), (4999), (9999);
select a from t tablesample regions();
a
1
2999
9999
select c from t tablesample regions();
c
3
3001
10001
select a, b from t tablesample regions();
a b
1 2
2999 3000
9999 10000
select d, c from t tablesample regions();
d c
4 3
3002 3001
10002 10001
select a, d from t tablesample regions();
a d
1 4
2999 3002
9999 10002
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
select * from t tablesample regions();
a
1
10002
begin;
insert into t values (20006), (50000);
select * from t tablesample regions();
a
1
10002
delete from t where a = 1;
select * from t tablesample regions();
a
1
10002
commit;
select * from t tablesample regions();
a
1000
10002
20006
50000
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
begin;
select * from t tablesample regions();
a
1
10002
insert into t values (20006), (50000);
select * from t tablesample regions();
a
1
10002
commit;
select * from t tablesample regions();
a
1
10002
20006
50000
drop table if exists t;
create table t (a int primary key, b int, c varchar(255));
split table t between (0) and (10000) regions 5;
insert into t values (1000, 1, '1'), (1001, 1, '1'), (2100, 2, '2'), (4500, 3, '3');
create index idx_0 on t (b);
select a from t tablesample regions() order by a;
a
1000
2100
4500
select a from t use index (idx_0) tablesample regions() order by a;
Error 8128 (HY000): Invalid TABLESAMPLE: plan not supported
DROP TABLE IF EXISTS a;
CREATE TABLE a (pk bigint unsigned primary key clustered, v text);
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk, 'a' FROM b;
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk + (1<<63), 'b' FROM b;
SPLIT TABLE a BY (500);
SELECT * FROM a TABLESAMPLE REGIONS() ORDER BY pk;
pk v
500 a
9223372036854775809 b
set @@global.tidb_scatter_region=default;
133 changes: 133 additions & 0 deletions tests/integrationtest/t/executor/sample.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
set @@global.tidb_scatter_region=1

# TestTableSampleSchema
drop table if exists t;
set tidb_enable_clustered_index = on;
create table t (a varchar(255) primary key, b bigint);
insert into t values ('b', 100), ('y', 100);
split table t between ('a') and ('z') regions 2;
select a from t tablesample regions();
drop table t;
create table t (a varchar(255), b int, c decimal, primary key (a, b, c));
split table t between ('a', 0, 0) and ('z', 100, 100) regions 2;
insert into t values ('b', 10, 100), ('y', 100, 10);
select * from t tablesample regions();
drop table t;
create table t (a bigint primary key, b int default 10);
split table t between (1) and (100000) regions 4;
insert into t(a) values (200), (25600), (50300), (99900), (99901);
select a from t tablesample regions();
drop table t;
create table t (a bigint, b int default 10);
split table t between (0) and (100000) regions 4;
insert into t(a) values (1), (2), (3);
select a from t tablesample regions();
set tidb_enable_clustered_index=default;

# TestTableSampleInvalid
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
create view v as select * from t;
-- error 8128
select * from v tablesample regions();
-- error 8128
select * from information_schema.tables tablesample regions();
-- error 8128
select a from t tablesample system();
-- error 8128
select a from t tablesample bernoulli(10 percent);
-- error 8128
select a from t as t1 tablesample regions(), t as t2 tablesample system();
-- error 8128
select a from t tablesample ();

# TestTableSampleWithTiDBRowID
drop table if exists t;
create table t (a int, b varchar(255));
insert into t values (1, 'abc');
select _tidb_rowid from t tablesample regions();
select a, _tidb_rowid from t tablesample regions();
select _tidb_rowid, b from t tablesample regions();
select b, _tidb_rowid, a from t tablesample regions();

# TestTableSampleWithPartition
drop table if exists t;
create table t (a int, b varchar(255), primary key (a)) partition by hash(a) partitions 2;
insert into t values (1, '1'), (2, '2'), (3, '3');
select count(*) from t tablesample regions();
delete from t;
insert into t values (1, '1');
select count(*) from t partition (p0) tablesample regions();
select count(*) from t partition (p1) tablesample regions();
## Test https://github.com/pingcap/tidb/issues/27349
drop table if exists t;
create table t (a int, b int, unique key idx(a)) partition by range (a) (
partition p0 values less than (0),
partition p1 values less than (10),
partition p2 values less than (30),
partition p3 values less than (maxvalue));
insert into t values (2, 2), (31, 31), (12, 12);
select _tidb_rowid from t tablesample regions() order by _tidb_rowid;

# TestTableSampleGeneratedColumns
drop table if exists t;
create table t (a int primary key, b int as (a + 1), c int as (b + 1), d int as (c + 1));
split table t between (0) and (10000) regions 4;
insert into t(a) values (1), (2), (2999), (4999), (9999);
select a from t tablesample regions();
select c from t tablesample regions();
select a, b from t tablesample regions();
select d, c from t tablesample regions();
select a, d from t tablesample regions();

# TestTableSampleUnionScanIgnorePendingKV
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
select * from t tablesample regions();
begin;
insert into t values (20006), (50000);
select * from t tablesample regions();
delete from t where a = 1;
select * from t tablesample regions();
commit;
select * from t tablesample regions();

# TestTableSampleTransactionConsistency
drop table if exists t;
create table t (a int primary key);
split table t between (0) and (40000) regions 4;
insert into t values (1), (1000), (10002);
begin;
select * from t tablesample regions();

connect (conn1, localhost, root,, executor__sample);
insert into t values (20006), (50000);
connection default;

select * from t tablesample regions();
commit;
select * from t tablesample regions();
disconnect conn1;

# TestTableSampleNotSupportedPlanWarning
drop table if exists t;
create table t (a int primary key, b int, c varchar(255));
split table t between (0) and (10000) regions 5;
insert into t values (1000, 1, '1'), (1001, 1, '1'), (2100, 2, '2'), (4500, 3, '3');
create index idx_0 on t (b);
select a from t tablesample regions() order by a;
-- error 8128
select a from t use index (idx_0) tablesample regions() order by a;

# TestTableSampleUnsignedIntHandle
DROP TABLE IF EXISTS a;
CREATE TABLE a (pk bigint unsigned primary key clustered, v text);
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk, 'a' FROM b;
INSERT INTO a WITH RECURSIVE b(pk) AS (SELECT 1 UNION ALL SELECT pk+1 FROM b WHERE pk < 1000) SELECT pk + (1<<63), 'b' FROM b;
SPLIT TABLE a BY (500);
SELECT * FROM a TABLESAMPLE REGIONS() ORDER BY pk;

set @@global.tidb_scatter_region=default;

0 comments on commit c4239f5

Please sign in to comment.