Skip to content

Commit

Permalink
[yugabyte#21980] YSQL: Do not skip fetching rows for YB Bitmap Table …
Browse files Browse the repository at this point in the history
…Scan

Summary:
Bitmap scans try to optimize cases where the actual tuples are not required by avoiding fetching the rows from the table.

This was added in PG to support the COUNT(*) use case, but would also optimize returning system attributes that aren't stored in DocDB. In both of these cases, we already have everything we need locally: an exact number of ybctids, or the system attributes. Sending a request is a waste of time.

However, there are a few edge cases / optimization to this that make it a bit more complicated. For now, disable this optimisation so that we can do it right later, at a priority that makes sense. That follow up is tracked by yugabyte#22044
Jira: DB-10901

Test Plan:
```
./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressYbBitmapScans'
```

Reviewers: amartsinchyk, tnayak

Reviewed By: amartsinchyk

Subscribers: yql

Tags: #jenkins-ready

Differential Revision: https://phorge.dev.yugabyte.com/D34137
  • Loading branch information
timothy-e committed Apr 19, 2024
1 parent 225247a commit 03fd372
Show file tree
Hide file tree
Showing 12 changed files with 48 additions and 70 deletions.
34 changes: 1 addition & 33 deletions src/postgres/src/backend/executor/nodeYbBitmapTablescan.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,24 +80,10 @@ YbBitmapTableNext(YbBitmapTableScanState *node)

if (!node->ss.ss_currentScanDesc)
node->ss.ss_currentScanDesc = CreateYbBitmapTableScanDesc(node);
scandesc = node->ss.ss_currentScanDesc;

scandesc = node->ss.ss_currentScanDesc;
ybScan = scandesc->ybscan;

/*
* Special case: if we don't need the results (e.g. COUNT), just return as
* many null values as we have ybctids.
*/
if (node->can_skip_fetch && !node->recheck_required && !node->work_mem_exceeded)
{
if (++node->skipped_tuples > yb_tbm_get_size(ybtbm))
return ExecClearTuple(slot);
/*
* If we don't have to fetch the tuple, just return nulls.
*/
return ExecStoreAllNullTuple(slot);
}

/*
* If the bitmaps have exceeded work_mem just select everything from the
* main table. The correct remote filters have already been applied.
Expand Down Expand Up @@ -245,7 +231,6 @@ CreateYbBitmapTableScanDesc(YbBitmapTableScanState *scanstate)
PushdownExprs *yb_pushdown;
HeapScanDesc scandesc;
YbBitmapTableScan *plan = (YbBitmapTableScan *) scanstate->ss.ps.plan;
bool has_targets;

yb_pushdown = YbInstantiatePushdownParams(
scanstate->work_mem_exceeded ? &plan->fallback_pushdown
Expand Down Expand Up @@ -277,23 +262,6 @@ CreateYbBitmapTableScanDesc(YbBitmapTableScanState *scanstate)
scandesc->rs_cblock = InvalidBlockNumber;
scandesc->ybscan = ybScan;

/*
* We can potentially skip sending a request to the table if we do not need
* any columns of the table, either for checking non-indexable quals or for
* returning data. This test is a bit simplistic, as it checks the
* stronger condition that there's no qual or return tlist at all. But in
* most cases it's probably not worth working harder than that.
*
* The PG version of this test looked only at qual and tlist. In Yugabyte,
* the target list from PGGate is more accurate and not much more work to
* look at, so look at that instead.
*/
HandleYBStatus(YBCPgDmlHasRegularTargets(ybScan->handle, &has_targets));

scanstate->can_skip_fetch = (plan->scan.plan.qual == NIL &&
plan->rel_pushdown.quals == NIL &&
!has_targets && !scanstate->recheck_required);

if (scanstate->recheck_required && !scanstate->work_mem_exceeded)
{
PushdownExprs *recheck_pushdown = YbInstantiatePushdownParams(
Expand Down
5 changes: 0 additions & 5 deletions src/postgres/src/include/nodes/execnodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1628,11 +1628,7 @@ typedef struct BitmapHeapScanState
* work_mem_exceeded if we've exceeded work_mem, internally switch to
* seq scan
* average_ybctid_bytes an estimate of the average ybctid size
* can_skip_fetch can we potentially skip tuple fetches in this scan?
* skipped_tuples how many tuples have we skipped fetching?
* recheck_pushdown if index recheck is required, check these remote
* quals
* fallback_pushdown if work_mem is exceeded, check these remote quals
* ----------------
*/
typedef struct YbBitmapTableScanState
Expand All @@ -1647,7 +1643,6 @@ typedef struct YbBitmapTableScanState
bool recheck_required;
bool work_mem_exceeded;
size_t average_ybctid_bytes;
bool can_skip_fetch;
int skipped_tuples;
} YbBitmapTableScanState;

Expand Down
22 changes: 22 additions & 0 deletions src/postgres/src/test/regress/expected/yb_bitmap_scan_joins.out
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,28 @@ SELECT joina.a FROM joina WHERE NOT EXISTS (SELECT FROM joinb WHERE joinb.c >= j
20
(4 rows)

--
-- System Table Join where we don't require any values from the Bitmap table
--
/*+ NestLoop(c ns) SeqScan(c) BitmapScan(ns) */ EXPLAIN (ANALYZE, COSTS OFF)
SELECT c.relname FROM pg_class c, pg_namespace ns WHERE ns.oid = c.relnamespace AND c.relname = 'pg_class';
QUERY PLAN
---------------------------------------------------------------------------------
Nested Loop (actual rows=1 loops=1)
-> Seq Scan on pg_class c (actual rows=1 loops=1)
Storage Filter: (relname = 'pg_class'::name)
-> YB Bitmap Table Scan on pg_namespace ns (actual rows=1 loops=1)
-> Bitmap Index Scan on pg_namespace_oid_index (actual rows=1 loops=1)
Index Cond: (oid = c.relnamespace)
(6 rows)

/*+ NestLoop(c ns) SeqScan(c) BitmapScan(ns) */
SELECT c.relname FROM pg_class c, pg_namespace ns WHERE ns.oid = c.relnamespace AND c.relname = 'pg_class';
relname
----------
pg_class
(1 row)

RESET yb_explain_hide_non_deterministic_fields;
RESET enable_bitmapscan;
RESET yb_prefer_bnl;
26 changes: 16 additions & 10 deletions src/postgres/src/test/regress/expected/yb_bitmap_scans.out
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ SELECT unique1, unique2 FROM tenk3 WHERE unique1 < 100 or unique2 IS NULL;
(12 rows)

--
-- test cases where we can skip fetching the table rows
-- test cases where we could skip fetching the table rows (TODO: #22044)
--
-- this query does not need a recheck, so we don't need to fetch the rows for the COUNT(*)
/*+ BitmapScan(tenk1) */ EXPLAIN (ANALYZE, DIST, COSTS OFF)
Expand All @@ -410,6 +410,8 @@ SELECT COUNT(*) FROM tenk1 WHERE unique1 < 2000 OR unique2 < 2000;
---------------------------------------------------------------------------------
Aggregate (actual rows=1 loops=1)
-> YB Bitmap Table Scan on tenk1 (actual rows=3594 loops=1)
Storage Table Read Requests: 4
Storage Table Rows Scanned: 3594
-> BitmapOr (actual rows=3594 loops=1)
-> Bitmap Index Scan on tenk1_unique1 (actual rows=2000 loops=1)
Index Cond: (unique1 < 2000)
Expand All @@ -419,11 +421,11 @@ SELECT COUNT(*) FROM tenk1 WHERE unique1 < 2000 OR unique2 < 2000;
Index Cond: (unique2 < 2000)
Storage Index Read Requests: 2
Storage Index Rows Scanned: 2000
Storage Read Requests: 4
Storage Rows Scanned: 4000
Storage Read Requests: 8
Storage Rows Scanned: 7594
Storage Write Requests: 0
Storage Flush Requests: 0
(15 rows)
(17 rows)

/*+ BitmapScan(tenk1) */
SELECT COUNT(*) FROM tenk1 WHERE unique1 < 2000 OR unique2 < 2000;
Expand Down Expand Up @@ -589,6 +591,8 @@ SELECT 1 FROM tenk1 WHERE unique1 < 5 OR unique2 < 5;
QUERY PLAN
------------------------------------------------------------------------
YB Bitmap Table Scan on tenk1 (actual rows=10 loops=1)
Storage Table Read Requests: 1
Storage Table Rows Scanned: 10
-> BitmapOr (actual rows=10 loops=1)
-> Bitmap Index Scan on tenk1_unique1 (actual rows=5 loops=1)
Index Cond: (unique1 < 5)
Expand All @@ -598,11 +602,11 @@ SELECT 1 FROM tenk1 WHERE unique1 < 5 OR unique2 < 5;
Index Cond: (unique2 < 5)
Storage Index Read Requests: 1
Storage Index Rows Scanned: 5
Storage Read Requests: 2
Storage Rows Scanned: 10
Storage Read Requests: 3
Storage Rows Scanned: 20
Storage Write Requests: 0
Storage Flush Requests: 0
(14 rows)
(16 rows)

/*+ BitmapScan(tenk1) */
SELECT 1 FROM tenk1 WHERE unique1 < 5 OR unique2 < 5;
Expand All @@ -625,6 +629,8 @@ SELECT random() FROM tenk1 WHERE unique1 < 5 OR unique2 < 5;
QUERY PLAN
------------------------------------------------------------------------
YB Bitmap Table Scan on tenk1 (actual rows=10 loops=1)
Storage Table Read Requests: 1
Storage Table Rows Scanned: 10
-> BitmapOr (actual rows=10 loops=1)
-> Bitmap Index Scan on tenk1_unique1 (actual rows=5 loops=1)
Index Cond: (unique1 < 5)
Expand All @@ -634,11 +640,11 @@ SELECT random() FROM tenk1 WHERE unique1 < 5 OR unique2 < 5;
Index Cond: (unique2 < 5)
Storage Index Read Requests: 1
Storage Index Rows Scanned: 5
Storage Read Requests: 2
Storage Rows Scanned: 10
Storage Read Requests: 3
Storage Rows Scanned: 20
Storage Write Requests: 0
Storage Flush Requests: 0
(14 rows)
(16 rows)

--
-- test primary key queries
Expand Down
8 changes: 8 additions & 0 deletions src/postgres/src/test/regress/sql/yb_bitmap_scan_joins.sql
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,14 @@ SELECT joina.a FROM joina WHERE NOT EXISTS (SELECT FROM joinb WHERE joinb.c >= j
SELECT joina.a FROM joina WHERE NOT EXISTS (SELECT FROM joinb WHERE joinb.c >= joina.b) ORDER BY joina.a;
SELECT joina.a FROM joina WHERE NOT EXISTS (SELECT FROM joinb WHERE joinb.c >= joina.b) ORDER BY joina.a;

--
-- System Table Join where we don't require any values from the Bitmap table
--
/*+ NestLoop(c ns) SeqScan(c) BitmapScan(ns) */ EXPLAIN (ANALYZE, COSTS OFF)
SELECT c.relname FROM pg_class c, pg_namespace ns WHERE ns.oid = c.relnamespace AND c.relname = 'pg_class';
/*+ NestLoop(c ns) SeqScan(c) BitmapScan(ns) */
SELECT c.relname FROM pg_class c, pg_namespace ns WHERE ns.oid = c.relnamespace AND c.relname = 'pg_class';

RESET yb_explain_hide_non_deterministic_fields;
RESET enable_bitmapscan;
RESET yb_prefer_bnl;
2 changes: 1 addition & 1 deletion src/postgres/src/test/regress/sql/yb_bitmap_scans.sql
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ DELETE FROM tenk3 WHERE unique2 IS NULL OR unique1 < 1000;
SELECT unique1, unique2 FROM tenk3 WHERE unique1 < 100 or unique2 IS NULL;

--
-- test cases where we can skip fetching the table rows
-- test cases where we could skip fetching the table rows (TODO: #22044)
--
-- this query does not need a recheck, so we don't need to fetch the rows for the COUNT(*)
/*+ BitmapScan(tenk1) */ EXPLAIN (ANALYZE, DIST, COSTS OFF)
Expand Down
6 changes: 0 additions & 6 deletions src/yb/yql/pggate/pg_dml.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ Status PgDml::AppendTargetPB(PgExpr *target) {

if (target->is_system()) {
has_system_targets_ = true;
} else if (!is_aggregate) {
has_regular_targets_ = true;
}

if (is_aggregate) {
Expand Down Expand Up @@ -465,10 +463,6 @@ Result<bool> PgDml::GetNextRow(PgTuple *pg_tuple) {
return false;
}

bool PgDml::has_regular_targets() const {
return has_regular_targets_;
}

bool PgDml::has_aggregate_targets() const {
return has_aggregate_targets_;
}
Expand Down
3 changes: 0 additions & 3 deletions src/yb/yql/pggate/pg_dml.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,6 @@ class PgDml : public PgStatement {
// key, or neither.
Result<YBCPgColumnInfo> GetColumnInfo(int attr_num) const;

bool has_regular_targets() const;

bool has_aggregate_targets() const;

bool has_system_targets() const;
Expand Down Expand Up @@ -192,7 +190,6 @@ class PgDml : public PgStatement {
// - "targets_" are either selected or returned expressions by DML statements.
PgTable target_;
std::vector<PgFetchedTarget*> targets_;
bool has_regular_targets_ = false;
bool has_aggregate_targets_ = false;
bool has_system_targets_ = false;

Expand Down
4 changes: 0 additions & 4 deletions src/yb/yql/pggate/pggate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1333,10 +1333,6 @@ Status PgApiImpl::DmlAppendTarget(PgStatement *handle, PgExpr *target) {
return down_cast<PgDml*>(handle)->AppendTarget(target);
}

Result<bool> PgApiImpl::DmlHasRegularTargets(PgStatement *handle) {
return down_cast<PgDml*>(handle)->has_regular_targets();
}

Result<bool> PgApiImpl::DmlHasSystemTargets(PgStatement *handle) {
return down_cast<PgDml*>(handle)->has_system_targets();
}
Expand Down
2 changes: 0 additions & 2 deletions src/yb/yql/pggate/pggate.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,6 @@ class PgApiImpl {
// All DML statements
Status DmlAppendTarget(PgStatement *handle, PgExpr *expr);

Result<bool> DmlHasRegularTargets(PgStatement *handle);

Result<bool> DmlHasSystemTargets(PgStatement *handle);

Status DmlAppendQual(PgStatement *handle, PgExpr *expr, bool is_primary);
Expand Down
4 changes: 0 additions & 4 deletions src/yb/yql/pggate/ybc_pggate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1203,10 +1203,6 @@ YBCStatus YBCPgDmlAppendTarget(YBCPgStatement handle, YBCPgExpr target) {
return ToYBCStatus(pgapi->DmlAppendTarget(handle, target));
}

YBCStatus YBCPgDmlHasRegularTargets(YBCPgStatement handle, bool *has_targets) {
return ExtractValueFromResult(pgapi->DmlHasRegularTargets(handle), has_targets);
}

YBCStatus YBCPgDmlHasSystemTargets(YBCPgStatement handle, bool *has_system_cols) {
return ExtractValueFromResult(pgapi->DmlHasSystemTargets(handle), has_system_cols);
}
Expand Down
2 changes: 0 additions & 2 deletions src/yb/yql/pggate/ybc_pggate.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,8 +434,6 @@ YBCStatus YBCPgBackfillIndex(
// - INSERT / UPDATE / DELETE ... RETURNING target_expr1, target_expr2, ...
YBCStatus YBCPgDmlAppendTarget(YBCPgStatement handle, YBCPgExpr target);

YBCStatus YBCPgDmlHasRegularTargets(YBCPgStatement handle, bool *has_targets);

// Check if any statement target is a system column reference.
YBCStatus YBCPgDmlHasSystemTargets(YBCPgStatement handle, bool *has_system_cols);

Expand Down

0 comments on commit 03fd372

Please sign in to comment.