Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIXME] Wrong results on main branch for INDF query #180

Merged
merged 1 commit into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/backend/executor/nodeHash.c
Original file line number Diff line number Diff line change
Expand Up @@ -2192,7 +2192,12 @@ bool
ExecScanHashBucket(HashState *hashState, HashJoinState *hjstate,
ExprContext *econtext)
{
ExprState *hjclauses = hjstate->hashclauses;
/*
* Greenplum specific behavior.
* Using hashqualclauses to support hash join on 'IS NOT DISTINCT FROM'
* as well as '='.
*/
ExprState *hjclauses = hjstate->hashqualclauses;
HashJoinTable hashtable = hjstate->hj_HashTable;
HashJoinTuple hashTuple = hjstate->hj_CurTuple;
uint32 hashvalue = hjstate->hj_CurHashValue;
Expand Down Expand Up @@ -2253,7 +2258,12 @@ bool
ExecParallelScanHashBucket(HashState *hashState, HashJoinState *hjstate,
ExprContext *econtext)
{
ExprState *hjclauses = hjstate->hashclauses;
/*
* Greenplum specific behavior.
* Using hashqualclauses to support hash join on 'IS NOT DISTINCT FROM'
* as well as '='.
*/
ExprState *hjclauses = hjstate->hashqualclauses;
HashJoinTable hashtable = hjstate->hj_HashTable;
HashJoinTuple hashTuple = hjstate->hj_CurTuple;
uint32 hashvalue = hjstate->hj_CurHashValue;
Expand Down
106 changes: 52 additions & 54 deletions src/test/regress/expected/groupingsets.out
Original file line number Diff line number Diff line change
Expand Up @@ -2102,78 +2102,76 @@ set enable_sort = true;
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10);
QUERY PLAN
--------------------------------------------------------------
Finalize GroupAggregate
Group Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Gather Motion 3:1 (slice1; segments: 3)
Merge Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Sort
Sort Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Partial GroupAggregate
Group Key: g1000, g100, g10
Group Key: g1000, g100
Group Key: g1000
Group Key: ()
Sort Key: g100, g10
Group Key: g100, g10
Group Key: g100
Sort Key: g10, g1000
Group Key: g10, g1000
Group Key: g10
-> Sort
Sort Key: g1000, g100, g10
-> Seq Scan on gs_data_1
---------------------------------------------------------------
GroupAggregate
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 1000)), ((g.g % 100))
Group Key: ((g.g % 1000))
Group Key: ()
Sort Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100))
Sort Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10))
-> Sort
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(21 rows)
(15 rows)

create table gs_group_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10) distributed by (g1000);
-- Produce results with hash aggregation.
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10);
QUERY PLAN
--------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Finalize HashAggregate
Group Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Partial MixedAggregate
Hash Key: g1000, g100, g10
Hash Key: g1000, g100
Hash Key: g1000
Hash Key: g100, g10
Hash Key: g100
Hash Key: g10, g1000
Hash Key: g10
Group Key: ()
-> Seq Scan on gs_data_1
---------------------------------------------------------------
GroupAggregate
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 1000)), ((g.g % 100))
Group Key: ((g.g % 1000))
Group Key: ()
Sort Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100))
Sort Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10))
-> Sort
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(16 rows)
(15 rows)

create table gs_hash_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10) distributed by (g1000);
set jit_above_cost to default;
set enable_sort = true;
set work_mem to default;
WARNING: "work_mem": setting is deprecated, and may be removed in a future release.
-- GPDB_12_MERGE_FIXME: the following comparison query has an ORCA plan that
-- relies on "IS NOT DISTINCT FROM" Hash Join, a variant that we likely have
-- lost during the merge with upstream Postgres 12. Disable ORCA for this query
SET optimizer TO off;
-- Compare results
-- Compare results of ORCA plan that relies on "IS NOT DISTINCT FROM" HASH Join
(select * from gs_hash_1 except select * from gs_group_1)
union all
(select * from gs_group_1 except select * from gs_hash_1);
g100 | g10 | sum | count | max
------+-----+-----+-------+-----
g1000 | g100 | g10 | sum | count | max
-------+------+-----+-----+-------+-----
(0 rows)

RESET optimizer;
Expand Down
106 changes: 52 additions & 54 deletions src/test/regress/expected/groupingsets_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -2248,78 +2248,76 @@ set enable_sort = true;
set enable_hashagg = false;
set jit_above_cost = 0;
explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10);
QUERY PLAN
--------------------------------------------------------------
Finalize GroupAggregate
Group Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Gather Motion 3:1 (slice1; segments: 3)
Merge Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Sort
Sort Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Partial GroupAggregate
Group Key: g1000, g100, g10
Group Key: g1000, g100
Group Key: g1000
Group Key: ()
Sort Key: g100, g10
Group Key: g100, g10
Group Key: g100
Sort Key: g10, g1000
Group Key: g10, g1000
Group Key: g10
-> Sort
Sort Key: g1000, g100, g10
-> Seq Scan on gs_data_1
---------------------------------------------------------------
GroupAggregate
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 1000)), ((g.g % 100))
Group Key: ((g.g % 1000))
Group Key: ()
Sort Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100))
Sort Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10))
-> Sort
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(21 rows)
(15 rows)

create table gs_group_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10) distributed by (g1000);
-- Produce results with hash aggregation.
set enable_hashagg = true;
set enable_sort = false;
explain (costs off)
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10);
QUERY PLAN
--------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Finalize HashAggregate
Group Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: g1000, g100, g10, (GROUPINGSET_ID())
-> Partial MixedAggregate
Hash Key: g1000, g100, g10
Hash Key: g1000, g100
Hash Key: g1000
Hash Key: g100, g10
Hash Key: g100
Hash Key: g10, g1000
Hash Key: g10
Group Key: ()
-> Seq Scan on gs_data_1
---------------------------------------------------------------
GroupAggregate
Group Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 1000)), ((g.g % 100))
Group Key: ((g.g % 1000))
Group Key: ()
Sort Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100)), ((g.g % 10))
Group Key: ((g.g % 100))
Sort Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10)), ((g.g % 1000))
Group Key: ((g.g % 10))
-> Sort
Sort Key: ((g.g % 1000)), ((g.g % 100)), ((g.g % 10))
-> Function Scan on generate_series g
Optimizer: Postgres query optimizer
(16 rows)
(15 rows)

create table gs_hash_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
select g1000, g100, g10, sum(g::numeric), count(*), max(g::text) from
(select g%1000 as g1000, g%100 as g100, g%10 as g10, g
from generate_series(0,199999) g) s
group by cube (g1000,g100,g10) distributed by (g1000);
set jit_above_cost to default;
set enable_sort = true;
set work_mem to default;
WARNING: "work_mem": setting is deprecated, and may be removed in a future release.
-- GPDB_12_MERGE_FIXME: the following comparison query has an ORCA plan that
-- relies on "IS NOT DISTINCT FROM" Hash Join, a variant that we likely have
-- lost during the merge with upstream Postgres 12. Disable ORCA for this query
SET optimizer TO off;
-- Compare results
-- Compare results of ORCA plan that relies on "IS NOT DISTINCT FROM" HASH Join
(select * from gs_hash_1 except select * from gs_group_1)
union all
(select * from gs_group_1 except select * from gs_hash_1);
g100 | g10 | sum | count | max
------+-----+-----+-------+-----
g1000 | g100 | g10 | sum | count | max
-------+------+-----+-----+-------+-----
(0 rows)

RESET optimizer;
Expand Down
Loading