Skip to content

Commit

Permalink
[AQUMV] Compute Aggregations on Materialized Views.
Browse files Browse the repository at this point in the history
Support origin query has aggregations, compute Aggregations
on materilized views whose query don't have Aggegations itself.

create incremental materialized view mv as
  select c1 as mc1, c2 as mc2, c3 as mc3
  from t1 where c1 > 90;

Origin query:

select count(c1)+1, sum(c2) filter (where c2 > 95), stddev(c3)
  from t1 where c1 > 90;

Could be rewritten to:

select count(mc1)+1, sum(mc2) filter (where mc2 > 95), stddev(mc3)
  from mv;

All aggregate functions including count(*) are supported
in AQUMV which is not limited to IVM's current aggregate
functions: count, sum, avg.
Complex expressions have aggregations, and aggregations
with Filter clause are also supported.

Authored-by: Zhang Mingli avamingli@gmail.com
  • Loading branch information
avamingli committed Dec 6, 2023
1 parent 5a663d7 commit 0b06fd2
Show file tree
Hide file tree
Showing 3 changed files with 341 additions and 7 deletions.
26 changes: 19 additions & 7 deletions src/backend/optimizer/plan/aqumv.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

/*-------------------------------------------------------------------------
*
* aqumv.c
Expand Down Expand Up @@ -112,11 +111,11 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel

bool can_not_use_mv = (parse->commandType != CMD_SELECT) ||
(parse->rowMarks != NIL) ||
parse->hasAggs ||
parse->hasWindowFuncs ||
parse->hasDistinctOn ||
/* Group By without agg could be possible though IMMV doesn't support it yet. */
(parse->groupClause != NIL) ||
(parse->havingQual != NULL) ||
parse->hasModifyingCTE ||
parse->sortClause ||
(parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW) ||
Expand Down Expand Up @@ -277,8 +276,11 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel
subroot->plan_params = NIL;
subroot->outer_params = NULL;
subroot->init_plans = NIL;
subroot->agginfos = NIL;
subroot->aggtransinfos = NIL;
if (!parse->hasAggs)
{
subroot->agginfos = NIL;
subroot->aggtransinfos = NIL;
}
subroot->parse = mvQuery;

/*
Expand Down Expand Up @@ -335,6 +337,13 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel
if(!aqumv_process_targetlist(context, parse->targetList, &mv_final_tlist))
continue;

/*
* We have successfully processed target list, all columns in Aggrefs could be
* computed from mvQuery.
* It's safe to set hasAggs here.
*/
mvQuery->hasAggs = parse->hasAggs;

/*
* AQUMV
* Process all quals to conjunctive normal form.
Expand Down Expand Up @@ -459,6 +468,10 @@ aqumv_init_context(List *view_tlist, List *mv_tlist)
{
i++;
TargetEntry* tle = lfirst_node(TargetEntry, lc);

if (tle->resjunk)
continue;

expr = tle->expr;
if(IsA(expr, Var))
{
Expand All @@ -478,8 +491,6 @@ aqumv_init_context(List *view_tlist, List *mv_tlist)
return context;
}



/*
* Process varno after we eliminate mv's actions("old" and "new" relation)
* Correct rindex and all varnos with a delta.
Expand Down Expand Up @@ -724,8 +735,9 @@ static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, aqumv_equivalent_
PVC_RECURSE_WINDOWFUNCS |
PVC_INCLUDE_PLACEHOLDERS);

/* Keep TargentEntry expr no changed in case for count(*) */
if (expr_vars == NIL)
return (Node *)node_expr;
return is_targetEntry ? node : (Node *)node_expr;
list_free(expr_vars);

/* Try match with mv_pure_vars_index, but do not disturb already rewrited exprs(Var->location = -2) */
Expand Down
254 changes: 254 additions & 0 deletions src/test/regress/expected/aqumv.out
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,260 @@ select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1
Optimizer: Postgres query optimizer
(7 rows)

abort;
--
-- Support origin query with aggregations.
-- Compute Aggregations from mv.
--
begin;
create table aqumv_t2(c1 int, c2 int, c3 int) distributed by (c1);
insert into aqumv_t2 select i, i+1, i+2 from generate_series(1, 100) i;
insert into aqumv_t2 values (91, NULL, 95);
analyze aqumv_t2;
create incremental materialized view aqumv_mvt2_0 as
select c1 as mc1, c2 as mc2, c3 as mc3
from aqumv_t2 where c1 > 90;
analyze aqumv_mvt2_0;
-- test aggregation functions supported in IVM.
set local enable_answer_query_using_materialized_views = off;
explain(costs off, verbose)
select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
Output: count(c1), sum(c2), avg(c3)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(c1)), (PARTIAL sum(c2)), (PARTIAL avg(c3))
-> Partial Aggregate
Output: PARTIAL count(c1), PARTIAL sum(c2), PARTIAL avg(c3)
-> Seq Scan on public.aqumv_t2
Output: c1, c2, c3
Filter: (aqumv_t2.c1 > 90)
Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)

select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90;
count | sum | avg
-------+-----+---------------------
11 | 965 | 97.2727272727272727
(1 row)

set local enable_answer_query_using_materialized_views = on;
explain(costs off, verbose)
select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: count(mc1), sum(mc2), avg(mc3)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(mc1)), (PARTIAL sum(mc2)), (PARTIAL avg(mc3))
-> Partial Aggregate
Output: PARTIAL count(mc1), PARTIAL sum(mc2), PARTIAL avg(mc3)
-> Seq Scan on public.aqumv_mvt2_0
Output: mc1, mc2, mc3
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)

select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90;
count | sum | avg
-------+-----+---------------------
11 | 965 | 97.2727272727272727
(1 row)

-- test complex expressions have AGG.
set local enable_answer_query_using_materialized_views = off;
explain(costs off, verbose)
select count(c1) + 1 from aqumv_t2 where c1 > 90;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
Output: (count(c1) + 1)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(c1))
-> Partial Aggregate
Output: PARTIAL count(c1)
-> Seq Scan on public.aqumv_t2
Output: c1, c2, c3
Filter: (aqumv_t2.c1 > 90)
Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)

select count(c1) + 1 from aqumv_t2 where c1 > 90;
?column?
----------
12
(1 row)

set local enable_answer_query_using_materialized_views = on;
explain(costs off, verbose)
select count(c1) + 1 from aqumv_t2 where c1 > 90;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: (count(mc1) + 1)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(mc1))
-> Partial Aggregate
Output: PARTIAL count(mc1)
-> Seq Scan on public.aqumv_mvt2_0
Output: mc1, mc2, mc3
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)

select count(c1) + 1 from aqumv_t2 where c1 > 90;
?column?
----------
12
(1 row)

-- test AGG FILTER.
set local enable_answer_query_using_materialized_views = off;
explain(costs off, verbose)
select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
Output: sum(c2), sum(c2) FILTER (WHERE (c2 > 95))
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL sum(c2)), (PARTIAL sum(c2) FILTER (WHERE (c2 > 95)))
-> Partial Aggregate
Output: PARTIAL sum(c2), PARTIAL sum(c2) FILTER (WHERE (c2 > 95))
-> Seq Scan on public.aqumv_t2
Output: c1, c2, c3
Filter: (aqumv_t2.c1 > 90)
Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)

select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90;
sum | sum
-----+-----
965 | 591
(1 row)

set local enable_answer_query_using_materialized_views = on;
explain(costs off, verbose)
select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90;
QUERY PLAN
------------------------------------------------------------------------------------
Finalize Aggregate
Output: sum(mc2), sum(mc2) FILTER (WHERE (mc2 > 95))
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL sum(mc2)), (PARTIAL sum(mc2) FILTER (WHERE (mc2 > 95)))
-> Partial Aggregate
Output: PARTIAL sum(mc2), PARTIAL sum(mc2) FILTER (WHERE (mc2 > 95))
-> Seq Scan on public.aqumv_mvt2_0
Output: mc1, mc2, mc3
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)

select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90;
sum | sum
-----+-----
965 | 591
(1 row)

-- test AGG functions which are not supported in IVM now, but could work in AQUMV.
set local enable_answer_query_using_materialized_views = off;
explain(costs off, verbose)
select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
Output: max(c1), min(c3), stddev(c2)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL max(c1)), (PARTIAL min(c3)), (PARTIAL stddev(c2))
-> Partial Aggregate
Output: PARTIAL max(c1), PARTIAL min(c3), PARTIAL stddev(c2)
-> Seq Scan on public.aqumv_t2
Output: c1, c2, c3
Filter: (aqumv_t2.c1 > 90)
Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)

select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90;
max | min | stddev
-----+-----+--------------------
100 | 93 | 3.0276503540974917
(1 row)

set local enable_answer_query_using_materialized_views = on;
explain(costs off, verbose)
select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: max(mc1), min(mc3), stddev(mc2)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL max(mc1)), (PARTIAL min(mc3)), (PARTIAL stddev(mc2))
-> Partial Aggregate
Output: PARTIAL max(mc1), PARTIAL min(mc3), PARTIAL stddev(mc2)
-> Seq Scan on public.aqumv_mvt2_0
Output: mc1, mc2, mc3
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)

select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90;
max | min | stddev
-----+-----+--------------------
100 | 93 | 3.0276503540974917
(1 row)

-- test count(*)
set local enable_answer_query_using_materialized_views = off;
explain(costs off, verbose)
select count(c2), count(*) from aqumv_t2 where c1 > 90;
QUERY PLAN
-----------------------------------------------------------------------------------
Finalize Aggregate
Output: count(c2), count(*)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(c2)), (PARTIAL count(*))
-> Partial Aggregate
Output: PARTIAL count(c2), PARTIAL count(*)
-> Seq Scan on public.aqumv_t2
Output: c1, c2, c3
Filter: (aqumv_t2.c1 > 90)
Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off'
Optimizer: Postgres query optimizer
(11 rows)

select count(c2), count(*) from aqumv_t2 where c1 > 90;
count | count
-------+-------
10 | 11
(1 row)

set local enable_answer_query_using_materialized_views = on;
explain(costs off, verbose)
select count(c2), count(*) from aqumv_t2 where c1 > 90;
QUERY PLAN
----------------------------------------------------------------------------------
Finalize Aggregate
Output: count(mc2), count(*)
-> Gather Motion 3:1 (slice1; segments: 3)
Output: (PARTIAL count(mc2)), (PARTIAL count(*))
-> Partial Aggregate
Output: PARTIAL count(mc2), PARTIAL count(*)
-> Seq Scan on public.aqumv_mvt2_0
Output: mc1, mc2, mc3
Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off'
Optimizer: Postgres query optimizer
(10 rows)

select count(c2), count(*) from aqumv_t2 where c1 > 90;
count | count
-------+-------
10 | 11
(1 row)

abort;
reset optimizer;
reset enable_answer_query_using_materialized_views;
Expand Down
Loading

0 comments on commit 0b06fd2

Please sign in to comment.