From 0b06fd22b5ccccbf4965f8f5277e6680c3a2b615 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Tue, 5 Dec 2023 16:42:23 +0800 Subject: [PATCH] [AQUMV] Compute Aggregations on Materialized Views. Support origin query has aggregations, compute Aggregations on materilized views whose query don't have Aggegations itself. create incremental materialized view mv as select c1 as mc1, c2 as mc2, c3 as mc3 from t1 where c1 > 90; Origin query: select count(c1)+1, sum(c2) filter (where c2 > 95), stddev(c3) from t1 where c1 > 90; Could be rewritten to: select count(mc1)+1, sum(mc2) filter (where mc2 > 95), stddev(mc3) from mv; All aggregate functions including count(*) are supported in AQUMV which is not limited to IVM's current aggregate functions: count, sum, avg. Complex expressions have aggregations, and aggregations with Filter clause are also supported. Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/optimizer/plan/aqumv.c | 26 ++- src/test/regress/expected/aqumv.out | 254 ++++++++++++++++++++++++++++ src/test/regress/sql/aqumv.sql | 68 ++++++++ 3 files changed, 341 insertions(+), 7 deletions(-) diff --git a/src/backend/optimizer/plan/aqumv.c b/src/backend/optimizer/plan/aqumv.c index 24e0a9f9bbc..76bb64c0cc7 100644 --- a/src/backend/optimizer/plan/aqumv.c +++ b/src/backend/optimizer/plan/aqumv.c @@ -1,4 +1,3 @@ - /*------------------------------------------------------------------------- * * aqumv.c @@ -112,11 +111,11 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel bool can_not_use_mv = (parse->commandType != CMD_SELECT) || (parse->rowMarks != NIL) || - parse->hasAggs || parse->hasWindowFuncs || parse->hasDistinctOn || /* Group By without agg could be possible though IMMV doesn't support it yet. */ (parse->groupClause != NIL) || + (parse->havingQual != NULL) || parse->hasModifyingCTE || parse->sortClause || (parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW) || @@ -277,8 +276,11 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel subroot->plan_params = NIL; subroot->outer_params = NULL; subroot->init_plans = NIL; - subroot->agginfos = NIL; - subroot->aggtransinfos = NIL; + if (!parse->hasAggs) + { + subroot->agginfos = NIL; + subroot->aggtransinfos = NIL; + } subroot->parse = mvQuery; /* @@ -335,6 +337,13 @@ answer_query_using_materialized_views(PlannerInfo *root, RelOptInfo *current_rel if(!aqumv_process_targetlist(context, parse->targetList, &mv_final_tlist)) continue; + /* + * We have successfully processed target list, all columns in Aggrefs could be + * computed from mvQuery. + * It's safe to set hasAggs here. + */ + mvQuery->hasAggs = parse->hasAggs; + /* * AQUMV * Process all quals to conjunctive normal form. @@ -459,6 +468,10 @@ aqumv_init_context(List *view_tlist, List *mv_tlist) { i++; TargetEntry* tle = lfirst_node(TargetEntry, lc); + + if (tle->resjunk) + continue; + expr = tle->expr; if(IsA(expr, Var)) { @@ -478,8 +491,6 @@ aqumv_init_context(List *view_tlist, List *mv_tlist) return context; } - - /* * Process varno after we eliminate mv's actions("old" and "new" relation) * Correct rindex and all varnos with a delta. @@ -724,8 +735,9 @@ static Node *aqumv_adjust_sub_matched_expr_mutator(Node *node, aqumv_equivalent_ PVC_RECURSE_WINDOWFUNCS | PVC_INCLUDE_PLACEHOLDERS); + /* Keep TargentEntry expr no changed in case for count(*) */ if (expr_vars == NIL) - return (Node *)node_expr; + return is_targetEntry ? node : (Node *)node_expr; list_free(expr_vars); /* Try match with mv_pure_vars_index, but do not disturb already rewrited exprs(Var->location = -2) */ diff --git a/src/test/regress/expected/aqumv.out b/src/test/regress/expected/aqumv.out index b0d0c56e917..37f99d7e925 100644 --- a/src/test/regress/expected/aqumv.out +++ b/src/test/regress/expected/aqumv.out @@ -731,6 +731,260 @@ select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 Optimizer: Postgres query optimizer (7 rows) +abort; +-- +-- Support origin query with aggregations. +-- Compute Aggregations from mv. +-- +begin; +create table aqumv_t2(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t2 select i, i+1, i+2 from generate_series(1, 100) i; +insert into aqumv_t2 values (91, NULL, 95); +analyze aqumv_t2; +create incremental materialized view aqumv_mvt2_0 as + select c1 as mc1, c2 as mc2, c3 as mc3 + from aqumv_t2 where c1 > 90; +analyze aqumv_mvt2_0; +-- test aggregation functions supported in IVM. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(c1), sum(c2), avg(c3) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(c1)), (PARTIAL sum(c2)), (PARTIAL avg(c3)) + -> Partial Aggregate + Output: PARTIAL count(c1), PARTIAL sum(c2), PARTIAL avg(c3) + -> Seq Scan on public.aqumv_t2 + Output: c1, c2, c3 + Filter: (aqumv_t2.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; + count | sum | avg +-------+-----+--------------------- + 11 | 965 | 97.2727272727272727 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(mc1), sum(mc2), avg(mc3) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(mc1)), (PARTIAL sum(mc2)), (PARTIAL avg(mc3)) + -> Partial Aggregate + Output: PARTIAL count(mc1), PARTIAL sum(mc2), PARTIAL avg(mc3) + -> Seq Scan on public.aqumv_mvt2_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; + count | sum | avg +-------+-----+--------------------- + 11 | 965 | 97.2727272727272727 +(1 row) + +-- test complex expressions have AGG. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1) + 1 from aqumv_t2 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + Output: (count(c1) + 1) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(c1)) + -> Partial Aggregate + Output: PARTIAL count(c1) + -> Seq Scan on public.aqumv_t2 + Output: c1, c2, c3 + Filter: (aqumv_t2.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select count(c1) + 1 from aqumv_t2 where c1 > 90; + ?column? +---------- + 12 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1) + 1 from aqumv_t2 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Finalize Aggregate + Output: (count(mc1) + 1) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(mc1)) + -> Partial Aggregate + Output: PARTIAL count(mc1) + -> Seq Scan on public.aqumv_mvt2_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select count(c1) + 1 from aqumv_t2 where c1 > 90; + ?column? +---------- + 12 +(1 row) + +-- test AGG FILTER. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + Output: sum(c2), sum(c2) FILTER (WHERE (c2 > 95)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL sum(c2)), (PARTIAL sum(c2) FILTER (WHERE (c2 > 95))) + -> Partial Aggregate + Output: PARTIAL sum(c2), PARTIAL sum(c2) FILTER (WHERE (c2 > 95)) + -> Seq Scan on public.aqumv_t2 + Output: c1, c2, c3 + Filter: (aqumv_t2.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; + sum | sum +-----+----- + 965 | 591 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; + QUERY PLAN +------------------------------------------------------------------------------------ + Finalize Aggregate + Output: sum(mc2), sum(mc2) FILTER (WHERE (mc2 > 95)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL sum(mc2)), (PARTIAL sum(mc2) FILTER (WHERE (mc2 > 95))) + -> Partial Aggregate + Output: PARTIAL sum(mc2), PARTIAL sum(mc2) FILTER (WHERE (mc2 > 95)) + -> Seq Scan on public.aqumv_mvt2_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; + sum | sum +-----+----- + 965 | 591 +(1 row) + +-- test AGG functions which are not supported in IVM now, but could work in AQUMV. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + Output: max(c1), min(c3), stddev(c2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL max(c1)), (PARTIAL min(c3)), (PARTIAL stddev(c2)) + -> Partial Aggregate + Output: PARTIAL max(c1), PARTIAL min(c3), PARTIAL stddev(c2) + -> Seq Scan on public.aqumv_t2 + Output: c1, c2, c3 + Filter: (aqumv_t2.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; + max | min | stddev +-----+-----+-------------------- + 100 | 93 | 3.0276503540974917 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Finalize Aggregate + Output: max(mc1), min(mc3), stddev(mc2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL max(mc1)), (PARTIAL min(mc3)), (PARTIAL stddev(mc2)) + -> Partial Aggregate + Output: PARTIAL max(mc1), PARTIAL min(mc3), PARTIAL stddev(mc2) + -> Seq Scan on public.aqumv_mvt2_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; + max | min | stddev +-----+-----+-------------------- + 100 | 93 | 3.0276503540974917 +(1 row) + +-- test count(*) +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c2), count(*) from aqumv_t2 where c1 > 90; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(c2), count(*) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(c2)), (PARTIAL count(*)) + -> Partial Aggregate + Output: PARTIAL count(c2), PARTIAL count(*) + -> Seq Scan on public.aqumv_t2 + Output: c1, c2, c3 + Filter: (aqumv_t2.c1 > 90) + Settings: enable_answer_query_using_materialized_views = 'off', optimizer = 'off' + Optimizer: Postgres query optimizer +(11 rows) + +select count(c2), count(*) from aqumv_t2 where c1 > 90; + count | count +-------+------- + 10 | 11 +(1 row) + +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c2), count(*) from aqumv_t2 where c1 > 90; + QUERY PLAN +---------------------------------------------------------------------------------- + Finalize Aggregate + Output: count(mc2), count(*) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL count(mc2)), (PARTIAL count(*)) + -> Partial Aggregate + Output: PARTIAL count(mc2), PARTIAL count(*) + -> Seq Scan on public.aqumv_mvt2_0 + Output: mc1, mc2, mc3 + Settings: enable_answer_query_using_materialized_views = 'on', optimizer = 'off' + Optimizer: Postgres query optimizer +(10 rows) + +select count(c2), count(*) from aqumv_t2 where c1 > 90; + count | count +-------+------- + 10 | 11 +(1 row) + abort; reset optimizer; reset enable_answer_query_using_materialized_views; diff --git a/src/test/regress/sql/aqumv.sql b/src/test/regress/sql/aqumv.sql index 2da54655430..4d81dee5226 100644 --- a/src/test/regress/sql/aqumv.sql +++ b/src/test/regress/sql/aqumv.sql @@ -222,6 +222,74 @@ explain(verbose, costs off) select sqrt(abs(abs(c2) - c1 - 1) + abs(c2)) from aqumv_t1 where c1 > 30 and c1 < 40 and sqrt(abs(c2)) > 5.8; abort; + +-- +-- Support origin query with aggregations. +-- Compute Aggregations from mv. +-- +begin; +create table aqumv_t2(c1 int, c2 int, c3 int) distributed by (c1); +insert into aqumv_t2 select i, i+1, i+2 from generate_series(1, 100) i; +insert into aqumv_t2 values (91, NULL, 95); +analyze aqumv_t2; + +create incremental materialized view aqumv_mvt2_0 as + select c1 as mc1, c2 as mc2, c3 as mc3 + from aqumv_t2 where c1 > 90; +analyze aqumv_mvt2_0; + +-- test aggregation functions supported in IVM. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; +select count(c1), sum(c2), avg(c3) from aqumv_t2 where c1 > 90; + +-- test complex expressions have AGG. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c1) + 1 from aqumv_t2 where c1 > 90; +select count(c1) + 1 from aqumv_t2 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c1) + 1 from aqumv_t2 where c1 > 90; +select count(c1) + 1 from aqumv_t2 where c1 > 90; + +-- test AGG FILTER. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; +select sum(c2), sum(c2) filter (where c2 > 95) from aqumv_t2 where c1 > 90; + +-- test AGG functions which are not supported in IVM now, but could work in AQUMV. +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; +select max(c1), min(c3), stddev(c2) from aqumv_t2 where c1 > 90; + +-- test count(*) +set local enable_answer_query_using_materialized_views = off; +explain(costs off, verbose) +select count(c2), count(*) from aqumv_t2 where c1 > 90; +select count(c2), count(*) from aqumv_t2 where c1 > 90; +set local enable_answer_query_using_materialized_views = on; +explain(costs off, verbose) +select count(c2), count(*) from aqumv_t2 where c1 > 90; +select count(c2), count(*) from aqumv_t2 where c1 > 90; + +abort; + reset optimizer; reset enable_answer_query_using_materialized_views; drop table aqumv_t1 cascade;