diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index fc47d104871336..8413d6fa45657f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -241,11 +241,14 @@ public enum RuleType { MATERIALIZED_INDEX_FILTER_PROJECT_SCAN(RuleTypeClass.REWRITE), OLAP_SCAN_PARTITION_PRUNE(RuleTypeClass.REWRITE), + + OLAP_SCAN_WITH_PROJECT_PARTITION_PRUNE(RuleTypeClass.REWRITE), FILE_SCAN_PARTITION_PRUNE(RuleTypeClass.REWRITE), PUSH_CONJUNCTS_INTO_JDBC_SCAN(RuleTypeClass.REWRITE), PUSH_CONJUNCTS_INTO_ODBC_SCAN(RuleTypeClass.REWRITE), PUSH_CONJUNCTS_INTO_ES_SCAN(RuleTypeClass.REWRITE), OLAP_SCAN_TABLET_PRUNE(RuleTypeClass.REWRITE), + OLAP_SCAN_WITH_PROJECT_TABLET_PRUNE(RuleTypeClass.REWRITE), PUSH_AGGREGATE_TO_OLAP_SCAN(RuleTypeClass.REWRITE), EXTRACT_SINGLE_TABLE_EXPRESSION_FROM_DISJUNCTION(RuleTypeClass.REWRITE), HIDE_ONE_ROW_RELATION_UNDER_UNION(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java index 816dc4b645fd03..60df874f2a1004 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java @@ -21,17 +21,21 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.expression.rules.PartitionPruner; import org.apache.doris.nereids.rules.expression.rules.PartitionPruner.PartitionTableType; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalEmptyRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.Utils; import org.apache.doris.qe.ConnectContext; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import java.util.ArrayList; @@ -45,59 +49,75 @@ * Used to prune partition of olap scan, should execute after SwapProjectAndFilter, MergeConsecutiveFilters, * MergeConsecutiveProjects and all predicate push down related rules. */ -public class PruneOlapScanPartition extends OneRewriteRuleFactory { +public class PruneOlapScanPartition implements RewriteRuleFactory { + private Plan prunePartitions(CascadesContext ctx, + LogicalOlapScan scan, LogicalFilter originalFilter) { + OlapTable table = scan.getTable(); + Set partitionColumnNameSet = Utils.execWithReturnVal(table::getPartitionColumnNames); + if (partitionColumnNameSet.isEmpty()) { + return originalFilter; + } - @Override - public Rule build() { - return logicalFilter(logicalOlapScan()).when(p -> !p.child().isPartitionPruned()).thenApply(ctx -> { - LogicalFilter filter = ctx.root; - LogicalOlapScan scan = filter.child(); - OlapTable table = scan.getTable(); - Set partitionColumnNameSet = Utils.execWithReturnVal(table::getPartitionColumnNames); - if (partitionColumnNameSet.isEmpty()) { - return filter; - } + List output = scan.getOutput(); + Map scanOutput = Maps.newHashMapWithExpectedSize(output.size() * 2); + for (Slot slot : output) { + scanOutput.put(slot.getName().toLowerCase(), slot); + } - List output = scan.getOutput(); - Map scanOutput = Maps.newHashMapWithExpectedSize(output.size() * 2); - for (Slot slot : output) { - scanOutput.put(slot.getName().toLowerCase(), slot); + PartitionInfo partitionInfo = table.getPartitionInfo(); + List partitionColumns = partitionInfo.getPartitionColumns(); + List partitionSlots = new ArrayList<>(partitionColumns.size()); + for (Column column : partitionColumns) { + Slot slot = scanOutput.get(column.getName().toLowerCase()); + if (slot == null) { + return originalFilter; + } else { + partitionSlots.add(slot); } + } - PartitionInfo partitionInfo = table.getPartitionInfo(); - List partitionColumns = partitionInfo.getPartitionColumns(); - List partitionSlots = new ArrayList<>(partitionColumns.size()); - for (Column column : partitionColumns) { - Slot slot = scanOutput.get(column.getName().toLowerCase()); - if (slot == null) { - return filter; - } else { - partitionSlots.add(slot); - } - } + List manuallySpecifiedPartitions = scan.getManuallySpecifiedPartitions(); - List manuallySpecifiedPartitions = scan.getManuallySpecifiedPartitions(); + Map idToPartitions; + if (manuallySpecifiedPartitions.isEmpty()) { + idToPartitions = partitionInfo.getIdToItem(false); + } else { + Map allPartitions = partitionInfo.getAllPartitions(); + idToPartitions = allPartitions.keySet().stream() + .filter(id -> manuallySpecifiedPartitions.contains(id)) + .collect(Collectors.toMap(Function.identity(), id -> allPartitions.get(id))); + } + List prunedPartitions = PartitionPruner.prune( + partitionSlots, originalFilter.getPredicate(), idToPartitions, ctx, + PartitionTableType.OLAP); + if (prunedPartitions.isEmpty()) { + return new LogicalEmptyRelation( + ConnectContext.get().getStatementContext().getNextRelationId(), + originalFilter.getOutput()); + } - Map idToPartitions; - if (manuallySpecifiedPartitions.isEmpty()) { - idToPartitions = partitionInfo.getIdToItem(false); - } else { - Map allPartitions = partitionInfo.getAllPartitions(); - idToPartitions = allPartitions.keySet().stream() - .filter(id -> manuallySpecifiedPartitions.contains(id)) - .collect(Collectors.toMap(Function.identity(), id -> allPartitions.get(id))); - } - List prunedPartitions = PartitionPruner.prune( - partitionSlots, filter.getPredicate(), idToPartitions, ctx.cascadesContext, - PartitionTableType.OLAP); + LogicalOlapScan rewrittenScan = scan.withSelectedPartitionIds(prunedPartitions); + if (originalFilter.child() instanceof LogicalProject) { + LogicalProject rewrittenProject + = (LogicalProject) originalFilter.child() + .withChildren(ImmutableList.of(rewrittenScan)); + return new LogicalFilter<>(originalFilter.getConjuncts(), rewrittenProject); + } + return originalFilter.withChildren(ImmutableList.of(rewrittenScan)); + } - if (prunedPartitions.isEmpty()) { - return new LogicalEmptyRelation( - ConnectContext.get().getStatementContext().getNextRelationId(), - filter.getOutput()); - } - LogicalOlapScan rewrittenScan = scan.withSelectedPartitionIds(prunedPartitions); - return new LogicalFilter<>(filter.getConjuncts(), rewrittenScan); - }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE); + @Override + public List buildRules() { + return ImmutableList.of( + logicalFilter(logicalOlapScan()).when(p -> !p.child().isPartitionPruned()).thenApply(ctx -> { + return prunePartitions(ctx.cascadesContext, ctx.root.child(), ctx.root); + }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE), + + logicalFilter(logicalProject(logicalOlapScan())) + .when(p -> !p.child().child().isPartitionPruned()) + .when(p -> p.child().hasPushedDownToProjectionFunctions()).thenApply(ctx -> { + return prunePartitions(ctx.cascadesContext, ctx.root.child().child(), ctx.root); + }).toRule(RuleType.OLAP_SCAN_WITH_PROJECT_PARTITION_PRUNE) + ); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java index e2981635605008..c4468ca81202fa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java @@ -27,7 +27,10 @@ import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionColumnFilterConverter; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.planner.HashDistributionPruner; import org.apache.doris.planner.PartitionColumnFilter; @@ -46,34 +49,50 @@ /** * prune bucket */ -public class PruneOlapScanTablet extends OneRewriteRuleFactory { - +public class PruneOlapScanTablet implements RewriteRuleFactory { @Override - public Rule build() { - return logicalFilter(logicalOlapScan()) - .then(filter -> { - LogicalOlapScan olapScan = filter.child(); - OlapTable table = olapScan.getTable(); - Builder selectedTabletIdsBuilder = ImmutableList.builder(); - if (olapScan.getSelectedTabletIds().isEmpty()) { - for (Long id : olapScan.getSelectedPartitionIds()) { - Partition partition = table.getPartition(id); - MaterializedIndex index = partition.getIndex(olapScan.getSelectedIndexId()); - selectedTabletIdsBuilder - .addAll(getSelectedTabletIds(filter.getConjuncts(), index, - olapScan.getSelectedIndexId() == olapScan.getTable() - .getBaseIndexId(), - partition.getDistributionInfo())); - } - } else { - selectedTabletIdsBuilder.addAll(olapScan.getSelectedTabletIds()); - } - List selectedTabletIds = selectedTabletIdsBuilder.build(); - if (new HashSet(selectedTabletIds).equals(new HashSet(olapScan.getSelectedTabletIds()))) { - return null; - } - return filter.withChildren(olapScan.withSelectedTabletIds(selectedTabletIds)); - }).toRule(RuleType.OLAP_SCAN_TABLET_PRUNE); + public List buildRules() { + return ImmutableList.of( + logicalFilter(logicalOlapScan()) + .then(filter -> { + return pruneTablets(filter.child(), filter); + }).toRule(RuleType.OLAP_SCAN_TABLET_PRUNE), + + logicalFilter(logicalProject(logicalOlapScan())) + .when(p -> p.child().hasPushedDownToProjectionFunctions()).then(filter -> { + return pruneTablets(filter.child().child(), filter); + }).toRule(RuleType.OLAP_SCAN_WITH_PROJECT_TABLET_PRUNE) + ); + } + + private Plan pruneTablets(LogicalOlapScan olapScan, LogicalFilter originalFilter) { + OlapTable table = olapScan.getTable(); + Builder selectedTabletIdsBuilder = ImmutableList.builder(); + if (olapScan.getSelectedTabletIds().isEmpty()) { + for (Long id : olapScan.getSelectedPartitionIds()) { + Partition partition = table.getPartition(id); + MaterializedIndex index = partition.getIndex(olapScan.getSelectedIndexId()); + selectedTabletIdsBuilder + .addAll(getSelectedTabletIds(originalFilter.getConjuncts(), index, + olapScan.getSelectedIndexId() == olapScan.getTable() + .getBaseIndexId(), + partition.getDistributionInfo())); + } + } else { + selectedTabletIdsBuilder.addAll(olapScan.getSelectedTabletIds()); + } + List selectedTabletIds = selectedTabletIdsBuilder.build(); + if (new HashSet(selectedTabletIds).equals(new HashSet(olapScan.getSelectedTabletIds()))) { + return null; + } + LogicalOlapScan rewrittenScan = olapScan.withSelectedTabletIds(selectedTabletIds); + if (originalFilter.child() instanceof LogicalProject) { + LogicalProject rewrittenProject + = (LogicalProject) originalFilter.child() + .withChildren(ImmutableList.of(rewrittenScan)); + return new LogicalFilter<>(originalFilter.getConjuncts(), rewrittenProject); + } + return originalFilter.withChildren(rewrittenScan); } private Collection getSelectedTabletIds(Set expressions, diff --git a/regression-test/data/variant_p0/select_partition.out b/regression-test/data/variant_p0/select_partition.out new file mode 100644 index 00000000000000..939cddc82c487b --- /dev/null +++ b/regression-test/data/variant_p0/select_partition.out @@ -0,0 +1,29 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 {"a":1} + +-- !sql -- +7 {"a":2} + +-- !sql -- +1 {"a":1} +7 {"a":2} + +-- !sql -- +7 {"a":2} + +-- !sql -- +16 {"a":3} + +-- !sql -- +16 {"a":3} + +-- !sql -- +16 {"a":3} + +-- !sql -- +1 {"a":1} + +-- !sql -- +6 {"x":"123"} + diff --git a/regression-test/suites/variant_p0/select_partition.groovy b/regression-test/suites/variant_p0/select_partition.groovy new file mode 100644 index 00000000000000..a057e3b9a1dd3d --- /dev/null +++ b/regression-test/suites/variant_p0/select_partition.groovy @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("query_on_specific_partition") { + sql "SET enable_nereids_planner=true" + + sql """ + DROP TABLE IF EXISTS t_p; + """ + + sql """ + CREATE TABLE t_p ( + id BIGINT, + var VARIANT + ) DUPLICATE KEY(`id`) + PARTITION BY RANGE(`id`) + ( + PARTITION `p1` VALUES LESS THAN ('5'), + PARTITION `p2` VALUES LESS THAN ('10') + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_num"="1" + ); + """ + + sql """ALTER TABLE t_p ADD TEMPORARY PARTITION tp1 VALUES [("15"), ("20"));""" + + sql """INSERT INTO t_p VALUES(1, '{"a" : 1}')""" + sql """INSERT INTO t_p VALUES(7, '{"a" : 2}')""" + sql """INSERT INTO t_p TEMPORARY PARTITION(tp1) values(16, '{"a" : 3}');""" + + sql "SET enable_fallback_to_original_planner=false" + + qt_sql "SELECT * FROM t_p PARTITION p1" + + qt_sql "SELECT * FROM t_p PARTITION p2" + + order_qt_sql "SELECT * FROM t_p PARTITIONS (p2, p1)" + + order_qt_sql "SELECT * FROM t_p PARTITIONS (p2, p1) WHERE id > 1 and cast(var['a'] as int) > 0" + + qt_sql """select * from t_p temporary partition(tp1);""" + + qt_sql """select * from t_p temporary partitions(tp1);""" + + qt_sql """select * from t_p temporary partition tp1;""" + + sql """ + CREATE TABLE IF NOT EXISTS test_iot ( + `test_int` int NOT NULL, + `test_var` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`test_int`) + PARTITION BY LIST (`test_int`) + ( + PARTITION p1 VALUES IN ("1","2","3"), + PARTITION p2 VALUES IN ("4","5","6") + ) + DISTRIBUTED BY HASH(`test_int`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", + "storage_format" = "V2" + ) + """ + + sql """ + INSERT INTO test_iot VALUES(1,'{"a" : 1}'),(4, '{"a" : 2}'); + """ + + qt_sql """ + SELECT * FROM test_iot PARTITION p1; + """ + +// temporary partition test + sql """ + DROP TABLE IF EXISTS ut_p; + """ + + sql """ + CREATE TABLE ut_p ( + id BIGINT, + var VARIANT + ) unique KEY(`id`) + PARTITION BY RANGE(`id`) + ( + PARTITION `p1` VALUES LESS THAN ('5'), + PARTITION `p2` VALUES LESS THAN ('10') + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_num"="1" + ); + """ + + sql """ALTER TABLE ut_p ADD TEMPORARY PARTITION tp1 VALUES [("5"), ("7"));""" + + sql """INSERT INTO ut_p TEMPORARY PARTITION(tp1) values(6, '{"x" : "123"}');""" + sql """INSERT INTO ut_p values(6, '{"x" : 456}');""" + sql """INSERT INTO ut_p values(3, '{"x" : 789}');""" + + sql "set enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + qt_sql """select * from ut_p temporary partitions(tp1);""" + + explain { + sql "select * from ut_p temporary partitions(tp1);" + contains "partitions=1/2 (tp1)" + } + + explain { + sql """select * from ut_p temporary partitions(tp1) where cast(var['a'] as int) > 0""" + contains "partitions=1/2 (tp1)" + } + + explain { + sql "select * from ut_p temporary partitions(tp1) where cast(var['a'] as int) > 0" + contains "partitions=1/2 (tp1)" + } + + explain { + sql "select * from ut_p partitions(p2) where cast(var['a'] as int) > 0" + contains "partitions=1/2 (p2)" + } + + explain { + sql "select * from ut_p temporary partitions(tp1) where id = 8" + contains "VEMPTYSET" + } + + explain { + sql "select * from ut_p where id = 3 and cast(var['a'] as int) = 789" + contains "partitions=1/2 (p1)" + contains "tablets=1/3" + } +}