Skip to content

Commit

Permalink
HIVE-12543: Disable Hive ConstantPropagate optimizer when CBO has opt…
Browse files Browse the repository at this point in the history
…imized the plan (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
  • Loading branch information
jcamachor committed Feb 17, 2016
1 parent f8f50ab commit a6d9bf7
Show file tree
Hide file tree
Showing 135 changed files with 2,170 additions and 1,451 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ STAGE PLANS:
alias: hbase_pushdown
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (CASE WHEN ((key < '90')) THEN (2) ELSE (4) END > 3) (type: boolean)
predicate: (key >= '90') (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ STAGE PLANS:
alias: hbase_pushdown
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Filter Operator
predicate: (CASE WHEN ((key = 90)) THEN (2) ELSE (4) END > 3) (type: boolean)
predicate: (key <> 90) (type: boolean)
Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: key (type: int), value (type: string)
Expand Down
4 changes: 2 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.mapred.OutputCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

import java.io.Serializable;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand All @@ -29,8 +29,6 @@
import java.util.Stack;

import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
Expand All @@ -56,6 +54,7 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
Expand All @@ -72,10 +71,6 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFNvl;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNot;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
Expand All @@ -88,19 +83,20 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;

Expand Down Expand Up @@ -492,9 +488,9 @@ private static void propagate(GenericUDF udf, List<ExprNodeDesc> newExprs, RowSc
return;
}
// If both sides are constants, there is nothing to propagate
ExprNodeColumnDesc c = getColumnExpr(lOperand);
ExprNodeColumnDesc c = ExprNodeDescUtils.getColumnExpr(lOperand);
if (null == c) {
c = getColumnExpr(rOperand);
c = ExprNodeDescUtils.getColumnExpr(rOperand);
}
if (null == c) {
// we need a column expression on other side.
Expand Down Expand Up @@ -527,13 +523,6 @@ private static void propagate(GenericUDF udf, List<ExprNodeDesc> newExprs, RowSc
}
}

private static ExprNodeColumnDesc getColumnExpr(ExprNodeDesc expr) {
while (FunctionRegistry.isOpCast(expr)) {
expr = expr.getChildren().get(0);
}
return (expr instanceof ExprNodeColumnDesc) ? (ExprNodeColumnDesc)expr : null;
}

private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc> newExprs,
Operator<? extends Serializable> op) throws UDFArgumentException {
if (udf instanceof GenericUDFOPEqual) {
Expand Down Expand Up @@ -623,9 +612,9 @@ private static ExprNodeDesc shortcutFunction(GenericUDF udf, List<ExprNodeDesc>
// Try to fold (key <op> 86) and (key is not null) to (key <op> 86)
// where <op> can be "=", ">=", "<=", ">", "<".
// Note: (key <> 86) and (key is not null) cannot be folded
ExprNodeColumnDesc colDesc = getColumnExpr(childExpr.getChildren().get(0));
ExprNodeColumnDesc colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(0));
if (null == colDesc) {
colDesc = getColumnExpr(childExpr.getChildren().get(1));
colDesc = ExprNodeDescUtils.getColumnExpr(childExpr.getChildren().get(1));
}
if (colDesc != null) {
compareExprs.add(colDesc);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import java.util.Map;
import java.util.Stack;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
Expand Down Expand Up @@ -59,12 +57,15 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicListDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.SelectDesc;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* This optimization looks for expressions of the kind "x IN (RS[n])". If such
Expand All @@ -77,7 +78,7 @@ public class DynamicPartitionPruningOptimization implements NodeProcessor {
static final private Logger LOG = LoggerFactory.getLogger(DynamicPartitionPruningOptimization.class
.getName());

public static class DynamicPartitionPrunerProc implements NodeProcessor {
private static class DynamicPartitionPrunerProc implements NodeProcessor {

/**
* process simply remembers all the dynamic partition pruning expressions
Expand Down Expand Up @@ -130,27 +131,6 @@ public Iterator<DynamicListContext> iterator() {
}
}

private String extractColName(ExprNodeDesc root) {
if (root instanceof ExprNodeColumnDesc) {
return ((ExprNodeColumnDesc) root).getColumn();
} else {
if (root.getChildren() == null) {
return null;
}

String column = null;
for (ExprNodeDesc d: root.getChildren()) {
String candidate = extractColName(d);
if (column != null && candidate != null) {
return null;
} else if (candidate != null) {
column = candidate;
}
}
return column;
}
}

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
Expand Down Expand Up @@ -191,16 +171,16 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Obje

// collect the dynamic pruning conditions
removerContext.dynLists.clear();
walkExprTree(desc.getPredicate(), removerContext);
collectDynamicPruningConditions(desc.getPredicate(), removerContext);

for (DynamicListContext ctx : removerContext) {
String column = extractColName(ctx.parent);
String column = ExprNodeDescUtils.extractColName(ctx.parent);

if (ts != null && column != null) {
Table table = ts.getConf().getTableMetadata();

if (table != null && table.isPartitionKey(column)) {
String columnType = table.getPartColByName(column).getType();
String columnType = table.getPartColByName(column).getType();
String alias = ts.getConf().getAlias();
PrunedPartitionList plist = parseContext.getPrunedPartitions(alias, ts);
if (LOG.isDebugEnabled()) {
Expand All @@ -212,6 +192,8 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Obje
}
}
}
// If partKey is a constant, we can check whether the partitions
// have been already filtered
if (plist == null || plist.getPartitions().size() != 0) {
LOG.info("Dynamic partitioning: " + table.getCompleteName() + "." + column);
generateEventOperatorPlan(ctx, parseContext, ts, column, columnType);
Expand Down Expand Up @@ -253,7 +235,7 @@ private void cleanTableScanFilters(TableScanOperator ts) throws SemanticExceptio

// collect the dynamic pruning conditions
removerContext.dynLists.clear();
walkExprTree(ts.getConf().getFilterExpr(), removerContext);
collectDynamicPruningConditions(ts.getConf().getFilterExpr(), removerContext);

for (DynamicListContext ctx : removerContext) {
// remove the condition by replacing it with "true"
Expand Down Expand Up @@ -345,7 +327,7 @@ private void generateEventOperatorPlan(DynamicListContext ctx, ParseContext pars
}
}

private Map<Node, Object> walkExprTree(ExprNodeDesc pred, NodeProcessorCtx ctx)
private Map<Node, Object> collectDynamicPruningConditions(ExprNodeDesc pred, NodeProcessorCtx ctx)
throws SemanticException {

// create a walker which walks the tree in a DFS manner while maintaining
Expand Down
23 changes: 13 additions & 10 deletions ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Set;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverterPostProc;
import org.apache.hadoop.hive.ql.optimizer.correlation.CorrelationOptimizer;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
Expand Down Expand Up @@ -102,16 +103,15 @@ public void initialize(HiveConf hiveConf) {
transformations.add(new PredicatePushDown());
} else if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD) &&
pctx.getContext().isCboSucceeded()) {
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
transformations.add(new ConstantPropagate());
}
transformations.add(new SyntheticJoinPredicate());
transformations.add(new SimplePredicatePushDown());
transformations.add(new RedundantDynamicPruningConditionsRemoval());
}

if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
// We run constant propagation twice because after predicate pushdown, filter expressions
// are combined and may become eligible for reduction (like is not null filter).
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION) &&
!pctx.getContext().isCboSucceeded()) {
// We run constant propagation twice because after predicate pushdown, filter expressions
// are combined and may become eligible for reduction (like is not null filter).
transformations.add(new ConstantPropagate());
}

Expand All @@ -129,10 +129,13 @@ public void initialize(HiveConf hiveConf) {
/* Add list bucketing pruner. */
transformations.add(new ListBucketingPruner());
}
if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) {
// PartitionPruner may create more folding opportunities, run ConstantPropagate again.
transformations.add(new ConstantPropagate());
}
}
if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTPPD)
&& HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) ||
(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)
&& pctx.getContext().isCboSucceeded())) {
// PartitionPruner may create more folding opportunities, run ConstantPropagate again.
transformations.add(new ConstantPropagate());
}

if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) ||
Expand Down
Loading

0 comments on commit a6d9bf7

Please sign in to comment.