Skip to content

Commit 7e3780d

Browse files
committed
HIVE-29312: Concatenate equality conditions in AND nodes
1 parent c054a47 commit 7e3780d

File tree

5 files changed

+357
-184
lines changed

5 files changed

+357
-184
lines changed

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java

Lines changed: 74 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,11 @@
104104
import org.apache.hadoop.hive.metastore.model.MTableColumnStatistics;
105105
import org.apache.hadoop.hive.metastore.model.MWMResourcePlan;
106106
import org.apache.hadoop.hive.metastore.parser.ExpressionTree;
107+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Condition;
107108
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.FilterBuilder;
108109
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode;
109110
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator;
111+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.MultiAndLeafNode;
110112
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator;
111113
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode;
112114
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeVisitor;
@@ -1368,6 +1370,9 @@ private static String generateSqlFilter(String catName, String dbName, String ta
13681370
PartitionFilterGenerator visitor = new PartitionFilterGenerator(
13691371
catName, dbName, tableName, partitionKeys,
13701372
params, joins, dbHasJoinCastBug, defaultPartName, dbType, schema);
1373+
TreeNode flattened = PartFilterExprUtil.flattenAndExpressions(tree.getRoot());
1374+
tree.setRoot(flattened);
1375+
13711376
tree.accept(visitor);
13721377
if (visitor.filterBuffer.hasError()) {
13731378
LOG.info("Unable to push down SQL filter: " + visitor.filterBuffer.getErrorMessage());
@@ -1422,14 +1427,6 @@ private static enum FilterType {
14221427
this.clazz = clazz;
14231428
}
14241429

1425-
public Set<String> getType() {
1426-
return colTypes;
1427-
}
1428-
1429-
public Class<?> getClazz() {
1430-
return clazz;
1431-
}
1432-
14331430
public static FilterType fromType(String colTypeStr) {
14341431
for (FilterType filterType : FilterType.values()) {
14351432
if (filterType.colTypes.contains(colTypeStr)) {
@@ -1451,24 +1448,69 @@ public static FilterType fromClass(Object value){
14511448

14521449
@Override
14531450
public void visit(LeafNode node) throws MetaException {
1454-
int partColCount = partitionKeys.size();
1455-
int partColIndex = LeafNode.getPartColIndexForFilter(node.keyName, partitionKeys, filterBuffer);
1451+
String filter = visitCondition(node.getCondition(), true);
14561452
if (filterBuffer.hasError()) {
14571453
return;
14581454
}
14591455

1456+
filterBuffer.append("(" + filter + ")");
1457+
}
1458+
1459+
@Override
1460+
public void visit(MultiAndLeafNode node) throws MetaException {
1461+
StringBuilder filterBuilder = new StringBuilder();
1462+
List<String> partValues = new ArrayList<>(Collections.nCopies(partitionKeys.size(), null));
1463+
boolean hasEqualCondition = false;
1464+
for (Condition condition : node.getConditions()) {
1465+
boolean isEqual = Operator.isEqualOperator(condition.getOperator());
1466+
if (isEqual) {
1467+
hasEqualCondition = true;
1468+
int partColIndex = getPartColIndexForFilter(condition.getKeyName(), partitionKeys, filterBuffer);
1469+
if (filterBuffer.hasError()) {
1470+
return;
1471+
}
1472+
String partValue = partValues.get(partColIndex);
1473+
String nodeValueStr = condition.getValue().toString();
1474+
if (partValue != null && !partValue.equals(nodeValueStr)) {
1475+
// Conflicting equal conditions for the same partition key - the filter is unsatisfiable.
1476+
filterBuffer.append("(1 = 0)");
1477+
return;
1478+
}
1479+
partValues.set(partColIndex, nodeValueStr);
1480+
}
1481+
if (!filterBuilder.isEmpty()) {
1482+
filterBuilder.append(" and ");
1483+
}
1484+
filterBuilder.append(visitCondition(condition, !isEqual));
1485+
}
1486+
// Concatenate equality conditions to match a longer index prefix.
1487+
if (hasEqualCondition) {
1488+
String partName = Warehouse.makePartName(partitionKeys, partValues, "%");
1489+
filterBuilder.append(" and " + PARTITIONS + ".\"PART_NAME\" like ?");
1490+
params.add(partName);
1491+
}
1492+
1493+
filterBuffer.append("(" + filterBuilder.toString() + ")");
1494+
}
1495+
1496+
private String visitCondition(Condition condition, boolean addPartNameFilter) throws MetaException {
1497+
int partColIndex = getPartColIndexForFilter(condition.getKeyName(), partitionKeys, filterBuffer);
1498+
if (filterBuffer.hasError()) {
1499+
return null;
1500+
}
1501+
14601502
FieldSchema partCol = partitionKeys.get(partColIndex);
14611503
String colTypeStr = ColumnType.getTypeName(partCol.getType());
14621504
FilterType colType = FilterType.fromType(colTypeStr);
14631505
if (colType == FilterType.Invalid) {
14641506
filterBuffer.setError("Filter pushdown not supported for type " + colTypeStr);
1465-
return;
1507+
return null;
14661508
}
1467-
FilterType valType = FilterType.fromClass(node.value);
1468-
Object nodeValue = node.value;
1509+
Object nodeValue = condition.getValue();
1510+
FilterType valType = FilterType.fromClass(nodeValue);
14691511
if (valType == FilterType.Invalid) {
1470-
filterBuffer.setError("Filter pushdown not supported for value " + node.value.getClass());
1471-
return;
1512+
filterBuffer.setError("Filter pushdown not supported for value " + nodeValue.getClass());
1513+
return null;
14721514
}
14731515

14741516
String nodeValue0 = "?";
@@ -1487,7 +1529,7 @@ public void visit(LeafNode node) throws MetaException {
14871529
} else if (colType == FilterType.Timestamp) {
14881530
if (dbType.isDERBY() || dbType.isMYSQL()) {
14891531
filterBuffer.setError("Filter pushdown on timestamp not supported for " + dbType.dbType);
1490-
return;
1532+
return null;
14911533
}
14921534
try {
14931535
MetaStoreUtils.convertStringToTimestamp((String) nodeValue);
@@ -1506,15 +1548,15 @@ public void visit(LeafNode node) throws MetaException {
15061548
// to be coerced?). Let the expression evaluation sort this one out, not metastore.
15071549
filterBuffer.setError("Cannot push down filter for "
15081550
+ colTypeStr + " column and value " + nodeValue.getClass());
1509-
return;
1551+
return null;
15101552
}
15111553

15121554
if (joins.isEmpty()) {
15131555
// There's a fixed number of partition cols that we might have filters on. To avoid
15141556
// joining multiple times for one column (if there are several filters on it), we will
15151557
// keep numCols elements in the list, one for each column; we will fill it with nulls,
15161558
// put each join at a corresponding index when necessary, and remove nulls in the end.
1517-
for (int i = 0; i < partColCount; ++i) {
1559+
for (int i = 0; i < partitionKeys.size(); ++i) {
15181560
joins.add(null);
15191561
}
15201562
}
@@ -1527,7 +1569,8 @@ public void visit(LeafNode node) throws MetaException {
15271569
// Build the filter and add parameters linearly; we are traversing leaf nodes LTR.
15281570
String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\"";
15291571

1530-
if (node.isReverseOrder && nodeValue != null) {
1572+
boolean isReverseOrder = condition.isReverseOrder();
1573+
if (isReverseOrder && nodeValue != null) {
15311574
params.add(nodeValue);
15321575
}
15331576
String tableColumn = tableValue;
@@ -1559,22 +1602,23 @@ public void visit(LeafNode node) throws MetaException {
15591602
tableValue += " then " + tableValue0 + " else null end)";
15601603
}
15611604

1562-
if (!node.isReverseOrder && nodeValue != null) {
1605+
if (!isReverseOrder && nodeValue != null) {
15631606
params.add(nodeValue);
15641607
}
15651608

15661609
// The following syntax is required for using LIKE clause wildcards '_' and '%' as literals.
1567-
if (node.operator == Operator.LIKE) {
1610+
Operator operator = condition.getOperator();
1611+
if (operator == Operator.LIKE) {
15681612
nodeValue0 = nodeValue0 + " ESCAPE '\\' ";
15691613
}
1570-
String filter = node.isReverseOrder
1571-
? nodeValue0 + " " + node.operator.getSqlOp() + " " + tableValue
1572-
: tableValue + " " + node.operator.getSqlOp() + " " + nodeValue0;
1614+
String filter = isReverseOrder
1615+
? nodeValue0 + " " + operator.getSqlOp() + " " + tableValue
1616+
: tableValue + " " + operator.getSqlOp() + " " + nodeValue0;
15731617
// For equals and not-equals filter, we can add partition name filter to improve performance.
1574-
boolean isOpEquals = Operator.isEqualOperator(node.operator);
1575-
boolean isOpNotEqual = Operator.isNotEqualOperator(node.operator);
1576-
String nodeValueStr = node.value.toString();
1577-
if (StringUtils.isNotEmpty(nodeValueStr) && (isOpEquals || isOpNotEqual)) {
1618+
boolean isOpEquals = Operator.isEqualOperator(operator);
1619+
boolean isOpNotEqual = Operator.isNotEqualOperator(operator);
1620+
String nodeValueStr = condition.getValue().toString();
1621+
if (addPartNameFilter && StringUtils.isNotEmpty(nodeValueStr) && (isOpEquals || isOpNotEqual)) {
15781622
Map<String, String> partKeyToVal = new HashMap<>();
15791623
partKeyToVal.put(partCol.getName(), nodeValueStr);
15801624
String escapedNameFragment = Warehouse.makePartName(partKeyToVal, false);
@@ -1583,6 +1627,7 @@ public void visit(LeafNode node) throws MetaException {
15831627
// match PART_NAME by like clause.
15841628
escapedNameFragment += "%";
15851629
}
1630+
int partColCount = partitionKeys.size();
15861631
if (colType != FilterType.Date && partColCount == 1) {
15871632
// Case where partition column type is not date and there is no other partition columns
15881633
params.add(escapedNameFragment);
@@ -1604,8 +1649,7 @@ public void visit(LeafNode node) throws MetaException {
16041649
filter += " and " + PARTITIONS + ".\"PART_NAME\"" + (isOpEquals ? " like ? " : " not like ? ");
16051650
}
16061651
}
1607-
1608-
filterBuffer.append("(" + filter + ")");
1652+
return filter;
16091653
}
16101654
}
16111655

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,24 @@
1818

1919
package org.apache.hadoop.hive.metastore;
2020

21+
import java.util.ArrayList;
22+
import java.util.List;
23+
24+
import org.apache.hadoop.conf.Configuration;
25+
import org.apache.hadoop.hive.metastore.api.MetaException;
2126
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
2227
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
28+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree;
29+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.BaseLeafNode;
30+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Condition;
31+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LeafNode;
32+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.LogicalOperator;
33+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.MultiAndLeafNode;
34+
import org.apache.hadoop.hive.metastore.parser.ExpressionTree.TreeNode;
2335
import org.apache.hadoop.hive.metastore.parser.PartFilterParser;
2436
import org.apache.hadoop.hive.metastore.utils.JavaUtils;
2537
import org.slf4j.Logger;
2638
import org.slf4j.LoggerFactory;
27-
import org.apache.hadoop.conf.Configuration;
28-
import org.apache.hadoop.hive.metastore.api.MetaException;
29-
import org.apache.hadoop.hive.metastore.parser.ExpressionTree;
3039

3140
/**
3241
* Utility functions for working with partition filter expressions
@@ -116,4 +125,59 @@ private static ExpressionTree makeExpressionTree(String filter) throws MetaExcep
116125
public static ExpressionTree parseFilterTree(String filter) throws MetaException {
117126
return PartFilterParser.parseFilter(filter);
118127
}
128+
129+
public static TreeNode buildTreeFromNodes(List<? extends TreeNode> nodes, LogicalOperator operator) {
130+
// The 'nodes' list is expected to have at least one element.
131+
// If the list if empty, the lexer parse would have failed.
132+
assert !nodes.isEmpty() ;
133+
if (nodes.size() == 1) {
134+
return nodes.get(0);
135+
}
136+
TreeNode root = new TreeNode(nodes.get(0), operator, nodes.get(1));
137+
for (int i = 2; i < nodes.size(); ++i) {
138+
TreeNode tmp = new TreeNode(root, operator, nodes.get(i));
139+
root = tmp;
140+
}
141+
return root;
142+
}
143+
144+
/**
145+
* Flatten all AND-connected leaf nodes in the given expression tree
146+
* into MultiAndLeafNodes for more efficient evaluation.
147+
*/
148+
public static TreeNode flattenAndExpressions(TreeNode node) {
149+
if (node == null || node instanceof BaseLeafNode) {
150+
return node;
151+
}
152+
TreeNode left = flattenAndExpressions(node.getLhs());
153+
TreeNode right = flattenAndExpressions(node.getRhs());
154+
if (node.getAndOr() == LogicalOperator.AND) {
155+
List<Condition> flatConditions = new ArrayList<>();
156+
List<TreeNode> orNodes = new ArrayList<>();
157+
flattenConditions(left, flatConditions, orNodes);
158+
flattenConditions(right, flatConditions, orNodes);
159+
if (!flatConditions.isEmpty()) {
160+
TreeNode andNode = flatConditions.size() == 1 ?
161+
new LeafNode(flatConditions.get(0)) :
162+
new MultiAndLeafNode(flatConditions);
163+
orNodes.add(andNode);
164+
}
165+
return buildTreeFromNodes(orNodes, LogicalOperator.AND);
166+
}
167+
return new TreeNode(left, node.getAndOr(), right);
168+
}
169+
170+
private static void flattenConditions(TreeNode node, List<Condition> flatConditions, List<TreeNode> orNodes) {
171+
if (node == null) {
172+
return;
173+
}
174+
if (node instanceof BaseLeafNode leaf) {
175+
flatConditions.addAll(leaf.getConditions());
176+
} else if (node.getAndOr() == LogicalOperator.AND) {
177+
flattenConditions(node.getLhs(), flatConditions, orNodes);
178+
flattenConditions(node.getRhs(), flatConditions, orNodes);
179+
} else {
180+
orNodes.add(node);
181+
}
182+
}
119183
}

0 commit comments

Comments
 (0)