Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,51 @@
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.thrift.TColumnAccessPath;

import lombok.AllArgsConstructor;
import lombok.Data;

import java.util.List;

/** AccessPathInfo */
@Data
@AllArgsConstructor
public class AccessPathInfo {
public static final String ACCESS_ALL = "*";
public static final String ACCESS_MAP_KEYS = "KEYS";
public static final String ACCESS_MAP_VALUES = "VALUES";

private DataType prunedType;
// allAccessPaths is used to record all access path include predicate access path and non-predicate access path,
// and predicateAccessPaths only contains the predicate access path.
// e.g. select element_at(s, 'name') from tbl where element_at(s, 'id') = 1
// the allAccessPaths is: ["s.name", "s.id"]
// the predicateAccessPaths is: ["s.id"]
private List<TColumnAccessPath> allAccessPaths;
private List<TColumnAccessPath> predicateAccessPaths;

public AccessPathInfo(DataType prunedType, List<TColumnAccessPath> allAccessPaths,
List<TColumnAccessPath> predicateAccessPaths) {
this.prunedType = prunedType;
this.allAccessPaths = allAccessPaths;
this.predicateAccessPaths = predicateAccessPaths;
}

public DataType getPrunedType() {
return prunedType;
}

public void setPrunedType(DataType prunedType) {
this.prunedType = prunedType;
}

public List<TColumnAccessPath> getAllAccessPaths() {
return allAccessPaths;
}

public void setAllAccessPaths(List<TColumnAccessPath> allAccessPaths) {
this.allAccessPaths = allAccessPaths;
}

public List<TColumnAccessPath> getPredicateAccessPaths() {
return predicateAccessPaths;
}

public void setPredicateAccessPaths(List<TColumnAccessPath> predicateAccessPaths) {
this.predicateAccessPaths = predicateAccessPaths;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

package org.apache.doris.nereids.rules.rewrite;

import org.apache.doris.analysis.AccessPathInfo;
import org.apache.doris.nereids.StatementContext;
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectorContext;
import org.apache.doris.nereids.rules.rewrite.NestedColumnPruning.DataTypeAccessTree;
Expand Down Expand Up @@ -117,7 +118,7 @@ public Void visitArrayItemSlot(ArrayItemSlot arrayItemSlot, CollectorContext con
if (nameToLambdaArguments.isEmpty()) {
return null;
}
context.accessPathBuilder.addPrefix("*");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
Expression argument = nameToLambdaArguments.peek().get(arrayItemSlot.getName());
if (argument == null) {
return null;
Expand Down Expand Up @@ -157,7 +158,7 @@ public Void visitElementAt(ElementAt elementAt, CollectorContext context) {
List<Expression> arguments = elementAt.getArguments();
Expression first = arguments.get(0);
if (first.getDataType().isArrayType() || first.getDataType().isMapType()) {
context.accessPathBuilder.addPrefix("*");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
continueCollectAccessPath(first, context);

for (int i = 1; i < arguments.size(); i++) {
Expand Down Expand Up @@ -200,39 +201,39 @@ public Void visitStructElement(StructElement structElement, CollectorContext con
@Override
public Void visitMapKeys(MapKeys mapKeys, CollectorContext context) {
context = new CollectorContext(context.statementContext, context.bottomFilter);
context.accessPathBuilder.addPrefix("KEYS");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_KEYS);
return continueCollectAccessPath(mapKeys.getArgument(0), context);
}

@Override
public Void visitMapValues(MapValues mapValues, CollectorContext context) {
LinkedList<String> suffixPath = context.accessPathBuilder.accessPath;
if (!suffixPath.isEmpty() && suffixPath.get(0).equals("*")) {
if (!suffixPath.isEmpty() && suffixPath.get(0).equals(AccessPathInfo.ACCESS_ALL)) {
CollectorContext removeStarContext
= new CollectorContext(context.statementContext, context.bottomFilter);
removeStarContext.accessPathBuilder.accessPath.addAll(suffixPath.subList(1, suffixPath.size()));
removeStarContext.accessPathBuilder.addPrefix("VALUES");
removeStarContext.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_VALUES);
return continueCollectAccessPath(mapValues.getArgument(0), removeStarContext);
}
context.accessPathBuilder.addPrefix("VALUES");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_VALUES);
return continueCollectAccessPath(mapValues.getArgument(0), context);
}

@Override
public Void visitMapContainsKey(MapContainsKey mapContainsKey, CollectorContext context) {
context.accessPathBuilder.addPrefix("KEYS");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_KEYS);
return continueCollectAccessPath(mapContainsKey.getArgument(0), context);
}

@Override
public Void visitMapContainsValue(MapContainsValue mapContainsValue, CollectorContext context) {
context.accessPathBuilder.addPrefix("VALUES");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_MAP_VALUES);
return continueCollectAccessPath(mapContainsValue.getArgument(0), context);
}

@Override
public Void visitMapContainsEntry(MapContainsEntry mapContainsEntry, CollectorContext context) {
context.accessPathBuilder.addPrefix("*");
context.accessPathBuilder.addPrefix(AccessPathInfo.ACCESS_ALL);
return continueCollectAccessPath(mapContainsEntry.getArgument(0), context);
}

Expand Down Expand Up @@ -398,7 +399,7 @@ private Void collectArrayPathInLambda(Lambda lambda, CollectorContext context) {
}

List<String> path = context.accessPathBuilder.getPathList();
if (!path.isEmpty() && path.get(0).equals("*")) {
if (!path.isEmpty() && path.get(0).equals(AccessPathInfo.ACCESS_ALL)) {
context.accessPathBuilder.removePrefix();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,11 +244,18 @@ private static Map<Integer, AccessPathInfo> pruneDataType(

/** DataTypeAccessTree */
public static class DataTypeAccessTree {
// type of this level
private DataType type;
// is the root column?
private boolean isRoot;
// if access 's.a.b' the node 's' and 'a' has accessPartialChild, and node 'b' has accessAll
private boolean accessPartialChild;
private boolean accessAll;
// for the future, only access the meta of the column,
// e.g. `is not null` can only access the column's offset, not need to read the data
private TAccessPathType pathType;
// the children of the column, for example, column s is `struct<a:int, b:int>`,
// then node 's' has two children: 'a' and 'b', and the key is the column name
private Map<String, DataTypeAccessTree> children = new LinkedHashMap<>();

public DataTypeAccessTree(DataType type, TAccessPathType pathType) {
Expand All @@ -261,6 +268,30 @@ public DataTypeAccessTree(boolean isRoot, DataType type, TAccessPathType pathTyp
this.pathType = pathType;
}

public DataType getType() {
return type;
}

public boolean isRoot() {
return isRoot;
}

public boolean isAccessPartialChild() {
return accessPartialChild;
}

public boolean isAccessAll() {
return accessAll;
}

public TAccessPathType getPathType() {
return pathType;
}

public Map<String, DataTypeAccessTree> getChildren() {
return children;
}

/** pruneCastType */
public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast) {
if (type instanceof StructType) {
Expand Down Expand Up @@ -297,8 +328,16 @@ public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast
);
} else if (type instanceof MapType) {
return MapType.of(
children.get("KEYS").pruneCastType(origin.children.get("KEYS"), cast.children.get("KEYS")),
children.get("VALUES").pruneCastType(origin.children.get("VALUES"), cast.children.get("VALUES"))
children.get(AccessPathInfo.ACCESS_MAP_KEYS)
.pruneCastType(
origin.children.get(AccessPathInfo.ACCESS_MAP_KEYS),
cast.children.get(AccessPathInfo.ACCESS_MAP_KEYS)
),
children.get(AccessPathInfo.ACCESS_MAP_VALUES)
.pruneCastType(
origin.children.get(AccessPathInfo.ACCESS_MAP_VALUES),
cast.children.get(AccessPathInfo.ACCESS_MAP_VALUES)
)
);
} else {
return cast.type;
Expand Down Expand Up @@ -327,7 +366,7 @@ public boolean replacePathByAnotherTree(DataTypeAccessTree cast, List<String> pa
cast.children.values().iterator().next(), path, index + 1);
} else if (cast.type instanceof MapType) {
String fieldName = path.get(index);
return children.get("VALUES").replacePathByAnotherTree(
return children.get(AccessPathInfo.ACCESS_MAP_VALUES).replacePathByAnotherTree(
cast.children.get(fieldName), path, index + 1
);
}
Expand Down Expand Up @@ -358,33 +397,33 @@ public void setAccessByPath(List<String> path, int accessIndex, TAccessPathType
}
return;
} else if (this.type.isArrayType()) {
DataTypeAccessTree child = children.get("*");
if (path.get(accessIndex).equals("*")) {
DataTypeAccessTree child = children.get(AccessPathInfo.ACCESS_ALL);
if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_ALL)) {
// enter this array and skip next *
child.setAccessByPath(path, accessIndex + 1, pathType);
}
return;
} else if (this.type.isMapType()) {
String fieldName = path.get(accessIndex);
if (fieldName.equals("*")) {
if (fieldName.equals(AccessPathInfo.ACCESS_ALL)) {
// access value by the key, so we should access key and access value, then prune the value's type.
// e.g. map_column['id'] should access the keys, and access the values
DataTypeAccessTree keysChild = children.get("KEYS");
DataTypeAccessTree valuesChild = children.get("VALUES");
DataTypeAccessTree keysChild = children.get(AccessPathInfo.ACCESS_MAP_KEYS);
DataTypeAccessTree valuesChild = children.get(AccessPathInfo.ACCESS_MAP_VALUES);
keysChild.accessAll = true;
valuesChild.setAccessByPath(path, accessIndex + 1, pathType);
return;
} else if (fieldName.equals("KEYS")) {
} else if (fieldName.equals(AccessPathInfo.ACCESS_MAP_KEYS)) {
// only access the keys and not need enter keys, because it must be primitive type.
// e.g. map_keys(map_column)
DataTypeAccessTree keysChild = children.get("KEYS");
DataTypeAccessTree keysChild = children.get(AccessPathInfo.ACCESS_MAP_KEYS);
keysChild.accessAll = true;
return;
} else if (fieldName.equals("VALUES")) {
} else if (fieldName.equals(AccessPathInfo.ACCESS_MAP_VALUES)) {
// only access the values without keys, and maybe prune the value's data type.
// e.g. map_values(map_columns)[0] will access the array of values first,
// and then access the array, so the access path is ['VALUES', '*']
DataTypeAccessTree valuesChild = children.get("VALUES");
DataTypeAccessTree valuesChild = children.get(AccessPathInfo.ACCESS_MAP_VALUES);
valuesChild.setAccessByPath(path, accessIndex + 1, pathType);
return;
}
Expand All @@ -411,10 +450,10 @@ public static DataTypeAccessTree of(DataType type, TAccessPathType pathType) {
root.children.put(kv.getKey().toLowerCase(), of(kv.getValue().getDataType(), pathType));
}
} else if (type instanceof ArrayType) {
root.children.put("*", of(((ArrayType) type).getItemType(), pathType));
root.children.put(AccessPathInfo.ACCESS_ALL, of(((ArrayType) type).getItemType(), pathType));
} else if (type instanceof MapType) {
root.children.put("KEYS", of(((MapType) type).getKeyType(), pathType));
root.children.put("VALUES", of(((MapType) type).getValueType(), pathType));
root.children.put(AccessPathInfo.ACCESS_MAP_KEYS, of(((MapType) type).getKeyType(), pathType));
root.children.put(AccessPathInfo.ACCESS_MAP_VALUES, of(((MapType) type).getValueType(), pathType));
}
return root;
}
Expand All @@ -440,17 +479,17 @@ public Optional<DataType> pruneDataType() {
}
}
} else if (type instanceof ArrayType) {
Optional<DataType> childDataType = children.get("*").pruneDataType();
Optional<DataType> childDataType = children.get(AccessPathInfo.ACCESS_ALL).pruneDataType();
if (childDataType.isPresent()) {
accessedChildren.add(Pair.of("*", childDataType.get()));
accessedChildren.add(Pair.of(AccessPathInfo.ACCESS_ALL, childDataType.get()));
}
} else if (type instanceof MapType) {
DataType prunedValueType = children.get("VALUES")
DataType prunedValueType = children.get(AccessPathInfo.ACCESS_MAP_VALUES)
.pruneDataType()
.orElse(((MapType) type).getValueType());
// can not prune keys but can prune values
accessedChildren.add(Pair.of("KEYS", ((MapType) type).getKeyType()));
accessedChildren.add(Pair.of("VALUES", prunedValueType));
accessedChildren.add(Pair.of(AccessPathInfo.ACCESS_MAP_KEYS, ((MapType) type).getKeyType()));
accessedChildren.add(Pair.of(AccessPathInfo.ACCESS_MAP_VALUES, prunedValueType));
}
if (accessedChildren.isEmpty()) {
return Optional.of(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.catalog.Column;
import org.apache.doris.common.Pair;
import org.apache.doris.datasource.iceberg.IcebergExternalTable;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.properties.OrderKey;
import org.apache.doris.nereids.rules.rewrite.NestedColumnPruning.DataTypeAccessTree;
import org.apache.doris.nereids.trees.expressions.ArrayItemReference;
Expand Down Expand Up @@ -542,8 +543,10 @@ private <C extends Collection<E>, E extends Expression> Pair<Boolean, C> replace
ImmutableCollection.Builder<E> newExprs;
if (expressions instanceof List) {
newExprs = ImmutableList.builder();
} else {
} else if (expressions instanceof Set) {
newExprs = ImmutableSet.builder();
} else {
throw new AnalysisException("Unsupported expression type: " + expressions.getClass());
}

boolean changed = false;
Expand Down Expand Up @@ -688,7 +691,7 @@ private void replaceIcebergAccessPathToId(List<String> originPath, int index, Da
originPath, index + 1, ((ArrayType) type).getItemType(), column.getChildren().get(0)
);
} else if (type instanceof MapType) {
if (fieldName.equals("*") || fieldName.equals("VALUES")) {
if (fieldName.equals(AccessPathInfo.ACCESS_ALL) || fieldName.equals(AccessPathInfo.ACCESS_MAP_VALUES)) {
replaceIcebergAccessPathToId(
originPath, index + 1, ((MapType) type).getValueType(), column.getChildren().get(1)
);
Expand Down
Loading
Loading