Skip to content

Commit d9f6a3a

Browse files
committed
prune nested column through lateral view
1 parent 09df533 commit d9f6a3a

File tree

4 files changed

+246
-2
lines changed

4 files changed

+246
-2
lines changed

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathExpressionCollector.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,9 +449,14 @@ public TAccessPathType getType() {
449449
public void setType(TAccessPathType type) {
450450
this.type = type;
451451
}
452+
453+
public AccessPathBuilder getAccessPathBuilder() {
454+
return accessPathBuilder;
455+
}
452456
}
453457

454-
private static class AccessPathBuilder {
458+
/** AccessPathBuilder */
459+
public static class AccessPathBuilder {
455460
private LinkedList<String> accessPath;
456461

457462
public AccessPathBuilder() {
@@ -463,6 +468,16 @@ public AccessPathBuilder addPrefix(String prefix) {
463468
return this;
464469
}
465470

471+
public AccessPathBuilder addSuffix(String suffix) {
472+
accessPath.addLast(suffix);
473+
return this;
474+
}
475+
476+
public AccessPathBuilder addSuffix(List<String> suffix) {
477+
accessPath.addAll(suffix);
478+
return this;
479+
}
480+
466481
public AccessPathBuilder removePrefix() {
467482
accessPath.removeFirst();
468483
return this;

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AccessPathPlanCollector.java

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,21 @@
1717

1818
package org.apache.doris.nereids.rules.rewrite;
1919

20+
import org.apache.doris.analysis.AccessPathInfo;
2021
import org.apache.doris.nereids.StatementContext;
2122
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectAccessPathResult;
23+
import org.apache.doris.nereids.rules.rewrite.AccessPathExpressionCollector.CollectorContext;
2224
import org.apache.doris.nereids.trees.expressions.Alias;
2325
import org.apache.doris.nereids.trees.expressions.Expression;
2426
import org.apache.doris.nereids.trees.expressions.NamedExpression;
2527
import org.apache.doris.nereids.trees.expressions.Slot;
2628
import org.apache.doris.nereids.trees.expressions.functions.Function;
29+
import org.apache.doris.nereids.trees.expressions.functions.generator.Explode;
30+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMap;
31+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeMapOuter;
32+
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeOuter;
33+
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode;
34+
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter;
2735
import org.apache.doris.nereids.trees.plans.Plan;
2836
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEAnchor;
2937
import org.apache.doris.nereids.trees.plans.logical.LogicalCTEConsumer;
@@ -60,8 +68,112 @@ public Map<Slot, List<CollectAccessPathResult>> collect(Plan root, StatementCont
6068

6169
@Override
6270
public Void visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, StatementContext context) {
63-
for (Function generator : generate.getGenerators()) {
71+
List<Function> generators = generate.getGenerators();
72+
List<Slot> output = generate.getGeneratorOutput();
6473

74+
AccessPathExpressionCollector exprCollector
75+
= new AccessPathExpressionCollector(context, allSlotToAccessPaths, false);
76+
for (int i = 0; i < output.size(); i++) {
77+
Slot generatorOutput = output.get(i);
78+
Function function = generators.get(i);
79+
Collection<CollectAccessPathResult> accessPaths = allSlotToAccessPaths.get(
80+
generatorOutput.getExprId().asInt());
81+
if (function instanceof Explode || function instanceof ExplodeOuter) {
82+
if (accessPaths.isEmpty()) {
83+
// use the whole column
84+
for (Expression child : function.children()) {
85+
exprCollector.collect(child);
86+
}
87+
} else {
88+
for (CollectAccessPathResult accessPath : accessPaths) {
89+
List<String> path = accessPath.getPath();
90+
if (function.arity() == 1) {
91+
// $c$1.VALUES.b
92+
CollectorContext argumentContext = new CollectorContext(context, false);
93+
argumentContext.setType(accessPath.getType());
94+
argumentContext.getAccessPathBuilder()
95+
.addSuffix(AccessPathInfo.ACCESS_ALL)
96+
.addSuffix(path.subList(1, path.size()));
97+
function.child(0).accept(exprCollector, argumentContext);
98+
continue;
99+
} else if (path.size() >= 2) {
100+
// $c$1.col1.VALUES.b will be extract 'col1'
101+
String colName = path.get(1);
102+
// extract '1' in 'col1'
103+
int colIndex = Integer.parseInt(colName.substring("col".length())) - 1;
104+
CollectorContext argumentContext = new CollectorContext(context, false);
105+
argumentContext.setType(accessPath.getType());
106+
argumentContext.getAccessPathBuilder()
107+
.addSuffix(AccessPathInfo.ACCESS_ALL)
108+
.addSuffix(path.subList(2, path.size()));
109+
function.child(colIndex).accept(exprCollector, argumentContext);
110+
continue;
111+
}
112+
// use the whole column
113+
for (Expression child : function.children()) {
114+
exprCollector.collect(child);
115+
}
116+
}
117+
}
118+
} else if (function instanceof ExplodeMap || function instanceof ExplodeMapOuter) {
119+
if (accessPaths.isEmpty()) {
120+
// use the whole column
121+
for (Expression child : function.children()) {
122+
exprCollector.collect(child);
123+
}
124+
} else {
125+
for (CollectAccessPathResult accessPath : accessPaths) {
126+
List<String> path = accessPath.getPath();
127+
if (path.size() >= 2) {
128+
if (path.get(1).equalsIgnoreCase("col1")) {
129+
// key
130+
for (Expression child : function.children()) {
131+
CollectorContext argumentContext = new CollectorContext(context, false);
132+
argumentContext.setType(accessPath.getType());
133+
argumentContext.getAccessPathBuilder()
134+
.addSuffix(AccessPathInfo.ACCESS_MAP_KEYS)
135+
.addSuffix(path.subList(2, path.size()));
136+
child.accept(exprCollector, argumentContext);
137+
}
138+
continue;
139+
} else if (path.get(1).equalsIgnoreCase("col2")) {
140+
// value
141+
for (Expression child : function.children()) {
142+
CollectorContext argumentContext = new CollectorContext(context, false);
143+
argumentContext.setType(accessPath.getType());
144+
argumentContext.getAccessPathBuilder()
145+
.addSuffix(AccessPathInfo.ACCESS_MAP_VALUES)
146+
.addSuffix(path.subList(2, path.size()));
147+
child.accept(exprCollector, argumentContext);
148+
}
149+
continue;
150+
}
151+
}
152+
// use the whole column
153+
exprCollector.collect(function.child(0));
154+
}
155+
}
156+
} else if (function instanceof PosExplode || function instanceof PosExplodeOuter) {
157+
if (accessPaths.isEmpty()) {
158+
// use the whole column
159+
for (Expression child : function.children()) {
160+
exprCollector.collect(child);
161+
}
162+
} else {
163+
for (CollectAccessPathResult accessPath : accessPaths) {
164+
List<String> path = accessPath.getPath();
165+
// $c$1.pos or // $c$1.col
166+
CollectorContext argumentContext = new CollectorContext(context, false);
167+
argumentContext.setType(accessPath.getType());
168+
argumentContext.getAccessPathBuilder()
169+
.addSuffix(AccessPathInfo.ACCESS_ALL)
170+
.addSuffix(path.subList(Math.min(path.size(), 2), path.size()));
171+
function.child(0).accept(exprCollector, argumentContext);
172+
}
173+
}
174+
} else {
175+
exprCollector.collect(function);
176+
}
65177
}
66178
return generate.child().accept(this, context);
67179
}

fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/SlotTypeReplacer.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,12 @@ public Plan visitLogicalGenerate(LogicalGenerate<? extends Plan> generate, Void
325325

326326
Pair<Boolean, List<Function>> replacedGenerators
327327
= replaceExpressions(generate.getGenerators(), false, false);
328+
for (int i = 0; i < replacedGenerators.second.size(); i++) {
329+
DataType dataType = replacedGenerators.second.get(i).getDataType();
330+
replacedDataTypes.put(generate.getGeneratorOutput().get(i).getExprId().asInt(),
331+
new AccessPathInfo(dataType, null, null)
332+
);
333+
}
328334
Pair<Boolean, List<Slot>> replacedGeneratorOutput
329335
= replaceExpressions(generate.getGeneratorOutput(), false, false);
330336
if (replacedGenerators.first || replacedGeneratorOutput.first) {

fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PruneNestedColumnTest.java

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,117 @@ public void testCte() throws Throwable {
384384
);
385385
}
386386

387+
@Test
388+
public void testExplode() throws Exception {
389+
assertColumn("select 100 from tbl lateral view explode(s.data) t as item",
390+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
391+
ImmutableList.of(path("s", "data")),
392+
ImmutableList.of()
393+
);
394+
395+
assertColumn("select item from tbl lateral view explode(s.data) t as item",
396+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
397+
ImmutableList.of(path("s", "data", "*")),
398+
ImmutableList.of()
399+
);
400+
401+
assertColumn("select map_values(item)[1].a from tbl lateral view explode(s.data) t as item",
402+
"struct<data:array<map<int,struct<a:int>>>>",
403+
ImmutableList.of(path("s", "data", "*", "VALUES", "a")),
404+
ImmutableList.of()
405+
);
406+
407+
assertColumn("select map_values(item)[1].b from tbl lateral view explode(s.data) t as item",
408+
"struct<data:array<map<int,struct<b:double>>>>",
409+
ImmutableList.of(path("s", "data", "*", "VALUES", "b")),
410+
ImmutableList.of()
411+
);
412+
413+
assertColumn("select map_keys(item) from tbl lateral view explode(s.data) t as item",
414+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
415+
ImmutableList.of(path("s", "data", "*", "KEYS")),
416+
ImmutableList.of()
417+
);
418+
419+
assertColumn("select map_keys(item), map_values(item)[1].b from tbl lateral view explode(s.data) t as item",
420+
"struct<data:array<map<int,struct<b:double>>>>",
421+
ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "b")),
422+
ImmutableList.of()
423+
);
424+
425+
assertColumn("select map_values(item1)[1].b, map_values(item2)[1].a from tbl lateral view explode(s.data, s.data) t as item1, item2",
426+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
427+
ImmutableList.of(path("s", "data", "*", "VALUES", "a"), path("s", "data", "*", "VALUES", "b")),
428+
ImmutableList.of()
429+
);
430+
}
431+
432+
@Test
433+
public void testExplodeMap() throws Exception {
434+
assertColumn("select 100 from tbl lateral view explode_map(s.data[1]) t as item",
435+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
436+
ImmutableList.of(path("s", "data", "*")),
437+
ImmutableList.of()
438+
);
439+
440+
assertColumn("select item from tbl lateral view explode_map(s.data[1]) t as item",
441+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
442+
ImmutableList.of(path("s", "data", "*")),
443+
ImmutableList.of()
444+
);
445+
446+
assertColumn("select item.col1 from tbl lateral view explode_map(s.data[1]) t as item",
447+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
448+
ImmutableList.of(path("s", "data", "*", "KEYS")),
449+
ImmutableList.of()
450+
);
451+
452+
assertColumn("select item.col2.a from tbl lateral view explode_map(s.data[1]) t as item",
453+
"struct<data:array<map<int,struct<a:int>>>>",
454+
ImmutableList.of(path("s", "data", "*", "VALUES", "a")),
455+
ImmutableList.of()
456+
);
457+
458+
assertColumn("select item.col2.b from tbl lateral view explode_map(s.data[1]) t as item",
459+
"struct<data:array<map<int,struct<b:double>>>>",
460+
ImmutableList.of(path("s", "data", "*", "VALUES", "b")),
461+
ImmutableList.of()
462+
);
463+
464+
assertColumn("select item.col1, item.col2.b from tbl lateral view explode_map(s.data[1]) t as item",
465+
"struct<data:array<map<int,struct<b:double>>>>",
466+
ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "b")),
467+
ImmutableList.of()
468+
);
469+
470+
assertColumn("select k, v.b from tbl lateral view explode_map(s.data[1]) t as k, v",
471+
"struct<data:array<map<int,struct<b:double>>>>",
472+
ImmutableList.of(path("s", "data", "*", "KEYS"), path("s", "data", "*", "VALUES", "b")),
473+
ImmutableList.of()
474+
);
475+
}
476+
477+
@Test
478+
public void testPosExplode() throws Exception {
479+
assertColumn("select 100 from tbl lateral view posexplode(s.data) t as item",
480+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
481+
ImmutableList.of(path("s", "data")),
482+
ImmutableList.of()
483+
);
484+
485+
assertColumn("select item from tbl lateral view posexplode(s.data) t as item",
486+
"struct<data:array<map<int,struct<a:int,b:double>>>>",
487+
ImmutableList.of(path("s", "data", "*")),
488+
ImmutableList.of()
489+
);
490+
491+
assertColumn("select item.col[1].a from tbl lateral view posexplode(s.data) t as item",
492+
"struct<data:array<map<int,struct<a:int>>>>",
493+
ImmutableList.of(path("s", "data", "*", "*", "a")),
494+
ImmutableList.of()
495+
);
496+
}
497+
387498
@Test
388499
public void testUnion() throws Throwable {
389500
assertColumn("select coalesce(struct_element(s, 'city'), 'abc') from (select s from tbl union all select null)a",

0 commit comments

Comments
 (0)