Skip to content

Commit 0e287d4

Browse files
HIVE-28938: Error in LATERAL VIEW with non native tables due to incorrect virtual columns (#5798)
1 parent 1eb1a8c commit 0e287d4

File tree

9 files changed

+124
-55
lines changed

9 files changed

+124
-55
lines changed

hbase-handler/src/test/results/positive/hbase_queries.q.out

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -151,14 +151,14 @@ STAGE PLANS:
151151
predicate: UDFToDouble(key) is not null (type: boolean)
152152
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
153153
Select Operator
154-
expressions: key (type: int)
154+
expressions: UDFToDouble(key) (type: double)
155155
outputColumnNames: _col0
156156
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
157157
Reduce Output Operator
158-
key expressions: UDFToDouble(_col0) (type: double)
158+
key expressions: _col0 (type: double)
159159
null sort order: z
160160
sort order: +
161-
Map-reduce partition columns: UDFToDouble(_col0) (type: double)
161+
Map-reduce partition columns: _col0 (type: double)
162162
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
163163
TableScan
164164
alias: src
@@ -168,29 +168,29 @@ STAGE PLANS:
168168
predicate: UDFToDouble(key) is not null (type: boolean)
169169
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
170170
Select Operator
171-
expressions: key (type: string), value (type: string)
172-
outputColumnNames: _col0, _col1
173-
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
171+
expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double)
172+
outputColumnNames: _col0, _col1, _col2
173+
Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
174174
Reduce Output Operator
175-
key expressions: UDFToDouble(_col0) (type: double)
175+
key expressions: _col2 (type: double)
176176
null sort order: z
177177
sort order: +
178-
Map-reduce partition columns: UDFToDouble(_col0) (type: double)
179-
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
178+
Map-reduce partition columns: _col2 (type: double)
179+
Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
180180
value expressions: _col0 (type: string), _col1 (type: string)
181181
Reduce Operator Tree:
182182
Join Operator
183183
condition map:
184184
Inner Join 0 to 1
185185
keys:
186-
0 UDFToDouble(_col0) (type: double)
187-
1 UDFToDouble(_col0) (type: double)
186+
0 _col0 (type: double)
187+
1 _col2 (type: double)
188188
outputColumnNames: _col1, _col2
189-
Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
189+
Statistics: Num rows: 550 Data size: 102300 Basic stats: COMPLETE Column stats: NONE
190190
Select Operator
191191
expressions: _col1 (type: string), _col2 (type: string)
192192
outputColumnNames: _col0, _col1
193-
Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
193+
Statistics: Num rows: 550 Data size: 102300 Basic stats: COMPLETE Column stats: NONE
194194
File Output Operator
195195
compressed: false
196196
table:
@@ -206,20 +206,20 @@ STAGE PLANS:
206206
key expressions: _col0 (type: string), _col1 (type: string)
207207
null sort order: zz
208208
sort order: ++
209-
Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
209+
Statistics: Num rows: 550 Data size: 102300 Basic stats: COMPLETE Column stats: NONE
210210
TopN Hash Memory Usage: 0.1
211211
Execution mode: vectorized
212212
Reduce Operator Tree:
213213
Select Operator
214214
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string)
215215
outputColumnNames: _col0, _col1
216-
Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE
216+
Statistics: Num rows: 550 Data size: 102300 Basic stats: COMPLETE Column stats: NONE
217217
Limit
218218
Number of rows: 20
219-
Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: NONE
219+
Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: NONE
220220
File Output Operator
221221
compressed: false
222-
Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: NONE
222+
Statistics: Num rows: 20 Data size: 3720 Basic stats: COMPLETE Column stats: NONE
223223
table:
224224
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
225225
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- A simple explain formatted test for an iceberg table to check virtual columns in the JSON output.
2+
create external table test (a int, b int) stored by iceberg;
3+
explain formatted select * from test;
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
create external table test(id int, arr array<string>) stored by iceberg;
2+
insert into test values (1, array("a", "b")), (2, array("c", "d")), (3, array("e", "f"));
3+
4+
select * from test
5+
lateral view explode(arr) tbl1 as name
6+
lateral view explode(arr) tbl2 as name1;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
PREHOOK: query: create external table test (a int, b int) stored by iceberg
2+
PREHOOK: type: CREATETABLE
3+
PREHOOK: Output: database:default
4+
PREHOOK: Output: default@test
5+
POSTHOOK: query: create external table test (a int, b int) stored by iceberg
6+
POSTHOOK: type: CREATETABLE
7+
POSTHOOK: Output: database:default
8+
POSTHOOK: Output: default@test
9+
PREHOOK: query: explain formatted select * from test
10+
PREHOOK: type: QUERY
11+
PREHOOK: Input: default@test
12+
PREHOOK: Output: hdfs://### HDFS PATH ###
13+
POSTHOOK: query: explain formatted select * from test
14+
POSTHOOK: type: QUERY
15+
POSTHOOK: Input: default@test
16+
POSTHOOK: Output: hdfs://### HDFS PATH ###
17+
{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"test\"\n ],\n \"table:alias\": \"test\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 8.0,\n \"rowType\": {\n \"fields\": [\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"a\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"b\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"PARTITION__SPEC__ID\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"PARTITION__HASH\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"FILE__PATH\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"ROW__POSITION\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"PARTITION__PROJECTION\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"SNAPSHOT__ID\"\n }\n ],\n \"nullable\": false\n },\n \"colStats\": [\n {\n \"name\": \"a\",\n \"ndv\": 1,\n \"minValue\": -9223372036854775808,\n \"maxValue\": 9223372036854775807\n },\n {\n \"name\": \"b\",\n \"ndv\": 1,\n \"minValue\": -9223372036854775808,\n \"maxValue\": 9223372036854775807\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"a\",\n \"b\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT `a`, `b`\nFROM `default`.`test`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"test","columns:":["a","b"],"database:":"default","table:":"test","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"a (type: int), b (type: int)","columnExprMap:":{"_col0":"a","_col1":"b"},"outputColumnNames:":["_col0","_col1"],"OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
PREHOOK: query: create external table test(id int, arr array<string>) stored by iceberg
2+
PREHOOK: type: CREATETABLE
3+
PREHOOK: Output: database:default
4+
PREHOOK: Output: default@test
5+
POSTHOOK: query: create external table test(id int, arr array<string>) stored by iceberg
6+
POSTHOOK: type: CREATETABLE
7+
POSTHOOK: Output: database:default
8+
POSTHOOK: Output: default@test
9+
PREHOOK: query: insert into test values (1, array("a", "b")), (2, array("c", "d")), (3, array("e", "f"))
10+
PREHOOK: type: QUERY
11+
PREHOOK: Input: _dummy_database@_dummy_table
12+
PREHOOK: Output: default@test
13+
POSTHOOK: query: insert into test values (1, array("a", "b")), (2, array("c", "d")), (3, array("e", "f"))
14+
POSTHOOK: type: QUERY
15+
POSTHOOK: Input: _dummy_database@_dummy_table
16+
POSTHOOK: Output: default@test
17+
PREHOOK: query: select * from test
18+
lateral view explode(arr) tbl1 as name
19+
lateral view explode(arr) tbl2 as name1
20+
PREHOOK: type: QUERY
21+
PREHOOK: Input: default@test
22+
PREHOOK: Output: hdfs://### HDFS PATH ###
23+
POSTHOOK: query: select * from test
24+
lateral view explode(arr) tbl1 as name
25+
lateral view explode(arr) tbl2 as name1
26+
POSTHOOK: type: QUERY
27+
POSTHOOK: Input: default@test
28+
POSTHOOK: Output: hdfs://### HDFS PATH ###
29+
1 ["a","b"] a a
30+
1 ["a","b"] a b
31+
1 ["a","b"] b a
32+
1 ["a","b"] b b
33+
2 ["c","d"] c c
34+
2 ["c","d"] c d
35+
2 ["c","d"] d c
36+
2 ["c","d"] d d
37+
3 ["e","f"] e e
38+
3 ["e","f"] e f
39+
3 ["e","f"] f e
40+
3 ["e","f"] f f

kudu-handler/src/test/results/positive/kudu_complex_queries.q.out

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,15 @@ STAGE PLANS:
9191
predicate: UDFToDouble(key) is not null (type: boolean)
9292
Statistics: Num rows: 309 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE
9393
Select Operator
94-
expressions: key (type: int)
94+
expressions: UDFToDouble(key) (type: double)
9595
outputColumnNames: _col0
96-
Statistics: Num rows: 309 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE
96+
Statistics: Num rows: 309 Data size: 2472 Basic stats: COMPLETE Column stats: COMPLETE
9797
Reduce Output Operator
98-
key expressions: UDFToDouble(_col0) (type: double)
98+
key expressions: _col0 (type: double)
9999
null sort order: z
100100
sort order: +
101-
Map-reduce partition columns: UDFToDouble(_col0) (type: double)
102-
Statistics: Num rows: 309 Data size: 1236 Basic stats: COMPLETE Column stats: COMPLETE
101+
Map-reduce partition columns: _col0 (type: double)
102+
Statistics: Num rows: 309 Data size: 2472 Basic stats: COMPLETE Column stats: COMPLETE
103103
Execution mode: vectorized
104104
Map 4
105105
Map Operator Tree:
@@ -111,15 +111,15 @@ STAGE PLANS:
111111
predicate: UDFToDouble(key) is not null (type: boolean)
112112
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
113113
Select Operator
114-
expressions: key (type: string), value (type: string)
115-
outputColumnNames: _col0, _col1
116-
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
114+
expressions: key (type: string), value (type: string), UDFToDouble(key) (type: double)
115+
outputColumnNames: _col0, _col1, _col2
116+
Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
117117
Reduce Output Operator
118-
key expressions: UDFToDouble(_col0) (type: double)
118+
key expressions: _col2 (type: double)
119119
null sort order: z
120120
sort order: +
121-
Map-reduce partition columns: UDFToDouble(_col0) (type: double)
122-
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
121+
Map-reduce partition columns: _col2 (type: double)
122+
Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE
123123
value expressions: _col0 (type: string), _col1 (type: string)
124124
Execution mode: vectorized
125125
Reducer 2
@@ -128,8 +128,8 @@ STAGE PLANS:
128128
condition map:
129129
Inner Join 0 to 1
130130
keys:
131-
0 UDFToDouble(_col0) (type: double)
132-
1 UDFToDouble(_col0) (type: double)
131+
0 _col0 (type: double)
132+
1 _col2 (type: double)
133133
outputColumnNames: _col1, _col2
134134
Statistics: Num rows: 488 Data size: 86864 Basic stats: COMPLETE Column stats: COMPLETE
135135
Top N Key Operator

ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
6262
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
6363
import org.apache.hadoop.hive.ql.exec.Utilities;
64+
import org.apache.hadoop.hive.ql.io.AcidUtils;
6465
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
6566
import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
6667
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.TableSpec;
@@ -85,6 +86,8 @@
8586

8687
import com.google.common.base.Preconditions;
8788

89+
import static org.apache.commons.lang3.StringUtils.isBlank;
90+
8891
/**
8992
* A Hive Table: is a fundamental unit of data in Hive that shares a common schema/DDL.
9093
*
@@ -1364,4 +1367,20 @@ public SourceTable createSourceTable() {
13641367
sourceTable.setDeletedCount(0L);
13651368
return sourceTable;
13661369
}
1370+
1371+
public List<VirtualColumn> getVirtualColumns(HiveConf conf) {
1372+
List<VirtualColumn> virtualColumns = new ArrayList<>();
1373+
if (!isNonNative()) {
1374+
virtualColumns.addAll(VirtualColumn.getRegistry(conf));
1375+
}
1376+
if (isNonNative() && AcidUtils.isNonNativeAcidTable(this)) {
1377+
virtualColumns.addAll(getStorageHandler().acidVirtualColumns());
1378+
}
1379+
if (isNonNative() && getStorageHandler().areSnapshotsSupported() &&
1380+
isBlank(getMetaTable())) {
1381+
virtualColumns.add(VirtualColumn.SNAPSHOT_ID);
1382+
}
1383+
1384+
return virtualColumns;
1385+
}
13671386
}

ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,6 @@
366366

367367
import javax.sql.DataSource;
368368

369-
import static org.apache.commons.lang3.StringUtils.isBlank;
370369
import static org.apache.hadoop.hive.ql.optimizer.calcite.HiveMaterializedViewASTSubQueryRewriteShuttle.getMaterializedViewByAST;
371370
import static org.apache.hadoop.hive.ql.metadata.RewriteAlgorithm.ANY;
372371

@@ -3094,23 +3093,14 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc
30943093
final TableType tableType = obtainTableType(tabMetaData);
30953094

30963095
// 3.3 Add column info corresponding to virtual columns
3097-
List<VirtualColumn> virtualCols = new ArrayList<>();
3098-
if (tableType == TableType.NATIVE) {
3099-
virtualCols = VirtualColumn.getRegistry(conf);
3100-
if (AcidUtils.isNonNativeAcidTable(tabMetaData)) {
3101-
virtualCols.addAll(tabMetaData.getStorageHandler().acidVirtualColumns());
3102-
}
3103-
if (tabMetaData.isNonNative() && tabMetaData.getStorageHandler().areSnapshotsSupported() &&
3104-
isBlank(tabMetaData.getMetaTable())) {
3105-
virtualCols.add(VirtualColumn.SNAPSHOT_ID);
3106-
}
3107-
for (VirtualColumn vc : virtualCols) {
3108-
colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true,
3109-
vc.getIsHidden());
3110-
rr.put(tableAlias, vc.getName().toLowerCase(), colInfo);
3111-
cInfoLst.add(colInfo);
3112-
}
3113-
}
3096+
List<VirtualColumn> virtualCols = tabMetaData.getVirtualColumns(conf);
3097+
3098+
virtualCols
3099+
.forEach(vc ->
3100+
rr.put(tableAlias, vc.getName().toLowerCase(),
3101+
new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden())
3102+
)
3103+
);
31143104

31153105
// 4. Build operator
31163106
Map<String, String> tabPropsFromQuery = qb.getTabPropsForAlias(tableAlias);

ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11988,13 +11988,7 @@ private Operator genTablePlan(String alias, QB qb) throws SemanticException {
1198811988
}
1198911989

1199011990
// put virtual columns into RowResolver.
11991-
List<VirtualColumn> vcList = new ArrayList<>();
11992-
if (!tab.isNonNative()) {
11993-
vcList.addAll(VirtualColumn.getRegistry(conf));
11994-
}
11995-
if (tab.isNonNative() && AcidUtils.isNonNativeAcidTable(tab)) {
11996-
vcList.addAll(tab.getStorageHandler().acidVirtualColumns());
11997-
}
11991+
List<VirtualColumn> vcList = tab.getVirtualColumns(conf);
1199811992

1199911993
vcList.forEach(vc -> rwsch.put(alias, vc.getName().toLowerCase(), new ColumnInfo(vc.getName(),
1200011994
vc.getTypeInfo(), alias, true, vc.getIsHidden()

0 commit comments

Comments
 (0)