diff --git a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q index cb7b7ae9ae88..02e05b3671ab 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q @@ -3,58 +3,105 @@ set hive.stats.autogather=true; set hive.stats.column.autogather=true; +-- Create source table +drop table if exists src_ice; +create external table src_ice( + a int, + b string, + c int) +stored by iceberg; + +insert into src_ice values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56); + +-- Test hive.iceberg.stats.source = iceberg set hive.iceberg.stats.source=iceberg; + + +-- Test NON-PARTITIONED table with hive.iceberg.stats.source=iceberg drop table if exists tbl_ice_puffin; -create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); -insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); -insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); -explain select * from tbl_ice_puffin order by a, b, c; -select * from tbl_ice_puffin order by a, b, c; -desc formatted tbl_ice_puffin b; -update tbl_ice_puffin set b='two' where b='one' or b='three'; +create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg; + +insert into tbl_ice_puffin select * from src_ice; +insert into tbl_ice_puffin select * from src_ice; + +select count(*) from tbl_ice_puffin; +EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin; +desc formatted tbl_ice_puffin B; + +update tbl_ice_puffin + set b='two' where b='one' or b='three'; + analyze table tbl_ice_puffin compute statistics for columns; -explain select * from tbl_ice_puffin order by a, b, c; -select * from tbl_ice_puffin order by a, b, c; + select count(*) from tbl_ice_puffin; -desc formatted tbl_ice_puffin b; +EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin; +desc formatted tbl_ice_puffin B; + + +-- Test PARTITIONED table with hive.iceberg.stats.source=iceberg +drop table tbl_ice_puffin; +create external table tbl_ice_puffin( + a int, + b string +) +partitioned by (c int) +stored by iceberg; +insert overwrite table tbl_ice_puffin select * from src_ice; +delete from tbl_ice_puffin where a <= 2; --- Test if hive.iceberg.stats.source is empty +analyze table tbl_ice_puffin compute statistics for columns A, C; + +select count(*) from tbl_ice_puffin; +EXPLAIN select min(a), max(c) from tbl_ice_puffin; +desc formatted tbl_ice_puffin C; + +set hive.stats.fetch.column.stats=true; +EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a); + + +-- Test hive.iceberg.stats.source is empty set hive.iceberg.stats.source= ; -drop table if exists tbl_ice_puffin; -create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); -insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); -explain select * from tbl_ice_puffin order by a, b, c; +drop table tbl_ice_puffin; +create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg; + +insert into tbl_ice_puffin select * from src_ice; -set hive.iceberg.stats.source=iceberg; -drop table if exists tbl_ice_puffin; -create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); -insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); -explain select * from tbl_ice_puffin order by a, b, c; -select * from tbl_ice_puffin order by a, b, c; select count(*) from tbl_ice_puffin; -desc formatted tbl_ice_puffin a; +EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin; +desc formatted tbl_ice_puffin A; +-- Test hive.iceberg.stats.source = metastore set hive.iceberg.stats.source=metastore; drop table if exists tbl_ice; -create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); -insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); -explain select * from tbl_ice order by a, b, c; -select * from tbl_ice order by a, b, c; -select count(*) from tbl_ice; +create external table tbl_ice( + a int, + b string, + c int) +stored by iceberg; -set hive.iceberg.stats.source=iceberg; -delete from tbl_ice_puffin where a = 2; -analyze table tbl_ice_puffin compute statistics for columns A, C; -explain select * from tbl_ice_puffin order by a, b, c; -select count(*) from tbl_ice_puffin; -desc formatted tbl_ice_puffin C; +insert into tbl_ice select * from src_ice; -create table t1 (a int) stored by iceberg tblproperties ('format-version'='2'); -create table t2 (b int) stored by iceberg tblproperties ('format-version'='2'); -describe formatted t1; -describe formatted t2; -explain select * from t1 join t2 on t1.a = t2.b; \ No newline at end of file +select count(*) from tbl_ice; +EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice; +desc formatted tbl_ice A; diff --git a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats.q b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats.q index 93269513f53e..c2d2b1b15a22 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats.q @@ -1,21 +1,47 @@ -- Mask random uuid --! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ +set hive.fetch.task.conversion=none; + set hive.stats.autogather=true; set hive.stats.column.autogather=true; set hive.iceberg.stats.source=iceberg; + drop table if exists tbl_ice_puffin; -create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2'); -insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56); +create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg; + +insert into tbl_ice_puffin values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56); + +explain select * from tbl_ice_puffin; desc formatted tbl_ice_puffin a; +desc formatted tbl_ice_puffin b; desc formatted tbl_ice_puffin c; -explain select * from tbl_ice_puffin order by a, b, c; -insert into tbl_ice_puffin values (1000, 'one', 1000), (5000, 'two', 5000); + +insert into tbl_ice_puffin values (1000, 'six', 1000), (5000, 'two', 5000); + +explain select * from tbl_ice_puffin; desc formatted tbl_ice_puffin a; +desc formatted tbl_ice_puffin b; desc formatted tbl_ice_puffin c; -explain select * from tbl_ice_puffin order by a, b, c; -insert into tbl_ice_puffin values (10, 'one', 100000), (5000, 'two', 510000); -explain select * from tbl_ice_puffin order by a, b, c; + +insert into tbl_ice_puffin values (10, 'six', 100000), (5000, 'two', 510000); + +explain select * from tbl_ice_puffin; desc formatted tbl_ice_puffin a; +desc formatted tbl_ice_puffin b; desc formatted tbl_ice_puffin c; + -- Result: a = (min: 1, max: 5000) , c =(min: 50, max: 51000) \ No newline at end of file diff --git a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q index 243b358eb53f..a32045bc0d0e 100644 --- a/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q +++ b/iceberg/iceberg-handler/src/test/queries/positive/puffin_col_stats_with_time_travel.q @@ -1,25 +1,33 @@ set hive.fetch.task.conversion=none; -create external table default.tbl_ice_puffin_time_travel(a int, b string, c int) stored by iceberg; -insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2, 'two', 51); -alter table default.tbl_ice_puffin_time_travel create tag checkpoint; +create external table tbl_ice_puffin_time_travel( + a int, + b string, + c int) +stored by iceberg; -explain select * from default.tbl_ice_puffin_time_travel; +insert into tbl_ice_puffin_time_travel values + (1, 'one', 50), + (2, 'two', 51); + +alter table tbl_ice_puffin_time_travel create tag checkpoint; + +explain select * from tbl_ice_puffin_time_travel; explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint; insert into tbl_ice_puffin_time_travel values -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null); + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null); -explain select * from default.tbl_ice_puffin_time_travel; +explain select * from tbl_ice_puffin_time_travel; explain select * from default.tbl_ice_puffin_time_travel.tag_checkpoint; diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out index d85fc67aab87..6810c72c93e8 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out @@ -1,38 +1,103 @@ +PREHOOK: query: drop table if exists src_ice +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: drop table if exists src_ice +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: create external table src_ice( + a int, + b string, + c int) +stored by iceberg +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_ice +POSTHOOK: query: create external table src_ice( + a int, + b string, + c int) +stored by iceberg +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_ice +PREHOOK: query: insert into src_ice values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@src_ice +POSTHOOK: query: insert into src_ice values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@src_ice PREHOOK: query: drop table if exists tbl_ice_puffin PREHOOK: type: DROPTABLE PREHOOK: Output: database:default POSTHOOK: query: drop table if exists tbl_ice_puffin POSTHOOK: type: DROPTABLE POSTHOOK: Output: database:default -PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert into tbl_ice_puffin select * from src_ice PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@src_ice PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert into tbl_ice_puffin select * from src_ice POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@src_ice POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert into tbl_ice_puffin select * from src_ice PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@src_ice PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert into tbl_ice_puffin select * from src_ice POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@src_ice POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +PREHOOK: query: select count(*) from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: select count(*) from tbl_ice_puffin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +18 +PREHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### @@ -40,55 +105,39 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=18 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=18 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=18 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + Reducer 3 vectorized + File Output Operator [FS_20] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","count(VALUE._col1)","max(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(_col1)","count(_col0)","max(_col2)"] + Group By Operator [GBY_16] (rows=5 width=95) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=5 width=95) + Output:["_col0","_col1","_col3"],aggregations:["min(a)","max(c)"],keys:b + Select Operator [SEL_13] (rows=18 width=95) + Output:["a","b","c"] + TableScan [TS_0] (rows=18 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] -PREHOOK: query: select * from tbl_ice_puffin order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -1 one 50 -1 one 50 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -3 three 52 -3 three 52 -4 four 53 -4 four 53 -5 five 54 -5 five 54 -111 one 55 -111 one 55 -333 two 56 -333 two 56 -PREHOOK: query: desc formatted tbl_ice_puffin b +PREHOOK: query: desc formatted tbl_ice_puffin B PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin -POSTHOOK: query: desc formatted tbl_ice_puffin b +POSTHOOK: query: desc formatted tbl_ice_puffin B POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_puffin -col_name b +col_name B data_type string min max @@ -101,12 +150,14 @@ num_falses bit_vector comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} -PREHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three' +PREHOOK: query: update tbl_ice_puffin + set b='two' where b='one' or b='three' PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: default@tbl_ice_puffin PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three' +POSTHOOK: query: update tbl_ice_puffin + set b='two' where b='one' or b='three' POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin @@ -121,11 +172,20 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +PREHOOK: query: select count(*) from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: select count(*) from tbl_ice_puffin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +18 +PREHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### @@ -133,64 +193,39 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=18 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=18 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=18 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + Reducer 3 vectorized + File Output Operator [FS_20] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","count(VALUE._col1)","max(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(_col1)","count(_col0)","max(_col2)"] + Group By Operator [GBY_16] (rows=3 width=95) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=3 width=95) + Output:["_col0","_col1","_col3"],aggregations:["min(a)","max(c)"],keys:b + Select Operator [SEL_13] (rows=18 width=95) + Output:["a","b","c"] + TableScan [TS_0] (rows=18 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] -PREHOOK: query: select * from tbl_ice_puffin order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -1 two 50 -1 two 50 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -2 two 51 -3 two 52 -3 two 52 -4 four 53 -4 four 53 -5 five 54 -5 five 54 -111 two 55 -111 two 55 -333 two 56 -333 two 56 -PREHOOK: query: select count(*) from tbl_ice_puffin -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select count(*) from tbl_ice_puffin -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -18 -PREHOOK: query: desc formatted tbl_ice_puffin b +PREHOOK: query: desc formatted tbl_ice_puffin B PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin -POSTHOOK: query: desc formatted tbl_ice_puffin b +POSTHOOK: query: desc formatted tbl_ice_puffin B POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_puffin -col_name b +col_name B data_type string min max @@ -203,91 +238,225 @@ num_falses bit_vector HL comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} -PREHOOK: query: drop table if exists tbl_ice_puffin +PREHOOK: query: drop table tbl_ice_puffin PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: drop table if exists tbl_ice_puffin +POSTHOOK: query: drop table tbl_ice_puffin POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: query: create external table tbl_ice_puffin( + a int, + b string +) +partitioned by (c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: query: create external table tbl_ice_puffin( + a int, + b string +) +partitioned by (c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert overwrite table tbl_ice_puffin select * from src_ice PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@src_ice PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert overwrite table tbl_ice_puffin select * from src_ice POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@src_ice +POSTHOOK: Output: default@tbl_ice_puffin +PREHOOK: query: delete from tbl_ice_puffin where a <= 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: delete from tbl_ice_puffin where a <= 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: default@tbl_ice_puffin +PREHOOK: Output: default@tbl_ice_puffin@c=52 +PREHOOK: Output: default@tbl_ice_puffin@c=53 +PREHOOK: Output: default@tbl_ice_puffin@c=54 +PREHOOK: Output: default@tbl_ice_puffin@c=55 +PREHOOK: Output: default@tbl_ice_puffin@c=56 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: Output: default@tbl_ice_puffin@c=52 +POSTHOOK: Output: default@tbl_ice_puffin@c=53 +POSTHOOK: Output: default@tbl_ice_puffin@c=54 +POSTHOOK: Output: default@tbl_ice_puffin@c=55 +POSTHOOK: Output: default@tbl_ice_puffin@c=56 +POSTHOOK: Output: hdfs://### HDFS PATH ### +PREHOOK: query: select count(*) from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: select count(*) from tbl_ice_puffin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 +PREHOOK: query: EXPLAIN select min(a), max(c) from tbl_ice_puffin +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select min(a), max(c) from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + File Output Operator [FS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_9] + Group By Operator [GBY_8] (rows=1 width=8) + Output:["_col0","_col1"],aggregations:["min(a)","max(c)"] + Select Operator [SEL_7] (rows=5 width=8) + Output:["a","c"] + TableScan [TS_0] (rows=5 width=8) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","c"] -PREHOOK: query: drop table if exists tbl_ice_puffin +PREHOOK: query: desc formatted tbl_ice_puffin C +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl_ice_puffin +POSTHOOK: query: desc formatted tbl_ice_puffin C +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl_ice_puffin +col_name C +data_type int +min 52 +max 56 +num_nulls 0 +distinct_count 5 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_ice +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_ice +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 3 vectorized + File Output Operator [FS_37] + Group By Operator [GBY_36] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_11] + Group By Operator [GBY_10] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_29] (rows=6 width=8) + Conds:RS_32._col0=RS_35._col0(Inner) + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_32] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=9 width=4) + Output:["_col0"] + Filter Operator [FIL_30] (rows=9 width=4) + predicate:a is not null + TableScan [TS_0] (rows=9 width=4) + default@src_ice,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["a"] + <-Map 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=5 width=4) + Output:["_col0"] + Filter Operator [FIL_33] (rows=5 width=4) + predicate:a is not null + TableScan [TS_3] (rows=5 width=4) + default@tbl_ice_puffin,t2,Tbl:COMPLETE,Col:COMPLETE,Output:["a"] + +PREHOOK: query: drop table tbl_ice_puffin PREHOOK: type: DROPTABLE PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: drop table if exists tbl_ice_puffin +POSTHOOK: query: drop table tbl_ice_puffin POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert into tbl_ice_puffin select * from src_ice PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@src_ice PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert into tbl_ice_puffin select * from src_ice POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@src_ice POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +PREHOOK: query: select count(*) from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: select count(*) from tbl_ice_puffin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +9 +PREHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### @@ -295,55 +464,39 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + Reducer 3 vectorized + File Output Operator [FS_20] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","count(VALUE._col1)","max(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(_col1)","count(_col0)","max(_col2)"] + Group By Operator [GBY_16] (rows=5 width=95) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=5 width=95) + Output:["_col0","_col1","_col3"],aggregations:["min(a)","max(c)"],keys:b + Select Operator [SEL_13] (rows=9 width=95) + Output:["a","b","c"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] -PREHOOK: query: select * from tbl_ice_puffin order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -1 one 50 -2 two 51 -2 two 51 -2 two 51 -3 three 52 -4 four 53 -5 five 54 -111 one 55 -333 two 56 -PREHOOK: query: select count(*) from tbl_ice_puffin -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select count(*) from tbl_ice_puffin -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -9 -PREHOOK: query: desc formatted tbl_ice_puffin a +PREHOOK: query: desc formatted tbl_ice_puffin A PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin -POSTHOOK: query: desc formatted tbl_ice_puffin a +POSTHOOK: query: desc formatted tbl_ice_puffin A POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@tbl_ice_puffin -col_name a +col_name A data_type int min 1 max 333 @@ -362,67 +515,30 @@ PREHOOK: Output: database:default POSTHOOK: query: drop table if exists tbl_ice POSTHOOK: type: DROPTABLE POSTHOOK: Output: database:default -PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: query: create external table tbl_ice( + a int, + b string, + c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice -POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: query: create external table tbl_ice( + a int, + b string, + c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice -PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert into tbl_ice select * from src_ice PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@src_ice PREHOOK: Output: default@tbl_ice -POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert into tbl_ice select * from src_ice POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@src_ice POSTHOOK: Output: default@tbl_ice -PREHOOK: query: explain select * from tbl_ice order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice -POSTHOOK: Output: hdfs://### HDFS PATH ### -Plan optimized by CBO. - -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=95) - default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] - -PREHOOK: query: select * from tbl_ice order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select * from tbl_ice order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice -POSTHOOK: Output: hdfs://### HDFS PATH ### -1 one 50 -2 two 51 -2 two 51 -2 two 51 -3 three 52 -4 four 53 -5 five 54 -111 one 55 -333 two 56 PREHOOK: query: select count(*) from tbl_ice PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice @@ -432,225 +548,60 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice POSTHOOK: Output: hdfs://### HDFS PATH ### 9 -PREHOOK: query: delete from tbl_ice_puffin where a = 2 +PREHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: delete from tbl_ice_puffin where a = 2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C -PREHOOK: type: ANALYZE_TABLE -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: analyze table tbl_ice_puffin compute statistics for columns A, C -POSTHOOK: type: ANALYZE_TABLE -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Input: default@tbl_ice PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: EXPLAIN select min(a), count(distinct b), max(c) from tbl_ice POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Input: default@tbl_ice POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=6 width=192) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=6 width=192) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=6 width=192) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:PARTIAL,Output:["a","b","c"] + Reducer 3 vectorized + File Output Operator [FS_20] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","count(VALUE._col1)","max(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_18] + Group By Operator [GBY_17] (rows=1 width=16) + Output:["_col0","_col1","_col2"],aggregations:["min(_col1)","count(_col0)","max(_col2)"] + Group By Operator [GBY_16] (rows=5 width=95) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_14] (rows=5 width=95) + Output:["_col0","_col1","_col3"],aggregations:["min(a)","max(c)"],keys:b + Select Operator [SEL_13] (rows=9 width=95) + Output:["a","b","c"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] -PREHOOK: query: select count(*) from tbl_ice_puffin -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: select count(*) from tbl_ice_puffin -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -6 -PREHOOK: query: desc formatted tbl_ice_puffin C +PREHOOK: query: desc formatted tbl_ice A PREHOOK: type: DESCTABLE -PREHOOK: Input: default@tbl_ice_puffin -POSTHOOK: query: desc formatted tbl_ice_puffin C +PREHOOK: Input: default@tbl_ice +POSTHOOK: query: desc formatted tbl_ice A POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@tbl_ice_puffin -col_name C +POSTHOOK: Input: default@tbl_ice +col_name A data_type int -min 50 -max 56 +min 1 +max 333 num_nulls 0 -distinct_count 6 +distinct_count 7 avg_col_len max_col_len num_trues num_falses bit_vector HL comment -COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"c\":\"true\"}} -PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t1 -POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties ('format-version'='2') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t1 -PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@t2 -POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties ('format-version'='2') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@t2 -PREHOOK: query: describe formatted t1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@t1 -POSTHOOK: query: describe formatted t1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@t1 -# col_name data_type comment -a int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} - EXTERNAL TRUE - bucketing_version 2 - current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]} - format-version 2 - iceberg.orc.files.only false - metadata_location hdfs://### HDFS PATH ### - numFiles 0 - numRows 0 - parquet.compression zstd - rawDataSize 0 - serialization.format 1 - snapshot-count 0 - storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler - table_type ICEBERG - totalSize #Masked# -#### A masked pattern was here #### - uuid #Masked# - write.delete.mode merge-on-read - write.merge.mode merge-on-read - write.metadata.delete-after-commit.enabled true - write.update.mode merge-on-read - -# Storage Information -SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe -InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat -OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat -Compressed: No -Sort Columns: [] -PREHOOK: query: describe formatted t2 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@t2 -POSTHOOK: query: describe formatted t2 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@t2 -# col_name data_type comment -b int - -# Detailed Table Information -Database: default -#### A masked pattern was here #### -Retention: 0 -#### A masked pattern was here #### -Table Type: EXTERNAL_TABLE -Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}} - EXTERNAL TRUE - bucketing_version 2 - current-schema {\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]} - format-version 2 - iceberg.orc.files.only false - metadata_location hdfs://### HDFS PATH ### - numFiles 0 - numRows 0 - parquet.compression zstd - rawDataSize 0 - serialization.format 1 - snapshot-count 0 - storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler - table_type ICEBERG - totalSize #Masked# -#### A masked pattern was here #### - uuid #Masked# - write.delete.mode merge-on-read - write.merge.mode merge-on-read - write.metadata.delete-after-commit.enabled true - write.update.mode merge-on-read - -# Storage Information -SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe -InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat -OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat -Compressed: No -Sort Columns: [] -PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Output: hdfs://### HDFS PATH ### -Plan optimized by CBO. - -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 - File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_25] (rows=1 width=4) - Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_28] - PartitionCols:_col0 - Select Operator [SEL_27] (rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_26] (rows=1 width=4) - predicate:a is not null - TableScan [TS_0] (rows=1 width=4) - default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"] - <-Map 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_31] - PartitionCols:_col0 - Select Operator [SEL_30] (rows=1 width=4) - Output:["_col0"] - Filter Operator [FIL_29] (rows=1 width=4) - predicate:b is not null - TableScan [TS_3] (rows=1 width=4) - default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"] - +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out index 8bdfc189462e..651ee8c48c36 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/puffin_col_stats_with_time_travel.q.out @@ -1,30 +1,42 @@ -PREHOOK: query: create external table default.tbl_ice_puffin_time_travel(a int, b string, c int) stored by iceberg +PREHOOK: query: create external table tbl_ice_puffin_time_travel( + a int, + b string, + c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin_time_travel -POSTHOOK: query: create external table default.tbl_ice_puffin_time_travel(a int, b string, c int) stored by iceberg +POSTHOOK: query: create external table tbl_ice_puffin_time_travel( + a int, + b string, + c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin_time_travel -PREHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2, 'two', 51) +PREHOOK: query: insert into tbl_ice_puffin_time_travel values + (1, 'one', 50), + (2, 'two', 51) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@tbl_ice_puffin_time_travel -POSTHOOK: query: insert into default.tbl_ice_puffin_time_travel values (1, 'one', 50), (2, 'two', 51) +POSTHOOK: query: insert into tbl_ice_puffin_time_travel values + (1, 'one', 50), + (2, 'two', 51) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_puffin_time_travel -PREHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag checkpoint +PREHOOK: query: alter table tbl_ice_puffin_time_travel create tag checkpoint PREHOOK: type: ALTERTABLE_CREATETAG PREHOOK: Input: default@tbl_ice_puffin_time_travel -POSTHOOK: query: alter table default.tbl_ice_puffin_time_travel create tag checkpoint +POSTHOOK: query: alter table tbl_ice_puffin_time_travel create tag checkpoint POSTHOOK: type: ALTERTABLE_CREATETAG POSTHOOK: Input: default@tbl_ice_puffin_time_travel -PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel +PREHOOK: query: explain select * from tbl_ice_puffin_time_travel PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin_time_travel #### A masked pattern was here #### -POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel +POSTHOOK: query: explain select * from tbl_ice_puffin_time_travel POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin_time_travel #### A masked pattern was here #### @@ -106,42 +118,42 @@ STAGE PLANS: ListSink PREHOOK: query: insert into tbl_ice_puffin_time_travel values -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null) + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@tbl_ice_puffin_time_travel POSTHOOK: query: insert into tbl_ice_puffin_time_travel values -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null), -(null, null, null) + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null), + (null, null, null) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_puffin_time_travel -PREHOOK: query: explain select * from default.tbl_ice_puffin_time_travel +PREHOOK: query: explain select * from tbl_ice_puffin_time_travel PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin_time_travel #### A masked pattern was here #### -POSTHOOK: query: explain select * from default.tbl_ice_puffin_time_travel +POSTHOOK: query: explain select * from tbl_ice_puffin_time_travel POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin_time_travel #### A masked pattern was here #### diff --git a/iceberg/iceberg-handler/src/test/results/positive/puffin_col_stats.q.out b/iceberg/iceberg-handler/src/test/results/positive/puffin_col_stats.q.out index 6a133a0e4fa4..47b4dd3dcd08 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/puffin_col_stats.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/puffin_col_stats.q.out @@ -4,22 +4,69 @@ PREHOOK: Output: database:default POSTHOOK: query: drop table if exists tbl_ice_puffin POSTHOOK: type: DROPTABLE POSTHOOK: Output: database:default -PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +PREHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg tblproperties ('format-version'='2') +POSTHOOK: query: create external table tbl_ice_puffin( + a int, + b string, + c int) +stored by iceberg POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +PREHOOK: query: insert into tbl_ice_puffin values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 55), (333, 'two', 56) +POSTHOOK: query: insert into tbl_ice_puffin values + (1, 'one', 50), + (2, 'two', 51), + (2, 'two', 51), + (2, 'two', 51), + (3, 'three', 52), + (4, 'four', 53), + (5, 'five', 54), + (111, 'one', 55), + (333, 'two', 56) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_puffin +PREHOOK: query: explain select * from tbl_ice_puffin +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl_ice_puffin +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain select * from tbl_ice_puffin +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl_ice_puffin +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=9 width=95) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=9 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + PREHOOK: query: desc formatted tbl_ice_puffin a PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin @@ -39,6 +86,25 @@ num_falses bit_vector HL comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: desc formatted tbl_ice_puffin b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl_ice_puffin +POSTHOOK: query: desc formatted tbl_ice_puffin b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl_ice_puffin +col_name b +data_type string +min +max +num_nulls 0 +distinct_count 5 +avg_col_len 3.4444444444444446 +max_col_len 5 +num_trues +num_falses +bit_vector HL +comment +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: desc formatted tbl_ice_puffin c PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin @@ -58,42 +124,35 @@ num_falses bit_vector HL comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +PREHOOK: query: insert into tbl_ice_puffin values (1000, 'six', 1000), (5000, 'two', 5000) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl_ice_puffin +POSTHOOK: query: insert into tbl_ice_puffin values (1000, 'six', 1000), (5000, 'two', 5000) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl_ice_puffin +PREHOOK: query: explain select * from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: explain select * from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) - Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=9 width=95) + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=11 width=95) Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=9 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + TableScan [TS_0] (rows=11 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] -PREHOOK: query: insert into tbl_ice_puffin values (1000, 'one', 1000), (5000, 'two', 5000) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (1000, 'one', 1000), (5000, 'two', 5000) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@tbl_ice_puffin PREHOOK: query: desc formatted tbl_ice_puffin a PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin @@ -113,6 +172,25 @@ num_falses bit_vector comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: desc formatted tbl_ice_puffin b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl_ice_puffin +POSTHOOK: query: desc formatted tbl_ice_puffin b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl_ice_puffin +col_name b +data_type string +min +max +num_nulls 0 +distinct_count 6 +avg_col_len 3.4444444444444446 +max_col_len 5 +num_trues +num_falses +bit_vector +comment +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: desc formatted tbl_ice_puffin c PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin @@ -132,69 +210,34 @@ num_falses bit_vector comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl_ice_puffin -PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl_ice_puffin -POSTHOOK: Output: hdfs://### HDFS PATH ### -Plan optimized by CBO. - -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=11 width=95) - Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=11 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=11 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] - -PREHOOK: query: insert into tbl_ice_puffin values (10, 'one', 100000), (5000, 'two', 510000) +PREHOOK: query: insert into tbl_ice_puffin values (10, 'six', 100000), (5000, 'two', 510000) PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@tbl_ice_puffin -POSTHOOK: query: insert into tbl_ice_puffin values (10, 'one', 100000), (5000, 'two', 510000) +POSTHOOK: query: insert into tbl_ice_puffin values (10, 'six', 100000), (5000, 'two', 510000) POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl_ice_puffin -PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +PREHOOK: query: explain select * from tbl_ice_puffin PREHOOK: type: QUERY PREHOOK: Input: default@tbl_ice_puffin PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c +POSTHOOK: query: explain select * from tbl_ice_puffin POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl_ice_puffin POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. -Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) - Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 2 vectorized - File Output Operator [FS_8] - Select Operator [SEL_7] (rows=13 width=95) + Map 1 vectorized + File Output Operator [FS_4] + Select Operator [SEL_3] (rows=13 width=95) Output:["_col0","_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_6] - Select Operator [SEL_5] (rows=13 width=95) - Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=13 width=95) - default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] + TableScan [TS_0] (rows=13 width=95) + default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"] PREHOOK: query: desc formatted tbl_ice_puffin a PREHOOK: type: DESCTABLE @@ -215,6 +258,25 @@ num_falses bit_vector comment COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: desc formatted tbl_ice_puffin b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@tbl_ice_puffin +POSTHOOK: query: desc formatted tbl_ice_puffin b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@tbl_ice_puffin +col_name b +data_type string +min +max +num_nulls 0 +distinct_count 6 +avg_col_len 3.4444444444444446 +max_col_len 5 +num_trues +num_falses +bit_vector +comment +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} PREHOOK: query: desc formatted tbl_ice_puffin c PREHOOK: type: DESCTABLE PREHOOK: Input: default@tbl_ice_puffin diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java index d5f44cbea2e1..722b2f701199 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java @@ -402,7 +402,7 @@ public Void call() throws Exception { } } - if (!updates.isEmpty()) { + if (!updates.isEmpty() && !table.hasNonNativePartitionSupport()) { db.alterPartitions(tableFullName, updates, environmentContext, true); }