diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java index 386bfd97748e..5ecd9b9a91e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java @@ -189,7 +189,9 @@ public static String[] extractColumnValues(FieldSchema column, boolean isColumnS } } else if (statsData.isSetDoubleStats()) { DoubleColumnStatsData doubleStats = statsData.getDoubleStats(); - values.addAll(Lists.newArrayList("" + doubleStats.getLowValue(), "" + doubleStats.getHighValue(), + String lowVal = doubleStats.isSetLowValue() ? "" + doubleStats.getLowValue() : ""; + String highVal = doubleStats.isSetHighValue() ? "" + doubleStats.getHighValue() : ""; + values.addAll(Lists.newArrayList(lowVal, highVal, "" + doubleStats.getNumNulls(), "" + doubleStats.getNumDVs(), "", "", "", "", convertToString(doubleStats.getBitVectors()))); if (histogramEnabled) { @@ -197,7 +199,9 @@ public static String[] extractColumnValues(FieldSchema column, boolean isColumnS } } else if (statsData.isSetLongStats()) { LongColumnStatsData longStats = statsData.getLongStats(); - values.addAll(Lists.newArrayList("" + longStats.getLowValue(), "" + longStats.getHighValue(), + String lowVal = longStats.isSetLowValue() ? "" + longStats.getLowValue() : ""; + String highVal = longStats.isSetHighValue() ? "" + longStats.getHighValue() : ""; + values.addAll(Lists.newArrayList(lowVal, highVal, "" + longStats.getNumNulls(), "" + longStats.getNumDVs(), "", "", "", "", convertToString(longStats.getBitVectors()))); if (histogramEnabled) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java index a7cd18253751..f5642aecff4d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/info/desc/DescTableOperation.java @@ -249,7 +249,7 @@ private void getColumnDataForPartitionKeyColumn(Table table, List c ColStatistics.Range r = cs.getRange(); StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, - r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), + r == null || r.minValue == null ? null : r.minValue.toString(), r == null || r.maxValue == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses()); ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index f8ff14d6c4f5..ac84ba0fb818 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -799,33 +799,13 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col if (colTypeLowerCase.equals(serdeConstants.TINYINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME) || colTypeLowerCase.equals(serdeConstants.INT_TYPE_NAME)) { - cs.setCountDistint(csd.getLongStats().getNumDVs()); - cs.setNumNulls(csd.getLongStats().getNumNulls()); - cs.setAvgColLen(JavaDataModel.get().primitive1()); - cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); - cs.setBitVectors(csd.getLongStats().getBitVectors()); - cs.setHistogram(csd.getLongStats().getHistogram()); + populateColStatisticsFromLongStats(csd.getLongStats(), cs, JavaDataModel.get().primitive1()); } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { - cs.setCountDistint(csd.getLongStats().getNumDVs()); - cs.setNumNulls(csd.getLongStats().getNumNulls()); - cs.setAvgColLen(JavaDataModel.get().primitive2()); - cs.setRange(csd.getLongStats().getLowValue(), csd.getLongStats().getHighValue()); - cs.setBitVectors(csd.getLongStats().getBitVectors()); - cs.setHistogram(csd.getLongStats().getHistogram()); + populateColStatisticsFromLongStats(csd.getLongStats(), cs, JavaDataModel.get().primitive2()); } else if (colTypeLowerCase.equals(serdeConstants.FLOAT_TYPE_NAME)) { - cs.setCountDistint(csd.getDoubleStats().getNumDVs()); - cs.setNumNulls(csd.getDoubleStats().getNumNulls()); - cs.setAvgColLen(JavaDataModel.get().primitive1()); - cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); - cs.setBitVectors(csd.getDoubleStats().getBitVectors()); - cs.setHistogram(csd.getDoubleStats().getHistogram()); + populateColStatisticsFromDoubleStats(csd.getDoubleStats(), cs, JavaDataModel.get().primitive1()); } else if (colTypeLowerCase.equals(serdeConstants.DOUBLE_TYPE_NAME)) { - cs.setCountDistint(csd.getDoubleStats().getNumDVs()); - cs.setNumNulls(csd.getDoubleStats().getNumNulls()); - cs.setAvgColLen(JavaDataModel.get().primitive2()); - cs.setRange(csd.getDoubleStats().getLowValue(), csd.getDoubleStats().getHighValue()); - cs.setBitVectors(csd.getDoubleStats().getBitVectors()); - cs.setHistogram(csd.getDoubleStats().getHistogram()); + populateColStatisticsFromDoubleStats(csd.getDoubleStats(), cs, JavaDataModel.get().primitive2()); } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME) || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { @@ -894,6 +874,34 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col return cs; } + // Populate ColStatistics from LongColumnStatsData, checking isSet for optional i64 fields + private static void populateColStatisticsFromLongStats( + org.apache.hadoop.hive.metastore.api.LongColumnStatsData longStats, + ColStatistics cs, double avgColLen) { + cs.setCountDistint(longStats.getNumDVs()); + cs.setNumNulls(longStats.getNumNulls()); + cs.setAvgColLen(avgColLen); + Long lowVal = longStats.isSetLowValue() ? longStats.getLowValue() : null; + Long highVal = longStats.isSetHighValue() ? longStats.getHighValue() : null; + cs.setRange(lowVal, highVal); + cs.setBitVectors(longStats.getBitVectors()); + cs.setHistogram(longStats.getHistogram()); + } + + // Populate ColStatistics from DoubleColumnStatsData, checking isSet for optional double fields + private static void populateColStatisticsFromDoubleStats( + org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData doubleStats, + ColStatistics cs, double avgColLen) { + cs.setCountDistint(doubleStats.getNumDVs()); + cs.setNumNulls(doubleStats.getNumNulls()); + cs.setAvgColLen(avgColLen); + Double lowVal = doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null; + Double highVal = doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null; + cs.setRange(lowVal, highVal); + cs.setBitVectors(doubleStats.getBitVectors()); + cs.setHistogram(doubleStats.getHistogram()); + } + private static ColStatistics estimateColStats(long numRows, String colName, HiveConf conf, List schema) { ColumnInfo cinfo = getColumnInfoForColumn(colName, schema); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/ddl/TestShowUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/ddl/TestShowUtils.java new file mode 100644 index 000000000000..7a9446b57ce3 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/ddl/TestShowUtils.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.ddl; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.stream.Stream; + +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class TestShowUtils { + + @ParameterizedTest(name = "{0} - {1}") + @MethodSource("longStatsTestData") + public void testExtractColumnValues_LongStats(String typeName, String scenarioName, + Long lowValue, Long highValue, String expectedMin, String expectedMax) { + FieldSchema column = new FieldSchema("test_col", typeName, null); + + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("test_col"); + cso.setColType(typeName); + + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumDVs(100); + longStats.setNumNulls(10); + if (lowValue != null) { + longStats.setLowValue(lowValue); + } + if (highValue != null) { + longStats.setHighValue(highValue); + } + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setLongStats(longStats); + cso.setStatsData(data); + + String[] result = ShowUtils.extractColumnValues(column, true, cso, false); + + assertNotNull(result, "Result array should not be null"); + assertEquals("test_col", result[0], "Column name mismatch"); + assertEquals(typeName, result[1], "Column type mismatch"); + assertEquals(expectedMin, result[2], "Min value mismatch"); + assertEquals(expectedMax, result[3], "Max value mismatch"); + assertEquals("10", result[4], "NumNulls mismatch"); + assertEquals("100", result[5], "NumDVs mismatch"); + } + + @ParameterizedTest(name = "{0} - {1}") + @MethodSource("doubleStatsTestData") + public void testExtractColumnValues_DoubleStats(String typeName, String scenarioName, + Double lowValue, Double highValue, String expectedMin, String expectedMax) { + FieldSchema column = new FieldSchema("test_col", typeName, null); + + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("test_col"); + cso.setColType(typeName); + + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.setNumDVs(100); + doubleStats.setNumNulls(10); + if (lowValue != null) { + doubleStats.setLowValue(lowValue); + } + if (highValue != null) { + doubleStats.setHighValue(highValue); + } + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setDoubleStats(doubleStats); + cso.setStatsData(data); + + String[] result = ShowUtils.extractColumnValues(column, true, cso, false); + + assertNotNull(result, "Result array should not be null"); + assertEquals("test_col", result[0], "Column name mismatch"); + assertEquals(typeName, result[1], "Column type mismatch"); + assertEquals(expectedMin, result[2], "Min value mismatch"); + assertEquals(expectedMax, result[3], "Max value mismatch"); + assertEquals("10", result[4], "NumNulls mismatch"); + assertEquals("100", result[5], "NumDVs mismatch"); + } + + static Stream longStatsTestData() { + return Stream.of( + // {typeName, scenarioName, lowValue, highValue, expectedMin, expectedMax} + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, "1", "1000"), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, "100", ""), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, "", "1000"), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, "", "-5"), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, "1", "1000"), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, "100", ""), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, "", "1000"), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, "", "-5"), + Arguments.of(serdeConstants.INT_TYPE_NAME, "BothValuesSet", 1L, 1000L, "1", "1000"), + Arguments.of(serdeConstants.INT_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.INT_TYPE_NAME, "OnlyLowValueSet", 100L, null, "100", ""), + Arguments.of(serdeConstants.INT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, "", "1000"), + Arguments.of(serdeConstants.INT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, "", "-5"), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, "1", "1000"), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, "100", ""), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, "", "1000"), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, "", "-5") + ); + } + + static Stream doubleStatsTestData() { + return Stream.of( + // {typeName, scenarioName, lowValue, highValue, expectedMin, expectedMax} + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "BothValuesSet", 1.5, 1000.5, "1.5", "1000.5"), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "OnlyLowValueSet", 100.5, null, "100.5", ""), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "OnlyHighValueSet", null, 1000.5, "", "1000.5"), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "NegativeHighValueOnly", null, -5.5, "", "-5.5"), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "BothValuesSet", 1.5, 1000.5, "1.5", "1000.5"), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "NoValuesSet", null, null, "", ""), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "OnlyLowValueSet", 100.5, null, "100.5", ""), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "OnlyHighValueSet", null, 1000.5, "", "1000.5"), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "NegativeHighValueOnly", null, -5.5, "", "-5.5") + ); + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/ddl/table/info/desc/TestDescTableOperation.java b/ql/src/test/org/apache/hadoop/hive/ql/ddl/table/info/desc/TestDescTableOperation.java new file mode 100644 index 000000000000..bbbce58f2bbe --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/ddl/table/info/desc/TestDescTableOperation.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.ddl.table.info.desc; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.mockStatic; +import static org.mockito.Mockito.when; + +import java.util.stream.Stream; + +import org.apache.hadoop.hive.common.TableName; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.StatObjectConverter; +import org.apache.hadoop.hive.ql.ddl.DDLOperationContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.mockito.ArgumentCaptor; +import org.mockito.MockedStatic; + +/** + * Tests for DescTableOperation with minimal mocking to reach line 252 + * where null Range values are handled. + */ +public class TestDescTableOperation { + + @ParameterizedTest(name = "{0}") + @MethodSource("rangeTestCases") + public void testGetColumnDataForPartitionKeyColumnDifferentRanges( + String testName, + Number minValue, + Number maxValue, + String expectedMinValue, + String expectedMaxValue) throws Exception { + + // minimally possible mocking to pass (DDLOperationContext context, DescTableDesc desc) to the constructor + try (MockedStatic sessionState = mockStatic(SessionState.class); + MockedStatic statsUtils = mockStatic(StatsUtils.class); + MockedStatic statConverter = mockStatic(StatObjectConverter.class)) { + DDLOperationContext mockContext = mock(DDLOperationContext.class); + DescTableDesc mockDesc = mock(DescTableDesc.class); + Hive mockDb = mock(Hive.class); + Table mockTable = mock(Table.class); + HiveConf mockConf = new HiveConf(); + SessionState mockSessionState = mock(SessionState.class); + + sessionState.when(SessionState::get).thenReturn(mockSessionState); + when(mockSessionState.isHiveServerQuery()).thenReturn(false); + + TableName tableName = new TableName("hive", "testdb", "testtable"); + when(mockDesc.getTableName()).thenReturn(tableName); + when(mockDesc.getPartitionSpec()).thenReturn(null); + when(mockDesc.getResFile()).thenReturn("/tmp/test-result.txt"); + when(mockContext.getConf()).thenReturn(mockConf); + when(mockContext.getDb()).thenReturn(mockDb); + when(mockDb.getTable(eq("testdb"), eq("testtable"), any(), anyBoolean(), anyBoolean(), anyBoolean())) + .thenReturn(mockTable); + when(mockDesc.getColumnPath()).thenReturn("testdb.testtable.partition_col"); + when(mockDesc.isFormatted()).thenReturn(true); + when(mockTable.isPartitioned()).thenReturn(true); + when(mockTable.isPartitionKey("partition_col")).thenReturn(true); + + FieldSchema partitionCol = new FieldSchema("partition_col", "int", "partition column"); + when(mockTable.getPartColByName("partition_col")).thenReturn(partitionCol); + + // Create ColStatistics with the test's Range values + ColStatistics colStats = new ColStatistics("partition_col", "int"); + colStats.setRange(minValue, maxValue); + colStats.setNumNulls(0); + colStats.setCountDistint(100); + colStats.setAvgColLen(4); + + statsUtils.when(() -> StatsUtils.checkCanProvidePartitionStats(any())).thenReturn(true); + statsUtils.when(() -> StatsUtils.getColStatsForPartCol(any(), any(), any())).thenReturn(colStats); + + ArgumentCaptor minValueArgCaptor = ArgumentCaptor.forClass(Object.class); + ArgumentCaptor maxnValueArgCaptor = ArgumentCaptor.forClass(Object.class); + statConverter.when(() -> StatObjectConverter.fillColumnStatisticsData( + any(), any(), any(), any(), any(), any(), + minValueArgCaptor.capture(), maxnValueArgCaptor.capture(), + any(), any(), any(), any(), any(), any(), any(), any())) + .thenCallRealMethod(); + + DescTableOperation operation = new DescTableOperation(mockContext, mockDesc); + + // Execute - should no longer throw NullPointerException with null values of minValue or maxValue + assertDoesNotThrow(() -> operation.execute(), + "Should handle Range with null minValue and maxValue without NPE"); + + // Verify the String arguments (6 & 7) passed to fillColumnStatisticsData + assertEquals(expectedMinValue, minValueArgCaptor.getValue(), + "declow (arg 6) should be " + (expectedMinValue == null ? "null" : expectedMinValue)); + assertEquals(expectedMaxValue, maxnValueArgCaptor.getValue(), + "dechigh (arg 7) should be " + (expectedMaxValue == null ? "null" : expectedMaxValue)); + } + } + + static Stream rangeTestCases() { + return Stream.of( + Arguments.of("BothNull", null, null, null, null), + Arguments.of("MinNull", null, 100, null, "100"), + Arguments.of("MaxNull", 100, null, "100", null), + Arguments.of("NeitherNull", 100, 200, "100", "200") + ); + } + +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index f975da045ad7..8e8192ed2075 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -18,19 +18,29 @@ package org.apache.hadoop.hive.ql.stats; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.Set; +import java.util.stream.Stream; import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.apache.hadoop.hive.serde.serdeConstants; -import org.junit.Test; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import com.google.common.collect.Sets; @@ -100,8 +110,138 @@ public void testPrimitiveSizeEstimations() throws Exception { } int maxVarLen = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAX_VARIABLE_LENGTH); long siz = StatsUtils.getSizeOfPrimitiveTypeArraysFromType(typeName, 3, maxVarLen); - assertNotEquals(field.toString(), 0, siz); + assertNotEquals(0, siz, field.toString()); } } -} \ No newline at end of file + @ParameterizedTest(name = "{0} - {1}") + @MethodSource("integerStatisticsTestData") + public void testGetColStatistics_IntegerTypes(String typeName, String scenarioName, + Long lowValue, Long highValue, Long expectedMin, Long expectedMax) { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("test_col"); + cso.setColType(typeName); + + LongColumnStatsData longStats = new LongColumnStatsData(); + longStats.setNumDVs(100); + longStats.setNumNulls(10); + if (lowValue != null) { + longStats.setLowValue(lowValue); + } + if (highValue != null) { + longStats.setHighValue(highValue); + } + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setLongStats(longStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "test_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(100, cs.getCountDistint(), "NumDVs mismatch"); + assertEquals(10, cs.getNumNulls(), "NumNulls mismatch"); + + Range range = cs.getRange(); + assertNotNull(range, "Range should be created"); + + if (expectedMin == null) { + assertNull(range.minValue, "minValue should be null when lowValue is not set"); + } else { + assertEquals(expectedMin.longValue(), range.minValue.longValue(), "minValue mismatch"); + } + + if (expectedMax == null) { + assertNull(range.maxValue, "maxValue should be null when highValue is not set"); + } else { + assertEquals(expectedMax.longValue(), range.maxValue.longValue(), "maxValue mismatch"); + } + } + + @ParameterizedTest(name = "{0} - {1}") + @MethodSource("floatingPointStatisticsTestData") + public void testGetColStatistics_FloatingPointTypes(String typeName, String scenarioName, + Double lowValue, Double highValue, Double expectedMin, Double expectedMax) { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("test_col"); + cso.setColType(typeName); + + DoubleColumnStatsData doubleStats = new DoubleColumnStatsData(); + doubleStats.setNumDVs(100); + doubleStats.setNumNulls(10); + if (lowValue != null) { + doubleStats.setLowValue(lowValue); + } + if (highValue != null) { + doubleStats.setHighValue(highValue); + } + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setDoubleStats(doubleStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "test_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(100, cs.getCountDistint(), "NumDVs mismatch"); + assertEquals(10, cs.getNumNulls(), "NumNulls mismatch"); + + Range range = cs.getRange(); + assertNotNull(range, "Range should be created"); + + if (expectedMin == null) { + assertNull(range.minValue, "minValue should be null when lowValue is not set"); + } else { + assertEquals(expectedMin, range.minValue.doubleValue(), 0.0001, "minValue mismatch"); + } + + if (expectedMax == null) { + assertNull(range.maxValue, "maxValue should be null when highValue is not set"); + } else { + assertEquals(expectedMax, range.maxValue.doubleValue(), 0.0001, "maxValue mismatch"); + } + } + + static Stream integerStatisticsTestData() { + return Stream.of( + // {typeName, scenarioName, lowValue, highValue, expectedMin, expectedMax} + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, 1L, 1000L), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, 100L, null), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, null, 1000L), + Arguments.of(serdeConstants.TINYINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, null, -5L), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, 1L, 1000L), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, 100L, null), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, null, 1000L), + Arguments.of(serdeConstants.SMALLINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, null, -5L), + Arguments.of(serdeConstants.INT_TYPE_NAME, "BothValuesSet", 1L, 1000L, 1L, 1000L), + Arguments.of(serdeConstants.INT_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.INT_TYPE_NAME, "OnlyLowValueSet", 100L, null, 100L, null), + Arguments.of(serdeConstants.INT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, null, 1000L), + Arguments.of(serdeConstants.INT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, null, -5L), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "BothValuesSet", 1L, 1000L, 1L, 1000L), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "OnlyLowValueSet", 100L, null, 100L, null), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "OnlyHighValueSet", null, 1000L, null, 1000L), + Arguments.of(serdeConstants.BIGINT_TYPE_NAME, "NegativeHighValueOnly", null, -5L, null, -5L) + ); + } + + static Stream floatingPointStatisticsTestData() { + return Stream.of( + // {typeName, scenarioName, lowValue, highValue, expectedMin, expectedMax} + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "BothValuesSet", 1.5, 1000.5, 1.5, 1000.5), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "OnlyLowValueSet", 100.5, null, 100.5, null), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "OnlyHighValueSet", null, 1000.5, null, 1000.5), + Arguments.of(serdeConstants.FLOAT_TYPE_NAME, "NegativeHighValueOnly", null, -5.5, null, -5.5), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "BothValuesSet", 1.5, 1000.5, 1.5, 1000.5), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "NoValuesSet", null, null, null, null), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "OnlyLowValueSet", 100.5, null, 100.5, null), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "OnlyHighValueSet", null, 1000.5, null, 1000.5), + Arguments.of(serdeConstants.DOUBLE_TYPE_NAME, "NegativeHighValueOnly", null, -5.5, null, -5.5) + ); + } + +} diff --git a/ql/src/test/queries/clientpositive/stats_unset_hilo.q b/ql/src/test/queries/clientpositive/stats_unset_hilo.q new file mode 100644 index 000000000000..8d460de82647 --- /dev/null +++ b/ql/src/test/queries/clientpositive/stats_unset_hilo.q @@ -0,0 +1,91 @@ +-- test tables prep +-- Both min and max stats UNSET +CREATE TABLE stats_both_unset ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +); +ALTER TABLE stats_both_unset UPDATE STATISTICS SET('numRows'='10000'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0'); +ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0'); + +-- Only min SET, max UNSET +CREATE TABLE stats_min_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +); +ALTER TABLE stats_min_only UPDATE STATISTICS SET('numRows'='10000'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','lowValue'='-10'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','lowValue'='100'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','lowValue'='-1000'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','lowValue'='10000'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','lowValue'='-10.5'); +ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','lowValue'='100.5'); + +-- Only max SET, min UNSET +CREATE TABLE stats_max_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +); +ALTER TABLE stats_max_only UPDATE STATISTICS SET('numRows'='10000'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','highValue'='-5'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','highValue'='1000'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','highValue'='-500'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','highValue'='100000'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','highValue'='-25.5'); +ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','highValue'='1000.5'); + +-- actual test groups +DESCRIBE FORMATTED stats_both_unset col_tinyint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_tinyint > 50; +DESCRIBE FORMATTED stats_both_unset col_smallint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_smallint < 500; +DESCRIBE FORMATTED stats_both_unset col_int; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_int BETWEEN 100 AND 500; +DESCRIBE FORMATTED stats_both_unset col_bigint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_bigint = 999; +DESCRIBE FORMATTED stats_both_unset col_float; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_float > 50.0; +DESCRIBE FORMATTED stats_both_unset col_double; +EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_double < 500.0; + +DESCRIBE FORMATTED stats_min_only col_tinyint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_tinyint > 50; +DESCRIBE FORMATTED stats_min_only col_smallint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_smallint > 200; +DESCRIBE FORMATTED stats_min_only col_int; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_int > 2000; +DESCRIBE FORMATTED stats_min_only col_bigint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_bigint > 20000; +DESCRIBE FORMATTED stats_min_only col_float; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_float > 50.0; +DESCRIBE FORMATTED stats_min_only col_double; +EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_double > 200.0; + +DESCRIBE FORMATTED stats_max_only col_tinyint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_tinyint < 50; +DESCRIBE FORMATTED stats_max_only col_smallint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_smallint < 500; +DESCRIBE FORMATTED stats_max_only col_int; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_int < 5000; +DESCRIBE FORMATTED stats_max_only col_bigint; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_bigint < 50000; +DESCRIBE FORMATTED stats_max_only col_float; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_float < 50.0; +DESCRIBE FORMATTED stats_max_only col_double; +EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_double < 500.0; diff --git a/ql/src/test/results/clientpositive/beeline/colstats_all_nulls.q.out b/ql/src/test/results/clientpositive/beeline/colstats_all_nulls.q.out index c14628f3c1b9..9918e1d77f06 100644 --- a/ql/src/test/results/clientpositive/beeline/colstats_all_nulls.q.out +++ b/ql/src/test/results/clientpositive/beeline/colstats_all_nulls.q.out @@ -45,8 +45,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls col_name a data_type bigint -min 0 -max 0 +min +max num_nulls 5 distinct_count 1 avg_col_len @@ -64,8 +64,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@all_nulls col_name b data_type double -min 0.0 -max 0.0 +min +max num_nulls 5 distinct_count 1 avg_col_len diff --git a/ql/src/test/results/clientpositive/llap/constraints_explain_ddl.q.out b/ql/src/test/results/clientpositive/llap/constraints_explain_ddl.q.out index 1564c22c5619..98c8a9056d22 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_explain_ddl.q.out @@ -1192,17 +1192,17 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_year) IN (1985, 2004) (type: boolean) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: d_datekey (type: bigint), d_sellingseason (type: string) null sort order: zz - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE top n: 10 Select Operator expressions: d_datekey (type: bigint), d_sellingseason (type: string) outputColumnNames: d_datekey, d_sellingseason - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: d_datekey (type: bigint), d_sellingseason (type: string) minReductionHashAggr: 0.4 diff --git a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index 93b9b2ef49ad..1e537e875532 100644 --- a/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -601,17 +601,17 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_year) IN (1985, 2004) (type: boolean) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: d_datekey (type: bigint), d_sellingseason (type: string) null sort order: zz - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE top n: 10 Select Operator expressions: d_datekey (type: bigint), d_sellingseason (type: string) outputColumnNames: d_datekey, d_sellingseason - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: d_datekey (type: bigint), d_sellingseason (type: string) minReductionHashAggr: 0.4 diff --git a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out index b10d02242e7e..082124a91106 100644 --- a/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/display_colstats_tbllvl.q.out @@ -476,8 +476,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab_n0 col_name a data_type int -min 0 -max 0 +min +max num_nulls 0 distinct_count 0 avg_col_len @@ -495,8 +495,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab_n0 col_name b data_type double -min 0.0 -max 0.0 +min +max num_nulls 0 distinct_count 0 avg_col_len diff --git a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out index 40ca455cb609..00b752dd2078 100644 --- a/ql/src/test/results/clientpositive/llap/fm-sketch.q.out +++ b/ql/src/test/results/clientpositive/llap/fm-sketch.q.out @@ -105,8 +105,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@n_n0 col_name key data_type int -min 0 -max 0 +min +max num_nulls 500 distinct_count 1 avg_col_len diff --git a/ql/src/test/results/clientpositive/llap/hll.q.out b/ql/src/test/results/clientpositive/llap/hll.q.out index d8fd52993e87..ae694c653cc8 100644 --- a/ql/src/test/results/clientpositive/llap/hll.q.out +++ b/ql/src/test/results/clientpositive/llap/hll.q.out @@ -105,8 +105,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@n col_name key data_type int -min 0 -max 0 +min +max num_nulls 500 distinct_count 1 avg_col_len diff --git a/ql/src/test/results/clientpositive/llap/stats_unset_hilo.q.out b/ql/src/test/results/clientpositive/llap/stats_unset_hilo.q.out new file mode 100644 index 000000000000..8ea2e8fdc117 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/stats_unset_hilo.q.out @@ -0,0 +1,2916 @@ +PREHOOK: query: CREATE TABLE stats_both_unset ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: CREATE TABLE stats_both_unset ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS SET('numRows'='10000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS SET('numRows'='10000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_both_unset +PREHOOK: Output: default@stats_both_unset +POSTHOOK: query: ALTER TABLE stats_both_unset UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_both_unset +POSTHOOK: Output: default@stats_both_unset +PREHOOK: query: CREATE TABLE stats_min_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: CREATE TABLE stats_min_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS SET('numRows'='10000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS SET('numRows'='10000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','lowValue'='-10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','lowValue'='-10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','lowValue'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','lowValue'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','lowValue'='-1000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','lowValue'='-1000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','lowValue'='10000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','lowValue'='10000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','lowValue'='-10.5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','lowValue'='-10.5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','lowValue'='100.5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_min_only +PREHOOK: Output: default@stats_min_only +POSTHOOK: query: ALTER TABLE stats_min_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','lowValue'='100.5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_min_only +POSTHOOK: Output: default@stats_min_only +PREHOOK: query: CREATE TABLE stats_max_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: CREATE TABLE stats_max_only ( + col_tinyint TINYINT, + col_smallint SMALLINT, + col_int INT, + col_bigint BIGINT, + col_float FLOAT, + col_double DOUBLE +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS SET('numRows'='10000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS SET('numRows'='10000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','highValue'='-5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_tinyint SET('numDVs'='100','numNulls'='0','highValue'='-5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','highValue'='1000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_smallint SET('numDVs'='100','numNulls'='0','highValue'='1000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','highValue'='-500') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_int SET('numDVs'='100','numNulls'='0','highValue'='-500') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','highValue'='100000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_bigint SET('numDVs'='100','numNulls'='0','highValue'='100000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','highValue'='-25.5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_float SET('numDVs'='100','numNulls'='0','highValue'='-25.5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','highValue'='1000.5') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@stats_max_only +PREHOOK: Output: default@stats_max_only +POSTHOOK: query: ALTER TABLE stats_max_only UPDATE STATISTICS FOR COLUMN col_double SET('numDVs'='100','numNulls'='0','highValue'='1000.5') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@stats_max_only +POSTHOOK: Output: default@stats_max_only +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_tinyint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_tinyint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_tinyint +data_type tinyint +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_tinyint > 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_tinyint > 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_tinyint` > 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: (col_tinyint > 50Y) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_tinyint > 50Y) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_smallint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_smallint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_smallint +data_type smallint +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_smallint < 500 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_smallint < 500 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_smallint` < 500 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: (col_smallint < 500S) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_smallint < 500S) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_int +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_int +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_int +data_type int +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_int BETWEEN 100 AND 500 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_int BETWEEN 100 AND 500 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_int` BETWEEN 100 AND 500 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: col_int BETWEEN 100 AND 500 (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: col_int BETWEEN 100 AND 500 (type: boolean) + Statistics: Num rows: 1111 Data size: 4444 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1111 Data size: 4444 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_bigint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_bigint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_bigint +data_type bigint +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_bigint = 999 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_bigint = 999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_bigint` = 999 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: (col_bigint = 999L) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_bigint = 999L) (type: boolean) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_float +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_float +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_float +data_type float +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_float > 50.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_float > 50.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_float` > 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: (col_float > 50.0) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_float > 50.0) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_both_unset col_double +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_both_unset +POSTHOOK: query: DESCRIBE FORMATTED stats_both_unset col_double +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_both_unset +col_name col_double +data_type double +min +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_double < 500.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_both_unset WHERE col_double < 500.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_both_unset +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_both_unset` +WHERE `col_double` < 500 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_both_unset + filterExpr: (col_double < 500.0D) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_double < 500.0D) (type: boolean) + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_both_unset + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_both_unset + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_both_unset + name: default.stats_both_unset + Truncated Path -> Alias: + /stats_both_unset [stats_both_unset] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_tinyint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_tinyint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_tinyint +data_type tinyint +min -10 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_tinyint > 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_tinyint > 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_tinyint` > 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_tinyint > 50Y) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_tinyint > 50Y) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_smallint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_smallint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_smallint +data_type smallint +min 100 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_smallint > 200 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_smallint > 200 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_smallint` > 200 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_smallint > 200S) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_smallint > 200S) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_int +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_int +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_int +data_type int +min -1000 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_int > 2000 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_int > 2000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_int` > 2000 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_int > 2000) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_int > 2000) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_bigint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_bigint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_bigint +data_type bigint +min 10000 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_bigint > 20000 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_bigint > 20000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_bigint` > 20000 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_bigint > 20000L) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_bigint > 20000L) (type: boolean) + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_float +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_float +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_float +data_type float +min -10.5 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_float > 50.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_float > 50.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_float` > 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_float > 50.0) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_float > 50.0) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_min_only col_double +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_min_only +POSTHOOK: query: DESCRIBE FORMATTED stats_min_only col_double +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_min_only +col_name col_double +data_type double +min 100.5 +max +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_double > 200.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_min_only WHERE col_double > 200.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_min_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_min_only` +WHERE `col_double` > 200 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_min_only + filterExpr: (col_double > 200.0D) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_double > 200.0D) (type: boolean) + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_min_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_min_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_min_only + name: default.stats_min_only + Truncated Path -> Alias: + /stats_min_only [stats_min_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_tinyint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_tinyint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_tinyint +data_type tinyint +min +max -5 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_tinyint < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_tinyint < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_tinyint` < 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_tinyint < 50Y) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_tinyint < 50Y) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_smallint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_smallint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_smallint +data_type smallint +min +max 1000 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_smallint < 500 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_smallint < 500 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_smallint` < 500 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_smallint < 500S) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_smallint < 500S) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_int +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_int +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_int +data_type int +min +max -500 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_int < 5000 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_int < 5000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_int` < 5000 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_int < 5000) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_int < 5000) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_bigint +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_bigint +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_bigint +data_type bigint +min +max 100000 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_bigint < 50000 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_bigint < 50000 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_bigint` < 50000 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_bigint < 50000L) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_bigint < 50000L) (type: boolean) + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_float +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_float +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_float +data_type float +min +max -25.5 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_float < 50.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_float < 50.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_float` < 50 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_float < 50.0) (type: boolean) + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_float < 50.0) (type: boolean) + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 13332 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: DESCRIBE FORMATTED stats_max_only col_double +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@stats_max_only +POSTHOOK: query: DESCRIBE FORMATTED stats_max_only col_double +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@stats_max_only +col_name col_double +data_type double +min +max 1000.5 +num_nulls 0 +distinct_count 100 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector +comment from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"col_bigint\":\"true\",\"col_double\":\"true\",\"col_float\":\"true\",\"col_int\":\"true\",\"col_smallint\":\"true\",\"col_tinyint\":\"true\"}} +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_double < 500.0 +PREHOOK: type: QUERY +PREHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM stats_max_only WHERE col_double < 500.0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@stats_max_only +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `_c0` +FROM `default`.`stats_max_only` +WHERE `col_double` < 500 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: stats_max_only + filterExpr: (col_double < 500.0D) (type: boolean) + Statistics: Num rows: 10000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (col_double < 500.0D) (type: boolean) + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 3333 Data size: 26664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + bucketingVersion: 2 + null sort order: + numBuckets: -1 + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized, llap + LLAP IO: all inputs + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: stats_max_only + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucketing_version 2 + column.name.delimiter , + columns col_tinyint,col_smallint,col_int,col_bigint,col_float,col_double + columns.comments + columns.types tinyint:smallint:int:bigint:float:double +#### A masked pattern was here #### + name default.stats_max_only + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.stats_max_only + name: default.stats_max_only + Truncated Path -> Alias: + /stats_max_only [stats_max_only] + Reducer 2 + Execution mode: vectorized, llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + bucketingVersion: 2 + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + bucketing_version -1 + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out index 9d5317b685d0..59a80dda385a 100644 --- a/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/llap/temp_table_display_colstats_tbllvl.q.out @@ -610,8 +610,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab col_name a data_type int -min 0 -max 0 +min +max num_nulls 0 distinct_count 0 avg_col_len @@ -629,8 +629,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@empty_tab col_name b data_type double -min 0.0 -max 0.0 +min +max num_nulls 0 distinct_count 0 avg_col_len