From fdb5e98b109c4eff03c201635666718a9443c6c3 Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Wed, 30 Aug 2023 19:58:42 +0800 Subject: [PATCH] Show column stats even when error accured. --- .../doris/statistics/ColumnStatistic.java | 21 +++++++-- .../hive/test_hive_statistic.groovy | 47 +++++++++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 85965e9513d0ac..80d33e7c85f7fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; @@ -168,21 +169,31 @@ public static ColumnStatistic fromResultRow(ResultRow row) { String min = row.get(10); String max = row.get(11); if (min != null && !min.equalsIgnoreCase("NULL")) { - columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); - columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); + try { + columnStatisticBuilder.setMinValue(StatisticsUtil.convertToDouble(col.getType(), min)); + columnStatisticBuilder.setMinExpr(StatisticsUtil.readableValue(col.getType(), min)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} min value {}.", col, min, e); + columnStatisticBuilder.setMinValue(Double.MIN_VALUE); + } } else { columnStatisticBuilder.setMinValue(Double.MIN_VALUE); } if (max != null && !max.equalsIgnoreCase("NULL")) { - columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); - columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); + try { + columnStatisticBuilder.setMaxValue(StatisticsUtil.convertToDouble(col.getType(), max)); + columnStatisticBuilder.setMaxExpr(StatisticsUtil.readableValue(col.getType(), max)); + } catch (AnalysisException e) { + LOG.warn("Failed to deserialize column {} max value {}.", col, max, e); + columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); + } } else { columnStatisticBuilder.setMaxValue(Double.MAX_VALUE); } columnStatisticBuilder.setUpdatedTime(row.get(13)); return columnStatisticBuilder.build(); } catch (Exception e) { - LOG.warn("Failed to deserialize column statistics, column not exists", e); + LOG.warn("Failed to deserialize column statistics.", e); return ColumnStatistic.UNKNOWN; } } diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy index 2366267a27ea24..85c8326382cf6f 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy +++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy @@ -243,6 +243,53 @@ suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_h sql """drop stats statistics""" result = sql """show column cached stats statistics""" assertTrue(result.size() == 0) + + sql """use multi_catalog""" + sql """analyze table logs1_parquet (log_time) with sync""" + def ctlId + def dbId + def tblId + result = sql """show proc '/catalogs'""" + + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == catalog_name) { + ctlId = result[i][0] + } + } + result = sql """show proc '/catalogs/$ctlId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'multi_catalog') { + dbId = result[i][0] + } + } + result = sql """show proc '/catalogs/$ctlId/$dbId'""" + for (int i = 0; i < result.size(); i++) { + if (result[i][1] == 'logs1_parquet') { + tblId = result[i][0] + } + } + + result = sql """select * from internal.__internal_schema.column_statistics where id = '${tblId}--1-log_time'""" + assertTrue(result.size() == 1) + def id = result[0][0] + def catalog_id = result[0][1] + def db_id = result[0][2] + def tbl_id = result[0][3] + def idx_id = result[0][4] + def col_id = result[0][5] + def count = result[0][7] + def ndv = result[0][8] + def null_count = result[0][9] + def data_size_in_bytes = result[0][12] + def update_time = result[0][13] + + sql """insert into internal.__internal_schema.column_statistics values ('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, $ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')""" + + result = sql """show column stats logs1_parquet (log_time)""" + assertTrue(result.size() == 1) + assertTrue(result[0][6] == "N/A") + assertTrue(result[0][7] == "N/A") + sql """drop catalog ${catalog_name}""" } }