From ca529097d59d957523272c5ce1eb58d78e266fa5 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 17 Nov 2023 15:40:47 +0800 Subject: [PATCH] [fix](statistics)Fix alter column stats bug (#27093) Encode the min and max value with base64 encoder while inject the column stats. --- .../statistics/StatisticsRepository.java | 10 +++-- .../hive/test_hive_statistic.groovy | 39 +++++++++++++++++++ 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 63953f5bfb5397..2b14d588308034 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -35,6 +35,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.nio.charset.StandardCharsets; +import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -269,8 +271,10 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt params.put("count", String.valueOf(columnStatistic.count)); params.put("ndv", String.valueOf(columnStatistic.ndv)); params.put("nullCount", String.valueOf(columnStatistic.numNulls)); - params.put("min", min == null ? "NULL" : min); - params.put("max", max == null ? "NULL" : max); + params.put("min", min == null ? "NULL" : + Base64.getEncoder().encodeToString(min.getBytes(StandardCharsets.UTF_8))); + params.put("max", max == null ? "NULL" : + Base64.getEncoder().encodeToString(max.getBytes(StandardCharsets.UTF_8))); params.put("dataSize", String.valueOf(columnStatistic.dataSize)); if (partitionIds.isEmpty()) { @@ -278,7 +282,7 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt params.put("partId", "NULL"); StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params); Env.getCurrentEnv().getStatisticsCache() - .updateColStatsCache(objects.table.getId(), -1, colName, builder.build()); + .updateColStatsCache(objects.table.getId(), -1, colName, columnStatistic); } else { // update partition granularity statistics for (Long partitionId : partitionIds) { diff --git a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy index ea777437ccd816..810713988347a6 100644 --- a/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy +++ b/regression-test/suites/external_table_p2/hive/test_hive_statistic.groovy @@ -293,6 +293,45 @@ suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_h assertEquals(result.size(), 1) assertEquals(result[0][6], "N/A") assertEquals(result[0][7], "N/A") + + sql """use tpch1_parquet;""" + sql """drop stats region""" + sql """alter table region modify column r_comment set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0', 'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl');""" + sql """alter table region modify column r_name set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 'min_value'='AFRICA', 'max_value'='MIDDLE EAST');""" + sql """alter table region modify column r_regionkey set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0', 'min_value'='0', 'max_value'='4');""" + result = sql """show column stats region(r_regionkey)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_regionkey") + assertEquals(result[0][1], "5.0") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "0.0") + assertEquals(result[0][4], "20.0") + assertEquals(result[0][5], "4.0") + assertEquals(result[0][6], "0") + assertEquals(result[0][7], "4") + + result = sql """show column stats region(r_comment)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_comment") + assertEquals(result[0][1], "5.0") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "0.0") + assertEquals(result[0][4], "330.0") + assertEquals(result[0][5], "66.0") + assertEquals(result[0][6], "\'ges. thinly even pinto beans ca\'") + assertEquals(result[0][7], "\'uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl\'") + + result = sql """show column stats region(r_name)""" + assertEquals(result.size(), 1) + assertEquals(result[0][0], "r_name") + assertEquals(result[0][1], "5.0") + assertEquals(result[0][2], "5.0") + assertEquals(result[0][3], "0.0") + assertEquals(result[0][4], "34.0") + assertEquals(result[0][5], "6.8") + assertEquals(result[0][6], "\'AFRICA\'") + assertEquals(result[0][7], "\'MIDDLE EAST\'") + sql """drop catalog ${catalog_name}""" } }