diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 97cb10c520c7d8..04a763b688440a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -31,10 +31,8 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.text.StringSubstitutor; -import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.util.ArrayList; -import java.util.Base64; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -94,8 +92,8 @@ protected void doSample() throws Exception { // Get basic stats, including min and max. ResultRow basicStats = collectBasicStat(r); long rowCount = tbl.getRowCount(); - String min = Base64.getEncoder().encodeToString(basicStats.get(0).getBytes(StandardCharsets.UTF_8)); - String max = Base64.getEncoder().encodeToString(basicStats.get(1).getBytes(StandardCharsets.UTF_8)); + String min = StatisticsUtil.encodeValue(basicStats, 0); + String max = StatisticsUtil.encodeValue(basicStats, 1); boolean limitFlag = false; long rowsToSample = pair.second; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 2b14d588308034..12ca6b4aa1f938 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -35,8 +35,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -271,10 +269,8 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt params.put("count", String.valueOf(columnStatistic.count)); params.put("ndv", String.valueOf(columnStatistic.ndv)); params.put("nullCount", String.valueOf(columnStatistic.numNulls)); - params.put("min", min == null ? "NULL" : - Base64.getEncoder().encodeToString(min.getBytes(StandardCharsets.UTF_8))); - params.put("max", max == null ? "NULL" : - Base64.getEncoder().encodeToString(max.getBytes(StandardCharsets.UTF_8))); + params.put("min", StatisticsUtil.encodeString(min)); + params.put("max", StatisticsUtil.encodeString(max)); params.put("dataSize", String.valueOf(columnStatistic.dataSize)); if (partitionIds.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 8856d059a9a412..660cb874e9b885 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -90,12 +90,14 @@ import org.apache.logging.log4j.Logger; import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.time.LocalTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -935,4 +937,19 @@ public static int getAnalyzeTimeout() { return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC; } + public static String encodeValue(ResultRow row, int index) { + if (row == null || row.getValues().size() <= index) { + return "NULL"; + } + return encodeString(row.get(index)); + } + + public static String encodeString(String value) { + if (value == null) { + return "NULL"; + } else { + return Base64.getEncoder().encodeToString(value.getBytes(StandardCharsets.UTF_8)); + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 107a5f53822fe5..2c0854dcf2125b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -20,14 +20,19 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.qe.SessionVariable; +import org.apache.doris.statistics.ResultRow; +import com.google.common.collect.Lists; import mockit.Mock; import mockit.MockUp; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.nio.charset.StandardCharsets; import java.time.LocalTime; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Base64; public class StatisticsUtilTest { @Test @@ -111,4 +116,30 @@ protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws now = "23:30:00"; Assertions.assertFalse(StatisticsUtil.inAnalyzeTime(LocalTime.parse(now, timeFormatter))); } + + + @Test + public void testEncodeValue() throws Exception { + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(null, 0)); + + ResultRow row = new ResultRow(null); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + + ArrayList values = Lists.newArrayList(); + values.add("a"); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 1)); + + values = Lists.newArrayList(); + values.add(null); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + + values.add("a"); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + Assertions.assertEquals(Base64.getEncoder() + .encodeToString("a".getBytes(StandardCharsets.UTF_8)), StatisticsUtil.encodeValue(row, 1)); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 2)); + } }