Skip to content

Commit

Permalink
[fix](statistics)Fix sample min max npe bug apache#27702 (apache#27707)
Browse files Browse the repository at this point in the history
backport apache#27702
  • Loading branch information
Jibing-Li authored and eldenmoon committed Dec 3, 2023
1 parent e6182e8 commit 25a4ed1
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.text.StringSubstitutor;

import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -94,8 +92,8 @@ protected void doSample() throws Exception {
// Get basic stats, including min and max.
ResultRow basicStats = collectBasicStat(r);
long rowCount = tbl.getRowCount();
String min = Base64.getEncoder().encodeToString(basicStats.get(0).getBytes(StandardCharsets.UTF_8));
String max = Base64.getEncoder().encodeToString(basicStats.get(1).getBytes(StandardCharsets.UTF_8));
String min = StatisticsUtil.encodeValue(basicStats, 0);
String max = StatisticsUtil.encodeValue(basicStats, 1);

boolean limitFlag = false;
long rowsToSample = pair.second;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.nio.charset.StandardCharsets;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -271,10 +269,8 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
params.put("count", String.valueOf(columnStatistic.count));
params.put("ndv", String.valueOf(columnStatistic.ndv));
params.put("nullCount", String.valueOf(columnStatistic.numNulls));
params.put("min", min == null ? "NULL" :
Base64.getEncoder().encodeToString(min.getBytes(StandardCharsets.UTF_8)));
params.put("max", max == null ? "NULL" :
Base64.getEncoder().encodeToString(max.getBytes(StandardCharsets.UTF_8)));
params.put("min", StatisticsUtil.encodeString(min));
params.put("max", StatisticsUtil.encodeString(max));
params.put("dataSize", String.valueOf(columnStatistic.dataSize));

if (partitionIds.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,14 @@
import org.apache.logging.log4j.Logger;

import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
Expand Down Expand Up @@ -935,4 +937,19 @@ public static int getAnalyzeTimeout() {
return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC;
}

public static String encodeValue(ResultRow row, int index) {
if (row == null || row.getValues().size() <= index) {
return "NULL";
}
return encodeString(row.get(index));
}

public static String encodeString(String value) {
if (value == null) {
return "NULL";
} else {
return Base64.getEncoder().encodeToString(value.getBytes(StandardCharsets.UTF_8));
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,19 @@
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.statistics.ResultRow;

import com.google.common.collect.Lists;
import mockit.Mock;
import mockit.MockUp;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.nio.charset.StandardCharsets;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Base64;

public class StatisticsUtilTest {
@Test
Expand Down Expand Up @@ -111,4 +116,30 @@ protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws
now = "23:30:00";
Assertions.assertFalse(StatisticsUtil.inAnalyzeTime(LocalTime.parse(now, timeFormatter)));
}


@Test
public void testEncodeValue() throws Exception {
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(null, 0));

ResultRow row = new ResultRow(null);
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0));

ArrayList<String> values = Lists.newArrayList();
values.add("a");
row = new ResultRow(values);
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 1));

values = Lists.newArrayList();
values.add(null);
row = new ResultRow(values);
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0));

values.add("a");
row = new ResultRow(values);
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0));
Assertions.assertEquals(Base64.getEncoder()
.encodeToString("a".getBytes(StandardCharsets.UTF_8)), StatisticsUtil.encodeValue(row, 1));
Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 2));
}
}

0 comments on commit 25a4ed1

Please sign in to comment.