From f39d9c3c223e765583656d221866abb6b84b75d2 Mon Sep 17 00:00:00 2001 From: Jibing Li Date: Mon, 25 Mar 2024 13:16:17 +0800 Subject: [PATCH] Support identical column name in different index. --- .../doris/analysis/ShowColumnStatsStmt.java | 9 +- .../org/apache/doris/catalog/OlapTable.java | 38 ++--- .../java/org/apache/doris/catalog/Table.java | 11 +- .../org/apache/doris/catalog/TableIf.java | 7 +- .../doris/datasource/ExternalTable.java | 26 ++-- .../org/apache/doris/qe/ShowExecutor.java | 37 ++--- .../apache/doris/statistics/AnalysisInfo.java | 20 +-- .../doris/statistics/AnalysisInfoBuilder.java | 12 +- .../apache/doris/statistics/AnalysisJob.java | 4 +- .../doris/statistics/AnalysisManager.java | 143 ++++++------------ .../doris/statistics/OlapAnalysisTask.java | 9 +- .../statistics/StatisticsAutoCollector.java | 40 +++-- .../doris/statistics/StatisticsCollector.java | 2 +- .../statistics/StatisticsRepository.java | 3 +- .../doris/statistics/TableStatsMeta.java | 76 ++++------ .../doris/statistics/AnalysisManagerTest.java | 65 ++++---- .../statistics/AnalysisTaskExecutorTest.java | 11 +- .../apache/doris/statistics/AnalyzeTest.java | 11 +- .../doris/statistics/HistogramTaskTest.java | 1 - .../StatisticsAutoCollectorTest.java | 121 +++------------ .../doris/statistics/TableStatsMetaTest.java | 4 +- .../suites/statistics/analyze_stats.groovy | 4 +- .../statistics/test_analyze_mtmv.groovy | 40 ++--- .../suites/statistics/test_analyze_mv.groovy | 38 ++--- 24 files changed, 293 insertions(+), 439 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java index 37be76b20df09d..a4216f55661e16 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowColumnStatsStmt.java @@ -32,6 +32,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSet; import org.apache.doris.qe.ShowResultSetMetaData; +import org.apache.doris.statistics.AnalysisManager; import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ColumnStatistic; @@ -138,14 +139,15 @@ public TableIf getTable() { public ShowResultSet constructResultSet(List, ColumnStatistic>> columnStatistics) { List> result = Lists.newArrayList(); + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); columnStatistics.forEach(p -> { if (p.second.isUnKnown) { return; } - List row = Lists.newArrayList(); - row.add(p.first.first); + // p data structure is Pair, ColumnStatistic> row.add(p.first.second); + row.add(p.first.first); row.add(String.valueOf(p.second.count)); row.add(String.valueOf(p.second.ndv)); row.add(String.valueOf(p.second.numNulls)); @@ -153,8 +155,7 @@ public ShowResultSet constructResultSet(List, ColumnSt row.add(String.valueOf(p.second.avgSizeByte)); row.add(String.valueOf(p.second.minExpr == null ? "N/A" : p.second.minExpr.toSql())); row.add(String.valueOf(p.second.maxExpr == null ? "N/A" : p.second.maxExpr.toSql())); - ColStatsMeta colStatsMeta = Env.getCurrentEnv().getAnalysisManager().findColStatsMeta(table.getId(), - p.first.first); + ColStatsMeta colStatsMeta = analysisManager.findColStatsMeta(table.getId(), p.first.first, p.first.second); row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisMethod)); row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.analysisType)); row.add(String.valueOf(colStatsMeta == null ? "N/A" : colStatsMeta.jobType)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index b6239f486cdeec..3de6cccb7355ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1279,11 +1279,11 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { if (tblStats == null) { return true; } - if (!tblStats.analyzeColumns().containsAll(getBaseSchema() + if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs(getSchemaAllIndexes(false) .stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName) - .collect(Collectors.toSet()))) { + .collect(Collectors.toSet())))) { return true; } long rowCount = getRowCount(); @@ -1296,34 +1296,20 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { } @Override - public Map> findReAnalyzeNeededPartitions() { - TableStatsMeta tableStats = Env.getCurrentEnv().getAnalysisManager().findTableStatsStatus(getId()); - Set allPartitions = getPartitionNames().stream().map(this::getPartition) - .filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet()); - if (tableStats == null) { - Map> ret = Maps.newHashMap(); - for (Column col : getSchemaAllIndexes(false)) { - if (StatisticsUtil.isUnsupportedType(col.getType())) { + public List> getColumnIndexPairs(Set columns) { + List> ret = Lists.newArrayList(); + // Check the schema of all indexes for each given column name, + // If the column name exists in the index, add the pair to return list. + for (String column : columns) { + for (MaterializedIndexMeta meta : indexIdToMeta.values()) { + Column col = meta.getColumnByName(column); + if (col == null || StatisticsUtil.isUnsupportedType(col.getType())) { continue; } - ret.put(col.getName(), allPartitions); + ret.add(Pair.of(getIndexNameById(meta.getIndexId()), column)); } - return ret; } - Map> colToPart = new HashMap<>(); - for (Column col : getSchemaAllIndexes(false)) { - if (StatisticsUtil.isUnsupportedType(col.getType())) { - continue; - } - long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName()); - Set partitions = getPartitionNames().stream() - .map(this::getPartition) - .filter(Partition::hasData) - .filter(partition -> partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName) - .collect(Collectors.toSet()); - colToPart.put(col.getName(), partitions); - } - return colToPart; + return ret; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index 35f5b14efc5ab9..52655fa064943d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -24,6 +24,7 @@ import org.apache.doris.common.ErrorCode; import org.apache.doris.common.FeMetaVersion; import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.QueryableReentrantReadWriteLock; @@ -647,11 +648,6 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { return true; } - @Override - public Map> findReAnalyzeNeededPartitions() { - return Collections.emptyMap(); - } - @Override public List getChunkSizes() { throw new NotImplementedException("getChunkSized not implemented"); @@ -661,4 +657,9 @@ public List getChunkSizes() { public long fetchRowCount() { return 0; } + + @Override + public List> getColumnIndexPairs(Set columns) { + return Lists.newArrayList(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 484dd3bb6ebfe3..f7c8b4b83252bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -25,6 +25,7 @@ import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.Pair; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.persist.AlterConstraintLog; import org.apache.doris.statistics.AnalysisInfo; @@ -184,7 +185,11 @@ default long getRowCountForNereids() { boolean needReAnalyzeTable(TableStatsMeta tblStats); - Map> findReAnalyzeNeededPartitions(); + /** + * @param columns Set of column names. + * @return List of pairs. Each pair is . For external table, index name is table name. + */ + List> getColumnIndexPairs(Set columns); // Get all the chunk sizes of this table. Now, only HMS external table implemented this interface. // For HMS external table, the return result is a list of all the files' size. diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index 7f82d0d38768bf..82390b916560a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -24,6 +24,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.constraint.Constraint; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.common.util.Util; @@ -36,7 +37,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.thrift.TTableDescriptor; -import com.google.common.collect.Sets; +import com.google.common.collect.Lists; import com.google.gson.annotations.SerializedName; import lombok.Getter; import org.apache.commons.lang3.NotImplementedException; @@ -46,7 +47,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -322,11 +322,12 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { if (tblStats == null) { return true; } - if (!tblStats.analyzeColumns().containsAll(getBaseSchema() + if (!tblStats.analyzeColumns().containsAll(getColumnIndexPairs( + getBaseSchema() .stream() .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) .map(Column::getName) - .collect(Collectors.toSet()))) { + .collect(Collectors.toSet())))) { return true; } return System.currentTimeMillis() @@ -334,12 +335,17 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) { } @Override - public Map> findReAnalyzeNeededPartitions() { - HashSet partitions = Sets.newHashSet(); - // TODO: Find a way to collect external table partitions that need to be analyzed. - partitions.add("Dummy Partition"); - return getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .collect(Collectors.toMap(Column::getName, k -> partitions)); + public List> getColumnIndexPairs(Set columns) { + List> ret = Lists.newArrayList(); + for (String column : columns) { + Column col = getColumn(column); + if (col == null || StatisticsUtil.isUnsupportedType(col.getType())) { + continue; + } + // External table put table name as index name. + ret.add(Pair.of(String.valueOf(name), column)); + } + return ret; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 61beeb57ce698f..afdc317e711770 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2532,16 +2532,18 @@ private void handleShowColumnStats() throws AnalysisException { private void getStatsForAllColumns(List, ColumnStatistic>> columnStatistics, TableIf tableIf) throws AnalysisException { List resultRows = StatisticsRepository.queryColumnStatisticsForTable(tableIf.getId()); + // row[4] is index id, row[5] is column name. for (ResultRow row : resultRows) { - String indexName = "N/A"; + String indexName = tableIf.getName(); long indexId = Long.parseLong(row.get(4)); - if (indexId != -1) { - indexName = ((OlapTable) tableIf).getIndexNameById(indexId); - if (indexName == null) { - continue; - } + if (tableIf instanceof OlapTable) { + OlapTable olapTable = (OlapTable) tableIf; + indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId); + } + if (indexName == null) { + continue; } - columnStatistics.add(Pair.of(Pair.of(row.get(5), indexName), ColumnStatistic.fromResultRow(row))); + columnStatistics.add(Pair.of(Pair.of(indexName, row.get(5)), ColumnStatistic.fromResultRow(row))); } } @@ -2558,28 +2560,29 @@ private void getStatsForSpecifiedColumns(List, ColumnS indexIds.add(-1L); } for (long indexId : indexIds) { - String indexName = "N/A"; - if (indexId != -1) { - indexName = ((OlapTable) tableIf).getIndexNameById(indexId); - if (indexName == null) { - continue; - } + String indexName = tableIf.getName(); + if (tableIf instanceof OlapTable) { + OlapTable olapTable = (OlapTable) tableIf; + indexName = olapTable.getIndexNameById(indexId == -1 ? olapTable.getBaseIndexId() : indexId); + } + if (indexName == null) { + continue; } // Show column statistics in columnStatisticsCache. if (showCache) { ColumnStatistic columnStatistic = Env.getCurrentEnv().getStatisticsCache().getColumnStatistics( tableIf.getDatabase().getCatalog().getId(), tableIf.getDatabase().getId(), tableIf.getId(), indexId, colName); - columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic)); + columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic)); } else if (partitionNames == null) { ColumnStatistic columnStatistic = StatisticsRepository.queryColumnStatisticsByName(tableIf.getId(), indexId, colName); - columnStatistics.add(Pair.of(Pair.of(colName, indexName), columnStatistic)); + columnStatistics.add(Pair.of(Pair.of(indexName, colName), columnStatistic)); } else { String finalIndexName = indexName; columnStatistics.addAll(StatisticsRepository.queryColumnStatisticsByPartitions(tableName, colName, partitionNames.getPartitionNames()) - .stream().map(s -> Pair.of(Pair.of(colName, finalIndexName), s)) + .stream().map(s -> Pair.of(Pair.of(finalIndexName, colName), s)) .collect(Collectors.toList())); } } @@ -2983,7 +2986,7 @@ private void handleShowAnalyzeTaskStatus() { if (table instanceof OlapTable && analysisInfo.indexId != -1) { row.add(((OlapTable) table).getIndexNameById(analysisInfo.indexId)); } else { - row.add("N/A"); + row.add(table.getName()); } row.add(analysisInfo.message); row.add(TimeUtils.DATETIME_FORMAT.format( diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index c707107e0e0fb6..c167db2228d8cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -18,6 +18,7 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonUtils; @@ -35,7 +36,6 @@ import java.io.IOException; import java.text.ParseException; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.StringJoiner; @@ -95,8 +95,8 @@ public enum ScheduleType { @SerializedName("tblId") public final long tblId; - // TODO: Map here is wired, List is enough - public final Map> colToPartitions; + // Pair + public final List> jobColumns; public final Set partitionNames; @@ -200,7 +200,7 @@ public enum ScheduleType { public final boolean userInject; public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, long dbId, long tblId, - Map> colToPartitions, Set partitionNames, String colName, Long indexId, + List> jobColumns, Set partitionNames, String colName, Long indexId, JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType, int samplePercent, long sampleRows, int maxBucketNum, long periodTimeInMs, String message, long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType, @@ -213,7 +213,7 @@ public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, this.catalogId = catalogId; this.dbId = dbId; this.tblId = tblId; - this.colToPartitions = colToPartitions; + this.jobColumns = jobColumns; this.partitionNames = partitionNames; this.colName = colName; this.indexId = indexId; @@ -268,8 +268,8 @@ public String toString() { if (maxBucketNum > 0) { sj.add("MaxBucketNum: " + maxBucketNum); } - if (colToPartitions != null) { - sj.add("colToPartitions: " + getColToPartitionStr()); + if (jobColumns != null) { + sj.add("jobColumns: " + getJobColumns()); } if (lastExecTimeInMs > 0) { sj.add("LastExecTime: " + StatisticsUtil.getReadableTime(lastExecTimeInMs)); @@ -301,12 +301,12 @@ public void addTaskId(long taskId) { taskIds.add(taskId); } - public String getColToPartitionStr() { - if (colToPartitions == null || colToPartitions.isEmpty()) { + public String getJobColumns() { + if (jobColumns == null || jobColumns.isEmpty()) { return ""; } Gson gson = new Gson(); - return gson.toJson(colToPartitions); + return gson.toJson(jobColumns); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java index 22f3d22b3ce77c..00cf9f7b1bc560 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfoBuilder.java @@ -17,6 +17,7 @@ package org.apache.doris.statistics; +import org.apache.doris.common.Pair; import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import org.apache.doris.statistics.AnalysisInfo.AnalysisMode; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; @@ -26,7 +27,6 @@ import org.apache.logging.log4j.core.util.CronExpression; import java.util.List; -import java.util.Map; import java.util.Set; public class AnalysisInfoBuilder { @@ -36,7 +36,7 @@ public class AnalysisInfoBuilder { private long catalogId; private long dbId; private long tblId; - private Map> colToPartitions; + private List> jobColumns; private Set partitionNames; private String colName; private long indexId = -1L; @@ -75,7 +75,7 @@ public AnalysisInfoBuilder(AnalysisInfo info) { catalogId = info.catalogId; dbId = info.dbId; tblId = info.tblId; - colToPartitions = info.colToPartitions; + jobColumns = info.jobColumns; partitionNames = info.partitionNames; colName = info.colName; indexId = info.indexId; @@ -135,8 +135,8 @@ public AnalysisInfoBuilder setTblId(long tblId) { return this; } - public AnalysisInfoBuilder setColToPartitions(Map> colToPartitions) { - this.colToPartitions = colToPartitions; + public AnalysisInfoBuilder setJobColumns(List> jobColumns) { + this.jobColumns = jobColumns; return this; } @@ -276,7 +276,7 @@ public AnalysisInfoBuilder setUserInject(boolean userInject) { } public AnalysisInfo build() { - return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames, + return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, jobColumns, partitionNames, colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent, sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType, externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java index f52764bd6c9f4a..5fd5e43be53f2b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java @@ -180,13 +180,13 @@ public void cancel() { public void deregisterJob() { analysisManager.removeJob(jobInfo.jobId); for (BaseAnalysisTask task : queryingTask) { - task.info.colToPartitions.clear(); + task.info.jobColumns.clear(); if (task.info.partitionNames != null) { task.info.partitionNames.clear(); } } for (BaseAnalysisTask task : queryFinished) { - task.info.colToPartitions.clear(); + task.info.jobColumns.clear(); if (task.info.partitionNames != null) { task.info.partitionNames.clear(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 258c33305afc4d..66d6d38f381f2c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -38,6 +38,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; import org.apache.doris.common.FeMetaVersion; +import org.apache.doris.common.Pair; import org.apache.doris.common.ThreadPoolManager; import org.apache.doris.common.ThreadPoolManager.BlockedPolicy; import org.apache.doris.common.io.Text; @@ -82,7 +83,6 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -219,7 +219,7 @@ public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlExce @VisibleForTesting protected AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlException { AnalysisInfo jobInfo = buildAnalysisJobInfo(stmt); - if (jobInfo.colToPartitions.isEmpty()) { + if (jobInfo.jobColumns.isEmpty()) { // No statistics need to be collected or updated return null; } @@ -292,51 +292,6 @@ private void sendJobId(List analysisInfos, boolean proxy) { } } - /** - * Gets the partitions for which statistics are to be collected. First verify that - * there are partitions that have been deleted but have historical statistics(invalid statistics), - * if there are these partitions, we need to delete them to avoid errors in summary table level statistics. - * Then get the partitions for which statistics need to be collected based on collection mode (incremental/full). - *

- * note: - * If there is no invalid statistics, it does not need to collect/update - * statistics if the following conditions are met: - * - in full collection mode, the partitioned table does not have partitions - * - in incremental collection mode, partition statistics already exist - *

- * TODO Supports incremental collection of statistics from materialized views - */ - private Map> validateAndGetPartitions(TableIf table, Set columnNames, - Set partitionNames, AnalysisType analysisType) throws DdlException { - - Map> columnToPartitions = columnNames.stream() - .collect(Collectors.toMap( - columnName -> columnName, - columnName -> new HashSet<>(partitionNames == null ? Collections.emptySet() : partitionNames) - )); - - if (analysisType == AnalysisType.HISTOGRAM) { - // Collecting histograms does not need to support incremental collection, - // and will automatically cover historical statistics - return columnToPartitions; - } - - if (table instanceof HMSExternalTable) { - // TODO Currently, we do not support INCREMENTAL collection for external table. - // One reason is external table partition id couldn't convert to a Long value. - // Will solve this problem later. - return columnToPartitions; - } - - if (analysisType == AnalysisType.FUNDAMENTALS) { - Map> result = table.findReAnalyzeNeededPartitions(); - result.keySet().retainAll(columnNames); - return result; - } - - return columnToPartitions; - } - // Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed // or not. @VisibleForTesting @@ -362,12 +317,6 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio infoBuilder.setCatalogId(stmt.getCatalogId()); infoBuilder.setDBId(stmt.getDbId()); infoBuilder.setTblId(stmt.getTable().getId()); - // TODO: Refactor later, DON'T MODIFY IT RIGHT NOW - StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); - for (String colName : columnNames) { - stringJoiner.add(colName); - } - infoBuilder.setColName(stringJoiner.toString()); infoBuilder.setPartitionNames(partitionNames); infoBuilder.setPartitionOnly(partitionOnly); infoBuilder.setSamplingPartition(isSamplingPartition); @@ -391,20 +340,23 @@ public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlExceptio if (analysisType == AnalysisType.HISTOGRAM) { int numBuckets = stmt.getNumBuckets(); - int maxBucketNum = numBuckets > 0 ? numBuckets - : StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM; + int maxBucketNum = numBuckets > 0 ? numBuckets : StatisticConstants.HISTOGRAM_MAX_BUCKET_NUM; infoBuilder.setMaxBucketNum(maxBucketNum); } long periodTimeInMs = stmt.getPeriodTimeInMs(); infoBuilder.setPeriodTimeInMs(periodTimeInMs); - - Map> colToPartitions = validateAndGetPartitions(table, columnNames, - partitionNames, analysisType); - infoBuilder.setColToPartitions(colToPartitions); + List> jobColumns = table.getColumnIndexPairs(columnNames); + infoBuilder.setJobColumns(jobColumns); + StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); + for (Pair pair : jobColumns) { + stringJoiner.add(pair.toString()); + } + infoBuilder.setColName(stringJoiner.toString()); infoBuilder.setTaskIds(Lists.newArrayList()); infoBuilder.setTblUpdateTime(table.getUpdateTime()); - infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0); + infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0 + && analysisMethod.equals(AnalysisMethod.SAMPLE)); return infoBuilder.build(); } @@ -420,35 +372,28 @@ public void recordAnalysisJob(AnalysisInfo jobInfo) { public void createTaskForEachColumns(AnalysisInfo jobInfo, Map analysisTasks, boolean isSync) throws DdlException { - Map> columnToPartitions = jobInfo.colToPartitions; + List> jobColumns = jobInfo.jobColumns; TableIf table = jobInfo.getTable(); - for (Entry> entry : columnToPartitions.entrySet()) { - String colName = entry.getKey(); - List indexIds = Lists.newArrayList(); - // Get index id this column belongs to for OlapTable. Set it to -1 for baseIndex id. - if (table instanceof OlapTable) { - indexIds = ((OlapTable) table).getMvColumnIndexIds(colName); - } else { - indexIds.add(-1L); - } + for (Pair pair : jobColumns) { AnalysisInfoBuilder colTaskInfoBuilder = new AnalysisInfoBuilder(jobInfo); - if (jobInfo.analysisType != AnalysisType.HISTOGRAM) { - colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS); - Map> colToParts = new HashMap<>(); - colToParts.put(colName, entry.getValue()); - colTaskInfoBuilder.setColToPartitions(colToParts); - } - for (long indexId : indexIds) { - long taskId = Env.getCurrentEnv().getNextId(); - AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(colName).setIndexId(indexId) - .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); - analysisTasks.put(taskId, createTask(analysisInfo)); - jobInfo.addTaskId(taskId); - if (isSync) { - continue; + colTaskInfoBuilder.setAnalysisType(AnalysisType.FUNDAMENTALS); + long taskId = Env.getCurrentEnv().getNextId(); + long indexId = -1; + if (table instanceof OlapTable) { + OlapTable olapTable = (OlapTable) table; + indexId = olapTable.getIndexIdByName(pair.first); + if (indexId == olapTable.getBaseIndexId()) { + indexId = -1; } - replayCreateAnalysisTask(analysisInfo); } + AnalysisInfo analysisInfo = colTaskInfoBuilder.setColName(pair.second).setIndexId(indexId) + .setTaskId(taskId).setLastExecTimeInMs(System.currentTimeMillis()).build(); + analysisTasks.put(taskId, createTask(analysisInfo)); + jobInfo.addTaskId(taskId); + if (isSync) { + continue; + } + replayCreateAnalysisTask(analysisInfo); } } @@ -565,7 +510,9 @@ public void updateTableStats(AnalysisInfo jobInfo) { tableStats.update(jobInfo, tbl); logCreateTableStats(tableStats); } - jobInfo.colToPartitions.clear(); + if (jobInfo.jobColumns != null) { + jobInfo.jobColumns.clear(); + } if (jobInfo.partitionNames != null) { jobInfo.partitionNames.clear(); } @@ -712,7 +659,16 @@ public void invalidateLocalStats(long catalogId, long dbId, long tableId, indexIds.add(-1L); } for (long indexId : indexIds) { - tableStats.removeColumn(column); + String indexName = table.getName(); + if (table instanceof OlapTable) { + OlapTable olapTable = (OlapTable) table; + if (indexId == -1) { + indexName = olapTable.getIndexNameById(olapTable.getBaseIndexId()); + } else { + indexName = olapTable.getIndexNameById(indexId); + } + } + tableStats.removeColumn(indexName, column); statisticsCache.invalidate(tableId, indexId, column); } } @@ -1088,25 +1044,16 @@ public void registerSysJob(AnalysisInfo jobInfo, Map tas analysisJobIdToTaskMap.put(jobInfo.jobId, taskInfos); } - // Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that - // we could make sure it would be analyzed again soon if user or system submit job for that column again. - public void removeColStatsStatus(long tblId, String colName) { - TableStatsMeta tableStats = findTableStatsStatus(tblId); - if (tableStats != null) { - tableStats.removeColumn(colName); - } - } - public void removeTableStats(long tableId) { idToTblStats.remove(tableId); } - public ColStatsMeta findColStatsMeta(long tblId, String colName) { + public ColStatsMeta findColStatsMeta(long tblId, String indexName, String colName) { TableStatsMeta tableStats = findTableStatsStatus(tblId); if (tableStats == null) { return null; } - return tableStats.findColumnStatsMeta(colName); + return tableStats.findColumnStatsMeta(indexName, colName); } public AnalysisJob findJob(long id) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index d26de9d9de7141..0ec1c4561d3763 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -65,13 +65,8 @@ public OlapAnalysisTask(AnalysisInfo info) { } public void doExecute() throws Exception { - Set partitionNames = info.colToPartitions.get(info.colName); - if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) - || partitionNames == null || partitionNames.isEmpty()) { - if (partitionNames == null) { - LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]", - info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions); - } + List> columnList = info.jobColumns; + if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || columnList == null || columnList.isEmpty()) { StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId, info.tblId, info.indexId, info.colName, null); job.appendBuf(this, Arrays.asList(new ColStatsData(statsId))); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index dbb7046467aec0..9ca971845b7e64 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.common.Pair; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.hive.HMSExternalTable; @@ -39,10 +40,9 @@ import java.time.LocalTime; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; +import java.util.StringJoiner; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -121,7 +121,7 @@ public void analyzeDb(DatabaseIf databaseIf) throws DdlException { analysisTaskExecutor.clear(); break; } - analysisInfo = getReAnalyzeRequiredPart(analysisInfo); + analysisInfo = getNeedAnalyzeColumns(analysisInfo); if (analysisInfo == null) { continue; } @@ -186,11 +186,7 @@ protected void createAnalyzeJobForTbl(DatabaseIf db, .setCatalogId(db.getCatalog().getId()) .setDBId(db.getId()) .setTblId(table.getId()) - .setColName( - table.getSchemaAllIndexes(false).stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.joining(",")) - ) + .setColName(null) .setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS) .setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL) .setAnalysisMethod(analysisMethod) @@ -202,13 +198,14 @@ protected void createAnalyzeJobForTbl(DatabaseIf db, .setLastExecTimeInMs(System.currentTimeMillis()) .setJobType(JobType.SYSTEM) .setTblUpdateTime(table.getUpdateTime()) - .setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0) + .setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0 + && analysisMethod.equals(AnalysisMethod.SAMPLE)) .build(); analysisInfos.add(jobInfo); } @VisibleForTesting - protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { + protected AnalysisInfo getNeedAnalyzeColumns(AnalysisInfo jobInfo) { TableIf table = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); // Skip tables that are too wide. if (table.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) { @@ -218,26 +215,25 @@ protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager(); TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId()); - Map> needRunPartitions = null; - String colNames = jobInfo.colName; + List> needRunColumns = null; if (table.needReAnalyzeTable(tblStats)) { - needRunPartitions = table.findReAnalyzeNeededPartitions(); + needRunColumns = table.getColumnIndexPairs(table.getSchemaAllIndexes(false) + .stream().map(Column::getName).collect(Collectors.toSet())); } else if (table instanceof OlapTable && tblStats.newPartitionLoaded.get()) { OlapTable olapTable = (OlapTable) table; - needRunPartitions = new HashMap<>(); - Set partitionColumnNames = olapTable.getPartitionInfo().getPartitionColumns().stream() - .map(Column::getName).collect(Collectors.toSet()); - colNames = partitionColumnNames.stream().collect(Collectors.joining(",")); Set partitionNames = olapTable.getAllPartitions().stream() .map(Partition::getName).collect(Collectors.toSet()); - for (String column : partitionColumnNames) { - needRunPartitions.put(column, partitionNames); - } + needRunColumns = olapTable.getColumnIndexPairs(partitionNames); } - if (needRunPartitions == null || needRunPartitions.isEmpty()) { + if (needRunColumns == null || needRunColumns.isEmpty()) { return null; } - return new AnalysisInfoBuilder(jobInfo).setColName(colNames).setColToPartitions(needRunPartitions).build(); + StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); + for (Pair pair : needRunColumns) { + stringJoiner.add(pair.toString()); + } + return new AnalysisInfoBuilder(jobInfo) + .setColName(stringJoiner.toString()).setJobColumns(needRunColumns).build(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java index 0985b9b2b9539e..ec187fe893af49 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java @@ -61,7 +61,7 @@ protected void runAfterCatalogReady() { @VisibleForTesting protected void createSystemAnalysisJob(AnalysisInfo jobInfo) throws DdlException { - if (jobInfo.colToPartitions.isEmpty()) { + if (jobInfo.jobColumns.isEmpty()) { // No statistics need to be collected or updated return; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 5ac9b7305c77fd..5caa5bd9751c1e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -29,6 +29,7 @@ import org.apache.doris.statistics.util.DBObjects; import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.commons.text.StringSubstitutor; import org.apache.logging.log4j.LogManager; @@ -320,7 +321,7 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder() .setTblUpdateTime(System.currentTimeMillis()) .setColName("") - .setColToPartitions(Maps.newHashMap()) + .setJobColumns(Lists.newArrayList()) .setUserInject(true) .setJobType(AnalysisInfo.JobType.MANUAL) .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 9231c6a2bc7cd1..3b9b1e2bead005 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -21,11 +21,11 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.statistics.AnalysisInfo.JobType; -import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; import com.google.gson.annotations.SerializedName; @@ -33,8 +33,6 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import java.util.Arrays; -import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -64,7 +62,11 @@ public class TableStatsMeta implements Writable { public long updatedTime; @SerializedName("colNameToColStatsMeta") - private ConcurrentMap colNameToColStatsMeta = new ConcurrentHashMap<>(); + private ConcurrentMap deprecatedColNameToColStatsMeta = new ConcurrentHashMap<>(); + + @SerializedName("colToColStatsMeta") + // -> ColStatsMeta + private ConcurrentMap, ColStatsMeta> colToColStatsMeta = new ConcurrentHashMap<>(); @SerializedName("trigger") public JobType jobType; @@ -100,52 +102,34 @@ public static TableStatsMeta read(DataInput dataInput) throws IOException { String json = Text.readString(dataInput); TableStatsMeta tableStats = GsonUtils.GSON.fromJson(json, TableStatsMeta.class); // Might be null counterintuitively, for compatible - if (tableStats.colNameToColStatsMeta == null) { - tableStats.colNameToColStatsMeta = new ConcurrentHashMap<>(); + if (tableStats.colToColStatsMeta == null) { + tableStats.colToColStatsMeta = new ConcurrentHashMap<>(); } - return tableStats; - } - - public long findColumnLastUpdateTime(String colName) { - ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(colName); - if (colStatsMeta == null) { - return 0; + if (tableStats.deprecatedColNameToColStatsMeta != null) { + tableStats.convertDeprecatedColStatsToNewVersion(); } - return colStatsMeta.updatedTime; - } - - public ColStatsMeta findColumnStatsMeta(String colName) { - return colNameToColStatsMeta.get(colName); + return tableStats; } - public void removeColumn(String colName) { - colNameToColStatsMeta.remove(colName); + public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { + return colToColStatsMeta.get(Pair.of(indexName, colName)); } - public Set analyzeColumns() { - return colNameToColStatsMeta.keySet(); + public void removeColumn(String indexName, String colName) { + colToColStatsMeta.remove(Pair.of(indexName, colName)); } - public void reset() { - updatedTime = 0; - colNameToColStatsMeta.values().forEach(ColStatsMeta::clear); + public Set> analyzeColumns() { + return colToColStatsMeta.keySet(); } public void update(AnalysisInfo analyzedJob, TableIf tableIf) { updatedTime = analyzedJob.tblUpdateTime; userInjected = analyzedJob.userInject; - String colNameStr = analyzedJob.colName; - // colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here - // TODO: Refactor this later - if (analyzedJob.colName.startsWith("[") && analyzedJob.colName.endsWith("]")) { - colNameStr = colNameStr.substring(1, colNameStr.length() - 1); - } - List cols = Arrays.stream(colNameStr.split(",")) - .map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); - for (String col : cols) { - ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col); + for (Pair colPair : analyzedJob.jobColumns) { + ColStatsMeta colStatsMeta = colToColStatsMeta.get(colPair); if (colStatsMeta == null) { - colNameToColStatsMeta.put(col, new ColStatsMeta(updatedTime, + colToColStatsMeta.put(colPair, new ColStatsMeta(updatedTime, analyzedJob.analysisMethod, analyzedJob.analysisType, analyzedJob.jobType, 0)); } else { colStatsMeta.updatedTime = updatedTime; @@ -159,21 +143,27 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) { if (tableIf instanceof OlapTable) { rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount(); } - if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet() - .containsAll(tableIf.getBaseSchema().stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet()))) { + if (analyzedJob.emptyJob) { + return; + } + if (analyzedJob.jobColumns.containsAll( + tableIf.getColumnIndexPairs( + tableIf.getSchemaAllIndexes(false).stream().map(Column::getName).collect(Collectors.toSet())))) { updatedRows.set(0); newPartitionLoaded.set(false); } if (tableIf instanceof OlapTable) { PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo(); - if (partitionInfo != null && analyzedJob.colToPartitions.keySet() - .containsAll(partitionInfo.getPartitionColumns().stream() - .map(Column::getName).collect(Collectors.toSet()))) { + if (partitionInfo != null && analyzedJob.jobColumns + .containsAll(tableIf.getColumnIndexPairs(partitionInfo.getPartitionColumns().stream() + .map(Column::getName).collect(Collectors.toSet())))) { newPartitionLoaded.set(false); } } } } + + public void convertDeprecatedColStatsToNewVersion() { + deprecatedColNameToColStatsMeta = null; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index f8a77fe06db754..674456b0b46891 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import org.apache.doris.common.Pair; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; @@ -45,9 +46,9 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; // CHECKSTYLE OFF public class AnalysisManagerTest { @@ -109,7 +110,7 @@ public String toString() { // test build sync job @Test public void testBuildAndAssignJob1() throws Exception { - AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()).build(); + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()).build(); new MockUp() { @Mock @@ -167,12 +168,7 @@ public void updateTableStats(AnalysisInfo jobInfo) { AnalysisManager analysisManager = new AnalysisManager(); Assertions.assertNull(analysisManager.buildAndAssignJob(analyzeTblStmt)); - analysisInfo.colToPartitions.put("c1", new HashSet() { - { - add("p1"); - add("p2"); - } - }); + analysisInfo.jobColumns.add(Pair.of("index1", "c1")); analysisManager.buildAndAssignJob(analyzeTblStmt); new Expectations() { { @@ -191,7 +187,7 @@ public void updateTableStats(AnalysisInfo jobInfo) { // test build async job @Test public void testBuildAndAssignJob2(@Injectable OlapAnalysisTask analysisTask) throws Exception { - AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()) + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) .setScheduleType(ScheduleType.PERIOD) .build(); new MockUp() { @@ -255,12 +251,7 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) { } })); AnalysisManager analysisManager = new AnalysisManager(); - analysisInfo.colToPartitions.put("c1", new HashSet() { - { - add("p1"); - add("p2"); - } - }); + analysisInfo.jobColumns.add(Pair.of("index1", "c1")); analysisManager.buildAndAssignJob(analyzeTblStmt); new Expectations() { { @@ -274,15 +265,7 @@ public void logCreateAnalysisJob(AnalysisInfo analysisJob) { public void testReAnalyze() { new MockUp() { - int count = 0; - int[] rowCount = new int[]{100, 100, 200, 200, 1, 1}; - final Column c = new Column("col1", PrimitiveType.INT); - @Mock - public long getRowCount() { - return rowCount[count++]; - } - @Mock public List getBaseSchema() { return Lists.newArrayList(c); @@ -291,22 +274,52 @@ public List getBaseSchema() { @Mock public List getColumns() { return Lists.newArrayList(c); } + @Mock + public List> getColumnIndexPairs(Set columns) { + List> jobList = Lists.newArrayList(); + jobList.add(Pair.of("1", "1")); + jobList.add(Pair.of("2", "2")); + jobList.add(Pair.of("3", "3")); + return jobList; + } }; OlapTable olapTable = new OlapTable(); + List> jobList = Lists.newArrayList(); + jobList.add(Pair.of("1", "1")); + jobList.add(Pair.of("2", "2")); + TableStatsMeta stats0 = new TableStatsMeta( + 0, new AnalysisInfoBuilder().setJobColumns(jobList) + .setColName("col1").build(), olapTable); + Assertions.assertTrue(olapTable.needReAnalyzeTable(stats0)); + + new MockUp() { + int count = 0; + int[] rowCount = new int[]{100, 100, 200, 200, 1, 1}; + + @Mock + public long getRowCount() { + return rowCount[count++]; + } + @Mock + public List> getColumnIndexPairs(Set columns) { + List> jobList = Lists.newArrayList(); + return jobList; + } + }; TableStatsMeta stats1 = new TableStatsMeta( - 50, new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()) + 50, new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) .setColName("col1").build(), olapTable); stats1.updatedRows.addAndGet(50); Assertions.assertTrue(olapTable.needReAnalyzeTable(stats1)); TableStatsMeta stats2 = new TableStatsMeta( 190, new AnalysisInfoBuilder() - .setColToPartitions(new HashMap<>()).setColName("col1").build(), olapTable); + .setJobColumns(new ArrayList<>()).setColName("col1").build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder() - .setColToPartitions(new HashMap<>()).setEmptyJob(true).setColName("col1").build(), olapTable); + .setJobColumns(new ArrayList<>()).setEmptyJob(true).setColName("col1").build(), olapTable); Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3)); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java index b17ba3e68dbd7a..5698f0e9b20e63 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisTaskExecutorTest.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.InternalSchemaInitializer; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.Pair; import org.apache.doris.common.jmockit.Deencapsulation; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.qe.StmtExecutor; @@ -33,7 +34,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Maps; +import com.google.common.collect.Lists; import mockit.Mock; import mockit.MockUp; import mockit.Mocked; @@ -41,10 +42,8 @@ import org.junit.jupiter.api.Test; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicBoolean; @@ -158,8 +157,8 @@ public void syncLoadColStats(long tableId, long idxId, String colName) { }; AnalysisTaskExecutor analysisTaskExecutor = new AnalysisTaskExecutor(1); - HashMap> colToPartitions = Maps.newHashMap(); - colToPartitions.put("col1", Collections.singleton("t1")); + List> columns = Lists.newArrayList(); + columns.add(Pair.of("col1", "t1")); AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0) .setCatalogId(0).setDBId(0).setTblId(0) .setColName("col1").setJobType(JobType.MANUAL) @@ -167,7 +166,7 @@ public void syncLoadColStats(long tableId, long idxId, String colName) { .setAnalysisMethod(AnalysisMethod.FULL) .setAnalysisType(AnalysisType.FUNDAMENTALS) .setState(AnalysisState.RUNNING) - .setColToPartitions(colToPartitions) + .setJobColumns(columns) .build(); OlapAnalysisTask task = new OlapAnalysisTask(analysisInfo); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java index 483cd3c03262c2..bf6ce32e155f42 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java @@ -23,6 +23,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.Pair; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.qe.AutoCloseConnectContext; import org.apache.doris.qe.ConnectContext; @@ -35,7 +36,7 @@ import org.apache.doris.statistics.util.StatisticsUtil; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Maps; +import com.google.common.collect.Lists; import mockit.Expectations; import mockit.Mock; import mockit.MockUp; @@ -45,10 +46,8 @@ import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; public class AnalyzeTest extends TestWithFeService { @@ -160,8 +159,8 @@ public void execSQLs(List partitionAnalysisSQLs, Map par @Mock protected void runQuery(String sql) {} }; - HashMap> colToPartitions = Maps.newHashMap(); - colToPartitions.put("col1", Collections.singleton("t1")); + List> colList = Lists.newArrayList(); + colList.add(Pair.of("col1", "index1")); AnalysisInfo analysisJobInfo = new AnalysisInfoBuilder().setJobId(0).setTaskId(0) .setCatalogId(0) .setDBId(0) @@ -170,7 +169,7 @@ protected void runQuery(String sql) {} .setAnalysisMode(AnalysisMode.FULL) .setAnalysisMethod(AnalysisMethod.FULL) .setAnalysisType(AnalysisType.FUNDAMENTALS) - .setColToPartitions(colToPartitions) + .setJobColumns(colList) .setState(AnalysisState.RUNNING) .build(); new OlapAnalysisTask(analysisJobInfo).doExecute(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java index 4217fb5a0db397..09bf4dd94c6732 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HistogramTaskTest.java @@ -90,7 +90,6 @@ public void test1TaskCreation() throws Exception { for (Entry infoEntry : taskInfo.entrySet()) { BaseAnalysisTask task = infoEntry.getValue(); - Assertions.assertEquals(AnalysisType.HISTOGRAM, task.info.analysisType); Assertions.assertEquals("col1", task.info.colName); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 678e7580f8eefb..f7b75261cc54fa 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -31,16 +31,12 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.Pair; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.InternalCatalog; -import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; -import org.apache.doris.statistics.AnalysisInfo.AnalysisType; -import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import mockit.Expectations; import mockit.Injectable; import mockit.Mock; @@ -54,7 +50,6 @@ import java.util.Arrays; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -144,97 +139,32 @@ public List getSchemaAllIndexes(boolean full) { StatisticsAutoCollector saa = new StatisticsAutoCollector(); List analysisInfoList = saa.constructAnalysisInfo(new Database(1, "anydb")); Assertions.assertEquals(1, analysisInfoList.size()); - Assertions.assertEquals("c1", analysisInfoList.get(0).colName.split(",")[0]); + Assertions.assertNull(analysisInfoList.get(0).colName); } @Test - public void testGetReAnalyzeRequiredPart0() { + public void testSkipWideTable() { TableIf tableIf = new OlapTable(); new MockUp() { - @Mock - protected Map> findReAnalyzeNeededPartitions() { - Set partitionNames = new HashSet<>(); - partitionNames.add("p1"); - partitionNames.add("p2"); - Map> map = new HashMap<>(); - map.put("col1", partitionNames); - return map; - } - - @Mock - public long getRowCount() { - return 100; - } - @Mock public List getBaseSchema() { return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); } - }; - - new MockUp() { - @Mock - public TableIf findTable(long catalogName, long dbName, long tblName) { - return tableIf; - } - }; - AnalysisInfo analysisInfo = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) - .setColToPartitions(new HashMap<>()).setAnalysisType( - AnalysisType.FUNDAMENTALS).setColName("col1").setJobType(JobType.SYSTEM).build(); - new MockUp() { - - int count = 0; - - TableStatsMeta[] tableStatsArr = - new TableStatsMeta[] {new TableStatsMeta(0, analysisInfo, tableIf), - new TableStatsMeta(0, analysisInfo, tableIf), null}; - - { - tableStatsArr[0].updatedRows.addAndGet(100); - tableStatsArr[1].updatedRows.addAndGet(0); - } - - @Mock - public TableStatsMeta findTableStatsStatus(long tblId) { - return tableStatsArr[count++]; - } - }; - - new MockUp() { - @Mock - public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, - Set needRunPartitions) { - return new AnalysisInfoBuilder().build(); - } - }; - StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); - AnalysisInfo analysisInfo2 = new AnalysisInfoBuilder() - .setCatalogId(0) - .setDBId(0) - .setTblId(0).build(); - Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - // uncomment it when updatedRows gets ready - // Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); - } - - @Test - public void testSkipWideTable() { - - TableIf tableIf = new OlapTable(); - new MockUp() { @Mock - public List getBaseSchema() { - return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + public List> getColumnIndexPairs(Set columns) { + ArrayList> list = Lists.newArrayList(); + list.add(Pair.of("1", "1")); + return list; } }; new MockUp() { int count = 0; - int [] thresholds = {1, 10}; + int[] thresholds = {1, 10}; + @Mock public TableIf findTable(long catalogName, long dbName, long tblName) { return tableIf; @@ -246,19 +176,10 @@ public int getAutoAnalyzeTableWidthThreshold() { } }; - new MockUp() { - @Mock - public Map> findReAnalyzeNeededPartitions() { - HashMap> ret = Maps.newHashMap(); - ret.put("key1", Sets.newHashSet()); - return ret; - } - }; - AnalysisInfo analysisInfo = new AnalysisInfoBuilder().build(); StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); - Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo)); - Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo)); + Assertions.assertNull(statisticsAutoCollector.getNeedAnalyzeColumns(analysisInfo)); + Assertions.assertNotNull(statisticsAutoCollector.getNeedAnalyzeColumns(analysisInfo)); } @Test @@ -400,13 +321,9 @@ public TableIf findTable(long catalogId, long dbId, long tblId) { List jobInfos = new ArrayList<>(); sac.createAnalyzeJobForTbl(db, jobInfos, t1); AnalysisInfo jobInfo = jobInfos.get(0); - Map> colToPartitions = new HashMap<>(); - colToPartitions.put("test", new HashSet() { - { - add("p1"); - } - }); - jobInfo = new AnalysisInfoBuilder(jobInfo).setColToPartitions(colToPartitions).build(); + List> columnNames = Lists.newArrayList(); + columnNames.add(Pair.of("test", "t1")); + jobInfo = new AnalysisInfoBuilder(jobInfo).setJobColumns(columnNames).build(); Map analysisTasks = new HashMap<>(); AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false); @@ -472,13 +389,9 @@ public TableIf findTable(long catalogId, long dbId, long tblId) { List jobInfos = new ArrayList<>(); sac.createAnalyzeJobForTbl(db, jobInfos, t1); AnalysisInfo jobInfo = jobInfos.get(0); - Map> colToPartitions = new HashMap<>(); - colToPartitions.put("test", new HashSet() { - { - add("p1"); - } - }); - jobInfo = new AnalysisInfoBuilder(jobInfo).setColToPartitions(colToPartitions).build(); + List> colNames = Lists.newArrayList(); + colNames.add(Pair.of("test", "1")); + jobInfo = new AnalysisInfoBuilder(jobInfo).setJobColumns(colNames).build(); Map analysisTasks = new HashMap<>(); AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java index b5e73ba09da728..94eab9e00cc501 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -25,7 +25,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.HashMap; +import java.util.ArrayList; class TableStatsMetaTest { @@ -38,7 +38,7 @@ public long getRowCount() { } }; TableStatsMeta tableStatsMeta = new TableStatsMeta(); - AnalysisInfo jobInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()) + AnalysisInfo jobInfo = new AnalysisInfoBuilder().setJobColumns(new ArrayList<>()) .setColName("col1").build(); tableStatsMeta.update(jobInfo, table); Assertions.assertEquals(4, tableStatsMeta.rowCount); diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index 699e595df598cb..8cfec96623399a 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -1122,10 +1122,10 @@ PARTITION `p599` VALUES IN (599) System.out.println(actual_result) return expected_result.containsAll(actual_result) && actual_result.containsAll(expected_result) } - assert check_column(afterDropped, "[col2, col3]") + assert check_column(afterDropped, "[test_meta_management:col2, test_meta_management:col3]") sql """ANALYZE TABLE test_meta_management WITH SYNC""" afterDropped = sql """SHOW TABLE STATS test_meta_management""" - assert check_column(afterDropped, "[col1, col2, col3]") + assert check_column(afterDropped, "[test_meta_management:col1, test_meta_management:col2, test_meta_management:col3]") sql """ DROP TABLE IF EXISTS test_updated_rows """ sql """ diff --git a/regression-test/suites/statistics/test_analyze_mtmv.groovy b/regression-test/suites/statistics/test_analyze_mtmv.groovy index 7662fd1fbbe32e..3655a35390bbb3 100644 --- a/regression-test/suites/statistics/test_analyze_mtmv.groovy +++ b/regression-test/suites/statistics/test_analyze_mtmv.groovy @@ -143,7 +143,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column stats mv1(l_shipdate)""" assertEquals(1, result_sample.size()) assertEquals("l_shipdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -157,7 +157,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column cached stats mv1(l_shipdate)""" assertEquals(1, result_sample.size()) assertEquals("l_shipdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -171,7 +171,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column stats mv1(o_orderdate)""" assertEquals(1, result_sample.size()) assertEquals("o_orderdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -185,7 +185,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column cached stats mv1(o_orderdate)""" assertEquals(1, result_sample.size()) assertEquals("o_orderdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -199,7 +199,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column stats mv1(l_partkey)""" assertEquals(1, result_sample.size()) assertEquals("l_partkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -213,7 +213,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column cached stats mv1(l_partkey)""" assertEquals(1, result_sample.size()) assertEquals("l_partkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -227,7 +227,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column stats mv1(l_suppkey)""" assertEquals(1, result_sample.size()) assertEquals("l_suppkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -241,7 +241,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column cached stats mv1(l_suppkey)""" assertEquals(1, result_sample.size()) assertEquals("l_suppkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -255,7 +255,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column stats mv1(sum_total)""" assertEquals(1, result_sample.size()) assertEquals("sum_total", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("2.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -269,7 +269,7 @@ suite("test_analyze_mtmv") { result_sample = sql """show column cached stats mv1(sum_total)""" assertEquals(1, result_sample.size()) assertEquals("sum_total", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("2.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -298,7 +298,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_shipdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -319,7 +319,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_shipdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -340,7 +340,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("o_orderdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -361,7 +361,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("o_orderdate", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("3.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -382,7 +382,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_partkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -403,7 +403,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_partkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -424,7 +424,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_suppkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -445,7 +445,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("l_suppkey", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("1.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -466,7 +466,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("sum_total", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("2.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) @@ -487,7 +487,7 @@ suite("test_analyze_mtmv") { } assertEquals(1, result_sample.size()) assertEquals("sum_total", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mv1", result_sample[0][1]) assertEquals("3.0", result_sample[0][2]) assertEquals("2.0", result_sample[0][3]) assertEquals("0.0", result_sample[0][4]) diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 635837e6c16400..3348623acaf7b2 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -145,7 +145,7 @@ suite("test_analyze_mv") { def result_sample = sql """show column stats mvTestDup(key1)""" assertEquals(1, result_sample.size()) assertEquals("key1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestDup", result_sample[0][1]) assertEquals("6.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("1", result_sample[0][7]) @@ -157,7 +157,7 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestDup(value1)""" assertEquals(1, result_sample.size()) assertEquals("value1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestDup", result_sample[0][1]) assertEquals("6.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("3", result_sample[0][7]) @@ -252,9 +252,9 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestAgg(key1)""" assertEquals(2, result_sample.size()) - if (result_sample[0][1] == "N/A") { + if (result_sample[0][1] == "mvTestAgg") { assertEquals("key1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestAgg", result_sample[0][1]) assertEquals("5.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("1", result_sample[0][7]) @@ -267,7 +267,7 @@ suite("test_analyze_mv") { assertEquals("1001", result_sample[1][8]) } else { assertEquals("key1", result_sample[1][0]) - assertEquals("N/A", result_sample[1][1]) + assertEquals("mvTestAgg", result_sample[1][1]) assertEquals("5.0", result_sample[1][2]) assertEquals("4.0", result_sample[1][3]) assertEquals("1", result_sample[1][7]) @@ -282,9 +282,9 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestAgg(value1)""" assertEquals(2, result_sample.size()) - if (result_sample[0][1] == "N/A") { + if (result_sample[0][1] == "mvTestAgg") { assertEquals("value1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestAgg", result_sample[0][1]) assertEquals("5.0", result_sample[0][2]) assertEquals("5.0", result_sample[0][3]) assertEquals("6", result_sample[0][7]) @@ -297,7 +297,7 @@ suite("test_analyze_mv") { assertEquals("3001", result_sample[1][8]) } else { assertEquals("value1", result_sample[1][0]) - assertEquals("N/A", result_sample[1][1]) + assertEquals("mvTestAgg", result_sample[1][1]) assertEquals("5.0", result_sample[1][2]) assertEquals("5.0", result_sample[1][3]) assertEquals("6", result_sample[1][7]) @@ -313,7 +313,7 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestAgg(key2)""" assertEquals(1, result_sample.size()) assertEquals("key2", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestAgg", result_sample[0][1]) assertEquals("5.0", result_sample[0][2]) assertEquals("5.0", result_sample[0][3]) assertEquals("2", result_sample[0][7]) @@ -323,7 +323,7 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestAgg(value2)""" assertEquals(1, result_sample.size()) assertEquals("value2", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestAgg", result_sample[0][1]) assertEquals("5.0", result_sample[0][2]) assertEquals("5.0", result_sample[0][3]) assertEquals("4", result_sample[0][7]) @@ -391,7 +391,7 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestUni(key1)""" assertEquals(1, result_sample.size()) assertEquals("key1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestUni", result_sample[0][1]) assertEquals("5.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("1", result_sample[0][7]) @@ -444,7 +444,7 @@ suite("test_analyze_mv") { } assertEquals(1, result_sample.size()) assertEquals("key1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestDup", result_sample[0][1]) assertEquals("6.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("1", result_sample[0][7]) @@ -462,7 +462,7 @@ suite("test_analyze_mv") { } assertEquals(1, result_sample.size()) assertEquals("value1", result_sample[0][0]) - assertEquals("N/A", result_sample[0][1]) + assertEquals("mvTestDup", result_sample[0][1]) assertEquals("6.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("3", result_sample[0][7]) @@ -558,11 +558,11 @@ suite("test_analyze_mv") { logger.info("col " + colName + " in index " + indexName + " found ? " + found) assertTrue(found) } - verifyTaskStatus(result_sample, "key1", "N/A") - verifyTaskStatus(result_sample, "key2", "N/A") - verifyTaskStatus(result_sample, "value1", "N/A") - verifyTaskStatus(result_sample, "value2", "N/A") - verifyTaskStatus(result_sample, "value3", "N/A") + verifyTaskStatus(result_sample, "key1", "mvTestDup") + verifyTaskStatus(result_sample, "key2", "mvTestDup") + verifyTaskStatus(result_sample, "value1", "mvTestDup") + verifyTaskStatus(result_sample, "value2", "mvTestDup") + verifyTaskStatus(result_sample, "value3", "mvTestDup") verifyTaskStatus(result_sample, "mv_key1", "mv1") verifyTaskStatus(result_sample, "mv_key1", "mv3") verifyTaskStatus(result_sample, "mv_key2", "mv2") @@ -580,7 +580,7 @@ suite("test_analyze_mv") { def result = sql """show column cached stats mvTestDup(key1)""" assertEquals(1, result.size()) assertEquals("key1", result[0][0]) - assertEquals("N/A", result[0][1]) + assertEquals("mvTestDup", result[0][1]) assertEquals("50.0", result[0][2]) assertEquals("1.0", result[0][3]) assertEquals("1.0", result[0][4])