Skip to content

Commit

Permalink
[refactor](stats) refactor collection logic and opt some config #26163 (
Browse files Browse the repository at this point in the history
#26858)

picked from #26163
  • Loading branch information
Kikyou1997 authored Nov 13, 2023
1 parent be2dd64 commit 5ddabea
Show file tree
Hide file tree
Showing 30 changed files with 1,443 additions and 876 deletions.
34 changes: 2 additions & 32 deletions fe/fe-common/src/main/java/org/apache/doris/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

import org.apache.doris.common.ExperimentalUtil.ExperimentalType;

import java.util.concurrent.TimeUnit;

public class Config extends ConfigBase {

@ConfField(description = {"用户自定义配置文件的路径,用于存放 fe_custom.conf。该文件中的配置会覆盖 fe.conf 中的配置",
Expand Down Expand Up @@ -1745,7 +1743,7 @@ public class Config extends ConfigBase {
* Used to determined how many statistics collection SQL could run simultaneously.
*/
@ConfField
public static int statistics_simultaneously_running_task_num = 10;
public static int statistics_simultaneously_running_task_num = 3;

/**
* if table has too many replicas, Fe occur oom when schema change.
Expand Down Expand Up @@ -2046,7 +2044,7 @@ public class Config extends ConfigBase {
* FE OOM.
*/
@ConfField
public static long stats_cache_size = 10_0000;
public static long stats_cache_size = 50_0000;

/**
* This configuration is used to enable the statistics of query information, which will record
Expand All @@ -2069,9 +2067,6 @@ public class Config extends ConfigBase {
"Whether to enable binlog feature"})
public static boolean enable_feature_binlog = false;

@ConfField
public static int analyze_task_timeout_in_hours = 12;

@ConfField(mutable = true, masterOnly = true, description = {
"是否禁止使用 WITH REOSOURCE 语句创建 Catalog。",
"Whether to disable creating catalog with WITH RESOURCE statement."})
Expand Down Expand Up @@ -2126,9 +2121,6 @@ public class Config extends ConfigBase {
@ConfField
public static boolean forbid_running_alter_job = false;

@ConfField
public static int table_stats_health_threshold = 80;

@ConfField(description = {
"暂时性配置项,开启后会自动将所有的olap表修改为可light schema change",
"temporary config filed, will make all olap tables enable light schema change"
Expand All @@ -2154,28 +2146,6 @@ public class Config extends ConfigBase {
+ "but it will increase the memory overhead."})
public static int virtual_node_number = 2048;

@ConfField(description = {"控制对大表的自动ANALYZE的最小时间间隔,"
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
"This controls the minimum time interval for automatic ANALYZE on large tables. Within this interval,"
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
public static long huge_table_auto_analyze_interval_in_millis = TimeUnit.HOURS.toMillis(12);

@ConfField(description = {"定义大表的大小下界,在开启enable_auto_sample的情况下,"
+ "大小超过该值的表将会自动通过采样收集统计信息", "This defines the lower size bound for large tables. "
+ "When enable_auto_sample is enabled, tables larger than this value will automatically collect "
+ "statistics through sampling"})
public static long huge_table_lower_bound_size_in_bytes = 5L * 1024 * 1024 * 1024;

@ConfField(description = {"定义开启开启大表自动sample后,对大表的采样比例",
"This defines the number of sample percent for large tables when automatic sampling for"
+ "large tables is enabled"})
public static int huge_table_default_sample_rows = 4194304;

@ConfField(description = {"是否开启大表自动sample,开启后对于大小超过huge_table_lower_bound_size_in_bytes会自动通过采样收集"
+ "统计信息", "Whether to enable automatic sampling for large tables, which, when enabled, automatically"
+ "collects statistics through sampling for tables larger than 'huge_table_lower_bound_size_in_bytes'"})
public static boolean enable_auto_sample = false;

@ConfField(description = {
"控制统计信息的自动触发作业执行记录的持久化行数",
"Determine the persist number of automatic triggered analyze job execution status"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.BaseAnalysisTask;
import org.apache.doris.statistics.HistogramTask;
import org.apache.doris.statistics.MVAnalysisTask;
import org.apache.doris.statistics.OlapAnalysisTask;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;
Expand Down Expand Up @@ -1102,11 +1101,9 @@ public TTableDescriptor toThrift() {
public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
if (info.analysisType.equals(AnalysisType.HISTOGRAM)) {
return new HistogramTask(info);
}
if (info.analysisType.equals(AnalysisType.FUNDAMENTALS)) {
} else {
return new OlapAnalysisTask(info);
}
return new MVAnalysisTask(info);
}

public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
Expand All @@ -1126,7 +1123,7 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
}
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < Config.table_stats_health_threshold;
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,10 +571,15 @@ private Statistics computeFilter(Filter filter) {
}

private ColumnStatistic getColumnStatistic(TableIf table, String colName) {
ConnectContext connectContext = ConnectContext.get();
if (connectContext != null && connectContext.getSessionVariable().internalSession) {
return ColumnStatistic.UNKNOWN;
}
if (totalColumnStatisticMap.get(table.getName() + colName) != null) {
return totalColumnStatisticMap.get(table.getName() + colName);
} else if (isPlayNereidsDump) {
return ColumnStatistic.UNKNOWN;

} else {
long catalogId;
long dbId;
Expand Down
70 changes: 63 additions & 7 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
* System variable.
Expand Down Expand Up @@ -412,6 +413,19 @@ public class SessionVariable implements Serializable, Writable {

public static final String FASTER_FLOAT_CONVERT = "faster_float_convert";

public static final String ENABLE_DECIMAL256 = "enable_decimal256";

public static final String STATS_INSERT_MERGE_ITEM_COUNT = "stats_insert_merge_item_count";

public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";

public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS
= "huge_table_auto_analyze_interval_in_millis";

public static final String TABLE_STATS_HEALTH_THRESHOLD
= "table_stats_health_threshold";

public static final List<String> DEBUG_VARIABLES = ImmutableList.of(
SKIP_DELETE_PREDICATE,
SKIP_DELETE_BITMAP,
Expand Down Expand Up @@ -465,7 +479,7 @@ public class SessionVariable implements Serializable, Writable {
public int queryTimeoutS = 900;

// query timeout in second.
@VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, needForward = true)
@VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, flag = VariableMgr.GLOBAL, needForward = true)
public int analyzeTimeoutS = 43200;

// The global max_execution_time value provides the default for the session value for new connections.
Expand Down Expand Up @@ -1156,6 +1170,12 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
+ " use a skiplist to optimize the intersection."})
public int invertedIndexConjunctionOptThreshold = 1000;

@VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat",
description = {"该参数定义自动ANALYZE例程的结束时间",
"This parameter defines the end time for the automatic ANALYZE routine."},
flag = VariableMgr.GLOBAL)
public String fullAutoAnalyzeEndTime = "23:59:59";

@VariableMgr.VarAttr(name = ENABLE_UNIQUE_KEY_PARTIAL_UPDATE, needForward = true)
public boolean enableUniqueKeyPartialUpdate = false;

Expand All @@ -1177,12 +1197,6 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
flag = VariableMgr.GLOBAL)
public String fullAutoAnalyzeStartTime = "00:00:00";

@VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat",
description = {"该参数定义自动ANALYZE例程的结束时间",
"This parameter defines the end time for the automatic ANALYZE routine."},
flag = VariableMgr.GLOBAL)
public String fullAutoAnalyzeEndTime = "02:00:00";

@VariableMgr.VarAttr(name = FASTER_FLOAT_CONVERT,
description = {"是否启用更快的浮点数转换算法,注意会影响输出格式", "Set true to enable faster float pointer number convert"})
public boolean fasterFloatConvert = false;
Expand All @@ -1192,6 +1206,48 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
"the runtime filter id in IGNORE_RUNTIME_FILTER_IDS list will not be generated"})

public String ignoreRuntimeFilterIds = "";

@VariableMgr.VarAttr(name = STATS_INSERT_MERGE_ITEM_COUNT, flag = VariableMgr.GLOBAL, description = {
"控制统计信息相关INSERT攒批数量", "Controls the batch size for stats INSERT merging."
}
)
public int statsInsertMergeItemCount = 200;

@VariableMgr.VarAttr(name = HUGE_TABLE_DEFAULT_SAMPLE_ROWS, flag = VariableMgr.GLOBAL, description = {
"定义开启开启大表自动sample后,对大表的采样比例",
"This defines the number of sample percent for large tables when automatic sampling for"
+ "large tables is enabled"

})
public long hugeTableDefaultSampleRows = 4194304;


@VariableMgr.VarAttr(name = HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES, flag = VariableMgr.GLOBAL,
description = {
"大小超过该值的表将会自动通过采样收集统计信息",
"This defines the lower size bound for large tables. "
+ "When enable_auto_sample is enabled, tables"
+ "larger than this value will automatically collect "
+ "statistics through sampling"})
public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;

@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
description = {"控制对大表的自动ANALYZE的最小时间间隔,"
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
"This controls the minimum time interval for automatic ANALYZE on large tables."
+ "Within this interval,"
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
public long hugeTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(12);

@VariableMgr.VarAttr(name = TABLE_STATS_HEALTH_THRESHOLD, flag = VariableMgr.GLOBAL,
description = {"取值在0-100之间,当自上次统计信息收集操作之后"
+ "数据更新量达到 (100 - table_stats_health_threshold)% ,认为该表的统计信息已过时",
"The value should be between 0 and 100. When the data update quantity "
+ "exceeds (100 - table_stats_health_threshold)% since the last "
+ "statistics collection operation, the statistics for this table are"
+ "considered outdated."})
public int tableStatsHealthThreshold = 60;

public static final String IGNORE_RUNTIME_FILTER_IDS = "ignore_runtime_filter_ids";

public Set<Integer> getIgnoredRuntimeFilterIds() {
Expand Down
Loading

0 comments on commit 5ddabea

Please sign in to comment.