diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 68767843507ad4..f871e8761a5e55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -82,6 +82,30 @@ public abstract class BaseAnalysisTask { + "NOW() " + "FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}"; + protected static final String DUJ1_ANALYZE_STRING_TEMPLATE = "SELECT " + + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, " + + "${catalogId} AS `catalog_id`, " + + "${dbId} AS `db_id`, " + + "${tblId} AS `tbl_id`, " + + "${idxId} AS `idx_id`, " + + "'${colId}' AS `col_id`, " + + "NULL AS `part_id`, " + + "${rowCount} AS `row_count`, " + + "${ndvFunction} as `ndv`, " + + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, " + + "SUBSTRING(CAST(${min} AS STRING), 1, 1024) AS `min`, " + + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + + "NOW() " + + "FROM ( " + + " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count` " + + " FROM " + + " (SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1, 1024) AS `colValue` " + + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} " + + " ${sampleHints} ${limit}) as `t0` " + + " GROUP BY `t0`.`colValue` " + + ") as `t1` "; + protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT " + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, " + "${catalogId} AS `catalog_id`, " diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index ef1b795bd13827..287941be526635 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -129,7 +129,11 @@ protected void getColumnStats() throws Exception { params.put("ndvFunction", "ROUND(NDV(`${colName}`) * ${scaleFactor})"); params.put("rowCount", "ROUND(count(1) * ${scaleFactor})"); } else { - sb.append(DUJ1_ANALYZE_TEMPLATE); + if (col.getType().isStringType()) { + sb.append(DUJ1_ANALYZE_STRING_TEMPLATE); + } else { + sb.append(DUJ1_ANALYZE_TEMPLATE); + } params.put("dataSizeFunction", getDataSizeFunction(col, true)); params.put("ndvFunction", getNdvFunction("ROUND(SUM(t1.count) * ${scaleFactor})")); params.put("rowCount", "ROUND(SUM(t1.count) * ${scaleFactor})"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index d26de9d9de7141..ee587fc6d09e57 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -166,7 +166,11 @@ protected void doSample() throws Exception { sql = stringSubstitutor.replace(LINEAR_ANALYZE_TEMPLATE); } else { params.put("dataSizeFunction", getDataSizeFunction(col, true)); - sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE); + if (col.getType().isStringType()) { + sql = stringSubstitutor.replace(DUJ1_ANALYZE_STRING_TEMPLATE); + } else { + sql = stringSubstitutor.replace(DUJ1_ANALYZE_TEMPLATE); + } } LOG.info("Sample for column [{}]. Total rows [{}], rows to sample [{}], scale factor [{}], " + "limited [{}], distribute column [{}], partition column [{}], key column [{}], " diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 5d9d57406a3a10..75506b1c85a014 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -159,11 +159,10 @@ public void runQuery(String sql) { + " IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, " + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`," + " SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " - + "SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() " + + "SUM(t1.count) * 4 * 5.0 AS `data_size`, NOW() " + "FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) " - + "as `count` FROM (SELECT `${colName}` FROM " - + "`catalogName`.`${dbName}`.`${tblName}` " - + " limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + + "as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}`" + + " limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; @@ -183,7 +182,7 @@ public Set getDistributionColumnNames() { }; OlapAnalysisTask olapAnalysisTask = new OlapAnalysisTask(); - olapAnalysisTask.col = new Column("test", PrimitiveType.STRING); + olapAnalysisTask.col = new Column("test", PrimitiveType.INT); olapAnalysisTask.tbl = tableIf; AnalysisInfoBuilder analysisInfoBuilder = new AnalysisInfoBuilder(); analysisInfoBuilder.setJobType(AnalysisInfo.JobType.MANUAL); @@ -322,7 +321,8 @@ public void runQuery(String sql) { + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, " + "SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + "SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() " - + "FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + + "FROM ( SELECT t0.`colValue` as `column_key`, COUNT(1) as `count` FROM " + + "(SELECT SUBSTRING(CAST(`${colName}` AS STRING), 1, 1024) AS `colValue` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql); return; } };