From 9949410e5a190e9604aa8cb505a56018311e87d9 Mon Sep 17 00:00:00 2001
From: AKIRA <33112463+Kikyou1997@users.noreply.github.com>
Date: Fri, 24 Nov 2023 00:54:48 +0900
Subject: [PATCH] [doc](stats) SQL manual for stats (#27461)
---
.../Manipulation/ANALYZE.md | 66 +++++++++++
.../Show-Statements/SHOW-ANALYZE.md | 109 +++++++++++++++++
.../Show-Statements/SHOW-COLUMN-STATS.md | 71 +++++++++++
.../Show-Statements/SHOW-TABLE-STATS.md | 74 ++++++++++++
.../Manipulation/ANALYZE.md | 66 +++++++++++
.../Show-Statements/SHOW-ANALYZE.md | 111 ++++++++++++++++++
.../Show-Statements/SHOW-COLUMN-STATS.md | 72 ++++++++++++
.../Show-Statements/SHOW-TABLE-STATS.md | 77 ++++++++++++
8 files changed, 646 insertions(+)
create mode 100644 docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
create mode 100644 docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
create mode 100644 docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
create mode 100644 docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
create mode 100644 docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
create mode 100644 docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
create mode 100644 docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
create mode 100644 docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
new file mode 100644
index 00000000000000..2a02d4b5926141
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
@@ -0,0 +1,66 @@
+---
+{
+ "title": "ANALYZE",
+ "language": "en"
+}
+---
+
+
+
+## ANALYZE
+
+### Name
+
+
+
+ANALYZE
+
+### Description
+
+This statement is used to collect statistical information for various columns.
+
+```sql
+ANALYZE < TABLE | DATABASE table_name | db_name >
+ [ (column_name [, ...]) ]
+ [ [ WITH SYNC ] [ WITH SAMPLE PERCENT | ROWS ] ];
+```
+
+- `table_name`: The specified target table. It can be in the format `db_name.table_name`.
+- `column_name`: The specified target column. It must be an existing column in `table_name`. You can specify multiple column names separated by commas.
+- `sync`: Collect statistics synchronously. Returns after collection. If not specified, it executes asynchronously and returns a JOB ID.
+- `sample percent | rows`: Collect statistics with sampling. You can specify a sampling percentage or a number of sampling rows.
+
+### Example
+
+Collect statistical data for a table with a 10% sampling rate:
+
+```sql
+ANALYZE TABLE lineitem WITH SAMPLE PERCENT 10;
+```
+
+Collect statistical data for a table with a sample of 100,000 rows:
+
+```sql
+ANALYZE TABLE lineitem WITH SAMPLE ROWS 100000;
+```
+
+### Keywords
+
+ANALYZE
\ No newline at end of file
diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
new file mode 100644
index 00000000000000..73ddaaa80999e1
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
@@ -0,0 +1,109 @@
+---
+{
+ "title": "SHOW-ANALYZE",
+ "language": "en"
+}
+---
+
+
+
+## SHOW-ANALYZE
+
+### Name
+
+SHOW ANALYZE
+
+### Description
+
+Use `SHOW ANALYZE` to view information about statistics collection jobs.
+
+Syntax:
+
+```SQL
+SHOW [AUTO] ANALYZE < table_name | job_id >
+ [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ];
+```
+
+- AUTO: Show historical information for automatic collection jobs only. Note that, by default, the status of only the last 20,000 completed automatic collection jobs is retained.
+- table_name: Table name, specify to view statistics job information for that table. It can be in the format `db_name.table_name`. When not specified, it returns information for all statistics jobs.
+- job_id: Job ID for statistics collection, obtained when executing `ANALYZE`. When not specified, this command returns information for all statistics jobs.
+
+Output:
+
+| Column Name | Description |
+| :--------------------- | :--------------- |
+| `job_id` | Job ID |
+| `catalog_name` | Catalog Name |
+| `db_name` | Database Name |
+| `tbl_name` | Table Name |
+| `col_name` | Column Name List |
+| `job_type` | Job Type |
+| `analysis_type` | Analysis Type |
+| `message` | Job Information |
+| `last_exec_time_in_ms` | Last Execution Time |
+| `state` | Job Status |
+| `schedule_type` | Scheduling Method |
+
+Here's an example:
+
+```sql
+mysql> show analyze 245073\G;
+*************************** 1. row ***************************
+ job_id: 245073
+ catalog_name: internal
+ db_name: default_cluster:tpch
+ tbl_name: lineitem
+ col_name: [l_returnflag,l_receiptdate,l_tax,l_shipmode,l_suppkey,l_shipdate,l_commitdate,l_partkey,l_orderkey,l_quantity,l_linestatus,l_comment,l_extendedprice,l_linenumber,l_discount,l_shipinstruct]
+ job_type: MANUAL
+ analysis_type: FUNDAMENTALS
+ message:
+last_exec_time_in_ms: 2023-11-07 11:00:52
+ state: FINISHED
+ progress: 16 Finished | 0 Failed | 0 In Progress | 16 Total
+ schedule_type: ONCE
+```
+
+
+
+Each collection job can contain one or more tasks, with each task corresponding to the collection of a column. Users can use the following command to view the completion status of statistics collection for each column.
+
+Syntax:
+
+```sql
+SHOW ANALYZE TASK STATUS [job_id]
+```
+
+Here's an example:
+
+```
+mysql> show analyze task status 20038 ;
++---------+----------+---------+----------------------+----------+
+| task_id | col_name | message | last_exec_time_in_ms | state |
++---------+----------+---------+----------------------+----------+
+| 20039 | col4 | | 2023-06-01 17:22:15 | FINISHED |
+| 20040 | col2 | | 2023-06-01 17:22:15 | FINISHED |
+| 20041 | col3 | | 2023-06-01 17:22:15 | FINISHED |
+| 20042 | col1 | | 2023-06-01 17:22:15 | FINISHED |
++---------+----------+---------+----------------------+----------+
+```
+
+### Keywords
+
+SHOW, ANALYZE
\ No newline at end of file
diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
new file mode 100644
index 00000000000000..b64c5ad6f71e53
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
@@ -0,0 +1,71 @@
+---
+{
+ "title": "SHOW-COLUMN-STATS",
+ "language": "en"
+}
+---
+
+
+
+## SHOW-COLUMN-STATS
+
+### Name
+
+SHOW COLUMN STATS
+
+### Description
+
+Use `SHOW COLUMN STATS` to view various statistics data for columns.
+
+Syntax:
+
+```SQL
+SHOW COLUMN [cached] STATS table_name [ (column_name [, ...]) ];
+```
+
+Where:
+
+- cached: Show statistics information in the current FE memory cache.
+- table_name: The target table for collecting statistics. It can be in the format `db_name.table_name`.
+- column_name: Specifies the target column, which must be an existing column in `table_name`. You can specify multiple column names separated by commas.
+
+Here's an example:
+
+```sql
+mysql> show column stats lineitem(l_tax)\G;
+*************************** 1. row ***************************
+ column_name: l_tax
+ count: 6001215.0
+ ndv: 9.0
+ num_null: 0.0
+ data_size: 4.800972E7
+avg_size_byte: 8.0
+ min: 0.00
+ max: 0.08
+ method: FULL
+ type: FUNDAMENTALS
+ trigger: MANUAL
+ query_times: 0
+ updated_time: 2023-11-07 11:00:46
+```
+
+### Keywords
+
+SHOW, TABLE, STATS
diff --git a/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
new file mode 100644
index 00000000000000..06f59b27ea5000
--- /dev/null
+++ b/docs/en/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
@@ -0,0 +1,74 @@
+---
+{
+ "title": "SHOW-TABLE-STATS",
+ "language": "en"
+}
+---
+
+
+
+## SHOW-TABLE-STATS
+
+### Name
+
+SHOW TABLE STATS
+
+### Description
+
+Use `SHOW TABLE STATS` to view an overview of statistics collection for a table.
+
+Syntax:
+
+```SQL
+SHOW TABLE STATS table_name;
+```
+
+Where:
+
+- table_name: The target table name. It can be in the format `db_name.table_name`.
+
+Output:
+
+| Column Name | Description |
+| :--------------------- | :--------------- |
+| `updated_rows` | Updated rows since the last ANALYZE |
+| `query_times` | Reserved column for recording the number of times the table was queried in future versions |
+| `row_count` | Number of rows (does not reflect the exact number of rows at the time of command execution) |
+| `updated_time` | Last update time |
+| `columns` | Columns for which statistics information has been collected |
+
+Here's an example:
+
+```sql
+mysql> show table stats lineitem \G;
+*************************** 1. row ***************************
+updated_rows: 0
+ query_times: 0
+ row_count: 6001215
+updated_time: 2023-11-07
+ columns: [l_returnflag, l_receiptdate, l_tax, l_shipmode, l_suppkey, l_shipdate, l_commitdate, l_partkey, l_orderkey, l_quantity, l_linestatus, l_comment, l_extendedprice, l_linenumber, l_discount, l_shipinstruct]
+ trigger: MANUAL
+```
+
+
+
+### Keywords
+
+SHOW, TABLE, STATS
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
new file mode 100644
index 00000000000000..67fee1d78e2b9f
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Manipulation/ANALYZE.md
@@ -0,0 +1,66 @@
+---
+{
+ "title": "ANALYZE",
+ "language": "zh-CN"
+}
+---
+
+
+
+## ANALYZE
+
+### Name
+
+
+
+ANALYZE
+
+### Description
+
+该语句用于收集各列的统计信息。
+
+```sql
+ANALYZE < TABLE | DATABASE table_name | db_name >
+ [ (column_name [, ...]) ]
+ [ [ WITH SYNC ] [ WITH SAMPLE PERCENT | ROWS ] ];
+```
+
+- table_name: 指定的目标表。可以是 `db_name.table_name` 形式。
+- column_name: 指定的目标列。必须是 `table_name` 中存在的列,多个列名称用逗号分隔。
+- sync:同步收集统计信息。收集完后返回。若不指定则异步执行并返回JOB ID。
+- sample percent | rows:抽样收集统计信息。可以指定抽样比例或者抽样行数。
+
+### Example
+
+对一张表按照10%的比例采样收集统计数据:
+
+```sql
+ANALYZE TABLE lineitem WITH SAMPLE PERCENT 10;
+```
+
+对一张表按采样10万行收集统计数据
+
+```sql
+ANALYZE TABLE lineitem WITH SAMPLE ROWS 100000;
+```
+
+### Keywords
+
+ANALYZE
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
new file mode 100644
index 00000000000000..3feae39b2ad28a
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-ANALYZE.md
@@ -0,0 +1,111 @@
+---
+{
+ "title": "SHOW-ANALYZE",
+ "language": "zh-CN"
+}
+---
+
+
+
+## SHOW-ANALYZE
+
+### Name
+
+SHOW ANALYZE
+
+### Description
+
+通过 `SHOW ANALYZE` 来查看统计信息收集作业的信息。
+
+语法如下:
+
+```SQL
+SHOW [AUTO] ANALYZE < table_name | job_id >
+ [ WHERE [ STATE = [ "PENDING" | "RUNNING" | "FINISHED" | "FAILED" ] ] ];
+```
+
+- AUTO:仅仅展示自动收集历史作业信息。需要注意的是默认只保存过去20000个执行完毕的自动收集作业的状态。
+- table_name:表名,指定后可查看该表对应的统计作业信息。可以是 `db_name.table_name` 形式。不指定时返回所有统计作业信息。
+- job_id:统计信息作业 ID,执行 `ANALYZE` 异步收集时得到。不指定id时此命令返回所有统计作业信息。
+
+输出:
+
+| 列名 | 说明 |
+| :--------------------- | :----------- |
+| `job_id` | 统计作业 ID |
+| `catalog_name` | catalog 名称 |
+| `db_name` | 数据库名称 |
+| `tbl_name` | 表名称 |
+| `col_name` | 列名称列表 |
+| `job_type` | 作业类型 |
+| `analysis_type` | 统计类型 |
+| `message` | 作业信息 |
+| `last_exec_time_in_ms` | 上次执行时间 |
+| `state` | 作业状态 |
+| `schedule_type` | 调度方式 |
+
+下面是一个例子:
+
+```sql
+mysql> show analyze 245073\G;
+*************************** 1. row ***************************
+ job_id: 245073
+ catalog_name: internal
+ db_name: default_cluster:tpch
+ tbl_name: lineitem
+ col_name: [l_returnflag,l_receiptdate,l_tax,l_shipmode,l_suppkey,l_shipdate,l_commitdate,l_partkey,l_orderkey,l_quantity,l_linestatus,l_comment,l_extendedprice,l_linenumber,l_discount,l_shipinstruct]
+ job_type: MANUAL
+ analysis_type: FUNDAMENTALS
+ message:
+last_exec_time_in_ms: 2023-11-07 11:00:52
+ state: FINISHED
+ progress: 16 Finished | 0 Failed | 0 In Progress | 16 Total
+ schedule_type: ONCE
+```
+
+
+
+每个收集作业中可以包含一到多个任务,每个任务对应一列的收集。用户可通过如下命令查看具体每列的统计信息收集完成情况。
+
+语法:
+
+```sql
+SHOW ANALYZE TASK STATUS [job_id]
+```
+
+下面是一个例子:
+
+```
+mysql> show analyze task status 20038 ;
++---------+----------+---------+----------------------+----------+
+| task_id | col_name | message | last_exec_time_in_ms | state |
++---------+----------+---------+----------------------+----------+
+| 20039 | col4 | | 2023-06-01 17:22:15 | FINISHED |
+| 20040 | col2 | | 2023-06-01 17:22:15 | FINISHED |
+| 20041 | col3 | | 2023-06-01 17:22:15 | FINISHED |
+| 20042 | col1 | | 2023-06-01 17:22:15 | FINISHED |
++---------+----------+---------+----------------------+----------+
+
+
+```
+
+### Keywords
+
+SHOW, ANALYZE
\ No newline at end of file
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
new file mode 100644
index 00000000000000..15d91b8a91455d
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-COLUMN-STATS.md
@@ -0,0 +1,72 @@
+---
+{
+ "title": "SHOW-COLUMN-STATS",
+ "language": "zh-CN"
+}
+---
+
+
+
+## SHOW-COLUMN-STATS
+
+### Name
+
+SHOW COLUMN STATS
+
+### Description
+
+通过 `SHOW COLUMN STATS` 来查看列的各项统计数据。
+
+语法如下:
+
+```SQL
+SHOW COLUMN [cached] STATS table_name [ (column_name [, ...]) ];
+```
+
+其中:
+
+- cached: 展示当前FE内存缓存中的统计信息。
+- table_name: 收集统计信息的目标表。可以是 `db_name.table_name` 形式。
+- column_name: 指定的目标列,必须是 `table_name` 中存在的列,多个列名称用逗号分隔。
+
+下面是一个例子:
+
+```sql
+mysql> show column stats lineitem(l_tax)\G;
+*************************** 1. row ***************************
+ column_name: l_tax
+ count: 6001215.0
+ ndv: 9.0
+ num_null: 0.0
+ data_size: 4.800972E7
+avg_size_byte: 8.0
+ min: 0.00
+ max: 0.08
+ method: FULL
+ type: FUNDAMENTALS
+ trigger: MANUAL
+ query_times: 0
+ updated_time: 2023-11-07 11:00:46
+
+```
+
+### Keywords
+
+SHOW, COLUMN, STATS
diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
new file mode 100644
index 00000000000000..97391ed92ebfab
--- /dev/null
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Show-Statements/SHOW-TABLE-STATS.md
@@ -0,0 +1,77 @@
+---
+{
+ "title": "SHOW-TABLE-STATS",
+ "language": "zh-CN"
+}
+---
+
+
+
+## SHOW-TABLE-STATS
+
+### Name
+
+SHOW TABLE STATS
+
+### Description
+
+通过 `SHOW TABLE STATS` 查看表的统计信息收集概况。
+
+语法如下:
+
+```SQL
+SHOW TABLE STATS table_name;
+```
+
+其中:
+
+- table_name: 目标表表名。可以是 `db_name.table_name` 形式。
+
+输出:
+
+| 列名 | 说明 |
+| :------------------ | :--------------------- |
+|`updated_rows`|自上次ANALYZE以来该表的更新行数|
+|`query_times`|保留列,后续版本用以记录该表查询次数|
+|`row_count`| 行数(不反映命令执行时的准确行数)|
+|`updated_time`| 上次更新时间|
+|`columns`| 收集过统计信息的列|
+|`trigger`|触发方式|
+
+下面是一个例子:
+
+```sql
+mysql> show table stats lineitem \G;
+*************************** 1. row ***************************
+updated_rows: 0
+ query_times: 0
+ row_count: 6001215
+updated_time: 2023-11-07
+ columns: [l_returnflag, l_receiptdate, l_tax, l_shipmode, l_suppkey, l_shipdate, l_commitdate, l_partkey, l_orderkey, l_quantity, l_linestatus, l_comment, l_extendedprice, l_linenumber, l_discount, l_shipinstruct]
+ trigger: MANUAL
+```
+
+
+
+
+
+### Keywords
+
+SHOW, TABLE, STATS