From 3a7ce058a10b859bca292a28ade988e49679433b Mon Sep 17 00:00:00 2001 From: Eric Harmeling Date: Wed, 26 Aug 2020 13:28:22 -0400 Subject: [PATCH] Multi-column statistics docs update; updated STATISTICS statement examples and notes --- _includes/v19.2/misc/delete-statistics.md | 4 +- _includes/v20.1/misc/delete-statistics.md | 4 +- _includes/v20.2/misc/delete-statistics.md | 4 +- v19.2/create-statistics.md | 99 ++++++++++----- v19.2/show-statistics.md | 24 ++-- v20.1/create-statistics.md | 99 ++++++++++----- v20.1/show-statistics.md | 34 +++--- v20.2/cost-based-optimizer.md | 8 +- v20.2/create-statistics.md | 142 +++++++++++++++++----- v20.2/show-statistics.md | 39 +++--- 10 files changed, 312 insertions(+), 145 deletions(-) diff --git a/_includes/v19.2/misc/delete-statistics.md b/_includes/v19.2/misc/delete-statistics.md index a568055e583..a850a1ed654 100644 --- a/_includes/v19.2/misc/delete-statistics.md +++ b/_includes/v19.2/misc/delete-statistics.md @@ -5,11 +5,11 @@ To delete statistics for all tables in all databases: > DELETE FROM system.table_statistics WHERE true; ~~~ -To delete a named set of statistics (e.g, one named "my_stats"), run a query like the following: +To delete a named set of statistics (e.g, one named "users_stats"), run a query like the following: {% include copy-clipboard.html %} ~~~ sql -> DELETE FROM system.table_statistics WHERE name = 'my_stats'; +> DELETE FROM system.table_statistics WHERE name = 'users_stats'; ~~~ After deleting statistics, restart the nodes in your cluster to clear the statistics caches. diff --git a/_includes/v20.1/misc/delete-statistics.md b/_includes/v20.1/misc/delete-statistics.md index a568055e583..a850a1ed654 100644 --- a/_includes/v20.1/misc/delete-statistics.md +++ b/_includes/v20.1/misc/delete-statistics.md @@ -5,11 +5,11 @@ To delete statistics for all tables in all databases: > DELETE FROM system.table_statistics WHERE true; ~~~ -To delete a named set of statistics (e.g, one named "my_stats"), run a query like the following: +To delete a named set of statistics (e.g, one named "users_stats"), run a query like the following: {% include copy-clipboard.html %} ~~~ sql -> DELETE FROM system.table_statistics WHERE name = 'my_stats'; +> DELETE FROM system.table_statistics WHERE name = 'users_stats'; ~~~ After deleting statistics, restart the nodes in your cluster to clear the statistics caches. diff --git a/_includes/v20.2/misc/delete-statistics.md b/_includes/v20.2/misc/delete-statistics.md index a568055e583..a850a1ed654 100644 --- a/_includes/v20.2/misc/delete-statistics.md +++ b/_includes/v20.2/misc/delete-statistics.md @@ -5,11 +5,11 @@ To delete statistics for all tables in all databases: > DELETE FROM system.table_statistics WHERE true; ~~~ -To delete a named set of statistics (e.g, one named "my_stats"), run a query like the following: +To delete a named set of statistics (e.g, one named "users_stats"), run a query like the following: {% include copy-clipboard.html %} ~~~ sql -> DELETE FROM system.table_statistics WHERE name = 'my_stats'; +> DELETE FROM system.table_statistics WHERE name = 'users_stats'; ~~~ After deleting statistics, restart the nodes in your cluster to clear the statistics caches. diff --git a/v19.2/create-statistics.md b/v19.2/create-statistics.md index 2ad19b0e6bf..0f6c40b560d 100644 --- a/v19.2/create-statistics.md +++ b/v19.2/create-statistics.md @@ -7,13 +7,8 @@ Use the `CREATE STATISTICS` [statement](sql-statements.html) to generate table s Once you [create a table](create-table.html) and load data into it (e.g., [`INSERT`](insert.html), [`IMPORT`](import.html)), table statistics can be generated. Table statistics help the cost-based optimizer determine the cardinality of the rows used in each query, which helps to predict more accurate costs. -`CREATE STATISTICS` automatically figures out which columns to get statistics on — specifically, it chooses: - -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns (unless you specify which columns to create statistics on, as shown in [this example](#create-statistics-on-a-specific-column)). - {{site.data.alerts.callout_info}} -[Automatic statistics is enabled by default](cost-based-optimizer.html#table-statistics); most users don't need to issue `CREATE STATISTICS` statements directly. +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. As a result, most users don't need to issue `CREATE STATISTICS` statements directly. {{site.data.alerts.end}} ## Synopsis @@ -37,27 +32,69 @@ The user must have the `CREATE` [privilege](authorization.html#assign-privileges ## Examples -### Create statistics on a specific column +{% include {{page.version.version}}/sql/movr-statements.md %} + +### Create statistics on a single column {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; +> CREATE STATISTICS revenue_stats ON revenue FROM rides; ~~~ -{{site.data.alerts.callout_info}} -Multi-column statistics are not supported yet. -{{site.data.alerts.end}} +{% include copy-clipboard.html %} +~~~ sql +> SHOW STATISTICS FOR TABLE rides; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id ++-----------------+-----------------+----------------------------------+-----------+----------------+------------+--------------------+ + __auto__ | {city} | 2020-08-26 17:24:25.334218+00:00 | 500 | 9 | 0 | 584555775053725697 + __auto__ | {vehicle_city} | 2020-08-26 17:24:25.334218+00:00 | 500 | 9 | 0 | 584555775060344833 + __auto__ | {id} | 2020-08-26 17:24:25.334218+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 17:24:25.334218+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_id} | 2020-08-26 17:24:25.334218+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 17:24:25.334218+00:00 | 500 | 500 | 0 | NULL + __auto__ | {end_address} | 2020-08-26 17:24:25.334218+00:00 | 500 | 500 | 0 | NULL + __auto__ | {start_time} | 2020-08-26 17:24:25.334218+00:00 | 500 | 30 | 0 | NULL + __auto__ | {end_time} | 2020-08-26 17:24:25.334218+00:00 | 500 | 367 | 0 | NULL + __auto__ | {revenue} | 2020-08-26 17:24:25.334218+00:00 | 500 | 100 | 0 | NULL + revenue_stats | {revenue} | 2020-08-26 17:24:34.494008+00:00 | 500 | 100 | 0 | 584555805068886017 +(11 rows) +~~~ + +Note that statistics are automatically collected for all columns in the `rides` table, making the `revenue_stats` statistics a duplicate of the statistics automatically collected on the `rides` column. ### Create statistics on a default set of columns -The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on — specifically, it chooses: +The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on. -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns. +{% include copy-clipboard.html %} +~~~ sql +> CREATE STATISTICS users_stats FROM users; +~~~ + +This statement creates statistics identical to the statistics that CockroachDB creates automatically. {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students FROM students_by_list; +> SHOW STATISTICS FOR TABLE users; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id ++-----------------+---------------+----------------------------------+-----------+----------------+------------+--------------------+ + __auto__ | {city} | 2020-08-26 17:24:25.305468+00:00 | 50 | 9 | 0 | 584555774958108673 + __auto__ | {id} | 2020-08-26 17:24:25.305468+00:00 | 50 | 50 | 0 | NULL + __auto__ | {name} | 2020-08-26 17:24:25.305468+00:00 | 50 | 49 | 0 | NULL + __auto__ | {address} | 2020-08-26 17:24:25.305468+00:00 | 50 | 50 | 0 | NULL + __auto__ | {credit_card} | 2020-08-26 17:24:25.305468+00:00 | 50 | 50 | 0 | NULL + users_stats | {city} | 2020-08-26 17:24:53.49405+00:00 | 50 | 9 | 0 | 584555867327430657 + users_stats | {id} | 2020-08-26 17:24:53.49405+00:00 | 50 | 50 | 0 | NULL + users_stats | {name} | 2020-08-26 17:24:53.49405+00:00 | 50 | 49 | 0 | NULL + users_stats | {address} | 2020-08-26 17:24:53.49405+00:00 | 50 | 50 | 0 | NULL + users_stats | {credit_card} | 2020-08-26 17:24:53.49405+00:00 | 50 | 50 | 0 | NULL +(10 rows) ~~~ ### Create statistics as of a given time @@ -66,7 +103,7 @@ To create statistics as of a given time (in this example, 1 minute ago to avoid {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS employee_stats FROM employees AS OF SYSTEM TIME '-1m'; +> CREATE STATISTICS vehicle_stats_1 FROM vehicles AS OF SYSTEM TIME '-1m'; ~~~ For more information about how the `AS OF SYSTEM TIME` clause works, including supported time formats, see [`AS OF SYSTEM TIME`](as-of-system-time.html). @@ -89,11 +126,12 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+--------------+------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441281249412743169 | CREATE STATS | CREATE STATISTICS salary_stats FROM employees.public.salaries | | root | succeeded | | 2019-04-08 15:52:30.040531 | 2019-04-08 15:52:30.046646 | 2019-04-08 15:52:32.757519 | 2019-04-08 15:52:32.757519 | 1 | | 1 - 441281163978637313 | CREATE STATS | CREATE STATISTICS employee_stats FROM employees.public.employees | | root | succeeded | | 2019-04-08 15:52:03.968099 | 2019-04-08 15:52:03.972557 | 2019-04-08 15:52:05.168809 | 2019-04-08 15:52:05.168809 | 1 | | 1 - (2 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + +--------------------+--------------+--------------------------------------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------+ + 584555805032710145 | CREATE STATS | CREATE STATISTICS revenue_stats ON revenue FROM movr.public.rides | | root | succeeded | NULL | 2020-08-26 17:24:34.485089+00:00 | 2020-08-26 17:24:34.487231+00:00 | 2020-08-26 17:24:34.49702+00:00 | 2020-08-26 17:24:34.496442+00:00 | 1 | | 1 + 584555867287060481 | CREATE STATS | CREATE STATISTICS users_stats FROM movr.public.users | | root | succeeded | NULL | 2020-08-26 17:24:53.483605+00:00 | 2020-08-26 17:24:53.486025+00:00 | 2020-08-26 17:24:53.505254+00:00 | 2020-08-26 17:24:53.504697+00:00 | 1 | | 1 + 584555915664261121 | CREATE STATS | CREATE STATISTICS vehicle_stats_1 FROM movr.public.vehicles WITH OPTIONS AS OF SYSTEM TIME '-1m' | | root | succeeded | NULL | 2020-08-26 17:25:08.247163+00:00 | 2020-08-26 17:25:08.252334+00:00 | 2020-08-26 17:25:08.27947+00:00 | 2020-08-26 17:25:08.278204+00:00 | 1 | | 1 + (3 rows) ~~~ 2. Use `SHOW AUTOMATIC JOBS` to see statistics jobs that were created by the [automatic statistics feature](cost-based-optimizer.html#table-statistics): @@ -104,16 +142,15 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+-------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441280366254850049 | AUTO CREATE STATS | Table statistics refresh for employees.public.departments | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.522119 | 2019-04-08 15:48:00.52663 | 2019-04-08 15:48:00.541608 | 2019-04-08 15:48:00.541608 | 1 | | 1 - 441280364809289729 | AUTO CREATE STATS | Table statistics refresh for employees.public.titles | CREATE STATISTICS __auto__ FROM [60] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.080971 | 2019-04-08 15:48:00.083117 | 2019-04-08 15:48:00.515766 | 2019-04-08 15:48:00.515767 | 1 | | 1 - 441280356286201857 | AUTO CREATE STATS | Table statistics refresh for employees.public.salaries | CREATE STATISTICS __auto__ FROM [59] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:57.479929 | 2019-04-08 15:47:57.482235 | 2019-04-08 15:48:00.075025 | 2019-04-08 15:48:00.075025 | 1 | | 1 - 441280352161693697 | AUTO CREATE STATS | Table statistics refresh for employees.public.employees | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.221223 | 2019-04-08 15:47:56.223664 | 2019-04-08 15:47:57.474159 | 2019-04-08 15:47:57.474159 | 1 | | 1 - 441280352070434817 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_manager | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.193375 | 2019-04-08 15:47:56.195813 | 2019-04-08 15:47:56.215114 | 2019-04-08 15:47:56.215114 | 1 | | 1 - 441280350791401473 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_emp | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:55.803052 | 2019-04-08 15:47:55.806071 | 2019-04-08 15:47:56.187153 | 2019-04-08 15:47:56.187154 | 1 | | 1 - 441279760786096129 | AUTO CREATE STATS | Table statistics refresh for test.public.kv | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:44:55.747725 | 2019-04-08 15:44:55.754582 | 2019-04-08 15:44:55.775664 | 2019-04-08 15:44:55.775665 | 1 | | 1 - (7 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + +--------------------+-------------------+---------------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------+ + 584555774723129345 | AUTO CREATE STATS | Table statistics refresh for movr.public.promo_codes | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.23534+00:00 | 2020-08-26 17:24:25.237261+00:00 | 2020-08-26 17:24:25.258822+00:00 | 2020-08-26 17:24:25.258199+00:00 | 1 | | 1 + 584555774808096769 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicles | CREATE STATISTICS __auto__ FROM [54] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.261267+00:00 | 2020-08-26 17:24:25.263309+00:00 | 2020-08-26 17:24:25.292766+00:00 | 2020-08-26 17:24:25.292114+00:00 | 1 | | 1 + 584555774921211905 | AUTO CREATE STATS | Table statistics refresh for movr.public.users | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.295787+00:00 | 2020-08-26 17:24:25.297427+00:00 | 2020-08-26 17:24:25.31669+00:00 | 2020-08-26 17:24:25.315689+00:00 | 1 | | 1 + 584555775000444929 | AUTO CREATE STATS | Table statistics refresh for movr.public.rides | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.31997+00:00 | 2020-08-26 17:24:25.322527+00:00 | 2020-08-26 17:24:25.35465+00:00 | 2020-08-26 17:24:25.353909+00:00 | 1 | | 1 + 584555775125815297 | AUTO CREATE STATS | Table statistics refresh for movr.public.user_promo_codes | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.35823+00:00 | 2020-08-26 17:24:25.35987+00:00 | 2020-08-26 17:24:25.380727+00:00 | 2020-08-26 17:24:25.380128+00:00 | 1 | | 1 + 584555775206588417 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicle_location_histories | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:24:25.382874+00:00 | 2020-08-26 17:24:25.384203+00:00 | 2020-08-26 17:24:25.405608+00:00 | 2020-08-26 17:24:25.404834+00:00 | 1 | | 1 + (6 rows) ~~~ ## See Also diff --git a/v19.2/show-statistics.md b/v19.2/show-statistics.md index 788c72c7030..1f0cb5ad979 100644 --- a/v19.2/show-statistics.md +++ b/v19.2/show-statistics.md @@ -5,6 +5,10 @@ toc: true --- The `SHOW STATISTICS` [statement](sql-statements.html) lists [table statistics](create-statistics.html) used by the [cost-based optimizer](cost-based-optimizer.html). +{{site.data.alerts.callout_info}} +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. +{{site.data.alerts.end}} + ## Synopsis
@@ -23,27 +27,13 @@ Parameter | Description ## Examples -### List table statistics - -{% include copy-clipboard.html %} -~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; -~~~ +{% include {{page.version.version}}/sql/movr-statements.md %} -~~~ -CREATE STATISTICS -~~~ +### List table statistics {% include copy-clipboard.html %} ~~~ sql -> SHOW STATISTICS FOR TABLE students_by_list; -~~~ - -~~~ - statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id -+-----------------+--------------+----------------------------------+-----------+----------------+------------+--------------+ - students | {"id"} | 2018-10-26 15:06:34.320165+00:00 | 0 | 0 | 0 | NULL -(1 row) +> SHOW STATISTICS FOR TABLE rides; ~~~ ### Delete statistics diff --git a/v20.1/create-statistics.md b/v20.1/create-statistics.md index 2ad19b0e6bf..81233172e66 100644 --- a/v20.1/create-statistics.md +++ b/v20.1/create-statistics.md @@ -7,13 +7,8 @@ Use the `CREATE STATISTICS` [statement](sql-statements.html) to generate table s Once you [create a table](create-table.html) and load data into it (e.g., [`INSERT`](insert.html), [`IMPORT`](import.html)), table statistics can be generated. Table statistics help the cost-based optimizer determine the cardinality of the rows used in each query, which helps to predict more accurate costs. -`CREATE STATISTICS` automatically figures out which columns to get statistics on — specifically, it chooses: - -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns (unless you specify which columns to create statistics on, as shown in [this example](#create-statistics-on-a-specific-column)). - {{site.data.alerts.callout_info}} -[Automatic statistics is enabled by default](cost-based-optimizer.html#table-statistics); most users don't need to issue `CREATE STATISTICS` statements directly. +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. As a result, most users don't need to issue `CREATE STATISTICS` statements directly. {{site.data.alerts.end}} ## Synopsis @@ -37,27 +32,69 @@ The user must have the `CREATE` [privilege](authorization.html#assign-privileges ## Examples -### Create statistics on a specific column +{% include {{page.version.version}}/sql/movr-statements.md %} + +### Create statistics on a single column {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; +> CREATE STATISTICS revenue_stats ON revenue FROM rides; ~~~ -{{site.data.alerts.callout_info}} -Multi-column statistics are not supported yet. -{{site.data.alerts.end}} +{% include copy-clipboard.html %} +~~~ sql +> SHOW STATISTICS FOR TABLE rides; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+-----------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 17:17:13.852138+00:00 | 500 | 9 | 0 | 584554361172525057 + __auto__ | {vehicle_city} | 2020-08-26 17:17:13.852138+00:00 | 500 | 9 | 0 | 584554361179242497 + __auto__ | {id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {end_address} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {start_time} | 2020-08-26 17:17:13.852138+00:00 | 500 | 30 | 0 | NULL + __auto__ | {end_time} | 2020-08-26 17:17:13.852138+00:00 | 500 | 367 | 0 | NULL + __auto__ | {revenue} | 2020-08-26 17:17:13.852138+00:00 | 500 | 100 | 0 | NULL + revenue_stats | {revenue} | 2020-08-26 17:18:23.928606+00:00 | 500 | 100 | 0 | 584554590801035265 +(11 rows) +~~~ + +Note that statistics are automatically collected for all columns in the `rides` table, making the `revenue_stats` statistics a duplicate of the statistics automatically collected on the `rides` column. ### Create statistics on a default set of columns -The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on — specifically, it chooses: +The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on. -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns. +{% include copy-clipboard.html %} +~~~ sql +> CREATE STATISTICS users_stats FROM users; +~~~ + +This statement creates statistics identical to the statistics that CockroachDB creates automatically. {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students FROM students_by_list; +> SHOW STATISTICS FOR TABLE users; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+---------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 17:17:13.880576+00:00 | 50 | 9 | 0 | 584554361264930817 + __auto__ | {id} | 2020-08-26 17:17:13.880576+00:00 | 50 | 50 | 0 | NULL + __auto__ | {name} | 2020-08-26 17:17:13.880576+00:00 | 50 | 49 | 0 | NULL + __auto__ | {address} | 2020-08-26 17:17:13.880576+00:00 | 50 | 50 | 0 | NULL + __auto__ | {credit_card} | 2020-08-26 17:17:13.880576+00:00 | 50 | 50 | 0 | NULL + users_stats | {city} | 2020-08-26 17:18:55.87803+00:00 | 50 | 9 | 0 | 584554695490502657 + users_stats | {id} | 2020-08-26 17:18:55.87803+00:00 | 50 | 50 | 0 | NULL + users_stats | {name} | 2020-08-26 17:18:55.87803+00:00 | 50 | 49 | 0 | NULL + users_stats | {address} | 2020-08-26 17:18:55.87803+00:00 | 50 | 50 | 0 | NULL + users_stats | {credit_card} | 2020-08-26 17:18:55.87803+00:00 | 50 | 50 | 0 | NULL +(10 rows) ~~~ ### Create statistics as of a given time @@ -66,7 +103,7 @@ To create statistics as of a given time (in this example, 1 minute ago to avoid {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS employee_stats FROM employees AS OF SYSTEM TIME '-1m'; +> CREATE STATISTICS vehicle_stats_1 FROM vehicles AS OF SYSTEM TIME '-1m'; ~~~ For more information about how the `AS OF SYSTEM TIME` clause works, including supported time formats, see [`AS OF SYSTEM TIME`](as-of-system-time.html). @@ -89,11 +126,12 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+--------------+------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441281249412743169 | CREATE STATS | CREATE STATISTICS salary_stats FROM employees.public.salaries | | root | succeeded | | 2019-04-08 15:52:30.040531 | 2019-04-08 15:52:30.046646 | 2019-04-08 15:52:32.757519 | 2019-04-08 15:52:32.757519 | 1 | | 1 - 441281163978637313 | CREATE STATS | CREATE STATISTICS employee_stats FROM employees.public.employees | | root | succeeded | | 2019-04-08 15:52:03.968099 | 2019-04-08 15:52:03.972557 | 2019-04-08 15:52:05.168809 | 2019-04-08 15:52:05.168809 | 1 | | 1 - (2 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + ---------------------+--------------+--------------------------------------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------- + 584554590745821185 | CREATE STATS | CREATE STATISTICS revenue_stats ON revenue FROM movr.public.rides | | root | succeeded | NULL | 2020-08-26 17:18:23.914124+00:00 | 2020-08-26 17:18:23.918222+00:00 | 2020-08-26 17:18:23.932202+00:00 | 2020-08-26 17:18:23.931436+00:00 | 1 | | 1 + 584554695442432001 | CREATE STATS | CREATE STATISTICS users_stats FROM movr.public.users | | root | succeeded | NULL | 2020-08-26 17:18:55.864992+00:00 | 2020-08-26 17:18:55.867214+00:00 | 2020-08-26 17:18:55.888815+00:00 | 2020-08-26 17:18:55.888237+00:00 | 1 | | 1 + 584554752084606977 | CREATE STATS | CREATE STATISTICS vehicle_stats_1 FROM movr.public.vehicles WITH OPTIONS AS OF SYSTEM TIME '-1m' | | root | succeeded | NULL | 2020-08-26 17:19:13.150822+00:00 | 2020-08-26 17:19:13.152896+00:00 | 2020-08-26 17:19:13.176799+00:00 | 2020-08-26 17:19:13.176202+00:00 | 1 | | 1 + (3 rows) ~~~ 2. Use `SHOW AUTOMATIC JOBS` to see statistics jobs that were created by the [automatic statistics feature](cost-based-optimizer.html#table-statistics): @@ -104,16 +142,15 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+-------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441280366254850049 | AUTO CREATE STATS | Table statistics refresh for employees.public.departments | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.522119 | 2019-04-08 15:48:00.52663 | 2019-04-08 15:48:00.541608 | 2019-04-08 15:48:00.541608 | 1 | | 1 - 441280364809289729 | AUTO CREATE STATS | Table statistics refresh for employees.public.titles | CREATE STATISTICS __auto__ FROM [60] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.080971 | 2019-04-08 15:48:00.083117 | 2019-04-08 15:48:00.515766 | 2019-04-08 15:48:00.515767 | 1 | | 1 - 441280356286201857 | AUTO CREATE STATS | Table statistics refresh for employees.public.salaries | CREATE STATISTICS __auto__ FROM [59] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:57.479929 | 2019-04-08 15:47:57.482235 | 2019-04-08 15:48:00.075025 | 2019-04-08 15:48:00.075025 | 1 | | 1 - 441280352161693697 | AUTO CREATE STATS | Table statistics refresh for employees.public.employees | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.221223 | 2019-04-08 15:47:56.223664 | 2019-04-08 15:47:57.474159 | 2019-04-08 15:47:57.474159 | 1 | | 1 - 441280352070434817 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_manager | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.193375 | 2019-04-08 15:47:56.195813 | 2019-04-08 15:47:56.215114 | 2019-04-08 15:47:56.215114 | 1 | | 1 - 441280350791401473 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_emp | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:55.803052 | 2019-04-08 15:47:55.806071 | 2019-04-08 15:47:56.187153 | 2019-04-08 15:47:56.187154 | 1 | | 1 - 441279760786096129 | AUTO CREATE STATS | Table statistics refresh for test.public.kv | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:44:55.747725 | 2019-04-08 15:44:55.754582 | 2019-04-08 15:44:55.775664 | 2019-04-08 15:44:55.775665 | 1 | | 1 - (7 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + ---------------------+-------------------+---------------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------- + 584554360819712001 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicles | CREATE STATISTICS __auto__ FROM [54] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.746242+00:00 | 2020-08-26 17:17:13.754166+00:00 | 2020-08-26 17:17:13.785217+00:00 | 2020-08-26 17:17:13.784454+00:00 | 1 | | 1 + 584554360958156801 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicle_location_histories | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.788499+00:00 | 2020-08-26 17:17:13.79+00:00 | 2020-08-26 17:17:13.813299+00:00 | 2020-08-26 17:17:13.812742+00:00 | 1 | | 1 + 584554361050529793 | AUTO CREATE STATS | Table statistics refresh for movr.public.user_promo_codes | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.816693+00:00 | 2020-08-26 17:17:13.818026+00:00 | 2020-08-26 17:17:13.833977+00:00 | 2020-08-26 17:17:13.833316+00:00 | 1 | | 1 + 584554361118425089 | AUTO CREATE STATS | Table statistics refresh for movr.public.rides | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.837398+00:00 | 2020-08-26 17:17:13.83881+00:00 | 2020-08-26 17:17:13.870263+00:00 | 2020-08-26 17:17:13.868936+00:00 | 1 | | 1 + 584554361235341313 | AUTO CREATE STATS | Table statistics refresh for movr.public.users | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.873091+00:00 | 2020-08-26 17:17:13.874499+00:00 | 2020-08-26 17:17:13.889308+00:00 | 2020-08-26 17:17:13.888625+00:00 | 1 | | 1 + 584554361296388097 | AUTO CREATE STATS | Table statistics refresh for movr.public.promo_codes | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 17:17:13.89172+00:00 | 2020-08-26 17:17:13.893181+00:00 | 2020-08-26 17:17:13.912905+00:00 | 2020-08-26 17:17:13.912349+00:00 | 1 | | 1 + (6 rows) ~~~ ## See Also diff --git a/v20.1/show-statistics.md b/v20.1/show-statistics.md index 788c72c7030..3b5fe860e83 100644 --- a/v20.1/show-statistics.md +++ b/v20.1/show-statistics.md @@ -5,6 +5,10 @@ toc: true --- The `SHOW STATISTICS` [statement](sql-statements.html) lists [table statistics](create-statistics.html) used by the [cost-based optimizer](cost-based-optimizer.html). +{{site.data.alerts.callout_info}} +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. +{{site.data.alerts.end}} + ## Synopsis
@@ -23,27 +27,29 @@ Parameter | Description ## Examples -### List table statistics +{% include {{page.version.version}}/sql/movr-statements.md %} -{% include copy-clipboard.html %} -~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; -~~~ - -~~~ -CREATE STATISTICS -~~~ +### List table statistics {% include copy-clipboard.html %} ~~~ sql -> SHOW STATISTICS FOR TABLE students_by_list; +> SHOW STATISTICS FOR TABLE rides; ~~~ ~~~ - statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id -+-----------------+--------------+----------------------------------+-----------+----------------+------------+--------------+ - students | {"id"} | 2018-10-26 15:06:34.320165+00:00 | 0 | 0 | 0 | NULL -(1 row) + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+-----------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 17:17:13.852138+00:00 | 500 | 9 | 0 | 584554361172525057 + __auto__ | {vehicle_city} | 2020-08-26 17:17:13.852138+00:00 | 500 | 9 | 0 | 584554361179242497 + __auto__ | {id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_id} | 2020-08-26 17:17:13.852138+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {end_address} | 2020-08-26 17:17:13.852138+00:00 | 500 | 500 | 0 | NULL + __auto__ | {start_time} | 2020-08-26 17:17:13.852138+00:00 | 500 | 30 | 0 | NULL + __auto__ | {end_time} | 2020-08-26 17:17:13.852138+00:00 | 500 | 367 | 0 | NULL + __auto__ | {revenue} | 2020-08-26 17:17:13.852138+00:00 | 500 | 100 | 0 | NULL +(10 rows) ~~~ ### Delete statistics diff --git a/v20.2/cost-based-optimizer.md b/v20.2/cost-based-optimizer.md index 618f9f8d05b..b6b7af889e2 100644 --- a/v20.2/cost-based-optimizer.md +++ b/v20.2/cost-based-optimizer.md @@ -22,11 +22,13 @@ The most important factor in determining the quality of a plan is cardinality (i The cost-based optimizer can often find more performant query plans if it has access to statistical data on the contents of your tables. This data needs to be generated from scratch for new tables, and regenerated periodically for existing tables. -By default, CockroachDB generates table statistics automatically when tables are [created](create-table.html), and as they are [updated](update.html). It does this [using a background job](create-statistics.html#view-statistics-jobs) that automatically determines which columns to get statistics on — specifically, it chooses: +By default, CockroachDB automatically generates table statistics when tables are [created](create-table.html), and as they are [updated](update.html). It does this [using a background job](create-statistics.html#view-statistics-jobs) that automatically determines which columns to get statistics on — specifically, it chooses: - Columns that are part of the primary key or an index (in other words, all indexed columns). - Up to 100 non-indexed columns. +New in v20.2: By default, CockroachDB also automatically collects [multi-column statistics](create-statistics.html#create-statistics-on-multiple-columns) on columns that prefix an index. + {{site.data.alerts.callout_info}} [Schema changes](online-schema-changes.html) trigger automatic statistics collection for the affected table(s). {{site.data.alerts.end}} @@ -80,6 +82,10 @@ For instructions showing how to manually generate statistics, see the examples i By default, the optimizer collects histograms for all index columns (specifically the first column in each index) during automatic statistics collection. If a single column statistic is explicitly requested using manual invocation of [`CREATE STATISTICS`](create-statistics.html), a histogram will be collected, regardless of whether or not the column is part of an index. +{{site.data.alerts.callout_info}} +CockroachDB does not support multi-column histograms yet. See [tracking issue](https://github.com/cockroachdb/cockroach/issues/49698). +{{site.data.alerts.end}} + If you are an advanced user and need to disable histogram collection for troubleshooting or performance tuning reasons, change the [`sql.stats.histogram_collection.enabled` cluster setting](cluster-settings.html) by running [`SET CLUSTER SETTING`](set-cluster-setting.html) as follows: {% include copy-clipboard.html %} diff --git a/v20.2/create-statistics.md b/v20.2/create-statistics.md index 2ad19b0e6bf..6f144b3e2d0 100644 --- a/v20.2/create-statistics.md +++ b/v20.2/create-statistics.md @@ -7,13 +7,10 @@ Use the `CREATE STATISTICS` [statement](sql-statements.html) to generate table s Once you [create a table](create-table.html) and load data into it (e.g., [`INSERT`](insert.html), [`IMPORT`](import.html)), table statistics can be generated. Table statistics help the cost-based optimizer determine the cardinality of the rows used in each query, which helps to predict more accurate costs. -`CREATE STATISTICS` automatically figures out which columns to get statistics on — specifically, it chooses: - -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns (unless you specify which columns to create statistics on, as shown in [this example](#create-statistics-on-a-specific-column)). - {{site.data.alerts.callout_info}} -[Automatic statistics is enabled by default](cost-based-optimizer.html#table-statistics); most users don't need to issue `CREATE STATISTICS` statements directly. +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. As a result, most users don't need to issue `CREATE STATISTICS` statements directly. + +New in v20.2: CockroachDB also automatically collects [multi-column statistics](#create-statistics-on-multiple-columns) on columns that prefix each index. {{site.data.alerts.end}} ## Synopsis @@ -37,27 +34,109 @@ The user must have the `CREATE` [privilege](authorization.html#assign-privileges ## Examples -### Create statistics on a specific column +{% include {{page.version.version}}/sql/movr-statements.md %} + +### Create statistics on a single column {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; +> CREATE STATISTICS revenue_stats ON revenue FROM rides; ~~~ -{{site.data.alerts.callout_info}} -Multi-column statistics are not supported yet. -{{site.data.alerts.end}} +{% include copy-clipboard.html %} +~~~ sql +> SHOW STATISTICS FOR TABLE rides; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+---------------------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071425531905 + __auto__ | {id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071432740865 + __auto__ | {city,id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | 584550071446732801 + __auto__ | {city,rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071461019649 + __auto__ | {vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | 584550071467966465 + __auto__ | {vehicle_city,vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071482122241 + __auto__ | {end_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071489167361 + __auto__ | {start_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 30 | 0 | 584550071496671233 + __auto__ | {end_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 367 | 0 | 584550071504437249 + __auto__ | {revenue} | 2020-08-26 16:55:24.725089+00:00 | 500 | 100 | 0 | 584550071512137729 + revenue_stats | {revenue} | 2020-08-26 16:55:33.986698+00:00 | 500 | 100 | 0 | 584550101775384577 +(14 rows) +~~~ + +Note that statistics are automatically collected for all columns in the `rides` table, making the `revenue_stats` statistics a duplicate of the statistics automatically collected on the `rides` column. + +### Create statistics on multiple columns + +{% include copy-clipboard.html %} +~~~ sql +> CREATE STATISTICS city_revenue_stats ON city, revenue FROM rides; +~~~ + +{% include copy-clipboard.html %} +~~~ sql +> SHOW STATISTICS FOR TABLE rides; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +---------------------+---------------------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071425531905 + __auto__ | {id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071432740865 + __auto__ | {city,id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | 584550071446732801 + __auto__ | {city,rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071461019649 + __auto__ | {vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | 584550071467966465 + __auto__ | {vehicle_city,vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071482122241 + __auto__ | {end_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071489167361 + __auto__ | {start_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 30 | 0 | 584550071496671233 + __auto__ | {end_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 367 | 0 | 584550071504437249 + __auto__ | {revenue} | 2020-08-26 16:55:24.725089+00:00 | 500 | 100 | 0 | 584550071512137729 + revenue_stats | {revenue} | 2020-08-26 16:55:33.986698+00:00 | 500 | 100 | 0 | 584550101775384577 + city_revenue_stats | {city,revenue} | 2020-08-26 16:55:52.539795+00:00 | 500 | 372 | 0 | NULL +(15 rows) +~~~ + +New in v20.2: Multi-column statistics are automatically collected for all columns that prefix an index. In this example, `city` and `revenue` are not an index prefix, making the `city_revenue_stats` statistics unique for the table. ### Create statistics on a default set of columns -The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on — specifically, it chooses: +The `CREATE STATISTICS` statement shown below automatically figures out which columns to get statistics on. + +{% include copy-clipboard.html %} +~~~ sql +> CREATE STATISTICS users_stats FROM users; +~~~ -- Columns that are part of the primary key or an index (in other words, all indexed columns). -- Up to 100 non-indexed columns. +This statement creates statistics identical to the statistics that CockroachDB creates automatically. {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS students FROM students_by_list; +> SHOW STATISTICS FOR TABLE users; +~~~ + +~~~ + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+---------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 16:55:24.765331+00:00 | 50 | 9 | 0 | 584550071556964353 + __auto__ | {id} | 2020-08-26 16:55:24.765331+00:00 | 50 | 50 | 0 | 584550071563976705 + __auto__ | {city,id} | 2020-08-26 16:55:24.765331+00:00 | 50 | 50 | 0 | NULL + __auto__ | {name} | 2020-08-26 16:55:24.765331+00:00 | 50 | 49 | 0 | 584550071577477121 + __auto__ | {address} | 2020-08-26 16:55:24.765331+00:00 | 50 | 50 | 0 | 584550071583997953 + __auto__ | {credit_card} | 2020-08-26 16:55:24.765331+00:00 | 50 | 50 | 0 | 584550071591141377 + users_stats | {city} | 2020-08-26 16:56:12.802308+00:00 | 50 | 9 | 0 | 584550228973027329 + users_stats | {id} | 2020-08-26 16:56:12.802308+00:00 | 50 | 50 | 0 | 584550228985905153 + users_stats | {city,id} | 2020-08-26 16:56:12.802308+00:00 | 50 | 50 | 0 | NULL + users_stats | {name} | 2020-08-26 16:56:12.802308+00:00 | 50 | 49 | 0 | 584550229015625729 + users_stats | {address} | 2020-08-26 16:56:12.802308+00:00 | 50 | 50 | 0 | 584550229028765697 + users_stats | {credit_card} | 2020-08-26 16:56:12.802308+00:00 | 50 | 50 | 0 | 584550229043937281 +(12 rows) ~~~ ### Create statistics as of a given time @@ -66,7 +145,7 @@ To create statistics as of a given time (in this example, 1 minute ago to avoid {% include copy-clipboard.html %} ~~~ sql -> CREATE STATISTICS employee_stats FROM employees AS OF SYSTEM TIME '-1m'; +> CREATE STATISTICS vehicle_stats_1 FROM vehicles AS OF SYSTEM TIME '-1m'; ~~~ For more information about how the `AS OF SYSTEM TIME` clause works, including supported time formats, see [`AS OF SYSTEM TIME`](as-of-system-time.html). @@ -89,11 +168,13 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+--------------+------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441281249412743169 | CREATE STATS | CREATE STATISTICS salary_stats FROM employees.public.salaries | | root | succeeded | | 2019-04-08 15:52:30.040531 | 2019-04-08 15:52:30.046646 | 2019-04-08 15:52:32.757519 | 2019-04-08 15:52:32.757519 | 1 | | 1 - 441281163978637313 | CREATE STATS | CREATE STATISTICS employee_stats FROM employees.public.employees | | root | succeeded | | 2019-04-08 15:52:03.968099 | 2019-04-08 15:52:03.972557 | 2019-04-08 15:52:05.168809 | 2019-04-08 15:52:05.168809 | 1 | | 1 - (2 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + ---------------------+--------------+--------------------------------------------------------------------------------------------------+-----------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------- + 584550101732950017 | CREATE STATS | CREATE STATISTICS revenue_stats ON revenue FROM movr.public.rides | | root | succeeded | NULL | 2020-08-26 16:55:33.976113+00:00 | 2020-08-26 16:55:33.979043+00:00 | 2020-08-26 16:55:33.990197+00:00 | 2020-08-26 16:55:33.989405+00:00 | 1 | | NULL + 584550162508382209 | CREATE STATS | CREATE STATISTICS city_revenue_stats ON city, revenue FROM movr.public.rides | | root | succeeded | NULL | 2020-08-26 16:55:52.523299+00:00 | 2020-08-26 16:55:52.527194+00:00 | 2020-08-26 16:55:52.544301+00:00 | 2020-08-26 16:55:52.543148+00:00 | 1 | | NULL + 584550228891500545 | CREATE STATS | CREATE STATISTICS users_stats FROM movr.public.users | | root | succeeded | NULL | 2020-08-26 16:56:12.781808+00:00 | 2020-08-26 16:56:12.789111+00:00 | 2020-08-26 16:56:12.830659+00:00 | 2020-08-26 16:56:12.82907+00:00 | 1 | | NULL + 584550307147874305 | CREATE STATS | CREATE STATISTICS vehicle_stats_1 FROM movr.public.vehicles WITH OPTIONS AS OF SYSTEM TIME '-1m' | | root | succeeded | NULL | 2020-08-26 16:56:36.663773+00:00 | 2020-08-26 16:56:36.668101+00:00 | 2020-08-26 16:56:36.705743+00:00 | 2020-08-26 16:56:36.704696+00:00 | 1 | | NULL + (5 rows) ~~~ 2. Use `SHOW AUTOMATIC JOBS` to see statistics jobs that were created by the [automatic statistics feature](cost-based-optimizer.html#table-statistics): @@ -104,16 +185,15 @@ To view statistics jobs, there are two options: ~~~ ~~~ - job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id - --------------------+-------------------+------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------+----------------------------+----------------------------+----------------------------+--------------------+-------+---------------- - 441280366254850049 | AUTO CREATE STATS | Table statistics refresh for employees.public.departments | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.522119 | 2019-04-08 15:48:00.52663 | 2019-04-08 15:48:00.541608 | 2019-04-08 15:48:00.541608 | 1 | | 1 - 441280364809289729 | AUTO CREATE STATS | Table statistics refresh for employees.public.titles | CREATE STATISTICS __auto__ FROM [60] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:48:00.080971 | 2019-04-08 15:48:00.083117 | 2019-04-08 15:48:00.515766 | 2019-04-08 15:48:00.515767 | 1 | | 1 - 441280356286201857 | AUTO CREATE STATS | Table statistics refresh for employees.public.salaries | CREATE STATISTICS __auto__ FROM [59] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:57.479929 | 2019-04-08 15:47:57.482235 | 2019-04-08 15:48:00.075025 | 2019-04-08 15:48:00.075025 | 1 | | 1 - 441280352161693697 | AUTO CREATE STATS | Table statistics refresh for employees.public.employees | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.221223 | 2019-04-08 15:47:56.223664 | 2019-04-08 15:47:57.474159 | 2019-04-08 15:47:57.474159 | 1 | | 1 - 441280352070434817 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_manager | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:56.193375 | 2019-04-08 15:47:56.195813 | 2019-04-08 15:47:56.215114 | 2019-04-08 15:47:56.215114 | 1 | | 1 - 441280350791401473 | AUTO CREATE STATS | Table statistics refresh for employees.public.dept_emp | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:47:55.803052 | 2019-04-08 15:47:55.806071 | 2019-04-08 15:47:56.187153 | 2019-04-08 15:47:56.187154 | 1 | | 1 - 441279760786096129 | AUTO CREATE STATS | Table statistics refresh for test.public.kv | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | | 2019-04-08 15:44:55.747725 | 2019-04-08 15:44:55.754582 | 2019-04-08 15:44:55.775664 | 2019-04-08 15:44:55.775665 | 1 | | 1 - (7 rows) + job_id | job_type | description | statement | user_name | status | running_status | created | started | finished | modified | fraction_completed | error | coordinator_id + ---------------------+-------------------+---------------------------------------------------------------------+-------------------------------------------------------------------------------------------+-----------+-----------+----------------+----------------------------------+----------------------------------+----------------------------------+----------------------------------+--------------------+-------+----------------- + 584550071026876417 | AUTO CREATE STATS | Table statistics refresh for movr.public.user_promo_codes | CREATE STATISTICS __auto__ FROM [58] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.605364+00:00 | 2020-08-26 16:55:24.608296+00:00 | 2020-08-26 16:55:24.632626+00:00 | 2020-08-26 16:55:24.631635+00:00 | 1 | | NULL + 584550071124131841 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicle_location_histories | CREATE STATISTICS __auto__ FROM [56] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.635051+00:00 | 2020-08-26 16:55:24.636861+00:00 | 2020-08-26 16:55:24.672699+00:00 | 2020-08-26 16:55:24.671777+00:00 | 1 | | NULL + 584550071255498753 | AUTO CREATE STATS | Table statistics refresh for movr.public.promo_codes | CREATE STATISTICS __auto__ FROM [57] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.675136+00:00 | 2020-08-26 16:55:24.677263+00:00 | 2020-08-26 16:55:24.709619+00:00 | 2020-08-26 16:55:24.708881+00:00 | 1 | | NULL + 584550071376281601 | AUTO CREATE STATS | Table statistics refresh for movr.public.rides | CREATE STATISTICS __auto__ FROM [55] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.712003+00:00 | 2020-08-26 16:55:24.713674+00:00 | 2020-08-26 16:55:24.754449+00:00 | 2020-08-26 16:55:24.753735+00:00 | 1 | | NULL + 584550071523082241 | AUTO CREATE STATS | Table statistics refresh for movr.public.users | CREATE STATISTICS __auto__ FROM [53] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.756802+00:00 | 2020-08-26 16:55:24.758638+00:00 | 2020-08-26 16:55:24.77889+00:00 | 2020-08-26 16:55:24.777899+00:00 | 1 | | NULL + 584550071604314113 | AUTO CREATE STATS | Table statistics refresh for movr.public.vehicles | CREATE STATISTICS __auto__ FROM [54] WITH OPTIONS THROTTLING 0.9 AS OF SYSTEM TIME '-30s' | root | succeeded | NULL | 2020-08-26 16:55:24.781594+00:00 | 2020-08-26 16:55:24.783519+00:00 | 2020-08-26 16:55:24.815104+00:00 | 2020-08-26 16:55:24.814103+00:00 | 1 | | NULL + (6 rows) ~~~ ## See Also diff --git a/v20.2/show-statistics.md b/v20.2/show-statistics.md index 788c72c7030..b4190e9509b 100644 --- a/v20.2/show-statistics.md +++ b/v20.2/show-statistics.md @@ -5,6 +5,12 @@ toc: true --- The `SHOW STATISTICS` [statement](sql-statements.html) lists [table statistics](create-statistics.html) used by the [cost-based optimizer](cost-based-optimizer.html). +{{site.data.alerts.callout_info}} +[By default, CockroachDB automatically generates statistics](cost-based-optimizer.html#table-statistics) on all indexed columns, and up to 100 non-indexed columns. + +New in v20.2: CockroachDB also automatically collects [multi-column statistics](create-statistics.html#create-statistics-on-multiple-columns) on the columns that prefix each index. +{{site.data.alerts.end}} + ## Synopsis
@@ -23,27 +29,32 @@ Parameter | Description ## Examples -### List table statistics - -{% include copy-clipboard.html %} -~~~ sql -> CREATE STATISTICS students ON id FROM students_by_list; -~~~ +{% include {{page.version.version}}/sql/movr-statements.md %} -~~~ -CREATE STATISTICS -~~~ +### List table statistics {% include copy-clipboard.html %} ~~~ sql -> SHOW STATISTICS FOR TABLE students_by_list; +> SHOW STATISTICS FOR TABLE rides; ~~~ ~~~ - statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id -+-----------------+--------------+----------------------------------+-----------+----------------+------------+--------------+ - students | {"id"} | 2018-10-26 15:06:34.320165+00:00 | 0 | 0 | 0 | NULL -(1 row) + statistics_name | column_names | created | row_count | distinct_count | null_count | histogram_id +------------------+---------------------------+----------------------------------+-----------+----------------+------------+--------------------- + __auto__ | {city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071425531905 + __auto__ | {id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071432740865 + __auto__ | {city,id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | NULL + __auto__ | {rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | 584550071446732801 + __auto__ | {city,rider_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 50 | 0 | NULL + __auto__ | {vehicle_city} | 2020-08-26 16:55:24.725089+00:00 | 500 | 9 | 0 | 584550071461019649 + __auto__ | {vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | 584550071467966465 + __auto__ | {vehicle_city,vehicle_id} | 2020-08-26 16:55:24.725089+00:00 | 500 | 15 | 0 | NULL + __auto__ | {start_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071482122241 + __auto__ | {end_address} | 2020-08-26 16:55:24.725089+00:00 | 500 | 500 | 0 | 584550071489167361 + __auto__ | {start_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 30 | 0 | 584550071496671233 + __auto__ | {end_time} | 2020-08-26 16:55:24.725089+00:00 | 500 | 367 | 0 | 584550071504437249 + __auto__ | {revenue} | 2020-08-26 16:55:24.725089+00:00 | 500 | 100 | 0 | 584550071512137729 +(13 rows) ~~~ ### Delete statistics