diff --git a/.gitmodules b/.gitmodules
index 3955ade5ce..abba677814 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,12 +1,6 @@
-[submodule "modules/metrics"]
- path = modules/metrics
- url = https://github.com/tarantool/metrics.git
[submodule "modules/luatest"]
path = modules/luatest
url = https://github.com/tarantool/luatest
-[submodule "modules/grafana-dashboard"]
- path = modules/grafana-dashboard
- url = https://github.com/tarantool/grafana-dashboard
[submodule "modules/tntcxx"]
path = modules/tntcxx
url = https://github.com/tarantool/tntcxx.git
diff --git a/build_submodules.sh b/build_submodules.sh
index e19e7258a0..908af47df7 100755
--- a/build_submodules.sh
+++ b/build_submodules.sh
@@ -11,18 +11,6 @@ po_dest="${project_root}/locale/ru/LC_MESSAGES"
# Copy Building Tarantool Docs guide
cp README.rst doc/contributing/docs/_includes/README.rst
-
-# Monitoring
-monitoring_root="${project_root}/modules/metrics/doc/monitoring"
-monitoring_dest="${project_root}/doc/book"
-monitoring_grafana_root="${project_root}/modules/grafana-dashboard/doc/monitoring"
-
-# Copy monitoring docs to the right destination
-mkdir -p "${monitoring_dest}"
-cp -rfv "${monitoring_root}" "${monitoring_dest}/"
-cp -rfv "${monitoring_grafana_root}" "${monitoring_dest}/"
-
-
# Luatest
luatest_root="${project_root}/modules/luatest"
luatest_dest="${project_root}/doc/reference/reference_rock/luatest"
diff --git a/doc/book/admin/index.rst b/doc/book/admin/index.rst
index 0396a88f28..cbdac9af55 100644
--- a/doc/book/admin/index.rst
+++ b/doc/book/admin/index.rst
@@ -41,4 +41,4 @@ This chapter includes the following sections:
os_notes
bug_reports
troubleshoot
- ../monitoring/index
+ monitoring
diff --git a/doc/book/admin/monitoring.rst b/doc/book/admin/monitoring.rst
new file mode 100644
index 0000000000..24a696f3fc
--- /dev/null
+++ b/doc/book/admin/monitoring.rst
@@ -0,0 +1,17 @@
+.. _monitoring:
+
+Monitoring
+==========
+
+Monitoring is the process of capturing runtime information about the instances of a Tarantool cluster using metrics.
+Metrics can indicate various characteristics, such as memory usage, the number of records in spaces, replication status, and so on.
+Typically, metrics are monitored in real time, allowing for the identification of current issues or the prediction of potential ones.
+
+.. toctree::
+ :maxdepth: 1
+ :numbered: 0
+
+ monitoring/getting_started
+ monitoring/grafana_dashboard
+ monitoring/alerting
+ monitoring/metrics_reference
diff --git a/doc/book/admin/monitoring/alerting.rst b/doc/book/admin/monitoring/alerting.rst
new file mode 100644
index 0000000000..796f1d5692
--- /dev/null
+++ b/doc/book/admin/monitoring/alerting.rst
@@ -0,0 +1,420 @@
+.. _monitoring-alerting-page:
+
+===============================================================================
+Alerting
+===============================================================================
+
+You can set up alerts on metrics to get a notification when something went
+wrong. We will use `Prometheus alert rules `_
+as an example here. You can get full ``alerts.yml`` file at
+`tarantool/grafana-dashboard GitHub repo `_.
+
+.. _monitoring-alerting-tarantool:
+
+-------------------------------------------------------------------------------
+Tarantool metrics
+-------------------------------------------------------------------------------
+
+You can use internal Tarantool metrics to monitor detailed RAM consumption,
+replication state, database engine status, track business logic issues (like
+HTTP 4xx and 5xx responses or low request rate) and external modules statistics
+(like ``CRUD`` errors). Evaluation timeouts, severity
+levels and thresholds (especially ones for business logic) are placed here for
+the sake of example: you may want to increase or decrease them for your
+application. Also, don't forget to set sane rate time ranges based on your
+Prometheus configuration.
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Lua memory
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+Monitoring ``tnt_info_memory_lua`` metric may prevent memory overflow and detect the presence of bad Lua code practices.
+
+.. NOTE::
+
+ The Lua memory is limited to 2 GB per instance if Tarantool doesn't have the GC64 mode enabled for LuaJIT.
+
+.. code-block:: yaml
+
+ - alert: HighLuaMemoryWarning
+ expr: tnt_info_memory_lua >= (512 * 1024 * 1024)
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') Lua runtime warning"
+ description: "'{{ $labels.alias }}' instance of job '{{ $labels.job }}' uses too much Lua memory
+ and may hit threshold soon."
+
+ - alert: HighLuaMemoryAlert
+ expr: tnt_info_memory_lua >= (1024 * 1024 * 1024)
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') Lua runtime alert"
+ description: "'{{ $labels.alias }}' instance of job '{{ $labels.job }}' uses too much Lua memory
+ and likely to hit threshold soon."
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Memtx arena memory
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+By monitoring :ref:`slab allocation statistics ` you can see
+how many free RAM is remaining to store memtx tuples and indexes for an
+instance. If Tarantool hit the limits, the instance will become unavailable
+for write operations, so this alert may help you see when it's time to increase
+your ``memtx_memory`` limit or to add a new storage to a vshard cluster.
+
+.. code-block:: yaml
+
+ - alert: LowMemtxArenaRemainingWarning
+ expr: (tnt_slab_quota_used_ratio >= 80) and (tnt_slab_arena_used_ratio >= 80)
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') low arena memory remaining"
+ description: "Low arena memory (tuples and indexes) remaining for '{{ $labels.alias }}' instance of job '{{ $labels.job }}'.
+ Consider increasing memtx_memory or number of storages in case of sharded data."
+
+ - alert: LowMemtxArenaRemaining
+ expr: (tnt_slab_quota_used_ratio >= 90) and (tnt_slab_arena_used_ratio >= 90)
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') low arena memory remaining"
+ description: "Low arena memory (tuples and indexes) remaining for '{{ $labels.alias }}' instance of job '{{ $labels.job }}'.
+ You are likely to hit limit soon.
+ It is strongly recommended to increase memtx_memory or number of storages in case of sharded data."
+
+ - alert: LowMemtxItemsRemainingWarning
+ expr: (tnt_slab_quota_used_ratio >= 80) and (tnt_slab_items_used_ratio >= 80)
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') low items memory remaining"
+ description: "Low items memory (tuples) remaining for '{{ $labels.alias }}' instance of job '{{ $labels.job }}'.
+ Consider increasing memtx_memory or number of storages in case of sharded data."
+
+ - alert: LowMemtxItemsRemaining
+ expr: (tnt_slab_quota_used_ratio >= 90) and (tnt_slab_items_used_ratio >= 90)
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') low items memory remaining"
+ description: "Low items memory (tuples) remaining for '{{ $labels.alias }}' instance of job '{{ $labels.job }}'.
+ You are likely to hit limit soon.
+ It is strongly recommended to increase memtx_memory or number of storages in case of sharded data."
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Vinyl engine status
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+You can monitor :ref:`vinyl regulator `
+performance to track possible scheduler or disk issues.
+
+.. code-block:: yaml
+
+ - alert: LowVinylRegulatorRateLimit
+ expr: tnt_vinyl_regulator_rate_limit < 100000
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') have low vinyl regulator rate limit"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' have low vinyl engine regulator rate limit.
+ This indicates issues with the disk or the scheduler."
+
+
+:ref:`Vinyl transactions ` errors are likely
+to lead to user requests errors.
+
+.. code-block:: yaml
+
+ - alert: HighVinylTxConflictRate
+ expr: rate(tnt_vinyl_tx_conflict[5m]) / rate(tnt_vinyl_tx_commit[5m]) > 0.05
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') have high vinyl tx conflict rate"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' have
+ high vinyl transactions conflict rate. It indicates that vinyl is not healthy."
+
+:ref:`Vinyl scheduler ` failed tasks
+are a good signal of disk issues and may be the reason of increasing RAM
+consumption.
+
+.. code-block:: yaml
+
+ - alert: HighVinylSchedulerFailedTasksRate
+ expr: rate(tnt_vinyl_scheduler_tasks{status="failed"}[5m]) > 0.1
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') have high vinyl scheduler failed tasks rate"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' have
+ high vinyl scheduler failed tasks rate."
+
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Replication state
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+If ``tnt_replication_status`` is equal to ``0``, instance :ref:`replication `
+status is not equal to ``"follows"``: replication is either not ready yet or
+has been stopped due to some reason.
+
+.. code-block:: yaml
+
+ - alert: ReplicationNotRunning
+ expr: tnt_replication_status == 0
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') {{ $labels.stream }} (id {{ $labels.id }})
+ replication is not running"
+ description: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') {{ $labels.stream }} (id {{ $labels.id }})
+ replication is not running."
+
+Even if async replication is ``"follows"``, it could be considered malfunctioning
+if the lag is too high. It also may affect Tarantool garbage collector work,
+see :ref:`box.info.gc() `.
+
+.. code-block:: yaml
+
+ - alert: HighReplicationLag
+ expr: tnt_replication_lag > 1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') have high replication lag (id {{ $labels.id }})"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' have high replication lag
+ (id {{ $labels.id }}), check up your network and cluster state."
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Event loop
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+High :ref:`fiber ` event loop time leads to bad application
+performance, timeouts and various warnings. The reason could be a high quantity
+of working fibers or fibers that spend too much time without any yields or
+sleeps.
+
+.. code-block:: yaml
+
+ - alert: HighEVLoopTime
+ expr: tnt_ev_loop_time > 0.1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') event loop has high cycle duration"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' event loop has high cycle duration.
+ Some high loaded fiber has too little yields. It may be the reason of 'Too long WAL write' warnings."
+
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Configuration status
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+:ref:`Configuration status ` displays
+Tarantool 3 configuration apply state. Additional metrics desplay the count
+of apply warnings and errors.
+
+.. code-block:: yaml
+
+ - alert: ConfigWarningAlerts
+ expr: tnt_config_alerts{level="warn"} > 0
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') has configuration 'warn' alerts"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' has configuration 'warn' alerts.
+ Please, check config:info() for detailed info."
+
+ - alert: ConfigErrorAlerts
+ expr: tnt_config_alerts{level="error"} > 0
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') has configuration 'error' alerts"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' has configuration 'error' alerts.
+ Latest configuration has not been applied.
+ Please, check config:info() for detailed info."
+
+ - alert: ConfigStatusNotReady
+ expr: tnt_config_status{status="ready"} == 0
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') configuration is not ready"
+ description: "Instance '{{ $labels.alias }}' of job '{{ $labels.job }}' configuration is not ready.
+ Please, check config:info() for detailed info."
+
+
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+HTTP server statistics
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+:ref:`metrics ` allows to monitor `tarantool/http `_
+handles, see :ref:`"Collecting HTTP request latency statistics" `.
+Here we use a ``summary`` collector with a default name and 0.99 quantile
+computation.
+
+Too many responses with error codes usually is a sign of API issues or
+application malfunction.
+
+.. code-block:: yaml
+
+ - alert: HighInstanceHTTPClientErrorRate
+ expr: sum by (job, instance, method, path, alias) (rate(http_server_request_latency_count{ job="tarantool", status=~"^4\\d{2}$" }[5m])) > 10
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') high rate of client error responses"
+ description: "Too many {{ $labels.method }} requests to {{ $labels.path }} path
+ on '{{ $labels.alias }}' instance of job '{{ $labels.job }}' get client error (4xx) responses."
+
+ - alert: HighHTTPClientErrorRate
+ expr: sum by (job, method, path) (rate(http_server_request_latency_count{ job="tarantool", status=~"^4\\d{2}$" }[5m])) > 20
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Job '{{ $labels.job }}' high rate of client error responses"
+ description: "Too many {{ $labels.method }} requests to {{ $labels.path }} path
+ on instances of job '{{ $labels.job }}' get client error (4xx) responses."
+
+ - alert: HighHTTPServerErrorRate
+ expr: sum by (job, instance, method, path, alias) (rate(http_server_request_latency_count{ job="tarantool", status=~"^5\\d{2}$" }[5m])) > 0
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') server error responses"
+ description: "Some {{ $labels.method }} requests to {{ $labels.path }} path
+ on '{{ $labels.alias }}' instance of job '{{ $labels.job }}' get server error (5xx) responses."
+
+Responding with high latency is a synonym of insufficient performance. It may
+be a sign of application malfunction. Or maybe you need to add more routers to
+your cluster.
+
+.. code-block:: yaml
+
+ - alert: HighHTTPLatency
+ expr: http_server_request_latency{ job="tarantool", quantile="0.99" } > 0.1
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') high HTTP latency"
+ description: "Some {{ $labels.method }} requests to {{ $labels.path }} path with {{ $labels.status }} response status
+ on '{{ $labels.alias }}' instance of job '{{ $labels.job }}' are processed too long."
+
+Having too little requests when you expect them may detect balancer, external
+client or network malfunction.
+
+.. code-block:: yaml
+
+ - alert: LowRouterHTTPRequestRate
+ expr: sum by (job, instance, alias) (rate(http_server_request_latency_count{ job="tarantool", alias=~"^.*router.*$" }[5m])) < 10
+ for: 5m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Router '{{ $labels.alias }}' ('{{ $labels.job }}') low activity"
+ description: "Router '{{ $labels.alias }}' instance of job '{{ $labels.job }}' gets too little requests.
+ Please, check up your balancer middleware."
+
+
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+CRUD module statistics
+"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+
+If your application uses `CRUD `_ module
+requests, monitoring module statistics may track internal errors caused by
+invalid process of input and internal parameters.
+
+.. code-block:: yaml
+
+ - alert: HighCRUDErrorRate
+ expr: rate(tnt_crud_stats_count{ job="tarantool", status="error" }[5m]) > 0.1
+ for: 1m
+ labels:
+ severity: critical
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') too many CRUD {{ $labels.operation }} errors."
+ description: "Too many {{ $labels.operation }} CRUD requests for '{{ $labels.name }}' space on
+ '{{ $labels.alias }}' instance of job '{{ $labels.job }}' get module error responses."
+
+Statistics could also monitor requests performance. Too high request latency
+will lead to high latency of client responses. It may be caused by network
+or disk issues. Read requests with bad (with respect to space indexes and
+sharding schema) conditions may lead to full-scans or map reduces and also
+could be the reason of high latency.
+
+.. code-block:: yaml
+
+ - alert: HighCRUDLatency
+ expr: tnt_crud_stats{ job="tarantool", quantile="0.99" } > 0.1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') too high CRUD {{ $labels.operation }} latency."
+ description: "Some {{ $labels.operation }} {{ $labels.status }} CRUD requests for '{{ $labels.name }}' space on
+ '{{ $labels.alias }}' instance of job '{{ $labels.job }}' are processed too long."
+
+You also can directly monitor map reduces and scan rate.
+
+.. code-block:: yaml
+
+ - alert: HighCRUDMapReduceRate
+ expr: rate(tnt_crud_map_reduces{ job="tarantool" }[5m]) > 0.1
+ for: 1m
+ labels:
+ severity: warning
+ annotations:
+ summary: "Instance '{{ $labels.alias }}' ('{{ $labels.job }}') too many CRUD {{ $labels.operation }} map reduces."
+ description: "There are too many {{ $labels.operation }} CRUD map reduce requests for '{{ $labels.name }}' space on
+ '{{ $labels.alias }}' instance of job '{{ $labels.job }}'.
+ Check your request conditions or consider changing sharding schema."
+
+
+.. _monitoring-alerting-server:
+
+-------------------------------------------------------------------------------
+Server-side monitoring
+-------------------------------------------------------------------------------
+
+If there are no Tarantool metrics, you may miss critical conditions. Prometheus
+provide ``up`` metric to monitor the health of its targets.
+
+.. code-block:: yaml
+
+ - alert: InstanceDown
+ expr: up == 0
+ for: 1m
+ labels:
+ severity: page
+ annotations:
+ summary: "Instance '{{ $labels.instance }}' ('{{ $labels.job }}') down"
+ description: "'{{ $labels.instance }}' of job '{{ $labels.job }}' has been down for more than a minute."
+
+Do not forget to monitor your server's CPU, disk and RAM from server side with
+your favorite tools. For example, on some high CPU consumption cases Tarantool
+instance may stop to send metrics, so you can track such breakdowns only from
+the outside.
diff --git a/doc/book/admin/monitoring/getting_started.rst b/doc/book/admin/monitoring/getting_started.rst
new file mode 100644
index 0000000000..e36b1dbe3f
--- /dev/null
+++ b/doc/book/admin/monitoring/getting_started.rst
@@ -0,0 +1,111 @@
+.. _monitoring-getting_started:
+
+Getting started with monitoring
+===============================
+
+Example on GitHub: `sharded_cluster_crud_metrics `_
+
+Tarantool allows you to configure and expose its :ref:`metrics ` using a :ref:`YAML configuration `.
+You can also use the built-in :ref:`metrics ` module to create and collect custom metrics.
+
+
+
+
+.. _monitoring_configuring_metrics:
+
+Configuring metrics
+-------------------
+
+To configure metrics, use the :ref:`metrics ` section in a cluster configuration.
+The configuration below enables all metrics excluding :ref:`vinyl `-specific ones:
+
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
+ :start-at: metrics:
+ :end-at: instance_name
+ :language: yaml
+ :dedent:
+
+The ``metrics.labels`` option accepts the predefined :ref:`{{ instance_name }} ` variable.
+This adds an instance name as a :ref:`label ` to every observation.
+
+Third-party Lua modules, like `crud `_ or `expirationd `_, offer their own metrics.
+You can enable these metrics by :ref:`configuring the corresponding role `.
+The example below shows how to enable statistics on called operations by providing the ``roles.crud-router`` role's configuration:
+
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
+ :language: yaml
+ :start-after: routers:
+ :end-at: stats_quantiles
+ :dedent:
+
+``expirationd`` metrics can be enabled as follows:
+
+.. code-block:: yaml
+
+ expirationd:
+ cfg:
+ metrics: true
+
+
+
+.. _monitoring_exposing_metrics:
+
+Exposing metrics
+----------------
+
+To expose metrics in different formats, you can use a third-party `metrics-export-role `__ role.
+In the following example, the metrics of ``storage-a-001`` are provided on two endpoints:
+
+- ``/metrics/prometheus``: exposes metrics in the Prometheus format.
+- ``/metrics/json``: exposes metrics in the JSON format.
+
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
+ :start-at: storage-a-001:
+ :end-at: format: json
+ :language: yaml
+ :dedent:
+
+Example on GitHub: `sharded_cluster_crud_metrics `_
+
+.. NOTE::
+
+ The ``metrics`` module provides a set of plugins that can be used to collect and expose metrics in different formats. Learn more in :ref:`metrics-api_reference_collecting_using_plugins`.
+
+
+
+.. _monitoring_create_metrics:
+
+Creating custom metrics
+-----------------------
+
+The ``metrics`` module allows you to create and collect custom metrics.
+The example below shows how to collect the number of data operations performed on the specified space by increasing a ``counter`` value inside the :ref:`on_replace() ` trigger function:
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua
+ :start-after: -- Collect a custom metric
+ :end-before: -- End
+ :language: lua
+ :dedent:
+
+Learn more in :ref:`metrics-api_reference_custom_metrics`.
+
+
+
+.. _monitoring_collecting_metrics:
+
+Collecting metrics
+------------------
+
+When metrics are configured and exposed, you can use the desired third-party tool to collect them.
+Below is the example of a Prometheus scrape configuration that collects metrics of multiple Tarantool instances:
+
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/prometheus/prometheus.yml
+ :language: yaml
+ :dedent:
+
+For more information on collecting and visualizing metrics, refer to :ref:`monitoring-grafana_dashboard-page`.
+
+.. NOTE::
+
+ |tcm_full_name| allows you to view metrics of connected clusters in real time.
+ Learn more in :ref:`tcm_cluster_metrics`.
diff --git a/doc/book/admin/monitoring/grafana_dashboard.rst b/doc/book/admin/monitoring/grafana_dashboard.rst
new file mode 100644
index 0000000000..fb596fbd88
--- /dev/null
+++ b/doc/book/admin/monitoring/grafana_dashboard.rst
@@ -0,0 +1,171 @@
+.. _monitoring-grafana_dashboard-page:
+
+Grafana dashboard
+=================
+
+After :ref:`enabling and configuring metrics `, you can visualise them using Tarantool Grafana dashboards.
+These dashboards are available as part of
+`Grafana official & community-built dashboards `_:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 50 50
+ :header-rows: 0
+
+ * - Tarantool 3
+ - `Prometheus `_, `InfluxDB `_
+
+ * - Tarantool Cartridge and Tarantool 1.10—2.x
+ - `Prometheus `_, `InfluxDB `_
+
+ * - Tarantool Data Grid 2
+ - `Prometheus `_, `InfluxDB `_
+
+The Tarantool Grafana dashboard is a ready for import template with basic memory,
+space operations, and HTTP load panels, based on default `metrics `_
+package functionality.
+
+.. image:: images/Prometheus_dashboard_1.png
+ :width: 30%
+
+.. image:: images/Prometheus_dashboard_2.png
+ :width: 30%
+
+.. image:: images/Prometheus_dashboard_3.png
+ :width: 30%
+
+.. _monitoring-grafana_dashboard-monitoring_stack:
+
+
+Prepare a monitoring stack
+--------------------------
+
+Since there are Prometheus and InfluxDB data source Grafana dashboards,
+you can use one of the following:
+
+- `Telegraf `_
+ as a server agent for collecting metrics, `InfluxDB `_
+ as a time series database for storing metrics, and `Grafana `_
+ as a visualization platform.
+- `Prometheus `_ as both a server agent for collecting metrics
+ and a time series database for storing metrics, and `Grafana `_
+ as a visualization platform.
+
+For issues related to setting up Prometheus, Telegraf, InfluxDB, or Grafana instances, refer to the corresponding project's documentation.
+
+.. _monitoring-grafana_dashboard-collect_metrics:
+
+Collect metrics with server agents
+----------------------------------
+
+.. _monitoring-grafana_dashboard-collect_metrics_prometheus:
+
+Prometheus
+~~~~~~~~~~
+
+To collect metrics for Prometheus, first set up metrics output with ``prometheus`` format.
+You can use the :ref:`roles.metrics-export ` configuration or set up the :ref:`Prometheus plugin ` manually.
+To start collecting metrics, `add a job `_
+to Prometheus configuration with each Tarantool instance URI as a target and
+metrics path as it was configured on Tarantool instances:
+
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/prometheus/prometheus.yml
+ :language: yaml
+ :dedent:
+
+.. _monitoring-grafana_dashboard-collect_metrics_influxdb:
+
+InfluxDB
+~~~~~~~~
+
+To collect metrics for InfluxDB, use the Telegraf agent.
+First off, configure Tarantool metrics output in ``json`` format
+with :ref:`roles.metrics-export ` configuration or corresponding :ref:`JSON plugin `.
+To start collecting metrics, add `http input `_
+to Telegraf configuration including each Tarantool instance metrics URL:
+
+.. code-block:: toml
+
+ [[inputs.http]]
+ urls = [
+ "http://example_project:8081/metrics/json",
+ "http://example_project:8082/metrics/json",
+ "http://example_project:8083/metrics/json",
+ "http://example_project:8084/metrics/json",
+ "http://example_project:8085/metrics/json"
+ ]
+ timeout = "30s"
+ tag_keys = [
+ "metric_name",
+ "label_pairs_alias",
+ "label_pairs_quantile",
+ "label_pairs_path",
+ "label_pairs_method",
+ "label_pairs_status",
+ "label_pairs_operation",
+ "label_pairs_level",
+ "label_pairs_id",
+ "label_pairs_engine",
+ "label_pairs_name",
+ "label_pairs_index_name",
+ "label_pairs_delta",
+ "label_pairs_stream",
+ "label_pairs_thread",
+ "label_pairs_kind"
+ ]
+ insecure_skip_verify = true
+ interval = "10s"
+ data_format = "json"
+ name_prefix = "tarantool_"
+ fieldpass = ["value"]
+
+Be sure to include each label key as ``label_pairs_`` to extract it
+with the plugin.
+For example, if you use :code:`{ state = 'ready' }` labels somewhere in metric collectors, add ``label_pairs_state`` tag key.
+
+
+
+.. _monitoring-grafana_dashboard-import:
+
+Import the dashboard
+--------------------
+
+Open Grafana import menu.
+
+.. image:: images/grafana_import.png
+ :align: left
+
+To import a specific dashboard, choose one of the following options:
+
+- paste the dashboard id (``21474`` for Prometheus dashboard, ``21484`` for InfluxDB dashboard)
+- paste a link to the dashboard (https://grafana.com/grafana/dashboards/21474 for Prometheus dashboard, https://grafana.com/grafana/dashboards/21484 for InfluxDB dashboard)
+- paste the dashboard JSON file contents
+- upload the dashboard JSON file
+
+Set dashboard name, folder and uid (if needed).
+
+.. image:: images/grafana_import_setup.png
+ :align: left
+
+You can choose the data source and data source variables after import.
+
+.. image:: images/grafana_variables_setup.png
+ :align: left
+
+.. _monitoring-grafana_dashboard-troubleshooting:
+
+Troubleshooting
+---------------
+
+- If there are no data on the graphs, make sure that you picked datasource and job/measurement correctly.
+
+- If there are no data on the graphs, make sure that you have ``info`` group of Tarantool metrics
+ (in particular, ``tnt_info_uptime``).
+
+- If some Prometheus graphs show no data because of ``parse error: missing unit character in duration``,
+ ensure that you use Grafana 7.2 or newer.
+
+- If some Prometheus graphs display ``parse error: bad duration syntax "1m0"`` or similar error, you need
+ to update your Prometheus version. See
+ `grafana/grafana#44542 `_ for more details.
diff --git a/doc/book/admin/monitoring/images/Prometheus_dashboard_1.png b/doc/book/admin/monitoring/images/Prometheus_dashboard_1.png
new file mode 100644
index 0000000000..3ea5fed7ce
Binary files /dev/null and b/doc/book/admin/monitoring/images/Prometheus_dashboard_1.png differ
diff --git a/doc/book/admin/monitoring/images/Prometheus_dashboard_2.png b/doc/book/admin/monitoring/images/Prometheus_dashboard_2.png
new file mode 100644
index 0000000000..5cf04310ce
Binary files /dev/null and b/doc/book/admin/monitoring/images/Prometheus_dashboard_2.png differ
diff --git a/doc/book/admin/monitoring/images/Prometheus_dashboard_3.png b/doc/book/admin/monitoring/images/Prometheus_dashboard_3.png
new file mode 100644
index 0000000000..c03c0e7f03
Binary files /dev/null and b/doc/book/admin/monitoring/images/Prometheus_dashboard_3.png differ
diff --git a/doc/book/admin/monitoring/images/grafana_import.png b/doc/book/admin/monitoring/images/grafana_import.png
new file mode 100644
index 0000000000..1260367d93
Binary files /dev/null and b/doc/book/admin/monitoring/images/grafana_import.png differ
diff --git a/doc/book/admin/monitoring/images/grafana_import_setup.png b/doc/book/admin/monitoring/images/grafana_import_setup.png
new file mode 100644
index 0000000000..23400abb14
Binary files /dev/null and b/doc/book/admin/monitoring/images/grafana_import_setup.png differ
diff --git a/doc/book/admin/monitoring/images/grafana_variables_setup.png b/doc/book/admin/monitoring/images/grafana_variables_setup.png
new file mode 100644
index 0000000000..4737b4ee69
Binary files /dev/null and b/doc/book/admin/monitoring/images/grafana_variables_setup.png differ
diff --git a/doc/book/admin/monitoring/metrics_reference.rst b/doc/book/admin/monitoring/metrics_reference.rst
new file mode 100644
index 0000000000..3a1db70d8f
--- /dev/null
+++ b/doc/book/admin/monitoring/metrics_reference.rst
@@ -0,0 +1,986 @@
+.. _metrics-reference:
+
+Metrics reference
+=================
+
+This page provides a detailed description of metrics from the ``metrics`` module.
+
+General metrics
+---------------
+
+General instance information:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_cfg_current_time``
+ - Instance system time in the Unix timestamp format
+ * - ``tnt_info_uptime``
+ - Time in seconds since the instance has started
+ * - ``tnt_read_only``
+ - Indicates if the instance is in read-only mode (``1`` if true, ``0`` if false)
+
+.. _metrics-reference-memory_general:
+
+Memory general
+--------------
+
+The following metrics provide a picture of memory usage by the Tarantool process.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_info_memory_cache``
+ - Number of bytes in the cache used to store
+ tuples with the vinyl storage engine.
+ * - ``tnt_info_memory_data``
+ - Number of bytes used to store user data (tuples)
+ with the memtx engine and with level 0 of the vinyl engine,
+ without regard for memory fragmentation.
+ * - ``tnt_info_memory_index``
+ - Number of bytes used for indexing user data.
+ Includes memtx and vinyl memory tree extents,
+ the vinyl page index, and the vinyl bloom filters.
+ * - ``tnt_info_memory_lua``
+ - Number of bytes used for the Lua runtime.
+ Monitoring this metric can prevent memory overflow.
+ * - ``tnt_info_memory_net``
+ - Number of bytes used for network input/output buffers.
+ * - ``tnt_info_memory_tx``
+ - Number of bytes in use by active transactions.
+ For the vinyl storage engine,
+ this is the total size of all allocated objects
+ (struct ``txv``, struct ``vy_tx``, struct ``vy_read_interval``)
+ and tuples pinned for those objects.
+
+.. _metrics-reference-memory_allocation:
+
+Memory allocation
+-----------------
+
+Provides a memory usage report for the slab allocator.
+The slab allocator is the main allocator used to store tuples.
+The following metrics help monitor the total memory usage and memory fragmentation.
+To learn more about use cases, refer to the
+:ref:`box.slab submodule documentation `.
+
+Available memory, bytes:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_slab_quota_size``
+ - Amount of memory available to store tuples and indexes.
+ Is equal to ``memtx_memory``.
+ * - ``tnt_slab_arena_size``
+ - Total memory available to store both tuples and indexes.
+ Includes allocated but currently free slabs.
+ * - ``tnt_slab_items_size``
+ - Total amount of memory available to store only tuples and not indexes.
+ Includes allocated but currently free slabs.
+
+Memory usage, bytes:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_slab_quota_used``
+ - The amount of memory that is already reserved by the slab allocator.
+ * - ``tnt_slab_arena_used``
+ - The effective memory used to store both tuples and indexes.
+ Disregards allocated but currently free slabs.
+ * - ``tnt_slab_items_used``
+ - The effective memory used to store only tuples and not indexes.
+ Disregards allocated but currently free slabs.
+
+Memory utilization, %:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_slab_quota_used_ratio``
+ - ``tnt_slab_quota_used / tnt_slab_quota_size``
+ * - ``tnt_slab_arena_used_ratio``
+ - ``tnt_slab_arena_used / tnt_slab_arena_size``
+ * - ``tnt_slab_items_used_ratio``
+ - ``tnt_slab_items_used / tnt_slab_items_size``
+
+.. _metrics-reference-spaces:
+
+Spaces
+------
+
+The following metrics provide specific information
+about each individual space in a Tarantool instance.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_space_len``
+ - Number of records in the space.
+ This metric always has 2 labels: ``{name="test", engine="memtx"}``,
+ where ``name`` is the name of the space and
+ ``engine`` is the engine of the space.
+ * - ``tnt_space_bsize``
+ - Total number of bytes in all tuples.
+ This metric always has 2 labels: ``{name="test", engine="memtx"}``,
+ where ``name`` is the name of the space
+ and ``engine`` is the engine of the space.
+ * - ``tnt_space_index_bsize``
+ - Total number of bytes taken by the index.
+ This metric always has 2 labels: ``{name="test", index_name="pk"}``,
+ where ``name`` is the name of the space and
+ ``index_name`` is the name of the index.
+ * - ``tnt_space_total_bsize``
+ - Total size of tuples and all indexes in the space.
+ This metric always has 2 labels: ``{name="test", engine="memtx"}``,
+ where ``name`` is the name of the space and
+ ``engine`` is the engine of the space.
+ * - ``tnt_vinyl_tuples``
+ - Total tuple count for vinyl.
+ This metric always has 2 labels: ``{name="test", engine="vinyl"}``,
+ where ``name`` is the name of the space and
+ ``engine`` is the engine of the space. For vinyl this metric is disabled
+ by default and can be enabled only with global variable setup:
+ ``rawset(_G, 'include_vinyl_count', true)``.
+
+.. _metrics-reference-network:
+
+Network
+-------
+
+Network activity stats.
+These metrics can be used to monitor network load, usage peaks, and traffic drops.
+
+Sent bytes:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_sent_total``
+ - Bytes sent from the instance over the network since the instance's start time
+
+Received bytes:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_received_total``
+ - Bytes received by the instance since start time
+
+Connections:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_connections_total``
+ - Number of incoming network connections since the instance's start time
+ * - ``tnt_net_connections_current``
+ - Number of active network connections
+
+Requests:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_requests_total``
+ - Number of network requests the instance has handled since its start time
+ * - ``tnt_net_requests_current``
+ - Number of pending network requests
+
+Requests in progress:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_requests_in_progress_total``
+ - Total count of requests processed by tx thread
+ * - ``tnt_net_requests_in_progress_current``
+ - Count of requests currently being processed in the tx thread
+
+Requests placed in queues of streams:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_net_requests_in_stream_total``
+ - Total count of requests, which was placed in queues of streams
+ for all time
+ * - ``tnt_net_requests_in_stream_current``
+ - Count of requests currently waiting in queues of streams
+
+Since Tarantool 2.10 in each network metric has the label ``thread``, showing per-thread network statistics.
+
+.. _metrics-reference-fibers:
+
+Fibers
+------
+
+Provides the statistics for :ref:`fibers `.
+If your application creates a lot of fibers,
+you can use the metrics below to monitor fiber count and memory usage.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_fiber_amount``
+ - Number of fibers
+ * - ``tnt_fiber_csw``
+ - Overall number of fiber context switches
+ * - ``tnt_fiber_memalloc``
+ - Amount of memory reserved for fibers
+ * - ``tnt_fiber_memused``
+ - Amount of memory used by fibers
+
+.. _metrics-reference-operations:
+
+Operations
+----------
+
+You can collect iproto requests an instance has processed
+and aggregate them by request type.
+This may help you find out what operations your clients perform most often.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_stats_op_total``
+ - Total number of calls since server start
+
+To distinguish between request types, this metric has the ``operation`` label.
+For example, it can look as follows: ``{operation="select"}``.
+For the possible request types, check the table below.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``auth``
+ - Authentication requests
+ * - ``call``
+ - Requests to execute stored procedures
+ * - ``delete``
+ - Delete calls
+ * - ``error``
+ - Requests resulted in an error
+ * - ``eval``
+ - Calls to evaluate Lua code
+ * - ``execute``
+ - Execute SQL calls
+ * - ``insert``
+ - Insert calls
+ * - ``prepare``
+ - SQL prepare calls
+ * - ``replace``
+ - Replace calls
+ * - ``select``
+ - Select calls
+ * - ``update``
+ - Update calls
+ * - ``upsert``
+ - Upsert calls
+
+.. _metrics-reference-replication:
+
+Replication
+-----------
+
+Provides the current replication status.
+Learn more about :ref:`replication in Tarantool `.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_info_lsn``
+ - LSN of the instance.
+ * - ``tnt_info_vclock``
+ - LSN number in vclock.
+ This metric always has the label ``{id="id"}``,
+ where ``id`` is the instance's number in the replica set.
+ * - ``tnt_replication_lsn``
+ - LSN of the tarantool instance.
+ This metric always has labels ``{id="id", type="type"}``, where
+ ``id`` is the instance's number in the replica set,
+ ``type`` is ``master`` or ``replica``.
+ * - ``tnt_replication_lag``
+ - Replication lag value in seconds.
+ This metric always has labels ``{id="id", stream="stream"}``,
+ where ``id`` is the instance's number in the replica set,
+ ``stream`` is ``downstream`` or ``upstream``.
+ * - ``tnt_replication_status``
+ - This metrics equals 1 when replication status is "follow" and 0 otherwise.
+ This metric always has labels ``{id="id", stream="stream"}``,
+ where ``id`` is the instance's number in the replica set,
+ ``stream`` is ``downstream`` or ``upstream``.
+
+.. _metrics-reference-runtime:
+
+Runtime
+-------
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_runtime_lua``
+ - Lua garbage collector size in bytes
+ * - ``tnt_runtime_used``
+ - Number of bytes used for the Lua runtime
+ * - ``tnt_runtime_tuple``
+ - Number of bytes used for the tuples (except tuples owned by memtx and vinyl)
+
+
+
+.. _metrics-reference-luajit:
+
+LuaJIT metrics
+--------------
+
+LuaJIT metrics provide an insight into the work of the Lua garbage collector.
+These metrics are available in Tarantool 2.6 and later.
+
+General JIT metrics:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``lj_jit_snap_restore_total``
+ - Overall number of snap restores
+ * - ``lj_jit_trace_num``
+ - Number of JIT traces
+ * - ``lj_jit_trace_abort_total``
+ - Overall number of abort traces
+ * - ``lj_jit_mcode_size``
+ - Total size of allocated machine code areas
+
+JIT strings:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``lj_strhash_hit_total``
+ - Number of strings being interned
+ * - ``lj_strhash_miss_total``
+ - Total number of string allocations
+
+GC steps:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``lj_gc_steps_atomic_total``
+ - Count of incremental GC steps (atomic state)
+ * - ``lj_gc_steps_sweepstring_total``
+ - Count of incremental GC steps (sweepstring state)
+ * - ``lj_gc_steps_finalize_total``
+ - Count of incremental GC steps (finalize state)
+ * - ``lj_gc_steps_sweep_total``
+ - Count of incremental GC steps (sweep state)
+ * - ``lj_gc_steps_propagate_total``
+ - Count of incremental GC steps (propagate state)
+ * - ``lj_gc_steps_pause_total``
+ - Count of incremental GC steps (pause state)
+
+Allocations:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``lj_gc_strnum``
+ - Number of allocated ``string`` objects
+ * - ``lj_gc_tabnum``
+ - Number of allocated ``table`` objects
+ * - ``lj_gc_cdatanum``
+ - Number of allocated ``cdata`` objects
+ * - ``lj_gc_udatanum``
+ - Number of allocated ``udata`` objects
+ * - ``lj_gc_freed_total``
+ - Total amount of freed memory
+ * - ``lj_gc_memory``
+ - Current allocated Lua memory
+ * - ``lj_gc_allocated_total``
+ - Total amount of allocated memory
+
+.. _metrics-reference-psutils:
+.. _metrics-api_reference-cpu_usage_metrics:
+
+CPU metrics
+-----------
+
+The following metrics provide CPU usage statistics.
+They are only available on Linux.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_cpu_number``
+ - Total number of processors configured by the operating system
+ * - ``tnt_cpu_time``
+ - Host CPU time
+ * - ``tnt_cpu_thread``
+ - Tarantool thread CPU time.
+ This metric always has the labels
+ ``{kind="user", thread_name="tarantool", thread_pid="pid", file_name="init.lua"}``,
+ where:
+
+ * ``kind`` can be either ``user`` or ``system``
+ * ``thread_name`` is ``tarantool``, ``wal``, ``iproto``, or ``coio``
+ * ``file_name`` is the entrypoint file name, for example, ``init.lua``.
+
+There are also two cross-platform metrics, which can be obtained with a ``getrusage()`` call.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_cpu_user_time``
+ - Tarantool CPU user time
+ * - ``tnt_cpu_system_time``
+ - Tarantool CPU system time
+
+.. _metrics-reference-vinyl:
+
+Vinyl
+-----
+
+Vinyl metrics provide :ref:`vinyl engine ` statistics.
+
+.. _metrics-reference-vinyl-disk:
+
+Disk
+~~~~
+
+The disk metrics are used to monitor overall data size on disk.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_vinyl_disk_data_size``
+ - Amount of data in bytes stored in the ``.run`` files
+ located in :ref:`vinyl_dir `
+ * - ``tnt_vinyl_disk_index_size``
+ - Amount of data in bytes stored in the ``.index`` files
+ located in :ref:`vinyl_dir `
+
+.. _metrics-reference-vinyl_regulator:
+
+Regulator
+~~~~~~~~~
+
+The vinyl regulator decides when to commence disk IO actions.
+It groups activities in batches so that they are more consistent and
+efficient.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_vinyl_regulator_dump_bandwidth``
+ - Estimated average dumping rate, bytes per second.
+ The rate value is initially 10485760 (10 megabytes per second).
+ It is recalculated depending on the the actual rate.
+ Only significant dumps that are larger than 1 MB are used for estimating.
+ * - ``tnt_vinyl_regulator_write_rate``
+ - Actual average rate of performing write operations, bytes per second.
+ The rate is calculated as a 5-second moving average.
+ If the metric value is gradually going down,
+ this can indicate disk issues.
+ * - ``tnt_vinyl_regulator_rate_limit``
+ - Write rate limit, bytes per second.
+ The regulator imposes the limit on transactions
+ based on the observed dump/compaction performance.
+ If the metric value is down to approximately ``10^5``,
+ this indicates issues with the disk
+ or the :ref:`scheduler `.
+ * - ``tnt_vinyl_regulator_dump_watermark``
+ - Maximum amount of memory in bytes used
+ for in-memory storing of a vinyl LSM tree.
+ When this maximum is accessed, a dump must occur.
+ For details, see :ref:`engines-algorithm_filling_lsm`.
+ The value is slightly smaller
+ than the amount of memory allocated for vinyl trees,
+ reflected in the :ref:`vinyl_memory ` parameter.
+ * - ``tnt_vinyl_regulator_blocked_writers``
+ - The number of fibers that are blocked waiting
+ for Vinyl level0 memory quota.
+
+.. _metrics-reference-transactional-activity:
+
+Transactional activity
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_vinyl_tx_commit``
+ - Counter of commits (successful transaction ends)
+ Includes implicit commits: for example, any insert operation causes a
+ commit unless it is within a
+ :doc:`/reference/reference_lua/box_txn_management/begin`\ --\ :doc:`/reference/reference_lua/box_txn_management/commit`
+ block.
+ * - ``tnt_vinyl_tx_rollback``
+ - Сounter of rollbacks (unsuccessful transaction ends).
+ This is not merely a count of explicit
+ :doc:`/reference/reference_lua/box_txn_management/rollback`
+ requests -- it includes requests that ended with errors.
+ * - ``tnt_vinyl_tx_conflict``
+ - Counter of conflicts that caused transactions to roll back.
+ The ratio ``tnt_vinyl_tx_conflict / tnt_vinyl_tx_commit``
+ above 5% indicates that vinyl is not healthy.
+ At that moment, you'll probably see a lot of other problems with vinyl.
+ * - ``tnt_vinyl_tx_read_views``
+ - Current number of read views -- that is, transactions
+ that entered the read-only state to avoid conflict temporarily.
+ Usually the value is ``0``.
+ If it stays non-zero for a long time, it is indicative of a memory leak.
+
+
+.. _metrics-reference-memory:
+
+Memory
+~~~~~~
+
+The following metrics show state memory areas used by vinyl for caches and write buffers.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_vinyl_memory_tuple_cache``
+ - Amount of memory in bytes currently used to store tuples (data)
+ * - ``tnt_vinyl_memory_level0``
+ - "Level 0" (L0) memory area, bytes.
+ L0 is the area that vinyl can use for in-memory storage of an LSM tree.
+ By monitoring this metric, you can see when L0 is getting close to its
+ maximum (``tnt_vinyl_regulator_dump_watermark``),
+ at which time a dump will occur.
+ You can expect L0 = 0 immediately after the dump operation is completed.
+ * - ``tnt_vinyl_memory_page_index``
+ - Amount of memory in bytes currently used to store indexes.
+ If the metric value is close to :ref:`vinyl_memory `,
+ this indicates that :ref:`vinyl_page_size `
+ was chosen incorrectly.
+ * - ``tnt_vinyl_memory_bloom_filter``
+ - Amount of memory in bytes used by
+ :ref:`bloom filters `.
+ * - ``tnt_vinyl_memory_tuple``
+ - Total size of memory in bytes occupied by Vinyl tuples.
+ It includes cached tuples and tuples pinned by the Lua world.
+
+.. _metrics-reference-vinyl_scheduler:
+
+Scheduler
+~~~~~~~~~
+
+The vinyl scheduler invokes the :ref:`regulator ` and
+updates the related variables. This happens once per second.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_vinyl_scheduler_tasks``
+ - Number of scheduler dump/compaction tasks.
+ The metric always has label ``{status = }``,
+ where ```` can be one of the following:
+
+ * ``inprogress`` for currently running tasks
+ * ``completed`` for successfully completed tasks
+ * ``failed`` for tasks aborted due to errors.
+
+ * - ``tnt_vinyl_scheduler_dump_time``
+ - Total time in seconds spent by all worker threads performing dumps.
+ * - ``tnt_vinyl_scheduler_dump_total``
+ - Counter of dumps completed.
+
+.. _metrics-reference-memory_event_loop:
+
+Event loop metrics
+------------------
+
+Event loop tx thread information:
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_ev_loop_time``
+ - Event loop time (ms)
+ * - ``tnt_ev_loop_prolog_time``
+ - Event loop prolog time (ms)
+ * - ``tnt_ev_loop_epilog_time``
+ - Event loop epilog time (ms)
+
+
+.. _metrics-reference-synchro:
+
+Synchro
+-------
+
+Shows the current state of a synchronous replication.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_synchro_queue_owner``
+ - Instance ID of the current synchronous replication master.
+
+ * - ``tnt_synchro_queue_term``
+ - Current queue term.
+
+ * - ``tnt_synchro_queue_len``
+ - How many transactions are collecting confirmations now.
+
+ * - ``tnt_synchro_queue_busy``
+ - Whether the queue is processing any system entry (CONFIRM/ROLLBACK/PROMOTE/DEMOTE).
+
+.. _metrics-reference-election:
+
+Election
+--------
+
+Shows the current state of a replica set node in regards to leader election.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_election_state``
+ - Election state (mode) of the node.
+ When election is enabled, the node is writable only in the leader state.
+ Possible values:
+
+ * 0 (``follower``): all the non-leader nodes are called followers
+ * 1 (``candidate``): the nodes that start a new election round are called candidates.
+ * 2 (``leader``): the node that collected a quorum of votes becomes the leader
+
+ * - ``tnt_election_vote``
+ - ID of a node the current node votes for.
+ If the value is 0, it means the node hasn’t voted in the current term yet.
+
+ * - ``tnt_election_leader``
+ - Leader node ID in the current term.
+ If the value is 0, it means the node doesn’t know which node is the leader in the current term.
+
+ * - ``tnt_election_term``
+ - Current election term.
+
+ * - ``tnt_election_leader_idle``
+ - Time in seconds since the last interaction with the known leader.
+
+Memtx
+-----
+
+Memtx mvcc memory statistics.
+Transaction manager consists of two parts:
+
+- the transactions themselves (TXN section)
+- MVCC
+
+.. _metrics-reference-memtx_txn:
+
+TXN
+~~~
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_memtx_tnx_statements`` are the transaction statements.
+ - For example, the user started a transaction and made an action in it `space:replace{0, 1}`.
+ Under the hood, this operation will turn into ``statement`` for the current transaction.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``total``: the number of bytes that are allocated for the statements of all current transactions.
+ * ``average``: average bytes used by transactions for statements
+ (`txn.statements.total` bytes / number of open transactions).
+ * ``max``: the maximum number of bytes used by one the current transaction for statements.
+
+ * - ``tnt_memtx_tnx_user``
+ - In Tarantool C API there is a function `box_txn_alloc()`.
+ By using this function user can allocate memory for the current transaction.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``total``: memory allocated by the `box_txn_alloc()` function on all current transactions.
+ * ``average``: transaction average (total allocated bytes / number of all current transactions).
+ * ``max``: the maximum number of bytes allocated by `box_txn_alloc()` function per transaction.
+
+ * - ``tnt_memtx_tnx_system``
+ - There are internals: logs, savepoints.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``total``: memory allocated by internals on all current transactions.
+ * ``average``: average allocated memory by internals (total memory / number of all current transactions).
+ * ``max``: the maximum number of bytes allocated by internals per transaction.
+
+.. _metrics-reference-memtx_mvcc:
+
+MVCC
+~~~~
+
+``mvcc`` is responsible for the isolation of transactions.
+It detects conflicts and makes sure that tuples that are no longer in the space, but read by some transaction
+(or can be read) have not been deleted.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_memtx_mvcc_trackers``
+ - Trackers that keep track of transaction reads.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``total``: trackers of all current transactions are allocated in total (in bytes).
+ * ``average``: average for all current transactions (total memory bytes / number of transactions).
+ * ``max``: maximum trackers allocated per transaction (in bytes).
+
+ * - ``tnt_memtx_mvcc_conflicts``
+ - Allocated in case of transaction conflicts.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``total``: bytes allocated for conflicts in total.
+ * ``average``: average for all current transactions (total memory bytes / number of transactions).
+ * ``max``: maximum bytes allocated for conflicts per transaction.
+
+
+.. _metrics-reference-tuples:
+
+~~~~~~
+Tuples
+~~~~~~
+
+Saved tuples are divided into 3 categories: ``used``, ``read_view``, ``tracking``.
+
+Each category has two metrics:
+
+- ``retained`` tuples - they are no longer in the index, but MVCC does not allow them to be removed.
+- ``stories`` - MVCC is based on the story mechanism, almost every tuple has a story.
+This is a separate metric because even the tuples that are in the index can have a story.
+So ``stories`` and ``retained`` need to be measured separately.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_memtx_mvcc_tuples_used_stories``
+ - Tuples that are used by active read-write transactions.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of ``used`` tuples / number of stories.
+ * ``total``: amount of bytes used by stories ``used`` tuples.
+
+ * - ``tnt_memtx_mvcc_tuples_used_retained``
+ - Tuples that are used by active read-write transactions.
+ But they are no longer in the index, but MVCC does not allow them to be removed.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of retained ``used`` tuples / number of stories.
+ * ``total``: amount of bytes used by retained ``used`` tuples.
+
+ * - ``tnt_memtx_mvcc_tuples_read_view_stories``
+ - Tuples that are not used by active read-write transactions,
+ but are used by read-only transactions (i.e. in read view).
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of ``read_view`` tuples / number of stories.
+ * ``total``: amount of bytes used by stories ``read_view`` tuples.
+
+ * - ``tnt_memtx_mvcc_tuples_read_view_retained``
+ - Tuples that are not used by active read-write transactions,
+ but are used by read-only transactions (i.e. in read view).
+ This tuples are no longer in the index, but MVCC does not allow them to be removed.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of retained ``read_view`` tuples / number of stories.
+ * ``total``: amount of bytes used by retained ``read_view`` tuples.
+
+ * - ``tnt_memtx_mvcc_tuples_tracking_stories``
+ - Tuples that are not directly used by any transactions, but are used by MVCC to track reads.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of ``tracking`` tuples / number of tracking stories.
+ * ``total``: amount of bytes used by stories ``tracking`` tuples.
+
+ * - ``tnt_memtx_mvcc_tuples_tracking_retained``
+ - Tuples that are not directly used by any transactions, but are used by MVCC to track reads.
+ This tuples are no longer in the index, but MVCC does not allow them to be removed.
+ This metric always has the label ``{kind="..."}``,
+ which has the following possible values:
+
+ * ``count``: number of retained ``tracking`` tuples / number of stories.
+ * ``total``: amount of bytes used by retained ``tracking`` tuples.
+
+
+.. _metrics-reference-read-view:
+
+~~~~~~~~~~~~~~~~~~~~
+Read view statistics
+~~~~~~~~~~~~~~~~~~~~
+
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_memtx_tuples_data_total``
+ - Total amount of memory (in bytes) allocated for data tuples.
+ This includes ``tnt_memtx_tuples_data_read_view`` and
+ ``tnt_memtx_tuples_data_garbage`` metric values plus tuples that
+ are actually stored in memtx spaces.
+
+ * - ``tnt_memtx_tuples_data_read_view``
+ - Memory (in bytes) held for read views.
+
+ * - ``tnt_memtx_tuples_data_garbage``
+ - Memory (in bytes) that is unused and scheduled to be freed
+ (freed lazily on memory allocation).
+
+ * - ``tnt_memtx_index_total``
+ - Total amount of memory (in bytes) allocated for indexing data.
+ This includes ``tnt_memtx_index_read_view`` metric value
+ plus memory used for indexing tuples
+ that are actually stored in memtx spaces.
+
+ * - ``tnt_memtx_index_read_view``
+ - Memory (in bytes) held for read views.
+
+
+.. _metrics-reference-tarantool-config:
+
+Tarantool configuration
+-----------------------
+
+**Since:** :doc:`3.0.0 `.
+
+.. container:: table
+
+ .. list-table::
+ :widths: 25 75
+ :header-rows: 0
+
+ * - ``tnt_config_alerts``
+ - Count of current instance :ref:`configuration apply alerts `.
+ ``{level="warn"}`` label covers warnings and
+ ``{level="error"}`` covers errors.
+
+ * - ``tnt_config_status``
+ - The status of current instance :ref:`configuration apply `.
+ ``status`` label contains possible status name.
+ Current status has metric value ``1``, inactive statuses have metric value ``0``.
+
+ .. code-block:: none
+
+ # HELP tnt_config_status Tarantool 3 configuration status
+ # TYPE tnt_config_status gauge
+ tnt_config_status{status="reload_in_progress",alias="router-001-a"} 0
+ tnt_config_status{status="uninitialized",alias="router-001-a"} 0
+ tnt_config_status{status="check_warnings",alias="router-001-a"} 0
+ tnt_config_status{status="ready",alias="router-001-a"} 1
+ tnt_config_status{status="check_errors",alias="router-001-a"} 0
+ tnt_config_status{status="startup_in_progress",alias="router-001-a"} 0
+
+ For example, this set of metrics means that current configuration
+ for ``router-001-a`` status is ``ready``.
\ No newline at end of file
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/README.md b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/README.md
new file mode 100644
index 0000000000..fef01516ca
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/README.md
@@ -0,0 +1,11 @@
+# Collecting custom metrics
+
+A sample application showing how to collect custom [metrics](https://www.tarantool.io/doc/latest/book/monitoring/).
+
+## Running
+
+Start the application by executing the following command in the [config](../../../config) directory:
+
+```shell
+$ tt start metrics_collect_custom
+```
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/config.yaml b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/config.yaml
new file mode 100644
index 0000000000..54bf57667e
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/config.yaml
@@ -0,0 +1,20 @@
+metrics:
+ include: [ all ]
+ exclude: [ vinyl ]
+ labels:
+ alias: '{{ instance_name }}'
+roles:
+- examples.collect_custom_replace_count
+- examples.collect_custom_waste_size
+app:
+ file: 'load_data.lua'
+
+groups:
+ group001:
+ replicasets:
+ replicaset001:
+ instances:
+ instance001:
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3301'
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua
new file mode 100644
index 0000000000..a15d81a4bf
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua
@@ -0,0 +1,22 @@
+local function apply()
+ -- Collect a custom metric at an arbitrary moment in time --
+ local metrics = require('metrics')
+ local bands_replace_count = metrics.counter('bands_replace_count', 'The number of data operations')
+ local trigger = require('trigger')
+ trigger.set(
+ 'box.space.bands.on_replace',
+ 'update_bands_replace_count_metric',
+ function(_, _, _, request_type)
+ bands_replace_count:inc(1, { request_type = request_type })
+ end
+ )
+ -- End --
+end
+
+return {
+ validate = function()
+ end,
+ apply = apply,
+ stop = function()
+ end,
+}
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua
new file mode 100644
index 0000000000..520975b468
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua
@@ -0,0 +1,17 @@
+local function apply()
+ -- Collect a custom metric when the data collected by metrics is requested --
+ local metrics = require('metrics')
+ local bands_waste_size = metrics.gauge('bands_waste_size', 'The size of memory wasted due to internal fragmentation')
+ metrics.register_callback(function()
+ bands_waste_size:set(box.space.bands:stat()['tuple']['memtx']['waste_size'])
+ end)
+ -- End --
+end
+
+return {
+ validate = function()
+ end,
+ apply = apply,
+ stop = function()
+ end,
+}
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/instances.yml b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/instances.yml
new file mode 100644
index 0000000000..aa60c2fc42
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/instances.yml
@@ -0,0 +1 @@
+instance001:
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/load_data.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/load_data.lua
new file mode 100644
index 0000000000..cc721d61a3
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_custom/load_data.lua
@@ -0,0 +1,25 @@
+function create_space()
+ box.schema.space.create('bands')
+ box.space.bands:format({
+ { name = 'id', type = 'unsigned' },
+ { name = 'band_name', type = 'string' },
+ { name = 'year', type = 'unsigned' }
+ })
+ box.space.bands:create_index('primary', { parts = { 'id' } })
+end
+
+function load_data()
+ box.space.bands:insert { 1, 'Roxette', 1986 }
+ box.space.bands:insert { 2, 'Scorpions', 1965 }
+ box.space.bands:insert { 3, 'Ace of Base', 1987 }
+ box.space.bands:insert { 4, 'The Beatles', 1960 }
+ box.space.bands:insert { 5, 'Pink Floyd', 1965 }
+ box.space.bands:insert { 6, 'The Rolling Stones', 1962 }
+ box.space.bands:insert { 7, 'The Doors', 1965 }
+ box.space.bands:insert { 8, 'Nirvana', 1987 }
+ box.space.bands:insert { 9, 'Led Zeppelin', 1968 }
+ box.space.bands:insert { 10, 'Queen', 1970 }
+end
+
+create_space()
+load_data()
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/README.md b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/README.md
new file mode 100644
index 0000000000..95fbea43a3
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/README.md
@@ -0,0 +1,17 @@
+# Collecting HTTP metrics
+
+A sample application showing how to enable and configure [metrics](https://www.tarantool.io/doc/latest/book/monitoring/) in your application.
+
+## Running
+
+Before starting the application, install the `http` module by executing the `tt rocks install` command in the [config](../../../config) directory:
+
+```shell
+$ tt rocks install http
+```
+
+Then, start the application:
+
+```shell
+$ tt start metrics_collect_http
+```
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/collect_http_metrics.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/collect_http_metrics.lua
new file mode 100644
index 0000000000..2e57785196
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/collect_http_metrics.lua
@@ -0,0 +1,34 @@
+local httpd
+
+local function apply()
+ if httpd then
+ httpd:stop()
+ end
+
+ -- Collect HTTP metrics for the '/metrics/hello' route --
+ httpd = require('http.server').new('127.0.0.1', 8080)
+ local metrics = require('metrics')
+ metrics.http_middleware.configure_default_collector('summary')
+ httpd:route({
+ method = 'GET',
+ path = '/metrics/hello'
+ }, metrics.http_middleware.v1(
+ function()
+ return { status = 200,
+ headers = { ['content-type'] = 'text/plain' },
+ body = 'Hello from http_middleware!' }
+ end))
+
+ httpd:start()
+end
+
+local function stop()
+ httpd:stop()
+end
+
+return {
+ validate = function()
+ end,
+ apply = apply,
+ stop = stop,
+}
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/config.yaml b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/config.yaml
new file mode 100644
index 0000000000..2697252750
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/config.yaml
@@ -0,0 +1,17 @@
+metrics:
+ include: [ all ]
+ exclude: [ vinyl ]
+ labels:
+ alias: '{{ instance_name }}'
+roles:
+- collect_http_metrics
+
+groups:
+ group001:
+ replicasets:
+ replicaset001:
+ instances:
+ instance001:
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3301'
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/instances.yml b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/instances.yml
new file mode 100644
index 0000000000..aa60c2fc42
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_collect_http/instances.yml
@@ -0,0 +1 @@
+instance001:
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/README.md b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/README.md
new file mode 100644
index 0000000000..11c9d7705d
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/README.md
@@ -0,0 +1,29 @@
+# Exposing metrics using plugins
+
+A sample application showing how to use [metrics](https://www.tarantool.io/doc/latest/book/monitoring/) plugins for exposing metrics.
+
+## Running
+
+Before starting the application, install the `http` module by executing the `tt rocks install` command in the [config](../../../config) directory:
+
+```shell
+$ tt rocks install http
+```
+
+Then, start the application:
+
+```shell
+$ tt start metrics_plugins
+```
+
+To get Prometheus metrics, make the following request:
+
+```console
+$ curl -X GET --location "http://127.0.0.1:8080/metrics/prometheus"
+```
+
+To get metrics in the JSON format, make the following request:
+
+```console
+$ curl -X GET --location "http://127.0.0.1:8081/metrics/json"
+```
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/config.yaml b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/config.yaml
new file mode 100644
index 0000000000..fde2bf723f
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/config.yaml
@@ -0,0 +1,18 @@
+metrics:
+ include: [ all ]
+ exclude: [ vinyl ]
+ labels:
+ alias: '{{ instance_name }}'
+roles:
+- examples.expose_prometheus_metrics
+- examples.expose_json_metrics
+
+groups:
+ group001:
+ replicasets:
+ replicaset001:
+ instances:
+ instance001:
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3301'
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_json_metrics.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_json_metrics.lua
new file mode 100644
index 0000000000..411f32014f
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_json_metrics.lua
@@ -0,0 +1,32 @@
+local httpd
+
+local function apply()
+ if httpd then
+ httpd:stop()
+ end
+
+ -- Expose JSON metrics --
+ httpd = require('http.server').new('127.0.0.1', 8081)
+ httpd:route({
+ method = 'GET',
+ path = '/metrics/json'
+ }, function()
+ local json_plugin = require('metrics.plugins.json')
+ local json_metrics = json_plugin.export()
+ return { status = 200,
+ headers = { ['content-type'] = 'application/json' },
+ body = json_metrics }
+ end)
+ httpd:start()
+end
+
+local function stop()
+ httpd:stop()
+end
+
+return {
+ validate = function()
+ end,
+ apply = apply,
+ stop = stop,
+}
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua
new file mode 100644
index 0000000000..c812df8598
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua
@@ -0,0 +1,30 @@
+local httpd
+
+local function apply()
+ if httpd then
+ httpd:stop()
+ end
+
+ -- Expose Prometheus metrics --
+ httpd = require('http.server').new('127.0.0.1', 8080)
+ httpd:route({
+ method = 'GET',
+ path = '/metrics/prometheus'
+ }, function()
+ local prometheus_plugin = require('metrics.plugins.prometheus')
+ local prometheus_metrics = prometheus_plugin.collect_http()
+ return prometheus_metrics
+ end)
+ httpd:start()
+end
+
+local function stop()
+ httpd:stop()
+end
+
+return {
+ validate = function()
+ end,
+ apply = apply,
+ stop = stop,
+}
diff --git a/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/instances.yml b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/instances.yml
new file mode 100644
index 0000000000..aa60c2fc42
--- /dev/null
+++ b/doc/code_snippets/snippets/config/instances.enabled/metrics_plugins/instances.yml
@@ -0,0 +1 @@
+instance001:
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud/config.yaml b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud/config.yaml
index 4c505e1a29..dbccf140cb 100644
--- a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud/config.yaml
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud/config.yaml
@@ -59,14 +59,6 @@ groups:
client: '127.0.0.1:3305'
routers:
roles: [ roles.crud-router ]
- roles_cfg:
- roles.crud-router:
- stats: true
- stats_driver: metrics
- stats_quantiles: false
- stats_quantile_tolerated_error: 0.001
- stats_quantile_age_buckets_count: 5
- stats_quantile_max_age_time: 180
app:
module: router
sharding:
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/README.md b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/README.md
new file mode 100644
index 0000000000..1d3419d44f
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/README.md
@@ -0,0 +1,44 @@
+# Sharded cluster: Exposing metrics
+
+A sample application showing how to enable and expose [metrics](https://www.tarantool.io/doc/latest/book/monitoring/) through HTTP.
+
+## Running
+
+Before starting the application, install dependencies defined in the `*.rockspec` file:
+
+```console
+$ tt build sharded_cluster_crud_metrics
+```
+
+Then, start the application:
+
+```console
+$ tt start sharded_cluster_crud_metrics
+```
+
+To get Prometheus metrics, make the following request:
+
+```console
+$ curl -X GET --location "http://127.0.0.1:8081/metrics/prometheus"
+```
+
+To get metrics in the JSON format, make the following request:
+
+```console
+$ curl -X GET --location "http://127.0.0.1:8081/metrics/json"
+```
+
+
+## Running the Prometheus server
+
+To monitor the metrics of a running sample, you need to install Prometheus either locally or using Docker.
+To install and run Prometheus using Docker, follow the steps below:
+
+1. Open the [sharded_cluster_crud_metrics](../../../sharding/instances.enabled/sharded_cluster_crud_metrics) directory in the terminal.
+2. Replace `127.0.0.1` with `host.docker.internal` in the `prometheus/prometheus.yml` file.
+3. Then, run a server:
+ ```Bash
+ docker compose up
+ ```
+4. Open the [http://localhost:9090/graph](http://localhost:9090/graph) page to access the Prometheus expression browser.
+5. Enter the desired Tarantool metric, for example, `tnt_info_uptime`or `tnt_info_memory_data` to see monitoring results.
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
new file mode 100644
index 0000000000..4e208542f2
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
@@ -0,0 +1,134 @@
+credentials:
+ users:
+ replicator:
+ password: 'topsecret'
+ roles: [ replication ]
+ storage:
+ password: 'secret'
+ roles: [ sharding ]
+
+iproto:
+ advertise:
+ peer:
+ login: replicator
+ sharding:
+ login: storage
+
+sharding:
+ bucket_count: 1000
+
+metrics:
+ include: [ all ]
+ exclude: [ vinyl ]
+ labels:
+ alias: '{{ instance_name }}'
+
+groups:
+ storages:
+ roles:
+ - roles.crud-storage
+ - roles.metrics-export
+ app:
+ module: storage
+ sharding:
+ roles: [ storage ]
+ replication:
+ failover: manual
+ replicasets:
+ storage-a:
+ leader: storage-a-001
+ instances:
+ storage-a-001:
+ roles_cfg:
+ roles.metrics-export:
+ http:
+ - listen: '127.0.0.1:8082'
+ endpoints:
+ - path: /metrics/prometheus/
+ format: prometheus
+ - path: /metrics/json
+ format: json
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3302'
+ advertise:
+ client: '127.0.0.1:3302'
+ storage-a-002:
+ roles_cfg:
+ roles.metrics-export:
+ http:
+ - listen: '127.0.0.1:8083'
+ endpoints:
+ - path: /metrics/prometheus/
+ format: prometheus
+ - path: /metrics/json
+ format: json
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3303'
+ advertise:
+ client: '127.0.0.1:3303'
+ storage-b:
+ leader: storage-b-001
+ instances:
+ storage-b-001:
+ roles_cfg:
+ roles.metrics-export:
+ http:
+ - listen: '127.0.0.1:8084'
+ endpoints:
+ - path: /metrics/prometheus/
+ format: prometheus
+ - path: /metrics/json
+ format: json
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3304'
+ advertise:
+ client: '127.0.0.1:3304'
+ storage-b-002:
+ roles_cfg:
+ roles.metrics-export:
+ http:
+ - listen: '127.0.0.1:8085'
+ endpoints:
+ - path: /metrics/prometheus/
+ format: prometheus
+ - path: /metrics/json
+ format: json
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3305'
+ advertise:
+ client: '127.0.0.1:3305'
+ routers:
+ roles:
+ - roles.crud-router
+ - roles.metrics-export
+ roles_cfg:
+ roles.crud-router:
+ stats: true
+ stats_driver: metrics
+ stats_quantiles: true
+ app:
+ module: router
+ sharding:
+ roles: [ router ]
+ replicasets:
+ router-a:
+ instances:
+ router-a-001:
+ roles_cfg:
+ roles.metrics-export:
+ http:
+ - listen: '127.0.0.1:8081'
+ endpoints:
+ - path: /metrics/prometheus/
+ format: prometheus
+ - path: /metrics/json
+ format: json
+ iproto:
+ listen:
+ - uri: '127.0.0.1:3301'
+ advertise:
+ client: '127.0.0.1:3301'
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/docker-compose.yml b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/docker-compose.yml
new file mode 100644
index 0000000000..fb0d5d974f
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/docker-compose.yml
@@ -0,0 +1,27 @@
+services:
+ prometheus:
+ image: prom/prometheus
+ container_name: prometheus
+ command:
+ - '--config.file=/etc/prometheus/prometheus.yml'
+ ports:
+ - 9090:9090
+ volumes:
+ - ./prometheus:/etc/prometheus
+ networks:
+ - monitoring_network
+ grafana:
+ image: grafana/grafana
+ container_name: grafana
+ ports:
+ - 3000:3000
+ restart: unless-stopped
+ environment:
+ - GF_SECURITY_ADMIN_USER=admin
+ - GF_SECURITY_ADMIN_PASSWORD=grafana
+ networks:
+ - monitoring_network
+networks:
+ monitoring_network:
+ name: monitoring_network
+ driver: bridge
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/instances.yaml b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/instances.yaml
new file mode 100644
index 0000000000..96d4e2111f
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/instances.yaml
@@ -0,0 +1,5 @@
+storage-a-001:
+storage-a-002:
+storage-b-001:
+storage-b-002:
+router-a-001:
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/prometheus/prometheus.yml b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/prometheus/prometheus.yml
new file mode 100644
index 0000000000..4fc09ed358
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/prometheus/prometheus.yml
@@ -0,0 +1,14 @@
+global:
+ scrape_interval: 5s
+ evaluation_interval: 5s
+
+scrape_configs:
+ - job_name: prometheus
+ static_configs:
+ - targets:
+ - 127.0.0.1:8081
+ - 127.0.0.1:8082
+ - 127.0.0.1:8083
+ - 127.0.0.1:8084
+ - 127.0.0.1:8085
+ metrics_path: "/metrics/prometheus"
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/router.lua b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/router.lua
new file mode 100644
index 0000000000..61ccb2c40b
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/router.lua
@@ -0,0 +1 @@
+local vshard = require('vshard')
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/sharded_cluster_crud_metrics-scm-1.rockspec b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/sharded_cluster_crud_metrics-scm-1.rockspec
new file mode 100644
index 0000000000..1245faba68
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/sharded_cluster_crud_metrics-scm-1.rockspec
@@ -0,0 +1,14 @@
+package = 'sharded_cluster_crud_metrics'
+version = 'scm-1'
+source = {
+ url = '/dev/null',
+}
+
+dependencies = {
+ 'vshard == 0.1.27',
+ 'crud == 1.5.2',
+ 'metrics-export-role == 0.1.0-1',
+}
+build = {
+ type = 'none';
+}
diff --git a/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/storage.lua b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/storage.lua
new file mode 100644
index 0000000000..f692015c03
--- /dev/null
+++ b/doc/code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/storage.lua
@@ -0,0 +1,17 @@
+box.watch('box.status', function()
+ if box.info.ro then
+ return
+ end
+
+ box.schema.create_space('bands', {
+ format = {
+ { name = 'id', type = 'unsigned' },
+ { name = 'bucket_id', type = 'unsigned' },
+ { name = 'band_name', type = 'string' },
+ { name = 'year', type = 'unsigned' }
+ },
+ if_not_exists = true
+ })
+ box.space.bands:create_index('id', { parts = { 'id' }, if_not_exists = true })
+ box.space.bands:create_index('bucket_id', { parts = { 'bucket_id' }, unique = false, if_not_exists = true })
+end)
diff --git a/doc/concepts/configuration.rst b/doc/concepts/configuration.rst
index 40b578d022..0fbb6c9d30 100644
--- a/doc/concepts/configuration.rst
+++ b/doc/concepts/configuration.rst
@@ -227,13 +227,13 @@ In this option, the role name is the key and the role configuration is the value
The example below shows how to enable statistics on called operations by providing the ``roles.crud-router`` role's configuration:
-.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud/config.yaml
+.. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
:language: yaml
- :start-at: roles.crud-router
- :end-at: stats_quantile_max_age_time
+ :start-after: routers:
+ :end-at: stats_quantiles
:dedent:
-Example on GitHub: `sharded_cluster_crud `_
+Example on GitHub: `sharded_cluster_crud_metrics `_
diff --git a/doc/how-to/vshard_quick.rst b/doc/how-to/vshard_quick.rst
index 8b1265a1e7..cb2d3dcb46 100644
--- a/doc/how-to/vshard_quick.rst
+++ b/doc/how-to/vshard_quick.rst
@@ -206,7 +206,6 @@ Here is a schematic view of the cluster topology:
The main group-level options here are:
* ``roles``: This option enables the ``roles.crud-router`` :ref:`role ` provided by the CRUD module for a router instance.
- * ``roles_cfg``: This section enables and configures statistics on called operations for a router with the enabled ``roles.crud-router`` role.
* ``app``: The ``app.module`` option specifies that code specific to a router should be loaded from the ``router`` module. This is explained below in the :ref:`vshard-quick-start-router-code` section.
* ``sharding``: The :ref:`sharding.roles ` option specifies that an instance inside this group acts as a router.
* ``replicasets``: This section configures a replica set with one router instance.
@@ -444,18 +443,6 @@ Writing and selecting data
- null
...
-4. To get statistics on called operations, pass the space name to ``crud.stats()``:
-
- .. code-block:: tarantoolsession
-
- sharded_cluster_crud:router-a-001> crud.stats('bands')
- ---
- - get:
- ok:
- latency: 0.00069199999961711
- count: 1
- time: 0.00069199999961711
- latency_average: 0.00069199999961711
diff --git a/doc/reference/configuration/configuration_reference.rst b/doc/reference/configuration/configuration_reference.rst
index 8f84112aff..a733b12cf3 100644
--- a/doc/reference/configuration/configuration_reference.rst
+++ b/doc/reference/configuration/configuration_reference.rst
@@ -3039,6 +3039,73 @@ The ``memtx`` section is used to configure parameters related to the :ref:`memtx
| Default: box.NULL
| Environment variable: TT_MEMTX_SORT_THREADS
+
+
+
+.. _configuration_reference_metrics:
+
+metrics
+-------
+
+The ``metrics`` section defines configuration parameters for :ref:`metrics `.
+
+.. NOTE::
+
+ ``metrics`` can be defined in any :ref:`scope `.
+
+- :ref:`metrics.exclude `
+- :ref:`metrics.include `
+- :ref:`metrics.labels `
+
+
+.. _configuration_reference_metrics_exclude:
+
+.. confval:: metrics.exclude
+
+ An array containing the metrics to turn off.
+ The array can contain the same values as the ``exclude`` configuration parameter passed to :ref:`metrics.cfg() `.
+
+ **Example**
+
+ .. literalinclude:: /code_snippets/snippets/sharding/instances.enabled/sharded_cluster_crud_metrics/config.yaml
+ :start-at: metrics:
+ :end-at: instance_name
+ :language: yaml
+ :dedent:
+
+ |
+ | Type: array
+ | Default: ``[]``
+ | Environment variable: TT_METRICS_EXCLUDE
+
+
+.. _configuration_reference_metrics_include:
+
+.. confval:: metrics.include
+
+ An array containing the metrics to turn on.
+ The array can contain the same values as the ``include`` configuration parameter passed to :ref:`metrics.cfg() `.
+
+ |
+ | Type: array
+ | Default: ``[ all ]``
+ | Environment variable: TT_METRICS_INCLUDE
+
+
+.. _configuration_reference_metrics_labels:
+
+.. confval:: metrics.labels
+
+ Global :ref:`labels ` to be added to every observation.
+
+ |
+ | Type: map
+ | Default: ``{ alias = names.instance_name }``
+ | Environment variable: TT_METRICS_LABELS
+
+
+
+
.. _configuration_reference_process:
process
diff --git a/doc/reference/reference_lua/index.rst b/doc/reference/reference_lua/index.rst
index 72e7204179..896bb2e506 100644
--- a/doc/reference/reference_lua/index.rst
+++ b/doc/reference/reference_lua/index.rst
@@ -44,6 +44,7 @@ This reference covers Tarantool's built-in Lua modules.
key_def
log
merger
+ metrics
msgpack
net_box
osmodule
diff --git a/doc/reference/reference_lua/metrics.rst b/doc/reference/reference_lua/metrics.rst
new file mode 100644
index 0000000000..6f603718c4
--- /dev/null
+++ b/doc/reference/reference_lua/metrics.rst
@@ -0,0 +1,1104 @@
+.. _metrics-api_reference:
+
+Module metrics
+==============
+
+**Since:** `2.11.1 `__
+
+The ``metrics`` module provides the ability to collect and expose :ref:`Tarantool metrics `.
+
+.. NOTE::
+
+ If you use a Tarantool version below `2.11.1 `__,
+ it is necessary to install the latest version of `metrics `__ first.
+ For Tarantool 2.11.1 and above, you can also use the external ``metrics`` module.
+ In this case, the external ``metrics`` module takes priority over the built-in one.
+
+
+.. _metrics-api_reference_overview:
+
+Overview
+--------
+
+.. _metrics-api_reference-collectors:
+
+Collectors
+~~~~~~~~~~
+
+Tarantool provides the following metric collectors:
+
+.. contents::
+ :local:
+ :depth: 1
+
+A collector is a representation of one or more observations that change over time.
+
+
+.. _metrics-api_reference-counter:
+
+counter
+*******
+
+A counter is a cumulative metric that denotes a single monotonically increasing counter. Its value might only
+increase or be reset to zero on restart. For example, you can use the counter to represent the number of requests
+served, tasks completed, or errors.
+
+The design is based on the `Prometheus counter `__.
+
+
+.. _metrics-api_reference-gauge:
+
+gauge
+*****
+
+A gauge is a metric that denotes a single numerical value that can arbitrarily increase and decrease.
+
+The gauge type is typically used for measured values like temperature or current memory usage.
+It could also be used for values that can increase or decrease, such as the number of concurrent requests.
+
+The design is based on the `Prometheus gauge `__.
+
+
+
+
+.. _metrics-api_reference-histogram:
+
+histogram
+*********
+
+A histogram metric is used to collect and analyze
+statistical data about the distribution of values within the application.
+Unlike metrics that track the average value or quantity of events, a histogram provides detailed visibility into the distribution of values and can uncover hidden dependencies.
+
+The design is based on the `Prometheus histogram `__.
+
+
+
+.. _metrics-api_reference-summary:
+
+summary
+*******
+
+A summary metric is used to collect statistical data
+about the distribution of values within the application.
+
+Each summary provides several measurements:
+
+* total count of measurements
+* sum of measured values
+* values at specific quantiles
+
+Similar to histograms, the summary also operates with value ranges. However, unlike histograms,
+it uses quantiles (defined by a number between 0 and 1) for this purpose. In this case,
+it is not required to define fixed boundaries. For summary type, the ranges depend
+on the measured values and the number of measurements.
+
+The design is based on the `Prometheus summary `__.
+
+
+
+.. _metrics-api_reference-labels:
+
+Labels
+~~~~~~
+
+A label is a piece of metainfo that you associate with a metric in the key-value format.
+For details, see `labels in Prometheus `_ and `tags in Graphite `_.
+
+Labels are used to differentiate between the characteristics of a thing being
+measured. For example, in a metric associated with the total number of HTTP
+requests, you can represent methods and statuses as label pairs:
+
+.. code-block:: lua
+
+ http_requests_total_counter:inc(1, { method = 'POST', status = '200' })
+
+The example above allows extracting the following time series:
+
+#. The total number of requests over time with ``method = "POST"`` (and any status).
+#. The total number of requests over time with ``status = 500`` (and any method).
+
+
+
+.. _metrics-api_reference_configuring:
+
+Configuring metrics
+-------------------
+
+To configure metrics, use :ref:`metrics.cfg() `.
+This function can be used to turn on or off the specified metrics or to configure labels applied to all collectors.
+Moreover, you can use the following shortcut functions to set-up metrics or labels:
+
+- :ref:`metrics.enable_default_metrics() `
+- :ref:`metrics.set_global_labels() `
+
+.. NOTE::
+
+ Starting from version 3.0, metrics can be configured using a :ref:`configuration file ` in the :ref:`metrics ` section.
+
+
+
+.. _metrics-api_reference_custom_metrics:
+
+Custom metrics
+--------------
+
+.. _metrics-api_reference_create_custom_metrics:
+
+Creating custom metrics
+~~~~~~~~~~~~~~~~~~~~~~~
+
+To create a custom metric, follow the steps below:
+
+1. **Create a metric**
+
+ To create a new metric, you need to call a function corresponding to the desired :ref:`collector type `. For example, call :ref:`metrics.counter() ` or :ref:`metrics.gauge() ` to create a new counter or gauge, respectively.
+ In the example below, a new counter is created:
+
+ .. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua
+ :start-at: local metrics
+ :end-at: local bands_replace_count
+ :language: lua
+ :dedent:
+
+ This counter is intended to collect the number of data operations performed on the specified space.
+
+ In the next example, a gauge is created:
+
+ .. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua
+ :start-at: local metrics
+ :end-at: local bands_waste_size
+ :language: lua
+ :dedent:
+
+2. **Observe a value**
+
+ You can observe a value in two ways:
+
+ - At the appropriate place, for example, in an API request handler or :ref:`trigger `.
+ In this example below, the counter value is increased any time a data operation is performed on the ``bands`` space.
+ To increase a counter value, :ref:`counter_obj:inc() ` is called.
+
+ .. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_replace_count.lua
+ :start-after: -- Collect a custom metric
+ :end-before: -- End
+ :language: lua
+ :dedent:
+
+ - At the time of requesting the data collected by metrics.
+ In this case, you need to collect the required metric inside :ref:`metrics.register_callback() `.
+ The example below shows how to use a gauge collector to measure the size of memory wasted due to internal fragmentation:
+
+ .. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua
+ :start-after: -- Collect a custom metric
+ :end-before: -- End
+ :language: lua
+ :dedent:
+
+ To set a gauge value, :ref:`gauge_obj:set() ` is called.
+
+You can find the full example on GitHub: `metrics_collect_custom `_.
+
+
+
+
+.. _monitoring-getting_started-warning:
+
+Possible limitations
+~~~~~~~~~~~~~~~~~~~~
+
+The module allows to add your own metrics, but there are some subtleties when working with specific tools.
+
+When adding your custom metric, it's important to ensure that the number of label value combinations is kept to a minimum.
+Otherwise, combinatorial explosion may happen in the timeseries database with metrics values stored.
+Examples of data labels:
+
+* `Labels `__ in Prometheus
+* `Tags `__ in InfluxDB
+
+For example, if your company uses InfluxDB for metric collection, you can potentially disrupt the entire
+monitoring setup, both for your application and for all other systems within the company. As a result,
+monitoring data is likely to be lost.
+
+Example:
+
+.. code-block:: lua
+
+ local some_metric = metrics.counter('some', 'Some metric')
+
+ -- THIS IS POSSIBLE
+ local function on_value_update(instance_alias)
+ some_metric:inc(1, { alias = instance_alias })
+ end
+
+ -- THIS IS NOT ALLOWED
+ local function on_value_update(customer_id)
+ some_metric:inc(1, { customer_id = customer_id })
+ end
+
+In the example, there are two versions of the function ``on_value_update``. The top version labels
+the data with the cluster instance's alias. Since there's a relatively small number of nodes, using
+them as labels is feasible. In the second case, an identifier of a record is used. If there are many
+records, it's recommended to avoid such situations.
+
+The same principle applies to URLs. Using the entire URL with parameters is not recommended.
+Use a URL template or the name of the command instead.
+
+In essence, when designing custom metrics and selecting labels or tags, it's crucial to opt for a minimal
+set of values that can uniquely identify the data without introducing unnecessary complexity or potential
+conflicts with existing metrics and systems.
+
+
+
+.. _metrics-api_reference-collecting_http_statistics:
+
+Collecting HTTP metrics
+-----------------------
+
+The ``metrics`` module provides middleware for monitoring HTTP latency statistics for endpoints that are created using the `http `_ module.
+The latency collector observes both latency information and the number of invocations.
+The metrics collected by HTTP middleware are separated by a set of :ref:`labels `:
+
+* a route (``path``)
+* a method (``method``)
+* an HTTP status code (``status``)
+
+For each route that you want to track, you must specify the middleware explicitly.
+The example below shows how to collect statistics for requests made to the ``/metrics/hello`` endpoint.
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_http/collect_http_metrics.lua
+ :start-after: Collect HTTP metrics
+ :end-at: httpd:start()
+ :language: lua
+ :dedent:
+
+.. NOTE::
+
+ The middleware does not cover the 404 errors.
+
+
+.. _metrics-plugins-available:
+.. _metrics-api_reference_collecting_using_plugins:
+
+Collecting metrics using plugins
+--------------------------------
+
+The ``metrics`` module provides a set of plugins that let you collect metrics through a unified interface:
+
+- :ref:`metrics-prometheus-api_reference`
+- :ref:`metrics-json-api_reference`
+- :ref:`metrics-graphite-api_reference`
+
+
+For example, you can obtain an HTTP response object containing metrics in the Prometheus format by calling the ``metrics.plugins.prometheus.collect_http()`` function:
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua
+ :start-at: local prometheus_plugin
+ :end-at: local prometheus_metrics
+ :language: lua
+ :dedent:
+
+To expose the collected metrics, you can use the `http `_ module:
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua
+ :start-after: Expose Prometheus metrics
+ :end-at: httpd:start()
+ :language: lua
+ :dedent:
+
+Example on GitHub: `metrics_plugins `_
+
+
+
+
+.. _metrics-plugins-plugin-specific_api:
+.. _metrics-plugins-custom:
+
+Creating custom plugins
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Use the following API to create custom plugins:
+
+- :ref:`metrics.invoke_callbacks() `
+- :ref:`metrics.collectors() `
+- :ref:`collector_object `
+
+To create a plugin, you need to include the following in your main export function:
+
+.. code-block:: lua
+
+ -- Invoke all callbacks registered via `metrics.register_callback()`
+ metrics.invoke_callbacks()
+
+ -- Loop over collectors
+ for _, c in pairs(metrics.collectors()) do
+ ...
+
+ -- Loop over instant observations in the collector
+ for _, obs in pairs(c:collect()) do
+ -- Export observation `obs`
+ ...
+ end
+ end
+
+See the source code of built-in plugins in the `metrics GitHub repository `_.
+
+
+
+
+
+.. _metrics-module-api-reference:
+
+API Reference
+-------------
+
+.. container:: table
+
+ .. rst-class:: left-align-column-1
+ .. rst-class:: left-align-column-2
+
+ .. list-table::
+ :widths: 50 50
+
+ * - **metrics API**
+ -
+
+ * - :ref:`metrics.cfg() `
+ - Entrypoint to setup the module
+
+ * - :ref:`metrics.collect() `
+ - Collect observations from each collector
+
+ * - :ref:`metrics.collectors() `
+ - List all collectors in the registry
+
+ * - :ref:`metrics.counter() `
+ - Register a new counter
+
+ * - :ref:`metrics.enable_default_metrics() `
+ - Same as ``metrics.cfg{ include = include, exclude = exclude }``
+
+ * - :ref:`metrics.gauge() `
+ - Register a new gauge
+
+ * - :ref:`metrics.histogram() `
+ - Register a new histogram
+
+ * - :ref:`metrics.invoke_callbacks() `
+ - Invoke all registered callbacks
+
+ * - :ref:`metrics.register_callback() `
+ - Register a function named ``callback``
+
+ * - :ref:`metrics.set_global_labels() `
+ - Same as ``metrics.cfg{ labels = label_pairs }``
+
+ * - :ref:`metrics.summary() `
+ - Register a new summary
+
+ * - :ref:`metrics.unregister_callback() `
+ - Unregister a function named ``callback``
+
+ * - **metrics.http_middleware API**
+ -
+
+ * - :ref:`metrics.http_middleware.build_default_collector() `
+ - Register and return a collector for the middleware
+
+ * - :ref:`metrics.http_middleware.configure_default_collector() `
+ - Register a collector for the middleware and set it as default
+
+ * - :ref:`metrics.http_middleware.get_default_collector() `
+ - Get the default collector
+
+ * - :ref:`metrics.http_middleware.set_default_collector() `
+ - Set the default collector
+
+ * - :ref:`metrics.http_middleware.v1() `
+ - Latency measuring wrap-up
+
+ * - **Related objects**
+ -
+
+ * - :ref:`collector_object `
+ - A collector object
+
+ * - :ref:`counter_obj `
+ - A counter object
+
+ * - :ref:`gauge_obj `
+ - A gauge object
+
+ * - :ref:`histogram_obj `
+ - A histogram object
+
+ * - :ref:`registry `
+ - A metrics registry
+
+ * - :ref:`summary_obj `
+ - A summary object
+
+
+
+.. _metrics-api_reference-functions:
+
+metrics API
+~~~~~~~~~~~
+
+.. module:: metrics
+
+.. _metrics_cfg:
+
+.. function:: cfg([config])
+
+ Entrypoint to setup the module.
+
+ :param table config: module configuration options:
+
+ * ``cfg.include`` (string/table, default ``all``): ``all`` to enable all
+ supported default metrics, ``none`` to disable all default metrics,
+ table with names of the default metrics to enable a specific set of metrics.
+ * ``cfg.exclude`` (table, default ``{}``): a table containing the names of
+ the default metrics that you want to disable. Has higher priority
+ than ``cfg.include``.
+ * ``cfg.labels`` (table, default ``{}``): a table containing label names as
+ string keys, label values as values. See also: :ref:`metrics-api_reference-labels`.
+
+ You can work with ``metrics.cfg`` as a table to read values, but you must call
+ ``metrics.cfg{}`` as a function to update them.
+
+ Supported default metric names (for ``cfg.include`` and ``cfg.exclude`` tables):
+
+ * ``all`` (metasection including all metrics)
+ * ``network``
+ * ``operations``
+ * ``system``
+ * ``replicas``
+ * ``info``
+ * ``slab``
+ * ``runtime``
+ * ``memory``
+ * ``spaces``
+ * ``fibers``
+ * ``cpu``
+ * ``vinyl``
+ * ``memtx``
+ * ``luajit``
+ * ``clock``
+ * ``event_loop``
+ * ``config``
+
+ See :ref:`metrics reference ` for details.
+ All metric collectors from the collection have ``metainfo.default = true``.
+
+ ``cfg.labels`` are the global labels to be added to every observation.
+
+ Global labels are applied only to metric collection. They have no effect
+ on how observations are stored.
+
+ Global labels can be changed on the fly.
+
+ ``label_pairs`` from observation objects have priority over global labels.
+ If you pass ``label_pairs`` to an observation method with the same key as
+ some global label, the method argument value will be used.
+
+ Note that both label names and values in ``label_pairs`` are treated as strings.
+
+
+
+
+.. _metrics_collect:
+
+.. function:: collect([opts])
+
+ Collect observations from each collector.
+
+ :param table opts: table of collect options:
+
+ * ``invoke_callbacks`` -- if ``true``, :ref:`invoke_callbacks() ` is triggered before actual collect.
+ * ``default_only`` -- if ``true``, observations contain only default metrics (``metainfo.default = true``).
+
+
+
+.. _metrics_collectors:
+
+.. function:: collectors()
+
+ List all collectors in the registry. Designed to be used in exporters.
+
+ :return: A list of created collectors (see :ref:`collector_object `).
+
+ See also: :ref:`metrics-plugins-custom`
+
+
+
+.. _metrics_counter:
+
+.. function:: counter(name [, help, metainfo])
+
+ Register a new counter.
+
+ :param string name: collector name. Must be unique.
+ :param string help: collector description.
+ :param table metainfo: collector metainfo.
+ :return: A counter object (see :ref:`counter_obj `).
+ :rtype: counter_obj
+
+ See also: :ref:`metrics-api_reference_create_custom_metrics`
+
+
+
+
+.. _metrics_enable_default_metrics:
+
+.. function:: enable_default_metrics([include, exclude])
+
+ Same as ``metrics.cfg{include=include, exclude=exclude}``, but ``include={}`` is
+ treated as ``include='all'`` for backward compatibility.
+
+
+
+.. _metrics_gauge:
+
+.. function:: gauge(name [, help, metainfo])
+
+ Register a new gauge.
+
+ :param string name: collector name. Must be unique.
+ :param string help: collector description.
+ :param table metainfo: collector metainfo.
+
+ :return: A gauge object (see :ref:`gauge_obj `).
+
+ :rtype: gauge_obj
+
+ See also: :ref:`metrics-api_reference_create_custom_metrics`
+
+
+
+.. _metrics_histogram:
+
+.. function:: histogram(name [, help, buckets, metainfo])
+
+ Register a new histogram.
+
+ :param string name: collector name. Must be unique.
+ :param string help: collector description.
+ :param table buckets: histogram buckets (an array of sorted positive numbers).
+ The infinity bucket (``INF``) is appended automatically.
+ Default: ``{.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF}``.
+ :param table metainfo: collector metainfo.
+
+ :return: A histogram object (see :ref:`histogram_obj `).
+
+ :rtype: histogram_obj
+
+ See also: :ref:`metrics-api_reference_create_custom_metrics`
+
+ .. note::
+
+ A histogram is basically a set of collectors:
+
+ * ``name .. "_sum"`` -- a counter holding the sum of added observations.
+ * ``name .. "_count"`` -- a counter holding the number of added observations.
+ * ``name .. "_bucket"`` -- a counter holding all bucket sizes under the label
+ ``le`` (less or equal). To access a specific bucket -- ``x`` (where ``x`` is a number),
+ specify the value ``x`` for the label ``le``.
+
+
+
+
+
+
+.. _metrics_invoke_callbacks:
+
+.. function:: invoke_callbacks()
+
+ Invoke all registered callbacks. Has to be called before each :ref:`collect() `.
+ You can also use ``collect{invoke_callbacks = true}`` instead.
+ If you're using one of the default exporters,
+ ``invoke_callbacks()`` will be called by the exporter.
+
+ See also: :ref:`metrics-plugins-custom`
+
+
+.. _metrics_register_callback:
+
+.. function:: register_callback(callback)
+
+ Register a function named ``callback``, which will be called right before metric
+ collection on plugin export.
+
+ :param function callback: a function that takes no parameters.
+
+ This method is most often used for gauge metrics updates.
+
+ **Example:**
+
+ .. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_collect_custom/examples/collect_custom_waste_size.lua
+ :start-after: -- Collect a custom metric
+ :end-before: -- End
+ :language: lua
+ :dedent:
+
+ See also: :ref:`metrics-api_reference_custom_metrics`
+
+
+
+
+
+.. _metrics_set_global_labels:
+
+.. function:: set_global_labels(label_pairs)
+
+ Same as ``metrics.cfg{ labels = label_pairs }``.
+ Learn more in :ref:`metrics.cfg() `.
+
+
+
+
+
+.. _metrics_summary:
+
+.. function:: summary(name [, help, objectives, params, metainfo])
+
+ Register a new summary. Quantile computation is based on the
+ `"Effective computation of biased quantiles over data streams" `_
+ algorithm.
+
+ :param string name: collector name. Must be unique.
+ :param string help: collector description.
+ :param table objectives: a list of "targeted" φ-quantiles in the ``{quantile = error, ... }`` form.
+ Example: ``{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}``.
+ The targeted φ-quantile is specified in the form of a φ-quantile and the tolerated
+ error. For example, ``{[0.5] = 0.1}`` means that the median (= 50th
+ percentile) is to be returned with a 10-percent error. Note that
+ percentiles and quantiles are the same concept, except that percentiles are
+ expressed as percentages. The φ-quantile must be in the interval ``[0, 1]``.
+ A lower tolerated error for a φ-quantile results in higher memory and CPU
+ usage during summary calculation.
+
+ :param table params: table of the summary parameters used to configuring the sliding
+ time window. This window consists of several buckets to store observations.
+ New observations are added to each bucket. After a time period, the head bucket
+ (from which observations are collected) is reset, and the next bucket becomes the
+ new head. This way, each bucket stores observations for
+ ``max_age_time * age_buckets_count`` seconds before it is reset.
+ ``max_age_time`` sets the duration of each bucket's lifetime -- that is, how
+ many seconds the observations are kept before they are discarded.
+ ``age_buckets_count`` sets the number of buckets in the sliding time window.
+ This variable determines the number of buckets used to exclude observations
+ older than ``max_age_time`` from the summary. The value is
+ a trade-off between resources (memory and CPU for maintaining the bucket)
+ and how smooth the time window moves.
+ Default value: ``{max_age_time = math.huge, age_buckets_count = 1}``.
+
+ :param table metainfo: collector metainfo.
+
+ :return: A summary object (see :ref:`summary_obj `).
+
+ :rtype: summary_obj
+
+ See also: :ref:`metrics-api_reference_create_custom_metrics`
+
+ .. note::
+
+ A summary represents a set of collectors:
+
+ * ``name .. "_sum"`` -- a counter holding the sum of added observations.
+ * ``name .. "_count"`` -- a counter holding the number of added observations.
+ * ``name`` holds all the quantiles under observation that find themselves
+ under the label ``quantile`` (less or equal).
+ To access bucket ``x`` (where ``x`` is a number),
+ specify the value ``x`` for the label ``quantile``.
+
+
+
+.. _metrics_unregister_callback:
+
+.. function:: unregister_callback(callback)
+
+ Unregister a function named ``callback`` that is called right before metric
+ collection on plugin export.
+
+ :param function callback: a function that takes no parameters.
+
+ **Example:**
+
+ .. code-block:: lua
+
+ local cpu_callback = function()
+ local cpu_metrics = require('metrics.psutils.cpu')
+ cpu_metrics.update()
+ end
+
+ metrics.register_callback(cpu_callback)
+
+ -- after a while, we don't need that callback function anymore
+
+ metrics.unregister_callback(cpu_callback)
+
+
+
+.. _metrics-http_middleware-api_reference-functions:
+
+metrics.http_middleware API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. module:: metrics.http_middleware
+
+.. _metrics_http_middleware_build_default_collector:
+
+.. function:: build_default_collector(type_name, name [, help])
+
+ Register and return a collector for the middleware.
+
+ :param string type_name: collector type: ``histogram`` or ``summary``. The default is ``histogram``.
+ :param string name: collector name. The default is ``http_server_request_latency``.
+ :param string help: collector description. The default is ``HTTP Server Request Latency``.
+
+ :return: A collector object
+
+ **Possible errors:**
+
+ * A collector with the same type and name already exists in the registry.
+
+
+
+.. _metrics_http_middleware_configure_default_collector:
+
+.. function:: configure_default_collector(type_name, name, help)
+
+ Register a collector for the middleware and set it as default.
+
+ :param string type_name: collector type: ``histogram`` or ``summary``. The default is ``histogram``.
+ :param string name: collector name. The default is ``http_server_request_latency``.
+ :param string help: collector description. The default is ``HTTP Server Request Latency``.
+
+ **Possible errors:**
+
+ * A collector with the same type and name already exists in the registry.
+
+
+.. _metrics_http_middleware_get_default_collector:
+
+.. function:: get_default_collector()
+
+ Return the default collector.
+ If the default collector hasn't been set yet, register it
+ (with default :ref:`http_middleware.build_default_collector() ` parameters)
+ and set it as default.
+
+ :return: A collector object
+
+
+.. _metrics_http_middleware_set_default_collector:
+
+.. function:: set_default_collector(collector)
+
+ Set the default collector.
+
+ :param collector: middleware collector object
+
+
+
+.. _metrics_http_middleware_v1:
+
+.. function:: v1(handler, collector)
+
+ Latency measuring wrap-up for the HTTP ver. ``1.x.x`` handler. Returns a wrapped handler.
+
+ Learn more in :ref:`metrics-api_reference-collecting_http_statistics`.
+
+ :param function handler: handler function.
+ :param collector: middleware collector object.
+ If not set, the default collector is used
+ (like in :ref:`http_middleware.get_default_collector() `).
+
+ **Usage:**
+
+ .. code-block:: lua
+
+ httpd:route(route, http_middleware.v1(request_handler, collector))
+
+ See also: :ref:`metrics-api_reference-collecting_http_statistics`
+
+
+
+
+
+.. _metrics-module-api-reference-objects:
+
+Related objects
+~~~~~~~~~~~~~~~
+
+.. _metrics_collector_object:
+
+.. class:: collector_object
+
+ A collector object.
+
+ See also: :ref:`metrics-plugins-custom`
+
+ .. method:: collect()
+
+ Collect observations from this collector.
+ To collect observations from each collector, use :ref:`metrics.collectors() `.
+
+ ``collector_object:collect()`` is equivalent to the following code:
+
+ .. code-block:: lua
+
+ for _, c in pairs(metrics.collectors()) do
+ for _, obs in ipairs(c:collect()) do
+ ... -- handle observation
+ end
+ end
+
+ :return: A concatenation of ``observation`` objects across all created collectors.
+
+ .. code-block:: lua
+
+ {
+ label_pairs: table, -- `label_pairs` key-value table
+ timestamp: ctype, -- current system time (in microseconds)
+ value: number, -- current value
+ metric_name: string, -- collector
+ }
+
+ :rtype: table
+
+
+
+.. _metrics_counter_obj:
+
+.. class:: counter_obj
+
+ A counter object.
+
+ .. _metrics-api_reference-counter_inc:
+
+ .. method:: inc(num, label_pairs)
+
+ Increment the observation for ``label_pairs``.
+ If ``label_pairs`` doesn't exist, the method creates it.
+
+ See also: :ref:`metrics-api_reference-labels`
+
+ :param number num: increment value.
+ :param table label_pairs: table containing label names as keys,
+ label values as values. Note that both
+ label names and values in ``label_pairs``
+ are treated as strings.
+
+ .. _metrics-api_reference-counter_collect:
+
+ .. method:: collect()
+
+ :return: Array of ``observation`` objects for a given counter.
+
+ .. code-block:: lua
+
+ {
+ label_pairs: table, -- `label_pairs` key-value table
+ timestamp: ctype, -- current system time (in microseconds)
+ value: number, -- current value
+ metric_name: string, -- collector
+ }
+
+ :rtype: table
+
+ .. _metrics-api_reference-counter_remove:
+
+ .. method:: remove(label_pairs)
+
+ Remove the observation for :ref:`label_pairs `.
+
+ .. _metrics-api_reference-counter_reset:
+
+ .. method:: reset(label_pairs)
+
+ Set the observation for :ref:`label_pairs ` to 0.
+
+ :param table label_pairs: table containing label names as keys,
+ label values as values. Note that both
+ label names and values in ``label_pairs``
+ are treated as strings.
+
+
+
+.. _metrics_gauge_obj:
+
+.. class:: gauge_obj
+
+ .. _metrics_gauge_obj_inc:
+
+ .. method:: inc(num, label_pairs)
+
+ Increment the observation for :ref:`label_pairs `.
+ If ``label_pairs`` doesn't exist, the method creates it.
+
+ .. _metrics_gauge_obj_dec:
+
+ .. method:: dec(num, label_pairs)
+
+ Decrement the observation for :ref:`label_pairs `.
+
+ .. _metrics_gauge_obj_set:
+
+ .. method:: set(num, label_pairs)
+
+ Set the observation for :ref:`label_pairs ` to ``num``.
+
+ .. _metrics_gauge_obj_collect:
+
+ .. method:: collect()
+
+ Get an array of ``observation`` objects for a given gauge.
+ For the description of ``observation``, see
+ :ref:`counter_obj:collect() `.
+
+ .. _metrics_gauge_obj_remove:
+
+ .. method:: remove(label_pairs)
+
+ Remove the observation for :ref:`label_pairs `.
+
+
+
+.. _metrics_histogram_obj:
+
+.. class:: histogram_obj
+
+ .. _metrics_histogram_obj_observe:
+
+ .. method:: observe(num, label_pairs)
+
+ Record a new value in a histogram.
+ This increments all bucket sizes under the labels ``le`` >= ``num``
+ and the labels that match ``label_pairs``.
+
+ :param number num: value to put in the histogram.
+ :param table label_pairs: table containing label names as keys,
+ label values as values.
+ All internal counters that have these labels specified
+ observe new counter values.
+ Note that both label names and values in ``label_pairs``
+ are treated as strings.
+ See also: :ref:`metrics-api_reference-labels`.
+
+ .. _metrics_histogram_obj_collect:
+
+ .. method:: collect()
+
+ Return a concatenation of ``counter_obj:collect()`` across all internal
+ counters of ``histogram_obj``. For the description of ``observation``,
+ see :ref:`counter_obj:collect() `.
+
+ .. _metrics_histogram_obj_remove:
+
+ .. method:: remove(label_pairs)
+
+ Works like the ``remove()`` function
+ of a :ref:`counter `.
+
+
+
+
+.. _metrics_registry:
+
+.. class:: registry
+
+ .. _metrics_registry_unregister:
+
+ .. method:: unregister(collector)
+
+ Remove a collector from the registry.
+
+ :param collector_obj collector: the collector to be removed.
+
+ **Example:**
+
+ .. code-block:: lua
+
+ local collector = metrics.gauge('some-gauge')
+
+ -- after a while, we don't need it anymore
+
+ metrics.registry:unregister(collector)
+
+ .. _metrics_registry_find:
+
+ .. method:: find(kind, name)
+
+ Find a collector in the registry.
+
+ :param string kind: collector kind (``counter``, ``gauge``, ``histogram``, or ``summary``).
+ :param string name: collector name.
+
+ :return: A collector object or ``nil``.
+
+ :rtype: collector_obj
+
+ **Example:**
+
+ .. code-block:: lua
+
+ local collector = metrics.gauge('some-gauge')
+
+ collector = metrics.registry:find('gauge', 'some-gauge')
+
+
+
+.. _metrics_summary_obj:
+
+.. class:: summary_obj
+
+ .. _metrics_summary_obj_observe:
+
+ .. method:: observe(num, label_pairs)
+
+ Record a new value in a summary.
+
+ :param number num: value to put in the data stream.
+ :param table label_pairs: a table containing label names as keys,
+ label values as values.
+ All internal counters that have these labels specified
+ observe new counter values.
+ You can't add the ``"quantile"`` label to a summary.
+ It is added automatically.
+ If ``max_age_time`` and ``age_buckets_count`` are set,
+ the observed value is added to each bucket.
+ Note that both label names and values in ``label_pairs``
+ are treated as strings.
+ See also: :ref:`metrics-api_reference-labels`.
+
+ .. _metrics_summary_obj_collect:
+
+ .. method:: collect()
+
+ Return a concatenation of ``counter_obj:collect()`` across all internal
+ counters of ``summary_obj``. For the description of ``observation``,
+ see :ref:`counter_obj:collect() `.
+ If ``max_age_time`` and ``age_buckets_count`` are set, quantile observations
+ are collected only from the head bucket in the sliding time window,
+ not from every bucket. If no observations were recorded,
+ the method will return ``NaN`` in the values.
+
+ .. _metrics_summary_obj_remove:
+
+ .. method:: remove(label_pairs)
+
+ Works like the ``remove()`` function
+ of a :ref:`counter `.
+
+
+
+
+
+.. toctree::
+ :hidden:
+
+ metrics/prometheus
+ metrics/graphite
+ metrics/json
diff --git a/doc/reference/reference_lua/metrics/graphite.rst b/doc/reference/reference_lua/metrics/graphite.rst
new file mode 100644
index 0000000000..44d1e49d3e
--- /dev/null
+++ b/doc/reference/reference_lua/metrics/graphite.rst
@@ -0,0 +1,31 @@
+.. _metrics-graphite-api_reference:
+
+metrics.plugins.graphite
+========================
+
+.. module:: metrics.plugins.graphite
+
+.. function:: init(options)
+
+ Send all metrics to a remote Graphite server.
+ Exported metric names are formatted as follows: ``.``.
+
+ :param table options: possible options:
+
+ * ``prefix`` (string): metrics prefix (``'tarantool'`` by default)
+ * ``host`` (string): Graphite server host (``'127.0.0.1'`` by default)
+ * ``port`` (number): Graphite server port (``2003`` by default)
+ * ``send_interval`` (number): metrics collection interval in seconds
+ (``2`` by default)
+
+**Example**
+
+.. code-block:: lua
+
+ local graphite_plugin = require('metrics.plugins.graphite')
+ graphite_plugin.init {
+ prefix = 'tarantool',
+ host = '127.0.0.1',
+ port = 2003,
+ send_interval = 1,
+ }
diff --git a/doc/reference/reference_lua/metrics/json.rst b/doc/reference/reference_lua/metrics/json.rst
new file mode 100644
index 0000000000..1f9a9c534b
--- /dev/null
+++ b/doc/reference/reference_lua/metrics/json.rst
@@ -0,0 +1,33 @@
+.. _metrics-json-api_reference:
+
+metrics.plugins.json
+====================
+
+.. module:: metrics.plugins.json
+
+.. function:: export()
+
+ Export metrics in the JSON format.
+
+ :return: a string containing metrics in the JSON format
+
+ :rtype: string
+
+ .. IMPORTANT::
+
+ The values can also be ``+-math.huge`` and ``math.huge * 0``. In such case:
+
+ * ``math.huge`` is serialized to ``"inf"``
+ * ``-math.huge`` is serialized to ``"-inf"``
+ * ``math.huge * 0`` is serialized to ``"nan"``.
+
+
+**Example**
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_json_metrics.lua
+ :start-at: local json_plugin
+ :end-at: local json_metrics
+ :language: lua
+ :dedent:
+
+Example on GitHub: `metrics_plugins `_
diff --git a/doc/reference/reference_lua/metrics/prometheus.rst b/doc/reference/reference_lua/metrics/prometheus.rst
new file mode 100644
index 0000000000..d475a06779
--- /dev/null
+++ b/doc/reference/reference_lua/metrics/prometheus.rst
@@ -0,0 +1,28 @@
+.. _metrics-prometheus-api_reference:
+
+metrics.plugins.prometheus
+==========================
+
+.. module:: metrics.plugins.prometheus
+
+.. function:: collect_http()
+
+ Get an HTTP response object containing metrics in the Prometheus format.
+
+ :return: a table containing the following fields:
+
+ * ``status``: set to ``200``
+ * ``headers``: response headers
+ * ``body``: metrics in the Prometheus format
+
+ :rtype: table
+
+**Example**
+
+.. literalinclude:: /code_snippets/snippets/config/instances.enabled/metrics_plugins/examples/expose_prometheus_metrics.lua
+ :start-at: local prometheus_plugin
+ :end-at: local prometheus_metrics
+ :language: lua
+ :dedent:
+
+Example on GitHub: `metrics_plugins `_
diff --git a/doc/reference/reference_rock/index.rst b/doc/reference/reference_rock/index.rst
index 3e19e710b3..4d8d9fec95 100644
--- a/doc/reference/reference_rock/index.rst
+++ b/doc/reference/reference_rock/index.rst
@@ -10,7 +10,6 @@ This reference covers third-party Lua modules for Tarantool.
:maxdepth: 1
membership
- Module metrics <../../book/monitoring/index>
Module luatest
vshard/index
dbms
diff --git a/modules/grafana-dashboard b/modules/grafana-dashboard
deleted file mode 160000
index 30dd7fad88..0000000000
--- a/modules/grafana-dashboard
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 30dd7fad88b58e6494801c410305a6c7514bdfab
diff --git a/modules/metrics b/modules/metrics
deleted file mode 160000
index 7652824a1a..0000000000
--- a/modules/metrics
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 7652824a1a08c40fb98cd755bf32c8378a1b4362
diff --git a/pull_submodules.py b/pull_submodules.py
index 79a9f27ac4..3e2a761810 100755
--- a/pull_submodules.py
+++ b/pull_submodules.py
@@ -6,9 +6,7 @@
modules_dir = 'modules'
modules = {
- 'grafana-dashboard': 'INPUT_GRAFANA',
'luatest': 'INPUT_LUATEST',
- 'metrics': 'INPUT_METRICS',
'tntcxx': 'INPUT_CPP_DRIVER',
}
workdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'modules')