diff --git a/.github/workflows/skywalking.yaml b/.github/workflows/skywalking.yaml index 12d60d7d10d1..488588ac1492 100644 --- a/.github/workflows/skywalking.yaml +++ b/.github/workflows/skywalking.yaml @@ -611,6 +611,8 @@ jobs: - name: Zipkin BanyanDB config: test/e2e-v2/cases/zipkin/banyandb/e2e.yaml + - name: Nginx + config: test/e2e-v2/cases/nginx/e2e.yaml - name: APISIX metrics config: test/e2e-v2/cases/apisix/otel-collector/e2e.yaml - name: Exporter Kafka diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 5c360c2e14e4..ea7f4f15a0ef 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -39,6 +39,7 @@ * Fix `limit` doesn't work for `findEndpoint` API in ES storage. * Isolate MAL CounterWindow cache by metric name. * Fix JDBC Log query order. +* Support Nginx monitoring. #### UI diff --git a/docs/en/setup/backend/backend-nginx-monitoring.md b/docs/en/setup/backend/backend-nginx-monitoring.md new file mode 100644 index 000000000000..69a766fbdef7 --- /dev/null +++ b/docs/en/setup/backend/backend-nginx-monitoring.md @@ -0,0 +1,141 @@ +# Nginx monitoring +## Nginx performance from nginx-lua-prometheus +The [nginx-lua-prometheus](https://github.com/openresty/lua-nginx-module) is a lua library that can be used with Nginx to collect metrics +and expose them on a separate web page. +To use this library, you will need Nginx with [lua-nginx-module](https://github.com/openresty/lua-nginx-module) or directly [OpenResty](https://openresty.org/). + +SkyWalking leverages OpenTelemetry Collector to transfer the metrics to [OpenTelemetry receiver](opentelemetry-receiver.md) and into the [Meter System](./../../concepts-and-designs/meter.md). + +### Data flow +1. [nginx-lua-prometheus](https://github.com/openresty/lua-nginx-module) collects metrics from Nginx and expose them to an endpoint. +2. OpenTelemetry Collector fetches metrics from the endpoint expose above via Prometheus Receiver and pushes metrics to SkyWalking OAP Server via OpenTelemetry gRPC exporter. +3. The SkyWalking OAP Server parses the expression with [MAL](../../concepts-and-designs/mal.md) to filter/calculate/aggregate and store the results. + +### Set up +1. Collect Nginx metrics and expose the following four metrics by [nginx-lua-prometheus](https://github.com/openresty/lua-nginx-module). For details on metrics definition, refer to [here](../../../../test/e2e-v2/cases/nginx/nginx.conf). +- histogram: nginx_http_latency +- gauge: nginx_http_connections +- counter: nginx_http_size_bytes +- counter: nginx_http_requests_total + +2. Set up [OpenTelemetry Collector ](https://opentelemetry.io/docs/collector/getting-started/#docker). For details on Prometheus Receiver in OpenTelemetry Collector, refer to [here](../../../../test/e2e-v2/cases/nginx/otel-collector-config.yaml). +3. Config SkyWalking [OpenTelemetry receiver](opentelemetry-receiver.md). + +### Nginx Monitoring + +SkyWalking observes the status, payload, and latency of the Nginx server, which is cataloged as a `LAYER: Nginx` `Service` in the OAP and instances would be recognized as `LAYER: Nginx` `instance`. + +About `LAYER: Nginx` `endpoint`, it depends on how precision you want to monitor the nginx. +We do not recommend expose every request path metrics, because it will cause explosion of metrics endpoint data. + +You can collect host metrics: +``` +http { + log_by_lua_block { + metric_bytes:inc(tonumber(ngx.var.request_length), {"request", ngx.var.host}) + metric_bytes:inc(tonumber(ngx.var.bytes_send), {"response", ngx.var.host}) + metric_requests:inc(1, {ngx.var.status, ngx.var.host}) + metric_latency:observe(tonumber(ngx.var.request_time), {ngx.var.host}) + } +} +``` +or grouped urls and upstream metrics: +``` +upstream backend { + server ip:port; +} + +server { + + location /test { + default_type application/json; + return 200 '{"code": 200, "message": "success"}'; + + log_by_lua_block { + metric_bytes:inc(tonumber(ngx.var.request_length), {"request", "/test/**"}) + metric_bytes:inc(tonumber(ngx.var.bytes_send), {"response", "/test/**"}) + metric_requests:inc(1, {ngx.var.status, "/test/**"}) + metric_latency:observe(tonumber(ngx.var.request_time), {"/test/**"}) + } + } + + location /test_upstream { + + proxy_pass http://backend; + + log_by_lua_block { + metric_bytes:inc(tonumber(ngx.var.request_length), {"request", "upstream/backend"}) + metric_bytes:inc(tonumber(ngx.var.bytes_send), {"response", "upstream/backend"}) + metric_requests:inc(1, {ngx.var.status, "upstream/backend"}) + metric_latency:observe(tonumber(ngx.var.request_time), {"upstream/backend"}) + } + } +} +``` + +#### Nginx Service Supported Metrics +| Monitoring Panel | Unit | Metric Name | Catalog | Description | Data Source | +|-------------------------|------|-----------------------------------------------------------------------------------------------|---------|------------------------------------------------------|--------------------------------| +| HTTP Request Trend | | meter_nginx_service_http_requests | Service | The increment rate of HTTP requests | nginx-lua-prometheus | +| HTTP Latency | ms | meter_nginx_service_http_latency | Service | The increment rate of the latency of HTTP requests | nginx-lua-prometheus | +| HTTP Bandwidth | KB | meter_nginx_service_bandwidth | Service | The increment rate of the bandwidth of HTTP requests | nginx-lua-prometheus | +| HTTP Connections | | meter_nginx_service_http_connections | Service | The avg number of the connections | nginx-lua-prometheus | +| HTTP Status Trend | | meter_nginx_service_http_status | Service | The increment rate of the status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 4xx Percent | % | meter_nginx_service_http_4xx_requests_increment / meter_nginx_service_http_requests_increment | Service | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 5xx Percent | % | meter_nginx_service_http_5xx_requests_increment / meter_nginx_service_http_requests_increment | Service | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | + +#### Nginx Instance Supported Metrics +| Monitoring Panel | Unit | Metric Name | Catalog | Description | Data Source | +|---------------------------|------|-------------------------------------------------------------------------------------------------|----------|------------------------------------------------------|--------------------------------| +| HTTP Request Trend | | meter_nginx_instance_http_requests | Instance | The increment rate of HTTP requests | nginx-lua-prometheus | +| HTTP Latency | ms | meter_nginx_instance_http_latency | Instance | The increment rate of the latency of HTTP requests | nginx-lua-prometheus | +| HTTP Bandwidth | KB | meter_nginx_instance_bandwidth | Instance | The increment rate of the bandwidth of HTTP requests | nginx-lua-prometheus | +| HTTP Connections | | meter_nginx_instance_http_connections | Instance | The avg number of the connections | nginx-lua-prometheus | +| HTTP Status Trend | | meter_nginx_instance_http_status | Instance | The increment rate of the status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 4xx Percent | % | meter_nginx_instance_http_4xx_requests_increment / meter_nginx_instance_http_requests_increment | Instance | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 5xx Percent | % | meter_nginx_instance_http_5xx_requests_increment / meter_nginx_instance_http_requests_increment | Instance | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | + +#### Nginx Endpoint Supported Metrics +| Monitoring Panel | Unit | Metric Name | Catalog | Description | Data Source | +|-------------------------|------|-------------------------------------------------------------------------------------------------|----------|------------------------------------------------------|----------------------| +| HTTP Request Trend | | meter_nginx_endpoint_http_requests | Endpoint | The increment rate of HTTP requests | nginx-lua-prometheus | +| HTTP Latency | ms | meter_nginx_endpoint_http_latency | Endpoint | The increment rate of the latency of HTTP requests | nginx-lua-prometheus | +| HTTP Bandwidth | KB | meter_nginx_endpoint_bandwidth | Endpoint | The increment rate of the bandwidth of HTTP requests | nginx-lua-prometheus | +| HTTP Status Trend | | meter_nginx_endpoint_http_status | Endpoint | The increment rate of the status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 4xx Percent | % | meter_nginx_endpoint_http_4xx_requests_increment / meter_nginx_endpoint_http_requests_increment | Endpoint | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | +| HTTP Status 5xx Percent | % | meter_nginx_endpoint_http_5xx_requests_increment / meter_nginx_endpoint_http_requests_increment | Endpoint | The percentage of 4xx status of HTTP requests | nginx-lua-prometheus | + +### Customizations +You can customize your own metrics/expression/dashboard panel. + +The metrics definition and expression rules are found in `/config/otel-rules/nginx-service.yaml, /config/otel-rules/nginx-instance.yaml, /config/otel-rules/nginx-endpoint.yaml`. + +The Nginx dashboard panel configurations are found in `/config/ui-initialized-templates/nginx`. + +## Collect nginx access and error log +SkyWalking leverages [fluentbit](https://fluentbit.io/) or other log agents for collecting access log and error log of Nginx. + +### Data flow +1. fluentbit agent collects access log and error log from Nginx. +2. fluentbit agent sends data to SkyWalking OAP Server using native meter APIs via HTTP. +3. The SkyWalking OAP Server parses the expression with [LAL](../../concepts-and-designs/lal.md) to parse/extract and store the results. + +### Set up +1. Install [fluentbit](https://docs.fluentbit.io/manual/installation/docker). +2. Config fluent bit with fluent-bit.conf, refer to [here](../../../../test/e2e-v2/cases/nginx/fluent-bit.conf). + +### Error Log Monitoring +Error Log monitoring provides monitoring of the error.log of the Nginx server. + +#### Supported Metrics +| Monitoring Panel | Metric Name | Catalog | Description | Data Source | +|--------------------------|--------------------------------------|----------|-------------------------------------------|-------------| +| Service Error Log Count | meter_nginx_service_error_log_count | Service | The count of log level of nginx error.log | fluent bit | +| Instance Error Log Count | meter_nginx_instance_error_log_count | Instance | The count of log level of nginx error.log | fluent bit | + +### Customizations +You can customize your own metrics/expression/dashboard panel. + +The log collect and analyse rules are found in `/config/lal/nginx.yaml`, `/config/log-mal-rules/nginx.yaml`. + +The Nginx dashboard panel configurations are found in `/config/ui-initialized-templates/nginx`. \ No newline at end of file diff --git a/docs/menu.yml b/docs/menu.yml index eed8f06f8217..a01ed1d4f66f 100644 --- a/docs/menu.yml +++ b/docs/menu.yml @@ -239,6 +239,8 @@ catalog: path: "/en/setup/service-agent/browser-agent" - name: "Gateway Monitoring" catalog: + - name: "Nginx Monitoring" + path: "/en/setup/backend/backend-nginx-monitoring" - name: "APISIX Monitoring" path: "/en/setup/backend/backend-apisix-monitoring" - name: "AWS API Gateway" diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java index 46cf71f52a07..629292cdb1ad 100644 --- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/analysis/Layer.java @@ -205,7 +205,12 @@ public enum Layer { /** * A scalable, fault-tolerant, and low-latency storage service optimized for real-time workloads. */ - BOOKKEEPER(33, true); + BOOKKEEPER(33, true), + + /** + * Nginx is an HTTP and reverse proxy server, a mail proxy server, and a generic TCP/UDP proxy server. + */ + NGINX(34, true); private final int value; /** diff --git a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java index 686619c428e4..f8e5e7797e3c 100644 --- a/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java +++ b/oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/management/ui/template/UITemplateInitializer.java @@ -72,6 +72,7 @@ public class UITemplateInitializer { Layer.KAFKA.name(), Layer.PULSAR.name(), Layer.BOOKKEEPER.name(), + Layer.NGINX.name(), "custom" }; private final UITemplateManagementService uiTemplateManagementService; diff --git a/oap-server/server-starter/src/main/resources/application.yml b/oap-server/server-starter/src/main/resources/application.yml index 4dcf16e28ebd..5d93c0cfe737 100644 --- a/oap-server/server-starter/src/main/resources/application.yml +++ b/oap-server/server-starter/src/main/resources/application.yml @@ -245,8 +245,8 @@ agent-analyzer: log-analyzer: selector: ${SW_LOG_ANALYZER:default} default: - lalFiles: ${SW_LOG_LAL_FILES:envoy-als,mesh-dp,mysql-slowsql,pgsql-slowsql,redis-slowsql,k8s-service,default} - malFiles: ${SW_LOG_MAL_FILES:""} + lalFiles: ${SW_LOG_LAL_FILES:envoy-als,mesh-dp,mysql-slowsql,pgsql-slowsql,redis-slowsql,k8s-service,nginx,default} + malFiles: ${SW_LOG_MAL_FILES:"nginx"} event-analyzer: selector: ${SW_EVENT_ANALYZER:default} @@ -340,7 +340,7 @@ receiver-otel: selector: ${SW_OTEL_RECEIVER:default} default: enabledHandlers: ${SW_OTEL_RECEIVER_ENABLED_HANDLERS:"otlp-metrics,otlp-logs"} - enabledOtelMetricsRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_METRICS_RULES:"apisix,k8s/*,istio-controlplane,vm,mysql/*,postgresql/*,oap,aws-eks/*,windows,aws-s3/*,aws-dynamodb/*,aws-gateway/*,redis/*,elasticsearch/*,rabbitmq/*,mongodb/*,kafka/*,pulsar/*,bookkeeper/*"} + enabledOtelMetricsRules: ${SW_OTEL_RECEIVER_ENABLED_OTEL_METRICS_RULES:"apisix,nginx/*,k8s/*,istio-controlplane,vm,mysql/*,postgresql/*,oap,aws-eks/*,windows,aws-s3/*,aws-dynamodb/*,aws-gateway/*,redis/*,elasticsearch/*,rabbitmq/*,mongodb/*,kafka/*,pulsar/*,bookkeeper/*"} receiver-zipkin: selector: ${SW_RECEIVER_ZIPKIN:-} diff --git a/oap-server/server-starter/src/main/resources/lal/nginx.yaml b/oap-server/server-starter/src/main/resources/lal/nginx.yaml new file mode 100644 index 000000000000..527f72b385c2 --- /dev/null +++ b/oap-server/server-starter/src/main/resources/lal/nginx.yaml @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +rules: + - name: nginx-access-log + layer: NGINX + dsl: | + filter { + if (tag("LOG_KIND") == "NGINX_ACCESS_LOG") { + sink { + } + } + } + - name: nginx-error-log + layer: NGINX + dsl: | + filter { + if (tag("LOG_KIND") == "NGINX_ERROR_LOG") { + text { + regexp $/(?