Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Metrics][Kubernetes] Update proxy, scheduler and controller manager fields and dashboards #4948

Merged
merged 4 commits into from
Jan 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/kubernetes/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "1.31.1"
changes:
- description: Update controller manager, proxy and scheduler metrics and dashboards
type: enhancement
link: https://github.com/elastic/integrations/pull/4948
- version: "1.31.0"
changes:
- description: Use datas_tream.dataset as pre filters for dashboards and remove tags
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,11 @@
- name: kubernetes.controllermanager
type: group
fields:
- name: url
dimension: true
type: keyword
description: >
Request url

- name: verb
dimension: true
type: keyword
description: >
Request verb

description: |
HTTP verb
- name: code
dimension: true
type: keyword
Expand All @@ -27,7 +20,7 @@
dimension: true
type: keyword
description: |
Request host
HTTP host
- name: name
dimension: true
type: keyword
Expand All @@ -44,7 +37,7 @@
- name: cpu.sec
type: double
metric_type: counter
description: CPU seconds
description: Total user and system CPU time spent in seconds
- name: memory.resident.bytes
type: long
format: bytes
Expand All @@ -64,76 +57,102 @@
- name: fds.max.count
type: long
metric_type: gauge
description: Maximum number of open file descriptors
description: Limit for open file descriptors
- name: started.sec
type: double
metric_type: gauge
description: Seconds since the process started
description: Start time of the process since unix epoch in seconds
- name: client
type: group
fields:
- name: request.count
type: long
metric_type: counter
description: >
Number of requests as client

- name: request.duration.us.bucket.*
type: object
object_type: long
description: Response latency distribution, histogram buckets
description: Number of HTTP requests to API server, broken down by status code, method and host
- name: request.duration.us.sum
type: long
unit: micros
metric_type: counter
description: Request duration, sum in microseconds
description: Sum of requests latency in microseconds, broken down by verb and host
- name: request.duration.us.count
type: long
metric_type: counter
description: Request duration, number of operations
description: Number of request duration operations to API server, broken down by verb and host
- name: request.duration.us.bucket.*
type: object
object_type: long
description: Requests latency distribution in histogram buckets, broken down by verb and host
- name: request.size.bytes.sum
type: long
format: bytes
unit: byte
metric_type: counter
description: Requests size sum in bytes, broken down by verb and host
- name: request.size.bytes.count
type: long
metric_type: counter
description: Number of requests, broken down by verb and host
- name: request.size.bytes.bucket.*
type: object
object_type: long
description: Requests size distribution in histogram buckets, broken down by verb and host
- name: response.size.bytes.count
type: long
metric_type: counter
description: Number of responses, broken down by verb and host
- name: response.size.bytes.sum
type: long
format: bytes
unit: byte
metric_type: counter
description: Responses size sum in bytes, broken down by verb and host
- name: response.size.bytes.bucket.*
type: object
object_type: long
description: Responses size distribution in histogram buckets, broken down by verb and host
- name: workqueue
type: group
fields:
- name: longestrunning.sec
type: double
metric_type: gauge
description: Longest running processors
description: How many seconds has the longest running processor been running, broken down by workqueue name
- name: unfinished.sec
type: double
metric_type: gauge
description: Unfinished processors
description: How many seconds of work has done that is in progress and hasn't been considered in the longest running processor, broken down by workqueue name
- name: adds.count
type: long
metric_type: counter
description: Workqueue add count
description: Workqueue add count, broken down by workqueue name
- name: depth.count
type: long
metric_type: gauge
description: Workqueue depth count
description: Workqueue current depth, broken down by workqueue name
- name: retries.count
type: long
metric_type: counter
description: Workqueue number of retries
description: Workqueue number of retries, broken down by workqueue name
- name: node.collector
type: group
fields:
- name: eviction.count
type: long
metric_type: counter
description: Number of node evictions
description: Number of node evictions, broken down by zone
- name: unhealthy.count
type: long
metric_type: gauge
description: Number of unhealthy nodes
description: Number of unhealthy nodes, broken down by zone
- name: count
type: long
metric_type: gauge
description: Number of nodes
description: Number of nodes, broken down by zone
- name: health.pct
type: long
metric_type: gauge
description: Percentage of healthy nodes
description: Percentage of healthy nodes, broken down by zone
- name: leader.is_master
type: boolean
description: |
Whether the node is master
Whether the controller manager instance is leader
104 changes: 60 additions & 44 deletions packages/kubernetes/data_stream/controllermanager/sample_event.json
Original file line number Diff line number Diff line change
@@ -1,41 +1,60 @@
{
"kubernetes": {
"controllermanager": {
"verb": "GET",
"client": {
"request": {
"duration": {
"us": {
"bucket": {
"1000": 10787,
"2000": 13002,
"4000": 13442,
"8000": 13533,
"16000": 13558,
"32000": 13568,
"64000": 13571,
"128000": 13571,
"256000": 13571,
"512000": 13571,
"+Inf": 13571
},
"count": 13571,
"sum": 12994981.660999978
}
"node": {
"collector": {
"eviction": {
"count": 0
},
"unhealthy": {
"count": 0
},
"count": 1,
"health": {
"pct": 100
}
}
},
"url": "https://172.18.0.2:6443/apis?timeout=32s"
"process": {
"memory": {
"virtual": {
"bytes": 792043520
},
"resident": {
"bytes": 91090944
}
},
"fds": {
"max": {
"count": 1048576
},
"open": {
"count": 41
}
},
"cpu": {
"sec": 23
},
"started": {
"sec": 1673362806.13
}
}
}
},
"orchestrator": {
"cluster": {
"name": "kind",
"url": "kind-control-plane:6443"
}
},
"agent": {
"name": "kind-control-plane",
"id": "d5aadb7a-c9ec-4563-b83a-1c4bb5f1471f",
"ephemeral_id": "7ae929a5-2943-43de-98e8-693dc0c381d7",
"id": "ee1d778a-e607-4c29-b152-f6e83e606966",
"type": "metricbeat",
"version": "8.4.0"
"ephemeral_id": "084bb5dd-df70-4127-9a52-47fae69de446",
"version": "8.7.0"
},
"@timestamp": "2022-07-27T08:44:46.219Z",
"@timestamp": "2023-01-10T15:13:11.909Z",
"ecs": {
"version": "8.0.0"
},
Expand All @@ -51,51 +70,48 @@
"host": {
"hostname": "kind-control-plane",
"os": {
"kernel": "5.10.47-linuxkit",
"kernel": "5.15.49-linuxkit",
"codename": "focal",
"name": "Ubuntu",
"type": "linux",
"family": "debian",
"version": "20.04.4 LTS (Focal Fossa)",
"version": "20.04.5 LTS (Focal Fossa)",
"platform": "ubuntu"
},
"containerized": true,
"containerized": false,
"ip": [
"10.244.0.1",
"10.244.0.1",
"10.244.0.1",
"10.244.0.1",
"10.244.0.1",
"172.23.0.2",
"172.20.0.2",
"172.18.0.2",
"fc00:f853:ccd:e793::2",
"fe80::42:acff:fe12:2"
],
"name": "kind-control-plane",
"id": "1c1d736687984c73b6a5f77c1464d4da",
"mac": [
"02:42:ac:12:00:02",
"02:42:ac:17:00:02",
"06:9c:33:01:a5:e7",
"06:f8:26:c9:76:70",
"0e:c0:30:20:74:c5",
"76:48:b8:c1:a7:ee",
"d6:f7:d3:28:f5:9c"
"02-42-AC-12-00-02",
"02-42-AC-14-00-02",
"6E-87-97-B3-C4-A1",
"7E-2B-73-DA-CF-B7",
"F2-54-31-F4-76-AB"
],
"architecture": "x86_64"
},
"elastic_agent": {
"id": "d5aadb7a-c9ec-4563-b83a-1c4bb5f1471f",
"version": "8.4.0",
"snapshot": false
"id": "ee1d778a-e607-4c29-b152-f6e83e606966",
"version": "8.7.0",
"snapshot": true
},
"metricset": {
"period": 10000,
"name": "controllermanager"
},
"event": {
"duration": 59137358,
"duration": 26710852,
"agent_id_status": "verified",
"ingested": "2022-07-27T08:44:46Z",
"ingested": "2023-01-10T15:13:12Z",
"module": "kubernetes",
"dataset": "kubernetes.controllermanager"
}
Expand Down
Loading