Skip to content

Commit

Permalink
Add missing attributes to self-scraping prometheus metrics
Browse files Browse the repository at this point in the history
Prometheus metrics reported by gateway and k8s-cluster-receiver collector deployments are missing k8s and host related attributes. This commit is to make sure that all the self-scraping prometheus metrics have all the required attributes.
  • Loading branch information
dmitryax committed Jul 7, 2021
1 parent 4ee6938 commit 8fc7f99
Show file tree
Hide file tree
Showing 13 changed files with 244 additions and 63 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## Unreleased

### Removed

- BREAKING: Remove SAPM receiver from default config (#168)

### Fixed

- All missing attributes are added to prometheus metrics reported
by gateway and k8s-cluster-receiver collector deployments (#170)

## [0.28.1] - 2021-06-18

### Changed
Expand Down
31 changes: 31 additions & 0 deletions helm-charts/splunk-otel-collector/templates/config/_common.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,34 @@ zipkin:
endpoint: 0.0.0.0:9411
{{- end }}
{{- end }}

{{/*
Common config for resourcedetection processor
*/}}
{{- define "splunk-otel-collector.resourceDetectionProcessor" -}}
# Resource detection processor picks attributes from host environment.
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
resourcedetection:
detectors:
- system
# Note: Kubernetes distro detectors need to come first so they set the proper cloud.platform
# before it gets set later by the cloud provider detector.
- env
{{- if eq .Values.distro "gke" }}
- gke
{{- else if eq .Values.distro "eks" }}
- eks
{{- else if eq .Values.distro "aks" }}
- aks
{{- end }}
{{- if eq .Values.provider "gcp" }}
- gce
{{- else if eq .Values.provider "aws" }}
- ec2
{{- else if eq .Values.provider "azure" }}
- azure
{{- end }}
# Don't override existing resource attributes to maintain identification of data sources
override: false
timeout: 10s
{{- end }}
28 changes: 2 additions & 26 deletions helm-charts/splunk-otel-collector/templates/config/_otel-agent.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -114,36 +114,12 @@ processors:
{{- end }}
{{- end }}

# Resource detection processor picks attributes from host environment.
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
resourcedetection:
detectors:
- system
# Note: Kubernetes distro detectors need to come first so they set the proper cloud.platform
# before it gets set later by the cloud provider detector.
- env
{{- if eq .Values.distro "gke" }}
- gke
{{- else if eq .Values.distro "eks" }}
- eks
{{- else if eq .Values.distro "aks" }}
- aks
{{- end }}
{{- if eq .Values.provider "gcp" }}
- gce
{{- else if eq .Values.provider "aws" }}
- ec2
{{- else if eq .Values.provider "azure" }}
- azure
{{- end }}
# Don't override existing resource attributes to maintain identification of data sources
override: false
timeout: 10s

{{- include "splunk-otel-collector.otelMemoryLimiterConfig" .Values.otelAgent | nindent 2 }}

batch:

{{- include "splunk-otel-collector.resourceDetectionProcessor" . | nindent 2 }}

resource:
# General resource attributes that apply to all telemetry passing through the agent.
attributes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ extensions:
receivers:
{{- include "splunk-otel-collector.otelTraceReceivers" . | nindent 2 }}
# Prometheus receiver scraping metrics from the pod itself
prometheus:
prometheus/collector:
config:
scrape_configs:
- job_name: 'otel-collector'
Expand Down Expand Up @@ -46,6 +46,24 @@ processors:

batch:

{{- include "splunk-otel-collector.resourceDetectionProcessor" . | nindent 2 }}

# Resource attributes specific to the collector itself.
resource/add_collector_k8s:
attributes:
- action: insert
key: k8s.node.name
value: "${K8S_NODE_NAME}"
- action: insert
key: k8s.pod.name
value: "${K8S_POD_NAME}"
- action: insert
key: k8s.pod.uid
value: "${K8S_POD_UID}"
- action: insert
key: k8s.namespace.name
value: "${K8S_NAMESPACE}"

resource/add_cluster_name:
attributes:
- action: upsert
Expand Down Expand Up @@ -100,7 +118,7 @@ service:

# default metrics pipeline
metrics:
receivers: [otlp, prometheus, signalfx]
receivers: [otlp, signalfx]
processors: [memory_limiter, batch, resource/add_cluster_name]
exporters: [signalfx]

Expand All @@ -117,4 +135,15 @@ service:
processors: [memory_limiter, batch]
exporters: [splunk_hec]
{{- end }}

# Pipeline for metrics collected about the collector pod itself.
metrics/collector:
receivers: [prometheus/collector]
processors:
- memory_limiter
- batch
- resource/add_cluster_name
- resource/add_collector_k8s
- resourcedetection
exporters: [signalfx]
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ extensions:

receivers:
# Prometheus receiver scraping metrics from the pod itself, both otel and fluentd
prometheus:
prometheus/k8s_cluster_receiver:
config:
scrape_configs:
- job_name: 'otel-k8s-cluster-receiver'
Expand All @@ -24,14 +24,23 @@ processors:

batch:

# k8s_tagger to enrich its own metrics
k8s_tagger:
filter:
node_from_env_var: K8S_NODE_NAME
labels:
key: component
op: equals
value: otel-k8s-cluster-receiver
{{- include "splunk-otel-collector.resourceDetectionProcessor" . | nindent 2 }}

# Resource attributes specific to the collector itself.
resource/add_collector_k8s:
attributes:
- action: insert
key: k8s.node.name
value: "${K8S_NODE_NAME}"
- action: insert
key: k8s.pod.name
value: "${K8S_POD_NAME}"
- action: insert
key: k8s.pod.uid
value: "${K8S_POD_UID}"
- action: insert
key: k8s.namespace.name
value: "${K8S_NAMESPACE}"

resource:
attributes:
Expand Down Expand Up @@ -71,7 +80,18 @@ service:
pipelines:
# k8s metrics pipeline
metrics:
receivers: [prometheus, k8s_cluster]
receivers: [k8s_cluster]
processors: [memory_limiter, batch, resource]
exporters: [signalfx]

# Pipeline for metrics collected about the collector pod itself.
metrics:
receivers: [prometheus/k8s_cluster_receiver]
processors:
- memory_limiter
- batch
- resource
- resource/add_collector_k8s
- resourcedetection
exporters: [signalfx]
{{- end }}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: K8S_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPLUNK_ACCESS_TOKEN
valueFrom:
secretKeyRef:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: K8S_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPLUNK_ACCESS_TOKEN
valueFrom:
secretKeyRef:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@ data:
health_check: null
processors:
batch: null
k8s_tagger:
filter:
labels:
key: component
op: equals
value: otel-k8s-cluster-receiver
node_from_env_var: K8S_NODE_NAME
memory_limiter:
ballast_size_mib: ${SPLUNK_BALLAST_SIZE_MIB}
check_interval: 5s
Expand All @@ -43,12 +36,32 @@ data:
- action: upsert
key: k8s.cluster.name
value: CHANGEME
resource/add_collector_k8s:
attributes:
- action: insert
key: k8s.node.name
value: ${K8S_NODE_NAME}
- action: insert
key: k8s.pod.name
value: ${K8S_POD_NAME}
- action: insert
key: k8s.pod.uid
value: ${K8S_POD_UID}
- action: insert
key: k8s.namespace.name
value: ${K8S_NAMESPACE}
resourcedetection:
detectors:
- system
- env
override: false
timeout: 10s
receivers:
k8s_cluster:
auth_type: serviceAccount
metadata_exporters:
- signalfx
prometheus:
prometheus/k8s_cluster_receiver:
config:
scrape_configs:
- job_name: otel-k8s-cluster-receiver
Expand All @@ -67,6 +80,7 @@ data:
- memory_limiter
- batch
- resource
- resource/add_collector_k8s
- resourcedetection
receivers:
- prometheus
- k8s_cluster
- prometheus/k8s_cluster_receiver
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ spec:
component: otel-k8s-cluster-receiver
release: default
annotations:
checksum/config: 837ec900698252db42858176e2092a19539e10d66c92420d27aece7d92ca8737
checksum/config: 4412b5226207ae7bef173eb82f79d273729d25ca9eecbbe0274a141a29b0c711
spec:
serviceAccountName: default-splunk-otel-collector
containers:
Expand All @@ -47,6 +47,18 @@ spec:
fieldRef:
apiVersion: v1
fieldPath: status.podIP
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: K8S_POD_UID
valueFrom:
fieldRef:
fieldPath: metadata.uid
- name: K8S_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: SPLUNK_ACCESS_TOKEN
valueFrom:
secretKeyRef:
Expand Down
34 changes: 32 additions & 2 deletions rendered/manifests/gateway-only/configmap-otel-collector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,26 @@ data:
- action: upsert
key: k8s.cluster.name
value: CHANGEME
resource/add_collector_k8s:
attributes:
- action: insert
key: k8s.node.name
value: ${K8S_NODE_NAME}
- action: insert
key: k8s.pod.name
value: ${K8S_POD_NAME}
- action: insert
key: k8s.pod.uid
value: ${K8S_POD_UID}
- action: insert
key: k8s.namespace.name
value: ${K8S_NAMESPACE}
resourcedetection:
detectors:
- system
- env
override: false
timeout: 10s
receivers:
jaeger:
protocols:
Expand All @@ -59,7 +79,7 @@ data:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:55681
prometheus:
prometheus/collector:
config:
scrape_configs:
- job_name: otel-collector
Expand Down Expand Up @@ -103,8 +123,18 @@ data:
- resource/add_cluster_name
receivers:
- otlp
- prometheus
- signalfx
metrics/collector:
exporters:
- signalfx
processors:
- memory_limiter
- batch
- resource/add_cluster_name
- resource/add_collector_k8s
- resourcedetection
receivers:
- prometheus/collector
traces:
exporters:
- sapm
Expand Down
Loading

0 comments on commit 8fc7f99

Please sign in to comment.