Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement metrics server override #342

Merged
merged 3 commits into from
Oct 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ spec:
provider:
description: Traffic managent provider
type: string
metricsServer:
description: Prometheus URL
type: string
progressDeadlineSeconds:
description: Deployment progress deadline
type: number
Expand Down
3 changes: 3 additions & 0 deletions charts/flagger/templates/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ spec:
provider:
description: Traffic managent provider
type: string
metricsServer:
description: Prometheus URL
type: string
progressDeadlineSeconds:
description: Deployment progress deadline
type: number
Expand Down
30 changes: 11 additions & 19 deletions charts/flagger/templates/prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,38 +133,22 @@ data:
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kubernetes;https

# Scrape config for nodes
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics

# scrape config for cAdvisor
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
Expand All @@ -174,6 +158,14 @@ data:
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# exclude high cardinality metrics
metric_relabel_configs:
- source_labels: [__name__]
regex: (container|machine)_(cpu|memory|network|fs)_(.+)
action: keep
- source_labels: [__name__]
regex: container_memory_failures_total
action: drop

# scrape config for pods
- job_name: kubernetes-pods
Expand Down
3 changes: 3 additions & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ spec:
provider:
description: Traffic managent provider
type: string
metricsServer:
description: Prometheus URL
type: string
progressDeadlineSeconds:
description: Deployment progress deadline
type: number
Expand Down
36 changes: 13 additions & 23 deletions kustomize/base/prometheus/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@ global:
scrape_interval: 5s
scrape_configs:

# Scrape config for AppMesh Envoy sidecar
# scrape config for AppMesh Envoy sidecar
- job_name: 'appmesh-envoy'
metrics_path: /stats/prometheus
kubernetes_sd_configs:
- role: pod

relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_name]
action: keep
Expand All @@ -25,8 +24,7 @@ scrape_configs:
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name

# Exclude high cardinality metrics
# exclude high cardinality metrics
metric_relabel_configs:
- source_labels: [ cluster_name ]
regex: '(outbound|inbound|prometheus_stats).*'
Expand Down Expand Up @@ -56,7 +54,7 @@ scrape_configs:
regex: 'envoy_cluster_(lb|retry|bind|internal|max|original).*'
action: drop

# Scrape config for API servers
# scrape config for API servers
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
Expand All @@ -66,35 +64,19 @@ scrape_configs:
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: kubernetes;https

# Scrape config for nodes
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics

# scrape config for cAdvisor
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
Expand All @@ -107,6 +89,14 @@ scrape_configs:
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# exclude high cardinality metrics
metric_relabel_configs:
- source_labels: [__name__]
regex: (container|machine)_(cpu|memory|network|fs)_(.+)
action: keep
- source_labels: [__name__]
regex: container_memory_failures_total
action: drop

# scrape config for pods
- job_name: kubernetes-pods
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/flagger/v1alpha3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ type CanarySpec struct {
// +optional
Provider string `json:"provider,omitempty"`

// if specified overwrites the -metrics-server flag for this particular canary
// +optional
MetricsServer string `json:"metricsServer,omitempty"`

// reference to target resource
TargetRef hpav1.CrossVersionObjectReference `json:"targetRef"`

Expand Down
25 changes: 20 additions & 5 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package controller

import (
"fmt"
"github.com/weaveworks/flagger/pkg/metrics"
"strings"
"time"

Expand Down Expand Up @@ -749,7 +750,21 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool {
}

// create observer based on the mesh provider
observer := c.observerFactory.Observer(metricsProvider)
observerFactory := c.observerFactory
observer := observerFactory.Observer(metricsProvider)

// override the global metrics server if one is specified in the canary spec
metricsServer := c.observerFactory.Client.GetMetricsServer()
if r.Spec.MetricsServer != "" {
metricsServer = r.Spec.MetricsServer
var err error
observerFactory, err = metrics.NewFactory(metricsServer, metricsProvider, 5*time.Second)
if err != nil {
c.recordEventErrorf(r, "Error building Prometheus client for %s %v", r.Spec.MetricsServer, err)
return false
}
observer = observerFactory.Observer(metricsProvider)
}

// run metrics checks
for _, metric := range r.Spec.CanaryAnalysis.Metrics {
Expand All @@ -764,7 +779,7 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool {
c.recordEventWarningf(r, "Halt advancement no values found for metric %s probably %s.%s is not receiving traffic",
metric.Name, r.Spec.TargetRef.Name, r.Namespace)
} else {
c.recordEventErrorf(r, "Metrics server %s query failed: %v", c.observerFactory.Client.GetMetricsServer(), err)
c.recordEventErrorf(r, "Metrics server %s query failed: %v", metricsServer, err)
}
return false
}
Expand All @@ -784,7 +799,7 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool {
c.recordEventWarningf(r, "Halt advancement no values found for metric %s probably %s.%s is not receiving traffic",
metric.Name, r.Spec.TargetRef.Name, r.Namespace)
} else {
c.recordEventErrorf(r, "Metrics server %s query failed: %v", c.observerFactory.Client.GetMetricsServer(), err)
c.recordEventErrorf(r, "Metrics server %s query failed: %v", metricsServer, err)
}
return false
}
Expand All @@ -800,13 +815,13 @@ func (c *Controller) analyseCanary(r *flaggerv1.Canary) bool {

// custom checks
if metric.Query != "" {
val, err := c.observerFactory.Client.RunQuery(metric.Query)
val, err := observerFactory.Client.RunQuery(metric.Query)
if err != nil {
if strings.Contains(err.Error(), "no values found") {
c.recordEventWarningf(r, "Halt advancement no values found for metric %s probably %s.%s is not receiving traffic",
metric.Name, r.Spec.TargetRef.Name, r.Namespace)
} else {
c.recordEventErrorf(r, "Metrics server %s query failed for %s: %v", c.observerFactory.Client.GetMetricsServer(), metric.Name, err)
c.recordEventErrorf(r, "Metrics server %s query failed for %s: %v", metricsServer, metric.Name, err)
}
return false
}
Expand Down