From 14d4802f7b9cc0e4ecfcdc27ea0d702ee7c7ce69 Mon Sep 17 00:00:00 2001 From: Yuki Tsuboi Date: Mon, 29 Jun 2020 01:31:13 +0900 Subject: [PATCH] Consolidate Error count and Duration using Vec Signed-off-by: Yuki Tsuboi --- pkg/agent/metrics/prometheus.go | 74 +++++++++------------------------ pkg/agent/openflow/pipeline.go | 28 ++++++------- test/e2e/prometheus_test.go | 8 +--- 3 files changed, 36 insertions(+), 74 deletions(-) diff --git a/pkg/agent/metrics/prometheus.go b/pkg/agent/metrics/prometheus.go index 14a254091d5..3cc6ef0cf77 100644 --- a/pkg/agent/metrics/prometheus.go +++ b/pkg/agent/metrics/prometheus.go @@ -68,52 +68,22 @@ var ( StabilityLevel: metrics.STABLE, }, []string{"table_id"}) - OVSFlowAddErrorCount = metrics.NewCounter( + OVSFlowOpsErrorCount = metrics.NewCounterVec( &metrics.CounterOpts{ - Name: "antrea_agent_ovs_flow_add_error_count", - Help: "Number of OVS flow adding errors.", + Name: "antrea_agent_ovs_flow_ops_error_count", + Help: "Number of OVS flow operation errors.", StabilityLevel: metrics.STABLE, }, + []string{"operation"}, ) - OVSFlowModifyErrorCount = metrics.NewCounter( - &metrics.CounterOpts{ - Name: "antrea_agent_ovs_flow_modify_error_count", - Help: "Number of OVS flow modifying errors.", - StabilityLevel: metrics.STABLE, - }, - ) - - OVSFlowDeleteErrorCount = metrics.NewCounter( - &metrics.CounterOpts{ - Name: "antrea_agent_ovs_flow_delete_error_count", - Help: "Number of OVS flow deleting errors.", - StabilityLevel: metrics.STABLE, - }, - ) - - OVSFlowAddDuration = metrics.NewSummary( - &metrics.SummaryOpts{ - Name: "antrea_agent_ovs_flow_add_duration_milliseconds", - Help: "The duration of adding ovs flows", - StabilityLevel: metrics.STABLE, - }, - ) - - OVSFlowModifyDuration = metrics.NewSummary( - &metrics.SummaryOpts{ - Name: "antrea_agent_ovs_flow_modify_duration_milliseconds", - Help: "The duration of modifying ovs flows", - StabilityLevel: metrics.STABLE, - }, - ) - - OVSFlowDeleteDuration = metrics.NewSummary( + OVSFlowOpsDuration = metrics.NewSummaryVec( &metrics.SummaryOpts{ - Name: "antrea_agent_ovs_flow_delete_duration_milliseconds", - Help: "The duration of deleting ovs flows", + Name: "antrea_agent_ovs_flow_ops_duration_milliseconds", + Help: "The duration of OVS flow operation", StabilityLevel: metrics.STABLE, }, + []string{"operation"}, ) ) @@ -160,22 +130,18 @@ func InitializePrometheusMetrics() { if err := legacyregistry.Register(OVSFlowCount); err != nil { klog.Error("Failed to register antrea_agent_ovs_flow_count with Prometheus") } - if err := legacyregistry.Register(OVSFlowAddErrorCount); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_add_error_count with Prometheus") - } - if err := legacyregistry.Register(OVSFlowModifyErrorCount); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_modify_error_count with Prometheus") - } - if err := legacyregistry.Register(OVSFlowDeleteErrorCount); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_delete_error_count with Prometheus") - } - if err := legacyregistry.Register(OVSFlowAddDuration); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_add_duration_milliseconds with Prometheus") - } - if err := legacyregistry.Register(OVSFlowModifyDuration); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_modify_duration_milliseconds with Prometheus") + + if err := legacyregistry.Register(OVSFlowOpsErrorCount); err != nil { + klog.Error("Failed to register antrea_agent_ovs_flow_ops_error_count with Prometheus") } - if err := legacyregistry.Register(OVSFlowDeleteDuration); err != nil { - klog.Error("Failed to register antrea_agent_ovs_flow_delete_duration_milliseconds with Prometheus") + OVSFlowOpsErrorCount.WithLabelValues("add") + OVSFlowOpsErrorCount.WithLabelValues("modify") + OVSFlowOpsErrorCount.WithLabelValues("delete") + + if err := legacyregistry.Register(OVSFlowOpsDuration); err != nil { + klog.Error("Failed to register antrea_agent_ovs_flow_ops_duration_milliseconds with Prometheus") } + OVSFlowOpsDuration.WithLabelValues("add") + OVSFlowOpsDuration.WithLabelValues("modify") + OVSFlowOpsDuration.WithLabelValues("delete") } diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 1179c9b73bc..32b2eb311c3 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -199,10 +199,10 @@ func (c *client) Add(flow binding.Flow) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowAddDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("add").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddFlowsInBundle([]binding.Flow{flow}, nil, nil); err != nil { - metrics.OVSFlowAddErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc() return err } return nil @@ -212,10 +212,10 @@ func (c *client) Modify(flow binding.Flow) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowModifyDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("modify").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddFlowsInBundle(nil, []binding.Flow{flow}, nil); err != nil { - metrics.OVSFlowModifyErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("modify").Inc() return err } return nil @@ -225,10 +225,10 @@ func (c *client) Delete(flow binding.Flow) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowDeleteDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("delete").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddFlowsInBundle(nil, nil, []binding.Flow{flow}); err != nil { - metrics.OVSFlowDeleteErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc() return err } return nil @@ -238,10 +238,10 @@ func (c *client) AddAll(flows []binding.Flow) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowAddDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("add").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddFlowsInBundle(flows, nil, nil); err != nil { - metrics.OVSFlowAddErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc() return err } return nil @@ -251,10 +251,10 @@ func (c *client) DeleteAll(flows []binding.Flow) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowDeleteDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("delete").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddFlowsInBundle(nil, nil, flows); err != nil { - metrics.OVSFlowDeleteErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc() return err } return nil @@ -264,10 +264,10 @@ func (c *client) AddOFEntries(ofEntries []binding.OFEntry) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowAddDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("add").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddOFEntriesInBundle(ofEntries, nil, nil); err != nil { - metrics.OVSFlowAddErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc() return err } return nil @@ -277,10 +277,10 @@ func (c *client) DeleteOFEntries(ofEntries []binding.OFEntry) error { startTime := time.Now() defer func() { d := time.Since(startTime) - metrics.OVSFlowDeleteDuration.Observe(float64(d.Milliseconds())) + metrics.OVSFlowOpsDuration.WithLabelValues("delete").Observe(float64(d.Milliseconds())) }() if err := c.bridge.AddOFEntriesInBundle(nil, nil, ofEntries); err != nil { - metrics.OVSFlowDeleteErrorCount.Inc() + metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc() return err } return nil diff --git a/test/e2e/prometheus_test.go b/test/e2e/prometheus_test.go index c64b4d91fdb..37fe1969d67 100644 --- a/test/e2e/prometheus_test.go +++ b/test/e2e/prometheus_test.go @@ -40,12 +40,8 @@ var antreaAgentMetrics = []string{ "antrea_agent_local_pod_count", "antrea_agent_networkpolicy_count", "antrea_agent_ovs_flow_count", - "antrea_agent_ovs_flow_add_error_count", - "antrea_agent_ovs_flow_add_duration_milliseconds", - "antrea_agent_ovs_flow_modify_duration_milliseconds", - "antrea_agent_ovs_flow_modify_error_count", - "antrea_agent_ovs_flow_delete_duration_milliseconds", - "antrea_agent_ovs_flow_delete_error_count", + "antrea_agent_ovs_flow_ops_duration_milliseconds", + "antrea_agent_ovs_flow_ops_error_count", "antrea_agent_ovs_total_flow_count", "antrea_agent_runtime_info", }