Skip to content

Commit

Permalink
Add support for OVS flow operations metrics on node (antrea-io#866)
Browse files Browse the repository at this point in the history
- Number of OVS flow operations, partitioned by operations(add, modify and delete)
- Number of OVS flow operation errors, partitioned by operations(add, modify and delete)
- Latency of OVS flow operations, partitioned by operations(add, modify and delete)

Signed-off-by: Yuki Tsuboi <ytsuboi@vmware.com>
  • Loading branch information
Yuki Tsuboi committed Jul 3, 2020
1 parent 664bc0c commit 63c68f7
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 8 deletions.
45 changes: 45 additions & 0 deletions pkg/agent/metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,33 @@ var (
Help: "Flow count for each OVS flow table. The TableID is used as a label.",
StabilityLevel: metrics.STABLE,
}, []string{"table_id"})

OVSFlowOpsCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "antrea_agent_ovs_flow_ops_count",
Help: "Number of OVS flow operations, partitioned by operations(add, modify and delete).",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)

OVSFlowOpsErrorCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Name: "antrea_agent_ovs_flow_ops_error_count",
Help: "Number of OVS flow operation errors, partitioned by operations(add, modify and delete).",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)

OVSFlowOpsLateyncy = metrics.NewHistogramVec(
&metrics.HistogramOpts{
Name: "antrea_agent_ovs_flow_ops_latency_milliseconds",
Help: "Latency of OVS flow operations, partitioned by operations(add, modify and delete).",
StabilityLevel: metrics.ALPHA,
},
[]string{"operation"},
)
)

func InitializePrometheusMetrics() {
Expand Down Expand Up @@ -112,4 +139,22 @@ func InitializePrometheusMetrics() {
if err := legacyregistry.Register(OVSFlowCount); err != nil {
klog.Error("Failed to register antrea_agent_ovs_flow_count with Prometheus")
}

if err := legacyregistry.Register(OVSFlowOpsCount); err != nil {
klog.Error("Failed to register antrea_agent_ovs_flow_ops_count with Prometheus")
}
if err := legacyregistry.Register(OVSFlowOpsErrorCount); err != nil {
klog.Error("Failed to register antrea_agent_ovs_flow_ops_error_count with Prometheus")
}
if err := legacyregistry.Register(OVSFlowOpsLateyncy); err != nil {
klog.Error("Failed to register antrea_agent_ovs_flow_ops_latency_milliseconds with Prometheus")
}
// Initialize OpenFlow operations metrics with label add, modify and delete
// since those metrics won't come out until observation.
opsArray := [3]string{"add", "modify", "delete"}
for _, ops := range opsArray {
OVSFlowOpsCount.WithLabelValues(ops)
OVSFlowOpsErrorCount.WithLabelValues(ops)
OVSFlowOpsLateyncy.WithLabelValues(ops)
}
}
86 changes: 79 additions & 7 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,12 @@ import (
"strconv"
"strings"
"sync"
"time"

"k8s.io/client-go/tools/cache"

"github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie"
"github.com/vmware-tanzu/antrea/pkg/agent/types"
"github.com/vmware-tanzu/antrea/pkg/features"
Expand Down Expand Up @@ -257,31 +259,101 @@ func (c *client) GetTunnelVirtualMAC() net.HardwareAddr {
}

func (c *client) Add(flow binding.Flow) error {
return c.bridge.AddFlowsInBundle([]binding.Flow{flow}, nil, nil)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("add").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddFlowsInBundle([]binding.Flow{flow}, nil, nil); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("add").Inc()
return nil
}

func (c *client) Modify(flow binding.Flow) error {
return c.bridge.AddFlowsInBundle(nil, []binding.Flow{flow}, nil)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("modify").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddFlowsInBundle(nil, []binding.Flow{flow}, nil); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("modify").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("modify").Inc()
return nil
}

func (c *client) Delete(flow binding.Flow) error {
return c.bridge.AddFlowsInBundle(nil, nil, []binding.Flow{flow})
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("delete").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddFlowsInBundle(nil, nil, []binding.Flow{flow}); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("delete").Inc()
return nil
}

func (c *client) AddAll(flows []binding.Flow) error {
return c.bridge.AddFlowsInBundle(flows, nil, nil)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("add").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddFlowsInBundle(flows, nil, nil); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("add").Inc()
return nil
}

func (c *client) DeleteAll(flows []binding.Flow) error {
return c.bridge.AddFlowsInBundle(nil, nil, flows)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("delete").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddFlowsInBundle(nil, nil, flows); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("delete").Inc()
return nil
}

func (c *client) AddOFEntries(ofEntries []binding.OFEntry) error {
return c.bridge.AddOFEntriesInBundle(ofEntries, nil, nil)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("add").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddOFEntriesInBundle(ofEntries, nil, nil); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("add").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("add").Inc()
return nil
}

func (c *client) DeleteOFEntries(ofEntries []binding.OFEntry) error {
return c.bridge.AddOFEntriesInBundle(nil, nil, ofEntries)
startTime := time.Now()
defer func() {
d := time.Since(startTime)
metrics.OVSFlowOpsLateyncy.WithLabelValues("delete").Observe(float64(d.Milliseconds()))
}()
if err := c.bridge.AddOFEntriesInBundle(nil, nil, ofEntries); err != nil {
metrics.OVSFlowOpsErrorCount.WithLabelValues("delete").Inc()
return err
}
metrics.OVSFlowOpsCount.WithLabelValues("delete").Inc()
return nil
}

// defaultFlows generates the default flows of all tables.
Expand Down
5 changes: 4 additions & 1 deletion test/e2e/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ var antreaAgentMetrics = []string{
"antrea_agent_ingress_networkpolicy_rule_count",
"antrea_agent_local_pod_count",
"antrea_agent_networkpolicy_count",
"antrea_agent_ovs_total_flow_count",
"antrea_agent_ovs_flow_count",
"antrea_agent_ovs_flow_ops_count",
"antrea_agent_ovs_flow_ops_error_count",
"antrea_agent_ovs_flow_ops_latency_milliseconds",
"antrea_agent_ovs_total_flow_count",
"antrea_agent_runtime_info",
}

Expand Down

0 comments on commit 63c68f7

Please sign in to comment.