Skip to content

Commit

Permalink
Enhance testing for prometheus metrics at agent (antrea-io#916)
Browse files Browse the repository at this point in the history
* Enhance testing for prometheus metrics at agent

Added testing for following metrics in integration tests:
antrea_agent_ovs_total_flow_count
antrea_agent_ovs_flow_count
antrea_agent_local_pod_count

Fixes#799

* Changes to Makefile to run integration test on MacOS

When running integration test on MacOS, hit an error because of an old
docker image for antrea/openvswitch:2.13.0. Pulling that docker image
explicitly which is used as base image for test container image.

* Addressed latest comments
  • Loading branch information
srikartati authored and GraysonWu committed Sep 18, 2020
1 parent 4590df4 commit d865452
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 5 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ docker-test-unit: $(DOCKER_CACHE)
.PHONY: docker-test-integration
docker-test-integration:
@echo "===> Building Antrea Integration Test Docker image <==="
@docker build -t antrea/test -f build/images/test/Dockerfile .
@docker build --pull -t antrea/test -f build/images/test/Dockerfile .
@docker run --privileged --rm \
-e "GOCACHE=/tmp/gocache" \
-e "GOPATH=/tmp/gopath" \
Expand Down
18 changes: 14 additions & 4 deletions pkg/agent/metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ var (
func InitializePrometheusMetrics() {
klog.Info("Initializing prometheus metrics")

if err := legacyregistry.Register(PodCount); err != nil {
klog.Error("Failed to register antrea_agent_local_pod_count with Prometheus")
}

nodeName, err := env.GetNodeName()
if err != nil {
klog.Errorf("Failed to retrieve agent K8S node name: %v", err)
Expand All @@ -94,6 +90,18 @@ func InitializePrometheusMetrics() {
// and will not measure anything unless the collector is first registered.
gaugeHost.Set(1)

InitializePodMetrics()
InitializeNetworkPolicyMetrics()
InitializeOVSMetrics()
}

func InitializePodMetrics() {
if err := legacyregistry.Register(PodCount); err != nil {
klog.Error("Failed to register antrea_agent_local_pod_count with Prometheus")
}
}

func InitializeNetworkPolicyMetrics() {
if err := legacyregistry.Register(EgressNetworkPolicyRuleCount); err != nil {
klog.Error("Failed to register antrea_agent_egress_networkpolicy_rule_count with Prometheus")
}
Expand All @@ -105,7 +113,9 @@ func InitializePrometheusMetrics() {
if err := legacyregistry.Register(NetworkPolicyCount); err != nil {
klog.Error("Failed to register antrea_agent_networkpolicy_count with Prometheus")
}
}

func InitializeOVSMetrics() {
if err := legacyregistry.Register(OVSTotalFlowCount); err != nil {
klog.Error("Failed to register antrea_agent_ovs_total_flow_count with Prometheus")
}
Expand Down
43 changes: 43 additions & 0 deletions test/integration/agent/cniserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"io/ioutil"
"net"
"os"
"strings"
"testing"
"text/template"

Expand All @@ -39,13 +40,16 @@ import (
"github.com/stretchr/testify/require"
"github.com/vishvananda/netlink"
k8sFake "k8s.io/client-go/kubernetes/fake"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"

"github.com/vmware-tanzu/antrea/pkg/agent/cniserver"
"github.com/vmware-tanzu/antrea/pkg/agent/cniserver/ipam"
ipamtest "github.com/vmware-tanzu/antrea/pkg/agent/cniserver/ipam/testing"
cniservertest "github.com/vmware-tanzu/antrea/pkg/agent/cniserver/testing"
"github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/interfacestore"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
openflowtest "github.com/vmware-tanzu/antrea/pkg/agent/openflow/testing"
routetest "github.com/vmware-tanzu/antrea/pkg/agent/route/testing"
"github.com/vmware-tanzu/antrea/pkg/agent/util"
Expand Down Expand Up @@ -178,6 +182,7 @@ type testCase struct {
expGatewayCIDRs []string // Expected Gateway addresses in CIDR form
addresses []string
networkConfig string
validateMetrics bool
}

func (tc testCase) netConfJSON(dataDir string) string {
Expand Down Expand Up @@ -497,6 +502,17 @@ func (tester *cmdAddDelTester) cmdCheckTest(tc testCase, conf *Net, dataDir stri

// Find the veth peer in the container namespace and the default route.
tester.checkContainerNetworking(tc)

// If validateMetrics flag is set, check pod count metrics.
if tc.validateMetrics {
expectedStr := `
# HELP antrea_agent_local_pod_count [STABLE] Number of pods on local node which are managed by the Antrea Agent.
# TYPE antrea_agent_local_pod_count gauge
antrea_agent_local_pod_count 1
`
assert.Equal(tc.t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedStr), "antrea_agent_local_pod_count"))
}

}

func (tester *cmdAddDelTester) cmdDelTest(tc testCase, dataDir string) {
Expand Down Expand Up @@ -532,6 +548,16 @@ func (tester *cmdAddDelTester) cmdDelTest(tc testCase, dataDir string) {
link, err = linkByName(tester.testNS, tester.vethName)
testRequire.NotNil(err)
testRequire.Nil(link)

// If validateMetrics flag is set, check Pod count metrics.
if tc.validateMetrics {
expectedStr := `
# HELP antrea_agent_local_pod_count [STABLE] Number of pods on local node which are managed by the Antrea Agent.
# TYPE antrea_agent_local_pod_count gauge
antrea_agent_local_pod_count 0
`
assert.Equal(tc.t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedStr), "antrea_agent_local_pod_count"))
}
}

func newTester() *cmdAddDelTester {
Expand Down Expand Up @@ -660,9 +686,25 @@ func TestAntreaServerFunc(t *testing.T) {
addresses: []string{"10.1.2.100/24,10.1.2.1,4"},
Routes: []string{"10.0.0.0/8,10.1.2.1", "0.0.0.0/0,10.1.2.1"},
},
{
name: "Prometheus metrics test with ADD/DEL/CHECK for 0.4.0 config",
CNIVersion: "0.4.0",
// IPv4 only
ranges: []rangeInfo{{
subnet: "10.1.2.0/24",
}},
expGatewayCIDRs: []string{"10.1.2.1/24"},
addresses: []string{"10.1.2.100/24,10.1.2.1,4"},
Routes: []string{"10.0.0.0/8,10.1.2.1", "0.0.0.0/0,10.1.2.1"},
validateMetrics: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if strings.Contains(tc.name, "Prometheus") {
// Initialize pod metrics
metrics.InitializePodMetrics()
}
setup()
defer teardown()
tc.t = t
Expand Down Expand Up @@ -711,6 +753,7 @@ func TestCNIServerChaining(t *testing.T) {
if _, inContainer := os.LookupEnv("INCONTAINER"); !inContainer {
t.Skipf("Skip test runs only in container")
}

testRequire := require.New(t)
controller := mock.NewController(t)
defer controller.Finish()
Expand Down
35 changes: 35 additions & 0 deletions test/integration/agent/openflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"

config1 "github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
ofClient "github.com/vmware-tanzu/antrea/pkg/agent/openflow"
"github.com/vmware-tanzu/antrea/pkg/agent/types"
"github.com/vmware-tanzu/antrea/pkg/apis/networking/v1beta1"
Expand Down Expand Up @@ -84,6 +87,9 @@ type testConfig struct {
}

func TestConnectivityFlows(t *testing.T) {
// Initialize ovs metrics (Prometheus) to test them
metrics.InitializeOVSMetrics()

c = ofClient.NewClient(br, bridgeMgmtAddr, true)
err := ofTestUtils.PrepareOVSBridge(br)
require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err))
Expand Down Expand Up @@ -220,6 +226,7 @@ func testInitialize(t *testing.T, config *testConfig) {
for _, tableFlow := range prepareDefaultFlows() {
ofTestUtils.CheckFlowExists(t, ovsCtlClient, tableFlow.tableID, true, tableFlow.flows)
}
checkOVSFlowMetrics(t, c)
}

func testInstallTunnelFlows(t *testing.T, config *testConfig) {
Expand Down Expand Up @@ -295,6 +302,9 @@ func testUninstallPodFlows(t *testing.T, config *testConfig) {
}

func TestNetworkPolicyFlows(t *testing.T) {
// Initialize ovs metrics (Prometheus) to test them
metrics.InitializeOVSMetrics()

c = ofClient.NewClient(br, bridgeMgmtAddr, true)
err := ofTestUtils.PrepareOVSBridge(br)
require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br))
Expand Down Expand Up @@ -376,6 +386,8 @@ func TestNetworkPolicyFlows(t *testing.T) {
t.Errorf("Failed to install conjunctive match flow")
}
require.True(t, ofTestUtils.OfctlFlowMatch(flowList, ingressRuleTable, flow3), "Failed to install service flow")
checkOVSFlowMetrics(t, c)

_, err = c.UninstallPolicyRuleFlows(ruleID2)
require.Nil(t, err, "Failed to InstallPolicyRuleFlows")
checkDefaultDropFlows(t, ingressDefaultTable, priorityNormal, types.DstAddress, toIPList2, true)
Expand All @@ -384,6 +396,7 @@ func TestNetworkPolicyFlows(t *testing.T) {
require.Nil(t, err, "Failed to DeletePolicyRuleService")
checkConjunctionFlows(t, ingressRuleTable, ingressDefaultTable, contrackCommitTable, priorityNormal, ruleID, rule, assert.False)
checkDefaultDropFlows(t, ingressDefaultTable, priorityNormal, types.DstAddress, toIPList, false)
checkOVSFlowMetrics(t, c)
}

func checkDefaultDropFlows(t *testing.T, table uint8, priority int, addrType types.AddressType, addresses []types.Address, add bool) {
Expand Down Expand Up @@ -510,6 +523,28 @@ func checkConjunctionFlows(t *testing.T, ruleTable uint8, dropTable uint8, allow
}
}

func checkOVSFlowMetrics(t *testing.T, client ofClient.Client) {
expectedFlowCount := `
# HELP antrea_agent_ovs_flow_count [STABLE] Flow count for each OVS flow table. The TableID is used as a label.
# TYPE antrea_agent_ovs_flow_count gauge
`
tableStatus := client.GetFlowTableStatus()
totalFlowCount := 0
for _, table := range tableStatus {
expectedFlowCount = expectedFlowCount + fmt.Sprintf("antrea_agent_ovs_flow_count{table_id=\"%d\"} %d\n", table.ID, table.FlowCount)
totalFlowCount = totalFlowCount + int(table.FlowCount)
}
expectedTotalFlowCount := `
# HELP antrea_agent_ovs_total_flow_count [STABLE] Total flow count of all OVS flow tables.
# TYPE antrea_agent_ovs_total_flow_count gauge
`
expectedTotalFlowCount = expectedTotalFlowCount + fmt.Sprintf("antrea_agent_ovs_total_flow_count %d\n", totalFlowCount)

assert.Equal(t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedTotalFlowCount), "antrea_agent_ovs_total_flow_count"))
assert.Equal(t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedFlowCount), "antrea_agent_ovs_flow_count"))

}

func testInstallGatewayFlows(t *testing.T, config *testConfig) {
err := c.InstallGatewayFlows(config.localGateway.ip, config.localGateway.mac, config.localGateway.ofPort)
if err != nil {
Expand Down

0 comments on commit d865452

Please sign in to comment.