Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance testing for prometheus metrics at agent #916

Merged
merged 3 commits into from
Jul 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ docker-test-unit: $(DOCKER_CACHE)
.PHONY: docker-test-integration
docker-test-integration:
@echo "===> Building Antrea Integration Test Docker image <==="
@docker build -t antrea/test -f build/images/test/Dockerfile .
@docker build --pull -t antrea/test -f build/images/test/Dockerfile .
antoninbas marked this conversation as resolved.
Show resolved Hide resolved
@docker run --privileged --rm \
-e "GOCACHE=/tmp/gocache" \
-e "GOPATH=/tmp/gopath" \
Expand Down
18 changes: 14 additions & 4 deletions pkg/agent/metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,6 @@ var (
func InitializePrometheusMetrics() {
klog.Info("Initializing prometheus metrics")

if err := legacyregistry.Register(PodCount); err != nil {
klog.Error("Failed to register antrea_agent_local_pod_count with Prometheus")
}

nodeName, err := env.GetNodeName()
if err != nil {
klog.Errorf("Failed to retrieve agent K8S node name: %v", err)
Expand All @@ -94,6 +90,18 @@ func InitializePrometheusMetrics() {
// and will not measure anything unless the collector is first registered.
gaugeHost.Set(1)

InitializePodMetrics()
InitializeNetworkPolicyMetrics()
InitializeOVSMetrics()
}

func InitializePodMetrics() {
if err := legacyregistry.Register(PodCount); err != nil {
klog.Error("Failed to register antrea_agent_local_pod_count with Prometheus")
}
}

func InitializeNetworkPolicyMetrics() {
if err := legacyregistry.Register(EgressNetworkPolicyRuleCount); err != nil {
klog.Error("Failed to register antrea_agent_egress_networkpolicy_rule_count with Prometheus")
}
Expand All @@ -105,7 +113,9 @@ func InitializePrometheusMetrics() {
if err := legacyregistry.Register(NetworkPolicyCount); err != nil {
klog.Error("Failed to register antrea_agent_networkpolicy_count with Prometheus")
}
}

func InitializeOVSMetrics() {
if err := legacyregistry.Register(OVSTotalFlowCount); err != nil {
klog.Error("Failed to register antrea_agent_ovs_total_flow_count with Prometheus")
}
Expand Down
43 changes: 43 additions & 0 deletions test/integration/agent/cniserver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"io/ioutil"
"net"
"os"
"strings"
"testing"
"text/template"

Expand All @@ -39,13 +40,16 @@ import (
"github.com/stretchr/testify/require"
"github.com/vishvananda/netlink"
k8sFake "k8s.io/client-go/kubernetes/fake"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"

"github.com/vmware-tanzu/antrea/pkg/agent/cniserver"
"github.com/vmware-tanzu/antrea/pkg/agent/cniserver/ipam"
ipamtest "github.com/vmware-tanzu/antrea/pkg/agent/cniserver/ipam/testing"
cniservertest "github.com/vmware-tanzu/antrea/pkg/agent/cniserver/testing"
"github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/interfacestore"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
openflowtest "github.com/vmware-tanzu/antrea/pkg/agent/openflow/testing"
routetest "github.com/vmware-tanzu/antrea/pkg/agent/route/testing"
"github.com/vmware-tanzu/antrea/pkg/agent/util"
Expand Down Expand Up @@ -178,6 +182,7 @@ type testCase struct {
expGatewayCIDRs []string // Expected Gateway addresses in CIDR form
addresses []string
networkConfig string
validateMetrics bool
}

func (tc testCase) netConfJSON(dataDir string) string {
Expand Down Expand Up @@ -497,6 +502,17 @@ func (tester *cmdAddDelTester) cmdCheckTest(tc testCase, conf *Net, dataDir stri

// Find the veth peer in the container namespace and the default route.
tester.checkContainerNetworking(tc)

// If validateMetrics flag is set, check pod count metrics.
if tc.validateMetrics {
expectedStr := `
# HELP antrea_agent_local_pod_count [STABLE] Number of pods on local node which are managed by the Antrea Agent.
# TYPE antrea_agent_local_pod_count gauge
antrea_agent_local_pod_count 1
`
assert.Equal(tc.t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedStr), "antrea_agent_local_pod_count"))
}

}

func (tester *cmdAddDelTester) cmdDelTest(tc testCase, dataDir string) {
Expand Down Expand Up @@ -532,6 +548,16 @@ func (tester *cmdAddDelTester) cmdDelTest(tc testCase, dataDir string) {
link, err = linkByName(tester.testNS, tester.vethName)
testRequire.NotNil(err)
testRequire.Nil(link)

// If validateMetrics flag is set, check Pod count metrics.
if tc.validateMetrics {
expectedStr := `
# HELP antrea_agent_local_pod_count [STABLE] Number of pods on local node which are managed by the Antrea Agent.
# TYPE antrea_agent_local_pod_count gauge
antrea_agent_local_pod_count 0
`
assert.Equal(tc.t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedStr), "antrea_agent_local_pod_count"))
}
}

func newTester() *cmdAddDelTester {
Expand Down Expand Up @@ -660,9 +686,25 @@ func TestAntreaServerFunc(t *testing.T) {
addresses: []string{"10.1.2.100/24,10.1.2.1,4"},
Routes: []string{"10.0.0.0/8,10.1.2.1", "0.0.0.0/0,10.1.2.1"},
},
{
name: "Prometheus metrics test with ADD/DEL/CHECK for 0.4.0 config",
CNIVersion: "0.4.0",
// IPv4 only
ranges: []rangeInfo{{
subnet: "10.1.2.0/24",
}},
expGatewayCIDRs: []string{"10.1.2.1/24"},
addresses: []string{"10.1.2.100/24,10.1.2.1,4"},
Routes: []string{"10.0.0.0/8,10.1.2.1", "0.0.0.0/0,10.1.2.1"},
validateMetrics: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if strings.Contains(tc.name, "Prometheus") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why don't we add a new validateMetrics boolean field to the testCase struct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just went with slightly hacky approach, since it is only one testcase. Added a boolean in testcase.

// Initialize pod metrics
srikartati marked this conversation as resolved.
Show resolved Hide resolved
metrics.InitializePodMetrics()
}
setup()
defer teardown()
tc.t = t
Expand Down Expand Up @@ -711,6 +753,7 @@ func TestCNIServerChaining(t *testing.T) {
if _, inContainer := os.LookupEnv("INCONTAINER"); !inContainer {
t.Skipf("Skip test runs only in container")
}

testRequire := require.New(t)
controller := mock.NewController(t)
defer controller.Finish()
Expand Down
35 changes: 35 additions & 0 deletions test/integration/agent/openflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/component-base/metrics/testutil"

config1 "github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
ofClient "github.com/vmware-tanzu/antrea/pkg/agent/openflow"
"github.com/vmware-tanzu/antrea/pkg/agent/types"
"github.com/vmware-tanzu/antrea/pkg/apis/networking/v1beta1"
Expand Down Expand Up @@ -84,6 +87,9 @@ type testConfig struct {
}

func TestConnectivityFlows(t *testing.T) {
// Initialize ovs metrics (Prometheus) to test them
metrics.InitializeOVSMetrics()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that we call metrics.InitializeOVSMetrics() in multiple tests. I assume there is no issue with calling the function multiple times?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I made sure by calling this multiple times in same test. If the metric is already registered, the registry ignores subsequent calls.


c = ofClient.NewClient(br, bridgeMgmtAddr, true)
err := ofTestUtils.PrepareOVSBridge(br)
require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err))
Expand Down Expand Up @@ -220,6 +226,7 @@ func testInitialize(t *testing.T, config *testConfig) {
for _, tableFlow := range prepareDefaultFlows() {
ofTestUtils.CheckFlowExists(t, ovsCtlClient, tableFlow.tableID, true, tableFlow.flows)
}
checkOVSFlowMetrics(t, c)
}

func testInstallTunnelFlows(t *testing.T, config *testConfig) {
Expand Down Expand Up @@ -295,6 +302,9 @@ func testUninstallPodFlows(t *testing.T, config *testConfig) {
}

func TestNetworkPolicyFlows(t *testing.T) {
// Initialize ovs metrics (Prometheus) to test them
metrics.InitializeOVSMetrics()

c = ofClient.NewClient(br, bridgeMgmtAddr, true)
err := ofTestUtils.PrepareOVSBridge(br)
require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge %s", br))
Expand Down Expand Up @@ -376,6 +386,8 @@ func TestNetworkPolicyFlows(t *testing.T) {
t.Errorf("Failed to install conjunctive match flow")
}
require.True(t, ofTestUtils.OfctlFlowMatch(flowList, ingressRuleTable, flow3), "Failed to install service flow")
checkOVSFlowMetrics(t, c)

_, err = c.UninstallPolicyRuleFlows(ruleID2)
require.Nil(t, err, "Failed to InstallPolicyRuleFlows")
checkDefaultDropFlows(t, ingressDefaultTable, priorityNormal, types.DstAddress, toIPList2, true)
Expand All @@ -384,6 +396,7 @@ func TestNetworkPolicyFlows(t *testing.T) {
require.Nil(t, err, "Failed to DeletePolicyRuleService")
checkConjunctionFlows(t, ingressRuleTable, ingressDefaultTable, contrackCommitTable, priorityNormal, ruleID, rule, assert.False)
checkDefaultDropFlows(t, ingressDefaultTable, priorityNormal, types.DstAddress, toIPList, false)
checkOVSFlowMetrics(t, c)
}

func checkDefaultDropFlows(t *testing.T, table uint8, priority int, addrType types.AddressType, addresses []types.Address, add bool) {
Expand Down Expand Up @@ -510,6 +523,28 @@ func checkConjunctionFlows(t *testing.T, ruleTable uint8, dropTable uint8, allow
}
}

func checkOVSFlowMetrics(t *testing.T, client ofClient.Client) {
expectedFlowCount := `
# HELP antrea_agent_ovs_flow_count [STABLE] Flow count for each OVS flow table. The TableID is used as a label.
# TYPE antrea_agent_ovs_flow_count gauge
`
tableStatus := client.GetFlowTableStatus()
totalFlowCount := 0
for _, table := range tableStatus {
expectedFlowCount = expectedFlowCount + fmt.Sprintf("antrea_agent_ovs_flow_count{table_id=\"%d\"} %d\n", table.ID, table.FlowCount)
totalFlowCount = totalFlowCount + int(table.FlowCount)
}
expectedTotalFlowCount := `
# HELP antrea_agent_ovs_total_flow_count [STABLE] Total flow count of all OVS flow tables.
# TYPE antrea_agent_ovs_total_flow_count gauge
`
expectedTotalFlowCount = expectedTotalFlowCount + fmt.Sprintf("antrea_agent_ovs_total_flow_count %d\n", totalFlowCount)

assert.Equal(t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedTotalFlowCount), "antrea_agent_ovs_total_flow_count"))
assert.Equal(t, nil, testutil.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(expectedFlowCount), "antrea_agent_ovs_flow_count"))

}

func testInstallGatewayFlows(t *testing.T, config *testConfig) {
err := c.InstallGatewayFlows(config.localGateway.ip, config.localGateway.mac, config.localGateway.ofPort)
if err != nil {
Expand Down