Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catch GR test failing #213

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ KIND_EXPORT_LOGS ?=/tmp/kind_logs
.PHONY: kind-export-logs
kind-export-logs:
$(LOCALBIN)/kind export logs --name ${KIND_CLUSTER_NAME} ${KIND_EXPORT_LOGS}
for node in $(shell docker ps -q); do \
docker cp "$$node:/home" "$(KIND_EXPORT_LOGS)/home-$$node"; \
done

.PHONY: generate-all-in-one
generate-all-in-one: manifests kustomize ## Create manifests
Expand Down
5 changes: 5 additions & 0 deletions config/all-in-one/frr-k8s-prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,8 @@ spec:
port: 7573
periodSeconds: 5
volumeMounts:
- mountPath: /tmp/cores
name: core-path
- mountPath: /var/run/frr
name: frr-sockets
- mountPath: /etc/frr
Expand Down Expand Up @@ -1286,6 +1288,9 @@ spec:
key: node-role.kubernetes.io/control-plane
operator: Exists
volumes:
- hostPath:
path: /home/core-dump
name: core-path
- emptyDir: {}
name: frr-sockets
- configMap:
Expand Down
5 changes: 5 additions & 0 deletions config/all-in-one/frr-k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1178,6 +1178,8 @@ spec:
port: 7573
periodSeconds: 5
volumeMounts:
- mountPath: /tmp/cores
name: core-path
- mountPath: /var/run/frr
name: frr-sockets
- mountPath: /etc/frr
Expand Down Expand Up @@ -1255,6 +1257,9 @@ spec:
key: node-role.kubernetes.io/control-plane
operator: Exists
volumes:
- hostPath:
path: /home/core-dump
name: core-path
- emptyDir: {}
name: frr-sockets
- configMap:
Expand Down
5 changes: 5 additions & 0 deletions config/frr-k8s/frr-k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ spec:
- name: TINI_SUBREAPER
value: "true"
volumeMounts:
- mountPath: /tmp/cores
name: core-path
- name: frr-sockets
mountPath: /var/run/frr
- name: frr-conf
Expand Down Expand Up @@ -169,6 +171,9 @@ spec:
key: node-role.kubernetes.io/control-plane
operator: Exists
volumes:
- name: core-path
hostPath:
path: /home/core-dump
- name: frr-sockets
emptyDir: {}
- name: frr-startup
Expand Down
173 changes: 0 additions & 173 deletions e2etests/go.work.sum

Large diffs are not rendered by default.

149 changes: 129 additions & 20 deletions e2etests/tests/graceful_restart.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
package tests

import (
"errors"
"fmt"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/openshift-kni/k8sreporter"
"go.universe.tf/e2etest/pkg/frr/container"
frrcontainer "go.universe.tf/e2etest/pkg/frr/container"

"go.universe.tf/e2etest/pkg/executor"
"go.universe.tf/e2etest/pkg/frr"

frrk8sv1beta1 "github.com/metallb/frr-k8s/api/v1beta1"
"github.com/metallb/frrk8stests/pkg/config"
Expand All @@ -28,10 +33,11 @@ import (

var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func() {
var (
cs clientset.Interface
updater *config.Updater
reporter *k8sreporter.KubernetesReporter
nodes []corev1.Node
cs clientset.Interface
updater *config.Updater
reporter *k8sreporter.KubernetesReporter
nodes []corev1.Node
prefixesV4 = scaleUP(100)
)

cleanup := func(u *config.Updater) error {
Expand Down Expand Up @@ -63,17 +69,23 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func
})

ginkgo.AfterEach(func() {

seed := ginkgo.GinkgoRandomSeed()
testName := fmt.Sprintf("%s-%d", ginkgo.CurrentSpecReport().LeafNodeText, seed)
if ginkgo.CurrentSpecReport().Failed() {
testName := ginkgo.CurrentSpecReport().LeafNodeText
dump.K8sInfo(testName, reporter)
dump.BGPInfo(testName, infra.FRRContainers, cs)
testName += "-failed"
}
ginkgo.By(testName)
dump.K8sInfo(testName, reporter)
dump.BGPInfo(testName, infra.FRRContainers, cs)
})

ginkgo.Context("When restarting the frrk8s deamon pods", func() {

ginkgo.DescribeTable("external BGP peer maintains routes", func(ipFam ipfamily.Family, prefix string) {
ginkgo.DescribeTable("external BGP peer maintains routes", func(ipFam ipfamily.Family, prefix []string) {
frrs := config.ContainersForVRF(infra.FRRContainers, "")
// cnt, err := config.ContainerByName(infra.FRRContainers, "ebgp-multi-hop")
// frrs := []*frrcontainer.FRR{cnt}
for _, c := range frrs {
err := container.PairWithNodes(cs, c, ipFam)
Expect(err).NotTo(HaveOccurred(), "set frr config in infra containers failed")
Expand All @@ -88,48 +100,145 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func
Namespace: k8s.FRRK8sNamespace,
},
Spec: frrk8sv1beta1.FRRConfigurationSpec{
// NodeSelector: metav1.LabelSelector{
// MatchLabels: map[string]string{
// "kubernetes.io/hostname": nodes[0].GetLabels()["kubernetes.io/hostname"],
// },
// },
BGP: frrk8sv1beta1.BGPConfig{
Routers: []frrk8sv1beta1.Router{
{
ASN: infra.FRRK8sASN,
Neighbors: config.NeighborsFromPeers(peersConfig.PeersV4, peersConfig.PeersV6),
Prefixes: []string{prefix},
Prefixes: prefix,
},
},
},
},
}
ginkgo.By("Before GR test")

err := updater.Update(peersConfig.Secrets, frrConfigCR)
Expect(err).NotTo(HaveOccurred(), "apply the CR in k8s api failed")

check := func() error {
Eventually(func() error {
for _, p := range peersConfig.Peers() {
err := routes.CheckNeighborHasPrefix(p.FRR, p.FRR.RouterConfig.VRF, prefix, nodes)
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
neighbors, err := frr.NeighborsInfo(p.FRR)
Expect(err).NotTo(HaveOccurred())
for _, n := range neighbors {
Expect(n.GRInfo.RemoteGrMode).Should(Equal("Restart"))
}

err = routes.CheckNeighborHasPrefix(p.FRR, p.FRR.RouterConfig.VRF, prefix[0], nodes)
if err != nil {
return fmt.Errorf("Neigh %s does not have prefix %s: %w", p.FRR.Name, prefix, err)
return fmt.Errorf("Neigh %s does not have prefixes: %w", p.FRR.Name, err)
}
}
return nil
}

Eventually(check, time.Minute, time.Second).ShouldNot(HaveOccurred(),
"route should exist before we restart frr-k8s")
}, time.Minute, time.Second).ShouldNot(HaveOccurred(), "route should exist before we restart frr-k8s")

ginkgo.By("Start GR test")
c := make(chan struct{})
go func() { // go restart frr-k8s while Consistently check that route exists
defer ginkgo.GinkgoRecover()
err := k8s.RestartFRRK8sPods(cs)
Expect(err).NotTo(HaveOccurred(), "frr-k8s pods failed to restart")
for _, p := range peersConfig.Peers() {
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
}
ginkgo.By("FRRK8s pod restarted and BGP established")
close(c)
}()

check := func() error {
var returnError error

for _, p := range peersConfig.Peers() {
err := checkRoutes(p.FRR, prefix)
if err != nil {
returnError = errors.Join(returnError, fmt.Errorf("Neigh %s : %w", p.FRR.Name, err))
for i := 0; i < 20; i++ {
if err := checkRoutes(p.FRR, prefix); err != nil {
ginkgo.By(fmt.Sprintf("%d Neigh %s does NOT have prefix %v", i, p.FRR.Name, err))
} else {
ginkgo.By(fmt.Sprintf("%d Neigh %s does have prefix", i, p.FRR.Name))
}
time.Sleep(time.Second)
}
}
}
return returnError
}

// 2*time.Minute is important because that is the Graceful Restart timer.
Consistently(check, 2*time.Minute, time.Second).ShouldNot(HaveOccurred())
Consistently(check, 30*time.Second, time.Second).ShouldNot(HaveOccurred())
Eventually(c, time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s pods are not yet ready")
},
ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24"),
ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64"),
ginkgo.Entry("IPV4", ipfamily.IPv4, prefixesV4),
// ginkgo.Entry("IPV6", ipfamily.IPv6, []string{"2001:db8:5555::5/128"}),
)
})
})

func checkRoutes(cnt frrcontainer.FRR, want []string) error {
m := sliceToMap(want)
v4, _, err := frr.Routes(cnt)
if err != nil {
// ignore the docker exec errors
return nil
}
if len(m) == 0 {
return fmt.Errorf("nil map m")
}
if len(v4) == 0 {
IPRoutes(cnt)
return fmt.Errorf("nil map v4")
}
for _, r := range v4 {
// if r.Stale {
// fmt.Printf("S")
// }
delete(m, r.Destination.String())
}
if len(m) > 0 {
return fmt.Errorf("%d routes %+v not found ", len(m), getKeys(m))
}
return nil
}

func scaleUP(size int) []string {
if size > 255 {
panic("255 is max")
}

ret := []string{}
for i := 0; i < size; i++ {
ret = append(ret, fmt.Sprintf("5.5.5.%d/32", i))
}

return ret
}

func sliceToMap(slice []string) map[string]bool {
m := make(map[string]bool)
for _, v := range slice {
m[v] = true
}
return m
}
func getKeys(m map[string]bool) []string {
keys := make([]string, 0, len(m)) // Initialize slice with the capacity of the map length
for key := range m {
keys = append(keys, key)
}
return keys
}
func IPRoutes(exec executor.Executor) error {
cmd := "show ip route bgp"
res, err := exec.Exec("vtysh", "-c", cmd)
if err != nil {
return errors.Join(err, errors.New("Failed to query routes"))
}
fmt.Println("res", res)
return nil
}
Loading
Loading