Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

Terminate NPC application if any of the watches panic #3792

Closed
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 65 additions & 5 deletions prog/weave-npc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
networkingv1 "k8s.io/api/networking/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
Expand All @@ -32,6 +31,7 @@ var (
nodeName string
maxList int
bridgePortName string
stopChan chan struct{}
naemono marked this conversation as resolved.
Show resolved Hide resolved
)

func handleError(err error) { common.CheckFatal(err) }
Expand Down Expand Up @@ -259,9 +259,19 @@ func root(cmd *cobra.Command, args []string) {
nsController := makeController(client.Core().RESTClient(), "namespaces", &coreapi.Namespace{},
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
naemono marked this conversation as resolved.
Show resolved Hide resolved
stopChan <- struct{}{}
}
}()
handleError(npc.AddNamespace(obj.(*coreapi.Namespace)))
},
DeleteFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
switch obj := obj.(type) {
case *coreapi.Namespace:
handleError(npc.DeleteNamespace(obj))
Expand All @@ -273,15 +283,30 @@ func root(cmd *cobra.Command, args []string) {
}
},
UpdateFunc: func(old, new interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
handleError(npc.UpdateNamespace(old.(*coreapi.Namespace), new.(*coreapi.Namespace)))
}})

podController := makeController(client.Core().RESTClient(), "pods", &coreapi.Pod{},
cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
handleError(npc.AddPod(obj.(*coreapi.Pod)))
},
DeleteFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
switch obj := obj.(type) {
case *coreapi.Pod:
handleError(npc.DeletePod(obj))
Expand All @@ -293,14 +318,29 @@ func root(cmd *cobra.Command, args []string) {
}
},
UpdateFunc: func(old, new interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
handleError(npc.UpdatePod(old.(*coreapi.Pod), new.(*coreapi.Pod)))
}})

npHandlers := cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
handleError(npc.AddNetworkPolicy(obj))
},
DeleteFunc: func(obj interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
switch obj := obj.(type) {
case cache.DeletedFinalStateUnknown:
// We know this object has gone away, but its final state is no longer
Expand All @@ -312,16 +352,36 @@ func root(cmd *cobra.Command, args []string) {
}
},
UpdateFunc: func(old, new interface{}) {
defer func() {
if r := recover(); r != nil {
stopChan <- struct{}{}
}
}()
handleError(npc.UpdateNetworkPolicy(old, new))
},
}
npController = makeController(client.NetworkingV1().RESTClient(), "networkpolicies", &networkingv1.NetworkPolicy{}, npHandlers)

go nsController.Run(wait.NeverStop)
go podController.Run(wait.NeverStop)
go npController.Run(wait.NeverStop)

signals := make(chan os.Signal, 1)
stopChan = make(chan struct{})

go func() {
nsController.Run(stopChan)
signals <- syscall.SIGINT
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the intention here? Looks like you are faking a signal arriving from the OS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am, since that's what weave is waiting on to completely stop the application fully

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about os.Exit(1) ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, but if the weave team wants waitgroups in place, I'll likely have to use a channel of some sort, and this was already setup to use... I'm not against this, but I'm curious as to how the weave team feels.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On line 222 we know the program has panic'd; it seems best to exit quickly and simply at that point rather than adding channels, fake signals, etc.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've updated this to simply os.Exit when one of the goroutines panics.

}()
go func() {
podController.Run(stopChan)
signals <- syscall.SIGINT
}()
go func() {
npController.Run(stopChan)
signals <- syscall.SIGINT
}()
go func() {
<-stopChan
close(stopChan)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it dangerous to close this while there are still goroutines that could send to it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, the application is ending anyways, and I was considering waitgroups, but I'm not sure the harm here, as the application is completely shutting down at this point regardless. I'm certainly not against adding a waitgroup, and waiting until all the others stop completely.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this has been removed.

}()

signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
common.Log.Fatalf("Exiting: %v", <-signals)
}
Expand Down