Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix network plugin detection #1061

Merged
merged 9 commits into from
Feb 15, 2021
3 changes: 3 additions & 0 deletions controllers/submariner/submariner_networkdiscovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ func (r *SubmarinerReconciler) getClusterNetwork(submariner *submopv1a1.Submarin
}

clusterNetwork, err := network.Discover(r.dynClient, r.clientSet, r.submClient, submariner.Namespace)
if err != nil {
log.Error(err, "Error trying to discover network")
}

if clusterNetwork != nil {
r.clusterNetwork = clusterNetwork
Expand Down
8 changes: 4 additions & 4 deletions pkg/discovery/network/canal.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,14 @@ func discoverCanalFlannelNetwork(clientSet kubernetes.Interface) (*ClusterNetwor
PodCIDRs: []string{*podCIDR},
}

// Try to networkPluginsDiscovery the service CIDRs using the generic functions
genNetwork, err := discoverGenericNetwork(clientSet)
// Try to detect the service CIDRs using the generic functions
clusterIPRange, err := findClusterIPRange(clientSet)
if err != nil {
return nil, err
}

if genNetwork != nil {
clusterNetwork.ServiceCIDRs = genNetwork.ServiceCIDRs
if clusterIPRange != "" {
clusterNetwork.ServiceCIDRs = []string{clusterIPRange}
}

return clusterNetwork, nil
Expand Down
12 changes: 6 additions & 6 deletions pkg/discovery/network/canal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,25 +20,25 @@ import (
v1 "k8s.io/api/core/v1"
v1meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
)

var _ = Describe("discoverCanalFlannelNetwork", func() {
When("There are no generic k8s pods to look at", func() {
It("Should return still return the pod CIDR", func() {
It("Should return the ClusterNetwork structure with the pod CIDR and the service CIDR", func() {
clusterNet := testDiscoverCanalFlannelWith(&canalFlannelCfgMap)
Expect(clusterNet).NotTo(BeNil())
Expect(clusterNet.NetworkPlugin).To(Equal("canal-flannel"))
Expect(clusterNet.PodCIDRs).To(Equal([]string{testCannalFlannelPodCIDR}))
Expect(clusterNet.ServiceCIDRs).To(Equal([]string{testServiceCIDRFromService}))
})
})

When("There is a kubeapi pod at least ", func() {
When("There is a kube-api pod", func() {

It("Should return the ClusterNetwork structure with ServiceCIDRs too", func() {
It("Should return the ClusterNetwork structure with the pod CIDR and the service CIDR", func() {
clusterNet := testDiscoverWith(
&canalFlannelCfgMap,
fakePod("kube-apiserver", []string{"kube-apiserver", "--service-cluster-ip-range=" + testServiceCIDR}, []v1.EnvVar{}),
Expand All @@ -53,14 +53,14 @@ var _ = Describe("discoverCanalFlannelNetwork", func() {
})

func testDiscoverCanalFlannelWith(objects ...runtime.Object) *ClusterNetwork {
clientSet := fake.NewSimpleClientset(objects...)
clientSet := newTestClient(objects...)
clusterNet, err := discoverCanalFlannelNetwork(clientSet)
Expect(err).NotTo(HaveOccurred())
return clusterNet
}

func testDiscoverWith(objects ...runtime.Object) *ClusterNetwork {
clientSet := fake.NewSimpleClientset(objects...)
clientSet := newTestClient(objects...)
clusterNet, err := Discover(nil, clientSet, nil, "")
Expect(err).NotTo(HaveOccurred())
return clusterNet
Expand Down
129 changes: 117 additions & 12 deletions pkg/discovery/network/generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ limitations under the License.
package network

import (
"fmt"
"os"
"regexp"

"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
v1meta "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/client-go/kubernetes"
)

Expand All @@ -25,27 +33,16 @@ func discoverGenericNetwork(clientSet kubernetes.Interface) (*ClusterNetwork, er
NetworkPlugin: "generic",
}

podIPRange, err := findPodIPRangeKubeController(clientSet)
podIPRange, err := findPodIPRange(clientSet)
tpantelis marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err
}

if podIPRange == "" {
podIPRange, err = findPodIPRangeKubeProxy(clientSet)
if err != nil {
return nil, err
}
}

if podIPRange != "" {
clusterNetwork.PodCIDRs = []string{podIPRange}
}

// on some self-hosted platforms, the platform itself will provide the kube-apiserver, thus
// our discovery method of looking for the kube-apiserver pod is useless, and we won't be
// able to return such detail
clusterIPRange, err := findClusterIPRange(clientSet)

if err != nil {
return nil, err
}
Expand All @@ -62,13 +59,121 @@ func discoverGenericNetwork(clientSet kubernetes.Interface) (*ClusterNetwork, er
}

func findClusterIPRange(clientSet kubernetes.Interface) (string, error) {
clusterIPRange, err := findClusterIPRangeFromApiserver(clientSet)
if err != nil || clusterIPRange != "" {
return clusterIPRange, err
}

clusterIPRange, err = findClusterIPRangeFromServiceCreation(clientSet)
if err != nil || clusterIPRange != "" {
return clusterIPRange, err
}

return "", nil
}

func findClusterIPRangeFromApiserver(clientSet kubernetes.Interface) (string, error) {
return findPodCommandParameter(clientSet, "component=kube-apiserver", "--service-cluster-ip-range")
}

func findClusterIPRangeFromServiceCreation(clientSet kubernetes.Interface) (string, error) {
// find service cidr based on https://stackoverflow.com/questions/44190607/how-do-you-find-the-cluster-service-cidr-of-a-kubernetes-cluster
tpantelis marked this conversation as resolved.
Show resolved Hide resolved
invalidSvcSpec := &v1.Service{
ObjectMeta: v1meta.ObjectMeta{
Name: "invalid-svc",
},
Spec: v1.ServiceSpec{
ClusterIP: "1.1.1.1",
Ports: []v1.ServicePort{
{
Port: 443,
TargetPort: intstr.IntOrString{
IntVal: 443,
},
},
},
},
}

ns := os.Getenv("WATCH_NAMESPACE")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is WATCH_NAMESPACE and how is it set?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is set here as an environment variable of the operator pod. I think that this is used by operator-sdk's GetWatchNamespace.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK - could you add a comment explaining this? eg "WATCH_NAMESPACE will be present if running in the operator pod".

Another approach is to pass in the NS so we don't rely on the presence of an env var. The operator code implicitly knows its own namespace. I think this would make it clearer and although it would mean passing a param down the chain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed and squashed.

// WATCH_NAMESPACE env should be set to operator's namespace, if running in operator
if ns == "" {
// otherwise, it should be called from subctl command, so use "default" namespace
ns = "default"
}

// create service to the namespace
_, err := clientSet.CoreV1().Services(ns).Create(invalidSvcSpec)

// creating invalid service didn't fail as expected
if err == nil {
return "", fmt.Errorf("creating invalid service(%v) didn't fail", invalidSvcSpec)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer this the way you had it, ie not return error here or on L128. This makes it consistent with the other discovery methods and, on join, will result in the user being prompted. But, as I noted earlier, I think it's worth logging a message as the API server didn't behave as expected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The callers of discoverGenericNetwork are as follows:

[Operator]
Reconcile
  discoverNetwork
    getClusterNetwork
      Discover
        discoverGenericNetwork

[Command line]
getNetworkDetails
  Discover
    discoverGenericNetwork
  if err != nil {
    status.QueueWarningMessage(fmt.Sprintf("Error trying to discover network details: %s", err))
  }

So, as for command line case, the error is already been passed to status.QueueWarningMessage.

And, other functions called from discoverGenericNetwork, like findClusterIPRangeFromApiserver actually returns error and not catching it with status.QueueWarningMessage.

Therefore, the code should be already as you expected.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point was about whether it should return an error in this case, ie should we treat it like we didn't find the data similar to the other discovery methods. So for subclt join, this would at least allow it to prompt the user as a last resort. After all, this is just another attempt to find the data. This seems reasonable but it would be beneficial to print a warning. @mangelajo WDYT?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Waiting for @mangelajo 's comment. But I'm now inclined to return nil and log error here, only for this case.

Meanwhile, I would like to confirm that it is safe to call status.QueueWarningMessage outside CLI's context, or from operator. It looks like a package under pkg/internal/cli and I'm not familiar with its implementation details.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately status.QueueWarningMessage wouldn't be appropriate in the operator. The operator uses a different logging API. We could pass in a function to log warnings.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added logging code in controllers/submariner/submariner_networkdiscovery.go for operator to log it (See the first commit). Also, re-organized and squashed commits to review easier.

}

return parseServiceCIDRFrom(err.Error())
}

func parseServiceCIDRFrom(msg string) (string, error) {
// expected msg is below:
// "The Service \"invalid-svc\" is invalid: spec.clusterIPs: Invalid value: []string{\"1.1.1.1\"}:
// failed to allocated ip:1.1.1.1 with error:provided IP is not in the valid range.
// The range of valid IPs is 10.45.0.0/16"
// expected matched string is below:
// 10.45.0.0/16
re := regexp.MustCompile(".*valid IPs is (.*)$")

match := re.FindStringSubmatch(msg)
if match == nil {
return "", fmt.Errorf("parsing (%s) failed. it doesn't match with %v", msg, re)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't return an error above if no error was returned from service creation so we should be consistent here. I think that's the correct behavior as it means we didn't find the desired data. As above I think we should log a message:

status.QueueWarningMessage(fmt.Sprintf"Could not determine the service IP range via service creation - the expected error was not returned. The actual error was %q", msg))

}

// returns first matching string
return match[1], nil
}

func findPodIPRange(clientSet kubernetes.Interface) (string, error) {
podIPRange, err := findPodIPRangeKubeController(clientSet)
if err != nil || podIPRange != "" {
return podIPRange, err
}

podIPRange, err = findPodIPRangeKubeProxy(clientSet)
if err != nil || podIPRange != "" {
return podIPRange, err
}

podIPRange, err = findPodIPRangeFromNodeSpec(clientSet)
if err != nil || podIPRange != "" {
return podIPRange, err
}

return "", nil
}

func findPodIPRangeKubeController(clientSet kubernetes.Interface) (string, error) {
return findPodCommandParameter(clientSet, "component=kube-controller-manager", "--cluster-cidr")
}

func findPodIPRangeKubeProxy(clientSet kubernetes.Interface) (string, error) {
return findPodCommandParameter(clientSet, "component=kube-proxy", "--cluster-cidr")
}

func findPodIPRangeFromNodeSpec(clientSet kubernetes.Interface) (string, error) {
nodes, err := clientSet.CoreV1().Nodes().List(v1meta.ListOptions{})

if err != nil {
return "", errors.WithMessagef(err, "error listing nodes")
}

return parseToPodCidr(nodes.Items)
}

func parseToPodCidr(nodes []v1.Node) (string, error) {
for _, node := range nodes {
if node.Spec.PodCIDR != "" {
return node.Spec.PodCIDR, nil
}
}

return "", nil
}
Loading