Skip to content

Commit

Permalink
support external control plane
Browse files Browse the repository at this point in the history
The use case is for managed kubernetes services (similar to
aws EKS or google GKE), where the kubernetes control plane
components (apiserver, etcd, kube-scheduler etc) are hosted
by cloud service provider in the provider managed kubernetes
cluster and only workload components (worker nodes) are
visible to end user. In such case, end user can still deploy
sriov operator on the worker node by accessing k8s apiserver
hosted externally.

Signed-off-by: Zenghui Shi <zshi@redhat.com>
  • Loading branch information
zshi-redhat committed Sep 8, 2022
1 parent e55927f commit 711302e
Show file tree
Hide file tree
Showing 8 changed files with 566 additions and 9 deletions.
4 changes: 4 additions & 0 deletions bindata/manifests/operator-webhook/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ spec:
nodeSelector:
beta.kubernetes.io/os: linux
affinity:
{{ if not .ExternalControlPlane }}
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
Expand All @@ -37,7 +38,9 @@ spec:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
{{ end }}
tolerations:
{{ if not .ExternalControlPlane }}
- key: "node-role.kubernetes.io/master"
operator: Exists
effect: NoSchedule
Expand All @@ -47,6 +50,7 @@ spec:
- key: "node.kubernetes.io/not-ready"
operator: Exists
effect: NoSchedule
{{ end }}
{{- if .ImagePullSecrets }}
imagePullSecrets:
{{- range .ImagePullSecrets }}
Expand Down
6 changes: 5 additions & 1 deletion bindata/manifests/webhook/server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
namespace: {{.Namespace}}
annotations:
kubernetes.io/description: |
This daemon set launches the network resource injector component on master nodes.
This daemon set launches the network resource injector component on master or worker nodes.
release.openshift.io/version: "{{.ReleaseVersion}}"
spec:
selector:
Expand All @@ -31,6 +31,7 @@ spec:
nodeSelector:
beta.kubernetes.io/os: linux
affinity:
{{ if not .ExternalControlPlane }}
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
Expand All @@ -40,7 +41,9 @@ spec:
- matchExpressions:
- key: node-role.kubernetes.io/control-plane
operator: Exists
{{ end }}
tolerations:
{{ if not .ExternalControlPlane }}
- key: "node-role.kubernetes.io/master"
operator: Exists
effect: NoSchedule
Expand All @@ -50,6 +53,7 @@ spec:
- key: "node.kubernetes.io/not-ready"
operator: Exists
effect: NoSchedule
{{ end }}
{{- if .ImagePullSecrets }}
imagePullSecrets:
{{- range .ImagePullSecrets }}
Expand Down
6 changes: 6 additions & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,12 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(dc *sriovnetworkv1.Sriov
data.Data["CaBundle"] = os.Getenv("WEBHOOK_CA_BUNDLE")
data.Data["DevMode"] = os.Getenv("DEV_MODE")
data.Data["ImagePullSecrets"] = GetImagePullSecrets()
external, err := utils.IsExternalControlPlaneCluster(r.Client)
if err != nil {
logger.Error(err, "Fail to get control plane topology")
return err
}
data.Data["ExternalControlPlane"] = external
objs, err := render.RenderDir(path, &data)
if err != nil {
logger.Error(err, "Fail to render webhook manifests")
Expand Down
14 changes: 14 additions & 0 deletions controllers/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
. "github.com/onsi/ginkgo"
"github.com/onsi/ginkgo/config"
. "github.com/onsi/gomega"
openshiftconfigv1 "github.com/openshift/api/config/v1"
mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"

corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -85,6 +86,8 @@ var _ = BeforeSuite(func(done Done) {
Expect(err).NotTo(HaveOccurred())
err = mcfgv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())
err = openshiftconfigv1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())

//+kubebuilder:scaffold:scheme

Expand Down Expand Up @@ -165,6 +168,17 @@ var _ = BeforeSuite(func(done Done) {
}
Expect(k8sClient.Create(context.TODO(), config)).Should(Succeed())

infra := &openshiftconfigv1.Infrastructure{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster",
},
Spec: openshiftconfigv1.InfrastructureSpec{},
Status: openshiftconfigv1.InfrastructureStatus{
ControlPlaneTopology: openshiftconfigv1.HighlyAvailableTopologyMode,
},
}
Expect(k8sClient.Create(context.TODO(), infra)).Should(Succeed())

poolConfig := &sriovnetworkv1.SriovNetworkPoolConfig{}
poolConfig.SetNamespace(testNamespace)
poolConfig.SetName(constants.DefaultConfigName)
Expand Down
4 changes: 4 additions & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: RELEASE_VERSION
value: 4.3.0
- name: SRIOV_CNI_BIN_PATH
Expand Down
4 changes: 4 additions & 0 deletions deployment/sriov-network-operator/templates/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: RELEASE_VERSION
value: {{ .Release.AppVersion }}
- name: SRIOV_CNI_BIN_PATH
Expand Down
76 changes: 68 additions & 8 deletions pkg/utils/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,99 @@ import (

const (
// default Infrastructure resource name for Openshift
infraResourceName = "cluster"
infraResourceName = "cluster"
workerRoleName = "worker"
masterRoleName = "master"
workerNodeLabelKey = "node-role.kubernetes.io/worker"
masterNodeLabelKey = "node-role.kubernetes.io/master"
controlPlaneNodeLabelKey = "node-role.kubernetes.io/control-plane"
)

func getNodeRole(node corev1.Node) string {
for k := range node.Labels {
if k == workerNodeLabelKey {
return workerRoleName
} else if k == masterNodeLabelKey || k == controlPlaneNodeLabelKey {
return masterRoleName
}
}
return ""
}

func IsSingleNodeCluster(c client.Client) (bool, error) {
if os.Getenv("CLUSTER_TYPE") == ClusterTypeOpenshift {
return openshiftSingleNodeClusterStatus(c)
topo, err := openshiftControlPlaneTopologyStatus(c)
if err != nil {
return false, err
}
if topo == configv1.SingleReplicaTopologyMode {
return true, nil
}
return false, nil
}
return k8sSingleNodeClusterStatus(c)
}

// IsExternalControlPlaneCluster detects control plane location of the cluster.
// On OpenShift, the control plane topology is configured in configv1.Infrastucture struct.
// On kubernetes, it is determined by which node the sriov operator is scheduled on. If operator
// pod is schedule on worker node, it is considered as external control plane.
func IsExternalControlPlaneCluster(c client.Client) (bool, error) {
if os.Getenv("CLUSTER_TYPE") == ClusterTypeOpenshift {
topo, err := openshiftControlPlaneTopologyStatus(c)
if err != nil {
return false, err
}
if topo == "External" {
return true, nil
}
} else if os.Getenv("CLUSTER_TYPE") == ClusterTypeKubernetes {
role, err := operatorNodeRole(c)
if err != nil {
return false, err
}
if role == workerRoleName {
return true, nil
}
}
return false, nil
}

func k8sSingleNodeClusterStatus(c client.Client) (bool, error) {
nodeList := &corev1.NodeList{}
err := c.List(context.TODO(), nodeList)
if err != nil {
glog.Errorf("IsSingleNodeCluster(): Failed to list nodes: %v", err)
glog.Errorf("k8sSingleNodeClusterStatus(): Failed to list nodes: %v", err)
return false, err
}

if len(nodeList.Items) == 1 {
glog.Infof("IsSingleNodeCluster(): one node found in the cluster")
glog.Infof("k8sSingleNodeClusterStatus(): one node found in the cluster")
return true, nil
}
return false, nil
}

func openshiftSingleNodeClusterStatus(c client.Client) (bool, error) {
// operatorNodeRole returns role of the node where operator is scheduled on
func operatorNodeRole(c client.Client) (string, error) {
node := corev1.Node{}
err := c.Get(context.TODO(), types.NamespacedName{Name: os.Getenv("NODE_NAME")}, &node)
if err != nil {
glog.Errorf("k8sIsExternalTopologyMode(): Failed to get node: %v", err)
return "", err
}

return getNodeRole(node), nil
}

func openshiftControlPlaneTopologyStatus(c client.Client) (configv1.TopologyMode, error) {
infra := &configv1.Infrastructure{}
err := c.Get(context.TODO(), types.NamespacedName{Name: infraResourceName}, infra)
if err != nil {
return false, err
return "", fmt.Errorf("openshiftControlPlaneTopologyStatus(): Failed to get Infrastructure (name: %s): %v", infraResourceName, err)
}
if infra == nil {
return false, fmt.Errorf("getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName)
return "", fmt.Errorf("openshiftControlPlaneTopologyStatus(): getting resource Infrastructure (name: %s) succeeded but object was nil", infraResourceName)
}
return infra.Status.ControlPlaneTopology == configv1.SingleReplicaTopologyMode, nil
return infra.Status.ControlPlaneTopology, nil
}
Loading

0 comments on commit 711302e

Please sign in to comment.