Skip to content

Commit

Permalink
add pod webhook and pod reconciler for new exclusive placement strategy
Browse files Browse the repository at this point in the history
  • Loading branch information
danielvegamyhre committed Nov 19, 2023
1 parent 2466090 commit 3c08157
Show file tree
Hide file tree
Showing 18 changed files with 936 additions and 334 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ COPY main.go main.go
COPY api/ api/
COPY pkg/controllers/ pkg/controllers/
COPY pkg/util/ pkg/util/
COPY pkg/webhooks pkg/webhooks

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
4 changes: 2 additions & 2 deletions api/jobset/v1alpha2/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"sigs.k8s.io/jobset/pkg/util/collections"
"sigs.k8s.io/jobset/pkg/util/names"
shared "sigs.k8s.io/jobset/pkg/util/shared"

batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -109,7 +109,7 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) {
}
// Check that the generated job names for this replicated job will be DNS 1035 compliant.
// Use the largest job index as it will have the longest name.
testJobName := names.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1))
testJobName := shared.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1))
for _, errMessage := range validation.IsDNS1035Label(testJobName) {
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
Expand Down
7 changes: 2 additions & 5 deletions config/components/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,8 @@ spec:
# TODO(user): Configure the resources accordingly based on the project requirements.
# More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
resources:
limits:
cpu: 500m
memory: 128Mi
requests:
cpu: 10m
memory: 64Mi
cpu: 2
memory: 512Mi
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
20 changes: 20 additions & 0 deletions config/components/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ rules:
- get
- patch
- update
- apiGroups:
- ""
resources:
- nodes
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
Expand Down
38 changes: 38 additions & 0 deletions config/components/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,25 @@ webhooks:
resources:
- jobsets
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: system
path: /mutate--v1-pod
failurePolicy: Fail
name: mpod.kb.io
rules:
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
resources:
- pods
sideEffects: None
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
Expand All @@ -50,3 +69,22 @@ webhooks:
resources:
- jobsets
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: system
path: /validate--v1-pod
failurePolicy: Fail
name: vpod.kb.io
rules:
- apiGroups:
- ""
apiVersions:
- v1
operations:
- CREATE
resources:
- pods
sideEffects: None
26 changes: 26 additions & 0 deletions examples/simple/exclusive-placement.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: jobset.x-k8s.io/v1alpha2
kind: JobSet
metadata:
name: exclusive-placement
annotations:
alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool # 1:1 job replica to node pool assignment
spec:
failurePolicy:
maxRestarts: 3
replicatedJobs:
- name: workers
replicas: 3
template:
spec:
parallelism: 3
completions: 3
backoffLimit: 10
template:
spec:
containers:
- name: sleep
image: busybox
command:
- sleep
args:
- 1000s
34 changes: 30 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
jobset "sigs.k8s.io/jobset/api/jobset/v1alpha2"
"sigs.k8s.io/jobset/pkg/controllers"
"sigs.k8s.io/jobset/pkg/util/cert"
"sigs.k8s.io/jobset/pkg/webhooks"
//+kubebuilder:scaffold:imports
)

Expand Down Expand Up @@ -68,7 +69,11 @@ func main() {

ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
kubeConfig := ctrl.GetConfigOrDie()
kubeConfig.QPS = 500
kubeConfig.Burst = 500

mgr, err := ctrl.NewManager(kubeConfig, ctrl.Options{
Scheme: scheme,
Metrics: server.Options{
BindAddress: metricsAddr,
Expand Down Expand Up @@ -104,8 +109,12 @@ func main() {
}

ctx := ctrl.SetupSignalHandler()
if err := controllers.SetupIndexes(ctx, mgr.GetFieldIndexer()); err != nil {
setupLog.Error(err, "unable to setup indexes")
if err := controllers.SetupJobSetIndexes(ctx, mgr.GetFieldIndexer()); err != nil {
setupLog.Error(err, "unable to setup jobset reconciler indexes")
os.Exit(1)
}
if err := controllers.SetupPodReconcilerIndexes(ctx, mgr.GetFieldIndexer()); err != nil {
setupLog.Error(err, "unable to setup pod reconciler indexes")
os.Exit(1)
}

Expand All @@ -130,13 +139,30 @@ func setupControllers(mgr ctrl.Manager, certsReady chan struct{}) {
<-certsReady
setupLog.Info("certs ready")

// Set up JobSet controller.
jobSetController := controllers.NewJobSetReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("jobset"))
if err := jobSetController.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "JobSet")
os.Exit(1)
}

// Set up pod reconciler.
podController := controllers.NewPodReconciler(mgr.GetClient(), mgr.GetScheme(), mgr.GetEventRecorderFor("pod"))
if err := podController.SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Pod")
os.Exit(1)
}

// Set up JobSet validating/defaulting webhook.
if err := (&jobset.JobSet{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "JobSet")
setupLog.Error(err, "unable to create validation/defaulting webhook", "webhook", "JobSet")
os.Exit(1)
}

// Set up pod mutating and admission webhook.
podWebhook := webhooks.NewPodWebhook(mgr)
if err := podWebhook.SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create mutating webhook", "webhook", "Pod")
os.Exit(1)
}
//+kubebuilder:scaffold:builder
Expand Down
Loading

0 comments on commit 3c08157

Please sign in to comment.