Skip to content

Commit

Permalink
upgrade mpi-operator manifests to v1 (kubeflow#1128)
Browse files Browse the repository at this point in the history
Signed-off-by: Abhilash Pallerlamudi <stp.abhi@gmail.com>
  • Loading branch information
stpabhi authored Apr 22, 2020
1 parent a29315e commit 520be69
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 94 deletions.
37 changes: 29 additions & 8 deletions mpi-job/mpi-operator/base/cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,22 @@ rules:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- create
- apiGroups:
- ""
resources:
- endpoints
verbs:
- create
- get
- update
- apiGroups:
- ""
resources:
Expand All @@ -43,30 +53,31 @@ rules:
- list
- watch
- apiGroups:
- apps
- policy
resources:
- statefulsets
- poddisruptionbudgets
verbs:
- create
- list
- update
- watch
- apiGroups:
- batch
- apps
resources:
- jobs
- statefulsets
verbs:
- create
- list
- update
- watch
- apiGroups:
- policy
- batch
resources:
- poddisruptionbudgets
- jobs
verbs:
- create
- list
- update
- watch
- apiGroups:
- apiextensions.k8s.io
Expand All @@ -79,8 +90,18 @@ rules:
- kubeflow.org
resources:
- mpijobs
- mpijobs/finalizers
- mpijobs/status
verbs:
- "*"
- apiGroups:
- scheduling.incubator.k8s.io
- scheduling.sigs.dev
resources:
- queues
- podgroups
verbs:
- '*'
- "*"

---

Expand All @@ -93,7 +114,7 @@ metadata:
aggregationRule:
clusterRoleSelectors:
- matchLabels:
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-mpijobs-admin: "true"
rules: []

---
Expand Down
207 changes: 129 additions & 78 deletions mpi-job/mpi-operator/base/crd.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
apiVersion: apiextensions.k8s.io/v1beta1
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: mpijobs.kubeflow.org
spec:
group: kubeflow.org
version: v1alpha1
scope: Namespaced
names:
plural: mpijobs
Expand All @@ -13,87 +12,139 @@ spec:
shortNames:
- mj
- mpij
validation:
openAPIV3Schema:
properties:
spec:
title: The MPIJob spec
description: Only one of gpus, processingUnits, or replicas should be specified
oneOf:
- properties:
gpus:
title: Total number of GPUs
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
versions:
- name: v1alpha1
served: false
storage: false
schema:
openAPIV3Schema:
properties:
spec:
title: The MPIJob spec
description: Only one of gpus, processingUnits, or replicas should be specified
oneOf:
- properties:
gpus:
title: Total number of GPUs
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
gpusPerNode:
title: The maximum number of GPUs available per node
description: Defaults to the number of GPUs per worker
type: integer
minimum: 1
required:
- gpus
- properties:
processingUnits:
title: Total number of processing units
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingUnitsPerNode:
title: The maximum number of processing units available per node
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
gpusPerNode:
title: The maximum number of GPUs available per node
description: Defaults to the number of GPUs per worker
type: integer
minimum: 1
required:
- gpus
- properties:
processingUnits:
title: Total number of processing units
description: Valid values are 1, 2, 4, or any multiple of 8
oneOf:
- type: integer
- nvidia.com/gpu
- cpu
required:
- processingUnits
- properties:
replicas:
title: Total number of replicas
description: The processing resource limit should be specified for each replica
type: integer
minimum: 1
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- 1
- 2
- 4
- type: integer
multipleOf: 8
minimum: 8
- nvidia.com/gpu
- cpu
required:
- replicas
- name: v1alpha2
served: true
storage: false
schema:
openAPIV3Schema:
properties:
spec:
properties:
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingUnitsPerNode:
title: The maximum number of processing units available per node
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- nvidia.com/gpu
- cpu
required:
- processingUnits
- properties:
replicas:
title: Total number of replicas
description: The processing resource limit should be specified for each replica
type: integer
minimum: 1
mpiReplicaSpecs:
properties:
Launcher:
properties:
replicas:
type: integer
minimum: 1
maximum: 1
Worker:
properties:
replicas:
type: integer
minimum: 1
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
properties:
spec:
properties:
slotsPerWorker:
title: The number of slots per worker used in hostfile
description: Defaults to the number of processing units per worker
type: integer
minimum: 1
processingResourceType:
title: The processing resource type, e.g. 'nvidia.com/gpu' or 'cpu'
description: Defaults to 'nvidia.com/gpu'
type: string
enum:
- nvidia.com/gpu
- cpu
required:
- replicas
mpiReplicaSpecs:
properties:
Launcher:
properties:
replicas:
type: integer
minimum: 1
maximum: 1
Worker:
properties:
replicas:
type: integer
minimum: 1
7 changes: 4 additions & 3 deletions mpi-job/mpi-operator/base/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ spec:
spec:
containers:
- args:
- --gpus-per-node
- "8"
- -alsologtostderr
- --lock-namespace
- $(lock-namespace)
- --kubectl-delivery-image
- $(kubectl-delivery-image)
image: mpioperator/mpi-operator:0.1.0
image: mpioperator/mpi-operator:latest
imagePullPolicy: Always
name: mpi-operator
serviceAccountName: mpi-operator
12 changes: 10 additions & 2 deletions mpi-job/mpi-operator/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@ commonLabels:
images:
- name: mpioperator/mpi-operator
newName: mpioperator/mpi-operator
newTag: 0.1.0
newTag: latest
configMapGenerator:
- name: mpi-operator-config
env: params.env
envs:
- params.env
generatorOptions:
disableNameSuffixHash: true
vars:
Expand All @@ -26,3 +27,10 @@ vars:
apiVersion: v1
fieldref:
fieldpath: data.kubectl-delivery-image
- name: lock-namespace
objref:
kind: ConfigMap
name: mpi-operator-config
apiVersion: v1
fieldref:
fieldpath: data.lock-namespace
1 change: 1 addition & 0 deletions mpi-job/mpi-operator/base/params.env
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
kubectl-delivery-image=mpioperator/kubectl-delivery:latest
lock-namespace=kubeflow
4 changes: 2 additions & 2 deletions mpi-job/mpi-operator/overlays/application/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/component: mpijob
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: v0.7
app.kubernetes.io/version: v1.0
componentKinds:
- group: apps
kind: Deployment
Expand All @@ -20,7 +20,7 @@ spec:
kind: MPIJob
descriptor:
type: "mpi-operator"
version: "v1alpha1"
version: "v1"
description: "Mpi-operator allows users to create and manage the \"MPIJob\" custom resource."
maintainers:
- name: Rong Ou
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ commonLabels:
app.kubernetes.io/managed-by: kfctl
app.kubernetes.io/component: mpijob
app.kubernetes.io/part-of: kubeflow
app.kubernetes.io/version: v0.7
app.kubernetes.io/version: v1.0

0 comments on commit 520be69

Please sign in to comment.