Skip to content

Commit

Permalink
CARRY: Add RHOAI manifests
Browse files Browse the repository at this point in the history
Added the manifests to allow for deployment into a RHOAI.
  • Loading branch information
z103cb committed Mar 29, 2024
1 parent c8dd66f commit 62b198e
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 0 deletions.
18 changes: 18 additions & 0 deletions manifests/rhoai/binding_admin_roles.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: admin-rolebinding
subjects:
- kind: Group
apiGroup: rbac.authorization.k8s.io
name: rhods-admins
- kind: Group
apiGroup: rbac.authorization.k8s.io
name: odh-admins
- kind: Group
apiGroup: rbac.authorization.k8s.io
name: dedicated-admins
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubeflow-training-admin
107 changes: 107 additions & 0 deletions manifests/rhoai/kubeflow-training-roles.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#This file has been copied from ../overlays/kubeflow
#The original labels have ben commented out for documentation purposes
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-admin
labels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-admin: "true"
rbac.authorization.k8s.io/aggregate-to-admin: "true"
aggregationRule:
clusterRoleSelectors:
- matchLabels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rules: []

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-edit
labels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
- apiGroups:
- ""
resources:
- persistentvolumeclaims
verbs:
- create
- delete
- get
- list
- watch
- apiGroups:
- ""
resources:
- events
verbs:
- get
- list
- watch

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kubeflow-training-view
labels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
47 changes: 47 additions & 0 deletions manifests/rhoai/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# RHOAI configuration for Kubeflow Training Operator (KFTO)

# Adds namespace to all resources.
namespace: opendatahub

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: kfto-

configMapGenerator:
- name: rhoai-config
envs:
- params.env

configurations:
- params.yaml

vars:
- name: image
objref:
kind: ConfigMap
name: rhoai-config
apiVersion: v1
fieldref:
fieldpath: data.odh-training-operator-controller-image

# Labels to add to all resources and selectors.
commonLabels:
app.kubernetes.io/name: training-operator
app.kubernetes.io/component: controller

resources:
- ../base
- kubeflow-training-roles.yaml
- monitor.yaml
- binding_admin_roles.yaml
#- webhook_network_policy.yaml
#- batch-user-rolebinding.yaml

patches:
# Mount the controller config file for loading manager configurations
# through a ComponentConfig type
- path: manager_config_patch.yaml
- path: manager_metrics_patch.yaml
12 changes: 12 additions & 0 deletions manifests/rhoai/manager_config_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: training-operator
spec:
template:
spec:
containers:
- name: training-operator
image: $(image)
args:
- "--zap-log-level=2"
14 changes: 14 additions & 0 deletions manifests/rhoai/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: training-operator
# namespace: opendatahub
spec:
template:
spec:
containers:
- name: training-operator
ports:
- containerPort: 8080
protocol: TCP
name: metrics
13 changes: 13 additions & 0 deletions manifests/rhoai/monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Prometheus Pod Monitor (Metrics)
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: controller-manager-metrics-monitor
namespace: opendatahub
spec:
selector:
matchLabels:
app.kubernetes.io/name: training-operator
app.kubernetes.io/component: controller
podMetricsEndpoints:
- port: metrics
1 change: 1 addition & 0 deletions manifests/rhoai/params.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
odh-training-operator-controller-image=docker.io/kubeflow/training-operator:v1-855e096
3 changes: 3 additions & 0 deletions manifests/rhoai/params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
varReference:
- path: spec/template/spec/containers[]/image
kind: Deployment

0 comments on commit 62b198e

Please sign in to comment.