Skip to content

Commit

Permalink
CARRY: Add RHOAI manifests (opendatahub-io#3)
Browse files Browse the repository at this point in the history
  • Loading branch information
z103cb authored Apr 2, 2024
1 parent c8dd66f commit fa7b886
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 0 deletions.
73 changes: 73 additions & 0 deletions manifests/rhoai/kubeflow-training-roles.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#This file has been copied from ../overlays/kubeflow
#The original labels have ben commented out for documentation purposes
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: training-edit
labels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: training-view
labels:
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
rules:
- apiGroups:
- kubeflow.org
resources:
- mpijobs
- tfjobs
- pytorchjobs
- mxjobs
- xgboostjobs
- paddlejobs
verbs:
- get
- list
- watch
- apiGroups:
- kubeflow.org
resources:
- mpijobs/status
- tfjobs/status
- pytorchjobs/status
- mxjobs/status
- xgboostjobs/status
- paddlejobs/status
verbs:
- get
45 changes: 45 additions & 0 deletions manifests/rhoai/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# RHOAI configuration for Kubeflow Training Operator (KFTO)

# Adds namespace to all resources.
namespace: opendatahub

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: kubeflow-

configMapGenerator:
- name: rhoai-config
envs:
- params.env

configurations:
- params.yaml

vars:
- name: image
objref:
kind: ConfigMap
name: rhoai-config
apiVersion: v1
fieldref:
fieldpath: data.odh-training-operator-controller-image

# Labels to add to all resources and selectors.
commonLabels:
app.kubernetes.io/name: training-operator
app.kubernetes.io/component: controller

resources:
- ../base
- kubeflow-training-roles.yaml
- monitor.yaml

patches:
# Mount the controller config file for loading manager configurations
# through a ComponentConfig type
- path: manager_config_patch.yaml
- path: manager_metrics_patch.yaml
- path: manager_delete_metrics_service_patch.yaml
12 changes: 12 additions & 0 deletions manifests/rhoai/manager_config_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: training-operator
spec:
template:
spec:
containers:
- name: training-operator
image: $(image)
args:
- "--zap-log-level=2"
6 changes: 6 additions & 0 deletions manifests/rhoai/manager_delete_metrics_service_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Delete the service created in base
$patch: delete
apiVersion: v1
kind: Service
metadata:
name: training-operator
12 changes: 12 additions & 0 deletions manifests/rhoai/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: training-operator
spec:
template:
spec:
containers:
- name: training-operator
ports:
- containerPort: 8080
name: metrics
12 changes: 12 additions & 0 deletions manifests/rhoai/monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Prometheus Pod Monitor (Metrics)
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: training-operator-metrics-monitor
spec:
selector:
matchLabels:
app.kubernetes.io/name: training-operator
app.kubernetes.io/component: controller
podMetricsEndpoints:
- port: metrics
1 change: 1 addition & 0 deletions manifests/rhoai/params.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
odh-training-operator-controller-image=docker.io/kubeflow/training-operator:v1-855e096
3 changes: 3 additions & 0 deletions manifests/rhoai/params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
varReference:
- path: spec/template/spec/containers[]/image
kind: Deployment

0 comments on commit fa7b886

Please sign in to comment.