Skip to content

Commit

Permalink
Merge branch 'kubeflow:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
tzabbi authored Jul 30, 2024
2 parents 58bd148 + f226d30 commit 66c0440
Show file tree
Hide file tree
Showing 15 changed files with 134 additions and 76 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/kserve_m2m_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
- name: Create kubeflow namespace
run: kustomize build common/kubeflow-namespace/base | kubectl apply -f -

- name: Install Istio with ext auth
- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh

- name: Install cert-manager
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/linting_bash_python_yaml_files.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Proper linting on Bash, Python, and YAML files

on: [push, pull_request]
on: [pull_request]

jobs:
format_python_files:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/model_registry_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
- name: Create kubeflow namespace
run: kustomize build common/kubeflow-namespace/base | kubectl apply -f -

- name: Install Istio with ext auth
- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh*

- name: Install cert-manager
Expand Down Expand Up @@ -61,7 +61,7 @@ jobs:
'http://localhost:8081/api/model_registry/v1alpha3/registered_models?pageSize=100&orderBy=ID&sortOrder=DESC' \
-H 'accept: application/json'
# for these steps below ensure same steps as kserve (ie: Istio with ext auth, cert0manager, knative) so to achieve same setup
# for these steps below ensure same steps as kserve (ie: Istio with external authentication, cert-manager, knative) so to achieve same setup
- name: Port forward Istio gateway
run: |
INGRESS_GATEWAY_SERVICE=$(kubectl get svc --namespace istio-system --selector="app=istio-ingressgateway" --output jsonpath='{.items[0].metadata.name}')
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/notebook_controller_m2m_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: Create kubeflow namespace
run: kustomize build common/kubeflow-namespace/base | kubectl apply -f -

- name: Install Istio with ext auth
- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh

- name: Install kubeflow-istio-resources
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pipeline_run_from_notebook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
- name: Install kustomize
run: ./tests/gh-actions/install_kustomize.sh

- name: Install Istio with ext auth
- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh

- name: Install cert-manager
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pipeline_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
- name: Install kubectl
run: ./tests/gh-actions/install_kubectl.sh

- name: Install Istio with ext auth
- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh

- name: Install cert-manager
Expand Down
43 changes: 0 additions & 43 deletions .github/workflows/train_operator_test.yaml

This file was deleted.

57 changes: 57 additions & 0 deletions .github/workflows/training_operator_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Build & Apply Training Operator manifests in KinD
on:
pull_request:
paths:
- .github/workflows/training_operator_test.yaml
- apps/training-operator/upstream/**
- tests/gh-actions/kind-cluster.yaml
- tests/gh-actions/install_kind.sh
- tests/gh-actions/install_kustomize.sh
- tests/gh-actions/install_istio.sh
- common/istio*/**
- tests/gh-actions/kf-objects/tfjob.yaml

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install KinD
run: ./tests/gh-actions/install_kind.sh

- name: Create KinD Cluster
run: kind create cluster --config tests/gh-actions/kind-cluster.yaml

- name: Install kustomize
run: ./tests/gh-actions/install_kustomize.sh

- name: Install kubectl
run: ./tests/gh-actions/install_kubectl.sh

- name: Install Istio with external authentication
run: ./tests/gh-actions/install_istio_with_ext_auth.sh

- name: Install cert-manager
run: ./tests/gh-actions/install_cert_manager.sh

- name: Create kubeflow namespace
run: kustomize build common/kubeflow-namespace/base | kubectl apply -f -

- name: Install KF Multi Tenancy
run: ./tests/gh-actions/install_multi_tenancy.sh

- name: Install kubeflow-istio-resources
run: kustomize build common/istio-1-22/kubeflow-istio-resources/base | kubectl apply -f -

- name: Create KF Profile
run: kustomize build common/user-namespace/base | kubectl apply -f -

- name: Install training operator
run: ./tests/gh-actions/install_training_operator.sh

- name: Create a PyTorchJob
run: |
kubectl create -f tests/gh-actions/kf-objects/training_operator_job.yaml -n kubeflow-user-example-com
kubectl wait --for=condition=Succeeded PyTorchJob pytorch-simple -n kubeflow-user-example-com --timeout 600s
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,19 @@ If you absolutely need to expose Kubeflow over HTTP, you can disable the `Secure

---

### Change default user name

For security reasons, we don't want to use the default username and email for the default Kubeflow user when installing in security-sensitive environments. Instead, you should define your own username and email before deploying. To define it for the default user:

1. Edit `common/dex/overlays/oauth2-proxy/config-map.yaml` and fill the relevant field with your email and preferred username:

```yaml
...
staticPasswords:
- email: <REPLACE_WITH_YOUR_EMAIL>
username: <REPLACE_WITH_PREFERRED_USERNAME>
```
### Change default user password
For security reasons, we don't want to use the default password for the default Kubeflow user when installing in security-sensitive environments. Instead, you should define your own password and apply it either **before creating the cluster** or **after creating the cluster**.
Expand Down
8 changes: 4 additions & 4 deletions common/networkpolicies/base/training-operator-webhook.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ spec:
# https://www.elastic.co/guide/en/cloud-on-k8s/1.1/k8s-webhook-network-policies.html
# The kubernetes api server must reach the webhook
ingress:
- ports:
- protocol: TCP
port: 9443
- ports:
- protocol: TCP
port: 9443
policyTypes:
- Ingress
- Ingress
3 changes: 3 additions & 0 deletions tests/gh-actions/install_multi_tenancy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ kubectl -n kubeflow wait --for=condition=Ready pods -l kustomize.component=profi

echo "Installing Multitenancy Kubeflow Roles"
kustomize build common/kubeflow-roles/base | kubectl apply -f -

echo "Installing Multitenancy Network policies"
kustomize build common/networkpolicies/base | kubectl apply -f -
9 changes: 9 additions & 0 deletions tests/gh-actions/install_training_operator.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -euo pipefail
echo "Installing training operator ..."

cd apps/training-operator/upstream
kustomize build overlays/kubeflow | kubectl apply -f -
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout=600s \
--field-selector=status.phase!=Succeeded
cd -
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
spec:
containers:
- name: test
image: kubeflownotebookswg/jupyter-scipy:v1.9.0-rc.1
image: kubeflownotebookswg/jupyter-scipy:v1.9.0
imagePullPolicy: IfNotPresent
resources:
limits:
Expand Down
21 changes: 0 additions & 21 deletions tests/gh-actions/kf-objects/tfjob.yaml

This file was deleted.

40 changes: 40 additions & 0 deletions tests/gh-actions/kf-objects/training_operator_job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# from https://github.com/kubeflow/training-operator/blob/master/examples/pytorch/simple.yaml
# and disabled istio as stated in the documentation https://www.kubeflow.org/docs/components/training/user-guides/pytorch/
apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
metadata:
name: pytorch-simple
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
imagePullPolicy: Always
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"
Worker:
replicas: 1
restartPolicy: OnFailure
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
spec:
containers:
- name: pytorch
image: docker.io/kubeflowkatib/pytorch-mnist:v1beta1-45c5727
imagePullPolicy: Always
command:
- "python3"
- "/opt/pytorch-mnist/mnist.py"
- "--epochs=1"

0 comments on commit 66c0440

Please sign in to comment.