Skip to content

Commit

Permalink
Extend Katib GH action to also create an experiment and assert it suc…
Browse files Browse the repository at this point in the history
…ceeds (kubeflow#2249)

* Extend Katib GH action to also create an experiment and assert it succeeds. (kubeflow#2248)

* Rearrange folders for KF objects in tests.
  • Loading branch information
NickLoukas authored and kevin85421 committed Feb 28, 2023
1 parent edb8328 commit a477b6f
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/katib_kind_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,9 @@ jobs:
kubectl create ns kubeflow
kustomize build installs/katib-with-kubeflow | kubectl apply -f -
kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s
- name: Create katib experiment
run: |
kubectl apply -f tests/gh-actions/katib_test.yaml
kubectl wait --for=condition=Succeeded trials.kubeflow.org -n kubeflow-user --all --timeout 300s
kubectl wait --for=condition=Succeeded experiments.kubeflow.org -n kubeflow-user --all --timeout 300s
78 changes: 78 additions & 0 deletions tests/gh-actions/kf-objects/katib_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
---
apiVersion: kubeflow.org/v1beta1
kind: Experiment
metadata:
name: grid-example
namespace: kubeflow-user
spec:
parameters:
- name: lr
parameterType: double
feasibleSpace:
max: '0.01'
min: '0.001'
step: '0.001'
- name: num-layers
parameterType: int
feasibleSpace:
max: '3'
min: '2'
- name: optimizer
parameterType: categorical
feasibleSpace:
list:
- adam
objective:
type: maximize
goal: 0.80
objectiveMetricName: Validation-accuracy
additionalMetricNames:
- Train-accuracy
metricStrategies:
- name: Validation-accuracy
value: max
- name: Train-accuracy
value: max
algorithm:
algorithmName: grid
trialTemplate:
trialSpec:
apiVersion: batch/v1
kind: Job
spec:
template:
metadata:
labels:
sidecar.istio.io/inject: 'false'
spec:
containers:
- command:
- python3
- /opt/mxnet-mnist/mnist.py
- '--batch-size=64'
- '--lr=${trialParameters.learningRate}'
- '--num-layers=${trialParameters.numberLayers}'
- '--optimizer=${trialParameters.optimizer}'
image: docker.io/kubeflowkatib/mxnet-mnist:latest
name: training-container
restartPolicy: Never
trialParameters:
- name: learningRate
description: Learning rate for the training model
reference: lr
- name: numberLayers
description: Number of training model layers
reference: num-layers
- name: optimizer
description: Training model optimizer (sdg, adam or ftrl)
reference: optimizer
primaryContainerName: training-container
successCondition: status.conditions.#(type=="Complete")#|#(status=="True")#
failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")#
parallelTrialCount: 1
maxTrialCount: 1
maxFailedTrialCount: 1
metricsCollectorSpec:
collector:
kind: StdOut
resumePolicy: LongRunning

0 comments on commit a477b6f

Please sign in to comment.