Extend Katib GH action to also create an experiment and assert it suc…

…ceeds (kubeflow#2249) * Extend Katib GH action to also create an experiment and assert it succeeds. (kubeflow#2248) * Rearrange folders for KF objects in tests.
juliusvonkohout · Feb 28, 2023 · a477b6f · a477b6f
1 parent edb8328
commit a477b6f
Show file tree

Hide file tree

Showing 2 changed files with 84 additions and 0 deletions.
diff --git a/.github/workflows/katib_kind_test.yaml b/.github/workflows/katib_kind_test.yaml
@@ -32,3 +32,9 @@ jobs:
         kubectl create ns kubeflow
         kustomize build installs/katib-with-kubeflow | kubectl apply -f -
         kubectl wait --for=condition=Ready pods --all --all-namespaces --timeout 180s
+
+    - name: Create katib experiment
+      run: |
+        kubectl apply -f tests/gh-actions/katib_test.yaml
+        kubectl wait --for=condition=Succeeded trials.kubeflow.org -n kubeflow-user --all --timeout 300s
+        kubectl wait --for=condition=Succeeded experiments.kubeflow.org -n kubeflow-user --all --timeout 300s
diff --git a/tests/gh-actions/kf-objects/katib_test.yaml b/tests/gh-actions/kf-objects/katib_test.yaml
@@ -0,0 +1,78 @@
+---
+apiVersion: kubeflow.org/v1beta1
+kind: Experiment
+metadata:
+  name: grid-example
+  namespace: kubeflow-user
+spec:
+  parameters:
+    - name: lr
+      parameterType: double
+      feasibleSpace:
+        max: '0.01'
+        min: '0.001'
+        step: '0.001'
+    - name: num-layers
+      parameterType: int
+      feasibleSpace:
+        max: '3'
+        min: '2'
+    - name: optimizer
+      parameterType: categorical
+      feasibleSpace:
+        list:
+          - adam
+  objective:
+    type: maximize
+    goal: 0.80
+    objectiveMetricName: Validation-accuracy
+    additionalMetricNames:
+      - Train-accuracy
+    metricStrategies:
+      - name: Validation-accuracy
+        value: max
+      - name: Train-accuracy
+        value: max
+  algorithm:
+    algorithmName: grid
+  trialTemplate:
+    trialSpec:
+      apiVersion: batch/v1
+      kind: Job
+      spec:
+        template:
+          metadata:
+            labels:
+              sidecar.istio.io/inject: 'false'
+          spec:
+            containers:
+              - command:
+                  - python3
+                  - /opt/mxnet-mnist/mnist.py
+                  - '--batch-size=64'
+                  - '--lr=${trialParameters.learningRate}'
+                  - '--num-layers=${trialParameters.numberLayers}'
+                  - '--optimizer=${trialParameters.optimizer}'
+                image: docker.io/kubeflowkatib/mxnet-mnist:latest
+                name: training-container
+            restartPolicy: Never
+    trialParameters:
+      - name: learningRate
+        description: Learning rate for the training model
+        reference: lr
+      - name: numberLayers
+        description: Number of training model layers
+        reference: num-layers
+      - name: optimizer
+        description: Training model optimizer (sdg, adam or ftrl)
+        reference: optimizer
+    primaryContainerName: training-container
+    successCondition: status.conditions.#(type=="Complete")#|#(status=="True")#
+    failureCondition: status.conditions.#(type=="Failed")#|#(status=="True")#
+  parallelTrialCount: 1
+  maxTrialCount: 1
+  maxFailedTrialCount: 1
+  metricsCollectorSpec:
+    collector:
+      kind: StdOut
+  resumePolicy: LongRunning