Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable test for v1alpha2 #465

Merged
merged 6 commits into from
Apr 26, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ ADD . /go/src/github.com/kubeflow/katib
WORKDIR /go/src/github.com/kubeflow/katib/cmd/katib-controller
# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o studyjobcontroller ./v1alpha1
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o katib-controller.v1alpha2 ./v1alpha2
# Copy the controller-manager into a thin image
FROM alpine:3.7
WORKDIR /app
COPY --from=build-env /go/src/github.com/kubeflow/katib/cmd/katib-controller/studyjobcontroller .
COPY --from=build-env /go/src/github.com/kubeflow/katib/cmd/katib-controller/katib-controller.v1alpha2 .
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
ENTRYPOINT ["./studyjobcontroller"]
14 changes: 14 additions & 0 deletions cmd/katib-controller/v1alpha2/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Build the manager binary
FROM golang:alpine AS build-env

# Copy in the go src
ADD . /go/src/github.com/kubeflow/katib

WORKDIR /go/src/github.com/kubeflow/katib/cmd/katib-controller
# Build
RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o katib-controller ./v1alpha2
# Copy the controller-manager into a thin image
FROM alpine:3.7
WORKDIR /app
COPY --from=build-env /go/src/github.com/kubeflow/katib/cmd/katib-controller/katib-controller .
ENTRYPOINT ["./katib-controller"]
59 changes: 59 additions & 0 deletions examples/v1alpha2/random-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
apiVersion: "kubeflow.org/v1alpha2"
kind: Experiment
metadata:
namespace: kubeflow
name: random-experiment
spec:
parallelTrialCount: 3
maxTrialCount: 12
maxFailedTrialCount: 3
objective:
type: maximize
goal: 0.99
objectiveMetricsName: Validation-accuracy
additionalMetricsNames:
- accuracy
algorithm:
algorithmName: random
trialTemplate:
goTemplate:
rawTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.WorkerID}}
namespace: kubeflow
spec:
template:
spec:
containers:
- name: {{.WorkerID}}
image: katib/mxnet-mnist-example
command:
- "python"
- "/mxnet/example/image-classification/train_mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
parameters:
- name: --lr
parametertype: double
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
feasibleSpace:
min: "0.01"
max: "0.03"
- name: --num-layers
parametertype: int
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
feasibleSpace:
min: "2"
max: "5"
- name: --optimizer
parametertype: categorical
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
feasibleSpace:
list:
- sgd
- adam
- ftrl
4 changes: 4 additions & 0 deletions manifests/v1alpha2/0-namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: kubeflow
25 changes: 25 additions & 0 deletions manifests/v1alpha2/katib-controller/crds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: experiments.kubeflow.org
spec:
group: kubeflow.org
version: v1alpha2
scope: Namespaced
names:
kind: Experiment
singular: experiment
plural: experiments
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: trials.kubeflow.org
spec:
group: kubeflow.org
version: v1alpha2
scope: Namespaced
names:
kind: Trial
singular: trial
plural: trials
31 changes: 31 additions & 0 deletions manifests/v1alpha2/katib-controller/katib-controller.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: katib-controller
namespace: kubeflow
labels:
app: katib-controller
spec:
replicas: 1
selector:
matchLabels:
app: katib-controller
template:
metadata:
labels:
app: katib-controller
spec:
serviceAccountName: katib-controller
containers:
- name: katib-controller
image: katib/katib-controller
imagePullPolicy: Always
ports:
- containerPort: 443
name: webhook
protocol: TCP
env:
- name: KATIB_CORE_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
76 changes: 76 additions & 0 deletions manifests/v1alpha2/katib-controller/rbac.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: katib-controller
rules:
- apiGroups:
- ""
resources:
- configmaps
- serviceaccounts
- services
verbs:
- "*"
- apiGroups:
- ""
resources:
- pods
- pods/log
- pods/status
verbs:
- "*"
- apiGroups:
- batch
resources:
- jobs
- cronjobs
verbs:
- "*"
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- apiGroups:
- admissionregistration.k8s.io
resources:
- validatingwebhookconfigurations
verbs:
- '*'
- apiGroups:
- kubeflow.org
resources:
- experiments
- experiments/status
- trials
- trials/status
verbs:
- "*"
- apiGroups:
- kubeflow.org
resources:
- tfjobs
- pytorchjobs
verbs:
- "*"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: katib-controller
namespace: kubeflow
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: katib-controller
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: katib-controller
subjects:
- kind: ServiceAccount
name: katib-controller
namespace: kubeflow
12 changes: 12 additions & 0 deletions manifests/v1alpha2/katib-controller/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: v1
kind: Service
metadata:
name: katib-controller
namespace: kubeflow
spec:
ports:
- port: 443
protocol: TCP
targetPort: 443
selector:
app: katib-controller
20 changes: 20 additions & 0 deletions manifests/v1alpha2/katib-controller/trialTemplateConfigmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: trial-template
namespace: kubeflow
data:
defaultTrialTemplate.yaml : |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.WorkerID}}
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.WorkerID}}
hougangliu marked this conversation as resolved.
Show resolved Hide resolved
image: alpine
restartPolicy: Never

24 changes: 19 additions & 5 deletions prow_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,31 @@
# see kubeflow/testing/py/run_e2e_workflow.py
workflows:
- app_dir: kubeflow/katib/test/workflows
component: workflows
name: e2e
component: workflows-v1alpha1
name: e2e-v1alpha1
job_types:
- presubmit
params:
registry: "gcr.io/kubeflow-ci"
# The postsubmit run publishes the docker images to gcr.io/kubeflow-images-public
- app_dir: kubeflow/katib/test/workflows
component: workflows
name: e2e-release
component: workflows-v1alpha1
name: e2e-v1alpha1-release
job_types:
- postsubmit
params:
registry: "gcr.io/kubeflow-images-public"
registry: "gcr.io/kubeflow-images-public"
- app_dir: kubeflow/katib/test/workflows
component: workflows-v1alpha2
name: e2e-v1alpha2
job_types:
- presubmit
params:
registry: "gcr.io/kubeflow-ci"
- app_dir: kubeflow/katib/test/workflows
component: workflows-v1alpha2
name: e2e-v1alpha2-release
job_types:
- postsubmit
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need postsubmit for v1alpha2 if the same image is built ?

Copy link
Member Author

@hougangliu hougangliu Apr 25, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In future, I wonder if all the images containing both versions (at least, metricsCollectors may be dropped). keep it as it is now, update it later if in need

params:
registry: "gcr.io/kubeflow-images-public"
2 changes: 1 addition & 1 deletion scripts/v1alpha1/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ cd ${SCRIPT_ROOT}

echo "Building core image..."
docker build -t ${PREFIX}/vizier-core -f ${CMD_PREFIX}/manager/Dockerfile .
docker build -t ${PREFIX}/studyjob-controller -f ${CMD_PREFIX}/katib-controller/Dockerfile .
docker build -t ${PREFIX}/studyjob-controller -f ${CMD_PREFIX}/katib-controller/v1alpha1/Dockerfile .
docker build -t ${PREFIX}/metrics-collector -f ${CMD_PREFIX}/metricscollector/Dockerfile .
docker build -t ${PREFIX}/tfevent-metrics-collector -f ${CMD_PREFIX}/tfevent-metricscollector/Dockerfile .

Expand Down
29 changes: 29 additions & 0 deletions scripts/v1alpha2/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash

# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

PREFIX="katib"
CMD_PREFIX="cmd"

SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/../..

cd ${SCRIPT_ROOT}

echo "Building core image..."
docker build -t ${PREFIX}/katib-controller -f ${CMD_PREFIX}/katib-controller/v1alpha2/Dockerfile .
31 changes: 31 additions & 0 deletions scripts/v1alpha2/deploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail
set -o xtrace

SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/../..

cd ${SCRIPT_ROOT}
kubectl apply -f manifests/v1alpha2/0-namespace.yaml
kubectl apply -f manifests/v1alpha2/katib-controller/crds.yaml
kubectl apply -f manifests/v1alpha2/katib-controller/rbac.yaml
kubectl apply -f manifests/v1alpha2/katib-controller/service.yaml
kubectl apply -f manifests/v1alpha2/katib-controller/trialTemplateConfigmap.yaml
kubectl apply -f manifests/v1alpha2/katib-controller/katib-controller.yaml
cd - > /dev/null
2 changes: 1 addition & 1 deletion test/scripts/v1alpha1/build-studyjobctr.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ cp -r pkg ${GO_DIR}/pkg
cp -r vendor ${GO_DIR}/vendor

cd ${GO_DIR}
cp cmd/katib-controller/Dockerfile .
cp cmd/katib-controller/v1alpha1/Dockerfile .
gcloud builds submit . --tag=${REGISTRY}/${REPO_NAME}/studyjob-controller:${VERSION} --project=${PROJECT}
gcloud container images add-tag --quiet ${REGISTRY}/${REPO_NAME}/studyjob-controller:${VERSION} ${REGISTRY}/${REPO_NAME}/studyjob-controller:latest --verbosity=info

Expand Down
Loading