Skip to content

Commit

Permalink
Unify code structure of training job api (#1300)
Browse files Browse the repository at this point in the history
* Add docs, register in job apis

- Clean up register.go and use reference in groupversion_info.go (generated by kubebuilder) instead
- Add doc.go and register.go for framework missing them
- Remove global tag “+k8s:deepcopy-gen=package,register” and this should be taken care of by controller-gen.

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>

* Generate defaults and openapi for all frameworks

1. Update codegen scripts to generate defaulters and openapi spec for all frameworks
2. Create defaults.go for framework which miss it.

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>

* Add Kind in constants.go

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>

* Update update-codegen to generate openapi-gen

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>

* Run goimports to format the files

* Add Plural and Singular to apis constants.go

Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>
  • Loading branch information
Jeffwan authored Jul 14, 2021
1 parent 7604da5 commit ae7ffd6
Show file tree
Hide file tree
Showing 48 changed files with 42,481 additions and 158 deletions.
6 changes: 3 additions & 3 deletions cmd/tf-operator.v1/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ import (

"github.com/kubeflow/common/pkg/util/signals"
"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
"github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme"
tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
"github.com/kubeflow/tf-operator/pkg/common"
controller "github.com/kubeflow/tf-operator/pkg/controller.v1/tensorflow"
"github.com/kubeflow/tf-operator/pkg/version"
)
Expand Down Expand Up @@ -75,7 +75,7 @@ func Run(opt *options.ServerOption) error {
version.PrintVersionAndExit(apiVersion)
}

namespace := os.Getenv(v1.EnvKubeflowNamespace)
namespace := os.Getenv(common.EnvKubeflowNamespace)
if len(namespace) == 0 {
log.Infof("EnvKubeflowNamespace not set, use default namespace %s",
metav1.NamespaceDefault)
Expand Down Expand Up @@ -232,7 +232,7 @@ func createClientSets(config *restclientset.Config) (
func checkCRDExists(clientset apiextensionclientset.Interface, namespace string) bool {
crd, err := clientset.ApiextensionsV1beta1().
CustomResourceDefinitions().
Get(context.TODO(), v1.TFCRD, metav1.GetOptions{})
Get(context.TODO(), "tfjobs.kubeflow.org", metav1.GetOptions{})

if err != nil {
log.Error(err)
Expand Down
75 changes: 72 additions & 3 deletions hack/update-codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
# This shell is used to auto generate some useful tools for k8s, such as lister,
# informer, deepcopy, defaulter and so on.

# Ignore shellcheck for IDEs
# shellcheck disable=SC2116
# shellcheck disable=SC2046
# shellcheck disable=SC2006

set -o errexit
set -o nounset
set -o pipefail
Expand All @@ -35,6 +40,17 @@ fi

echo ">> Using ${CODEGEN_PKG}"

# Grab openapi-gen version from go.mod
OPENAPI_VERSION=$(grep 'k8s.io/kube-openapi' go.sum | awk '{print $2}' | sed 's/\/go.mod//g' | head -1)
OPENAPI_PKG=$(echo `go env GOPATH`"/pkg/mod/k8s.io/kube-openapi@${OPENAPI_VERSION}")

if [[ ! -d ${OPENAPI_PKG} ]]; then
echo "${OPENAPI_PKG} is missing. Running 'go mod download'."
go mod download
fi

echo ">> Using ${OPENAPI_PKG}"

# code-generator does work with go.mod but makes assumptions about
# the project living in `$GOPATH/src`. To work around this and support
# any location; create a temporary directory, use this as an output
Expand All @@ -60,21 +76,74 @@ cd ${SCRIPT_ROOT}
${CODEGEN_PKG}/generate-groups.sh "all" \
github.com/kubeflow/tf-operator/pkg/client github.com/kubeflow/tf-operator/pkg/apis \
tensorflow:v1 \
--output-base "${TEMP_DIR}" \
--output-base "${TEMP_DIR}" \
--go-header-file hack/boilerplate/boilerplate.go.txt

# Notice: The code in code-generator does not generate defaulter by default.
# We need to build binary from vendor cmd folder.
#echo "Building defaulter-gen"
#go build -o defaulter-gen ${CODEGEN_PKG}/cmd/defaulter-gen

# ${GOPATH}/bin/defaulter-gen is automatically built from ${CODEGEN_PKG}/generate-groups.sh
echo "Generating defaulters for tensorflow/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 -O zz_generated.defaults --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 --go-header-file hack/boilerplate/boilerplate.go.txt "$@"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for pytorch/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for mxnet/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for xgboost/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

cd - > /dev/null

# Notice: The code in kube-openapi does not generate defaulter by default.
# We need to build binary from pkg cmd folder.
echo "Building openapi-gen"
go build -o openapi-gen ${OPENAPI_PKG}/cmd/openapi-gen

echo "Generating OpenAPI specification for tensorflow/v1"
${GOPATH}/bin/openapi-gen --report-filename=hack/violation_exception.list --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 --go-header-file hack/boilerplate/boilerplate.go.txt "$@"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for pytorch/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for mxnet/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for xgboost/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

cd - > /dev/null

# Copy everything back.
cp -a "${TEMP_DIR}/${ROOT_PKG}/." "${SCRIPT_ROOT}/"

# Clean up binaries we build for update codegen
rm ./openapi-gen
8 changes: 4 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,19 @@ func main() {
// TODO: We need a general manager. all rest reconciler addsToManager
// Based on the user configuration, we start different controllers
if err = xgboostcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "XGBoostJob")
setupLog.Error(err, "unable to create controller", "controller", xgboostv1.Kind)
os.Exit(1)
}
if err = pytorchcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PyTorchJob")
setupLog.Error(err, "unable to create controller", "controller", pytorchv1.Kind)
os.Exit(1)
}
if err = tensorflowcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "TFJob")
setupLog.Error(err, "unable to create controller", "controller", tensorflowv1.Kind)
os.Exit(1)
}
if err = mxnetcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MXJob")
setupLog.Error(err, "unable to create controller", "controller", mxnetv1.Kind)
os.Exit(1)
}
//+kubebuilder:scaffold:builder
Expand Down
9 changes: 6 additions & 3 deletions pkg/apis/mxnet/v1/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ package v1
import commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"

const (
// EnvKubeflowNamespace is ENV for kubeflow namespace specified by user.
EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE"

// DefaultPortName is name of the port used to communicate between scheduler and
// servers & workers.
DefaultPortName = "mxjob-port"
Expand All @@ -15,4 +12,10 @@ const (
DefaultPort = 9091
// DefaultRestartPolicy is default RestartPolicy for MXReplicaSpec.
DefaultRestartPolicy = commonv1.RestartPolicyNever
// Kind is the kind name.
Kind = "MXJob"
// Plural is the Plural for mxJob.
Plural = "mxjobs"
// Singular is the singular for mxJob.
Singular = "mxjob"
)
107 changes: 107 additions & 0 deletions pkg/apis/mxnet/v1/defaults.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1

import (
"strings"

commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// Int32 is a helper routine that allocates a new int32 value
// to store v and returns a pointer to it.
func Int32(v int32) *int32 {
return &v
}

func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}

// setDefaultPort sets the default ports for mxnet container.
func setDefaultPort(spec *v1.PodSpec) {
index := 0
for i, container := range spec.Containers {
if container.Name == DefaultContainerName {
index = i
break
}
}

hasMXJobPort := false
for _, port := range spec.Containers[index].Ports {
if port.Name == DefaultPortName {
hasMXJobPort = true
break
}
}
if !hasMXJobPort {
spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{
Name: DefaultPortName,
ContainerPort: DefaultPort,
})
}
}

func setDefaultReplicas(spec *commonv1.ReplicaSpec) {
if spec.Replicas == nil {
spec.Replicas = Int32(1)
}
if spec.RestartPolicy == "" {
spec.RestartPolicy = DefaultRestartPolicy
}
}

// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case.
func setTypeNamesToCamelCase(mxJob *MXJob) {
setTypeNameToCamelCase(mxJob, MXReplicaTypeScheduler)
setTypeNameToCamelCase(mxJob, MXReplicaTypeServer)
setTypeNameToCamelCase(mxJob, MXReplicaTypeWorker)
}

// setTypeNameToCamelCase sets the name of the replica type from any case to correct case.
// E.g. from server to Server; from WORKER to Worker.
func setTypeNameToCamelCase(mxJob *MXJob, typ commonv1.ReplicaType) {
for t := range mxJob.Spec.MXReplicaSpecs {
if strings.EqualFold(string(t), string(typ)) && t != typ {
spec := mxJob.Spec.MXReplicaSpecs[t]
delete(mxJob.Spec.MXReplicaSpecs, t)
mxJob.Spec.MXReplicaSpecs[typ] = spec
return
}
}
}

// SetDefaults_MXJob sets any unspecified values to defaults.
func SetDefaults_MXJob(mxjob *MXJob) {
// Set default cleanpod policy to All.
if mxjob.Spec.RunPolicy.CleanPodPolicy == nil {
all := commonv1.CleanPodPolicyAll
mxjob.Spec.RunPolicy.CleanPodPolicy = &all
}

// Update the key of MXReplicaSpecs to camel case.
setTypeNamesToCamelCase(mxjob)

for _, spec := range mxjob.Spec.MXReplicaSpecs {
// Set default replicas to 1.
setDefaultReplicas(spec)
// Set default port to mxnet container.
setDefaultPort(&spec.Template.Spec)
}
}
Loading

0 comments on commit ae7ffd6

Please sign in to comment.