Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify service account for kaniko jobs #352

Merged
merged 19 commits into from
Feb 24, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions api/cmd/api/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB
ContextSubPath: cfg.ImageBuilderConfig.ContextSubPath,
BuildTimeoutDuration: timeout,
KanikoImage: cfg.ImageBuilderConfig.KanikoImage,
KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount,
Tolerations: cfg.ImageBuilderConfig.Tolerations,
NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors,
MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry,
Expand All @@ -157,6 +158,7 @@ func initImageBuilder(cfg *config.Config) (webserviceBuilder imagebuilder.ImageB
ContextSubPath: cfg.ImageBuilderConfig.PredictionJobContextSubPath,
BuildTimeoutDuration: timeout,
KanikoImage: cfg.ImageBuilderConfig.KanikoImage,
KanikoServiceAccount: cfg.ImageBuilderConfig.KanikoServiceAccount,
Tolerations: cfg.ImageBuilderConfig.Tolerations,
NodeSelectors: cfg.ImageBuilderConfig.NodeSelectors,
MaximumRetry: cfg.ImageBuilderConfig.MaximumRetry,
Expand Down
1 change: 1 addition & 0 deletions api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ type ImageBuilderConfig struct {
DockerRegistry string `envconfig:"IMG_BUILDER_DOCKER_REGISTRY"`
BuildTimeout string `envconfig:"IMG_BUILDER_TIMEOUT" default:"10m"`
KanikoImage string `envconfig:"IMG_BUILDER_KANIKO_IMAGE" default:"gcr.io/kaniko-project/executor:v1.6.0"`
KanikoServiceAccount string `envconfig:"IMG_BUILDER_KANIKO_SERVICE_ACCOUNT"`
// How long to keep the image building job resource in the Kubernetes cluster. Default: 2 days (48 hours).
Retention time.Duration `envconfig:"IMG_BUILDER_RETENTION" default:"48h"`
Tolerations Tolerations `envconfig:"IMG_BUILDER_TOLERATIONS"`
Expand Down
2 changes: 2 additions & 0 deletions api/pkg/imagebuilder/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type Config struct {
BuildTimeoutDuration time.Duration
// Kaniko docker image
KanikoImage string
// Kaniko kubernetes service account
KanikoServiceAccount string
// Tolerations for Jobs Specification
Tolerations []v1.Toleration
// Node Selectors for Jobs Specification
Expand Down
71 changes: 43 additions & 28 deletions api/pkg/imagebuilder/imagebuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo
fmt.Sprintf("--context=%s", baseImageTag.BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", version.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", baseImageTag.ImageName),
fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath),
fmt.Sprintf("--destination=%s", imageRef),
"--cache=true",
"--single-snapshot",
Expand All @@ -364,8 +363,39 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo
}

activeDeadlineSeconds := int64(c.config.BuildTimeoutDuration / time.Second)
var volume []v1.Volume
var volumeMount []v1.VolumeMount
var envVar []v1.EnvVar

// If kaniko service account is not set, use kaniko secret
if c.config.KanikoServiceAccount == "" {
kanikoArgs = append(kanikoArgs,
fmt.Sprintf("--build-arg=%s=%s", gacEnvKey, saFilePath))
volume = []v1.Volume{
{
Name: kanikoSecretName,
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: kanikoSecretName,
},
},
},
}
volumeMount = []v1.VolumeMount{
{
Name: kanikoSecretName,
MountPath: "/secret",
},
}
envVar = []v1.EnvVar{
{
Name: gacEnvKey,
Value: saFilePath,
},
}
}

return &batchv1.Job{
job := &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: kanikoPodName,
Namespace: c.config.BuildNamespace,
Expand All @@ -385,41 +415,26 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Name: containerName,
Image: c.config.KanikoImage,
Args: kanikoArgs,
VolumeMounts: []v1.VolumeMount{
{
Name: kanikoSecretName,
MountPath: "/secret",
},
},
Env: []v1.EnvVar{
{
Name: gacEnvKey,
Value: saFilePath,
},
},
Name: containerName,
Image: c.config.KanikoImage,
Args: kanikoArgs,
VolumeMounts: volumeMount,
Env: envVar,
Resources: v1.ResourceRequirements{
Requests: defaultResourceRequests,
},
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
},
},
Volumes: []v1.Volume{
{
Name: kanikoSecretName,
VolumeSource: v1.VolumeSource{
Secret: &v1.SecretVolumeSource{
SecretName: kanikoSecretName,
},
},
},
},
Volumes: volume,
Tolerations: c.config.Tolerations,
NodeSelector: c.config.NodeSelectors,
},
},
},
}, nil
}
if c.config.KanikoServiceAccount != "" {
job.Spec.Template.Spec.ServiceAccountName = c.config.KanikoServiceAccount
}
return job, nil
}
141 changes: 133 additions & 8 deletions api/pkg/imagebuilder/imagebuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,51 @@ var (
},
MaximumRetry: jobBackOffLimit,
}
configWithSa = Config{
BuildNamespace: buildNamespace,
ContextSubPath: "python/pyfunc-server",
BaseImages: cfg.BaseImageConfigs{
"3.7.*": cfg.BaseImageConfig{
ImageName: "gojek/base-image:1",
BuildContextURI: buildContextURL,
DockerfilePath: "./Dockerfile",
},
"3.8.*": cfg.BaseImageConfig{
ImageName: "gojek/base-image:2",
BuildContextURI: buildContextURL,
DockerfilePath: "./Dockerfile",
},
"3.9.*": cfg.BaseImageConfig{
ImageName: "gojek/base-image:3",
BuildContextURI: buildContextURL,
DockerfilePath: "./Dockerfile",
},
"3.10.*": cfg.BaseImageConfig{
ImageName: "gojek/base-image:4",
BuildContextURI: buildContextURL,
DockerfilePath: "./Dockerfile",
},
},
DockerRegistry: dockerRegistry,
BuildTimeoutDuration: timeout,
ClusterName: "my-cluster",
GcpProject: "test-project",
Environment: "dev",
KanikoImage: "gcr.io/kaniko-project/executor:v1.1.0",
Tolerations: []v1.Toleration{
{
Key: "image-build-job",
Value: "true",
Operator: v1.TolerationOpEqual,
Effect: v1.TaintEffectNoSchedule,
},
},
NodeSelectors: map[string]string{
"cloud.google.com/gke-nodepool": "image-building-job-node-pool",
},
MaximumRetry: jobBackOffLimit,
KanikoServiceAccount: "kaniko-sa",
}
)

func TestBuildImage(t *testing.T) {
Expand Down Expand Up @@ -192,11 +237,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -246,6 +291,86 @@ func TestBuildImage(t *testing.T) {
wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID),
config: config,
},
{
name: "success: no existing job, use K8s Service account",
args: args{
project: project,
model: model,
version: modelVersion,
},
existingJob: nil,
wantCreateJob: &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s-%s", project.Name, model.Name, modelVersion.ID),
Namespace: config.BuildNamespace,
Labels: map[string]string{
"gojek.com/app": model.Name,
"gojek.com/orchestrator": "merlin",
"gojek.com/stream": project.Stream,
"gojek.com/team": project.Team,
"gojek.com/environment": config.Environment,
"gojek.com/component": "image-builder",
},
},
Spec: batchv1.JobSpec{
Completions: &jobCompletions,
BackoffLimit: &jobBackOffLimit,
TTLSecondsAfterFinished: &jobTTLSecondAfterComplete,
ActiveDeadlineSeconds: &timeoutInSecond,
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"gojek.com/app": model.Name,
"gojek.com/orchestrator": "merlin",
"gojek.com/stream": project.Stream,
"gojek.com/team": project.Team,
"gojek.com/environment": config.Environment,
"gojek.com/component": "image-builder",
},
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
{
Name: containerName,
Image: "gcr.io/kaniko-project/executor:v1.1.0",
Args: []string{
fmt.Sprintf("--dockerfile=%s", config.BaseImages[modelVersion.PythonVersion].DockerfilePath),
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
},
Resources: v1.ResourceRequirements{
Requests: defaultResourceRequests,
},
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
},
},
Tolerations: []v1.Toleration{
{
Key: "image-build-job",
Operator: v1.TolerationOpEqual,
Value: "true",
Effect: v1.TaintEffectNoSchedule,
},
},
NodeSelector: map[string]string{
"cloud.google.com/gke-nodepool": "image-building-job-node-pool",
},
ServiceAccountName: "kaniko-sa",
},
},
},
Status: batchv1.JobStatus{},
},
wantDeleteJobName: "",
wantImageRef: fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID),
config: configWithSa,
},
{
name: "success: no existing job, tolerations is not set",
args: args{
Expand Down Expand Up @@ -294,11 +419,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -423,11 +548,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -562,10 +687,10 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -695,11 +820,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -796,11 +921,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -900,11 +1025,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down Expand Up @@ -992,11 +1117,11 @@ func TestBuildImage(t *testing.T) {
fmt.Sprintf("--context=%s", config.BaseImages[modelVersion.PythonVersion].BuildContextURI),
fmt.Sprintf("--build-arg=MODEL_URL=%s/model", modelVersion.ArtifactURI),
fmt.Sprintf("--build-arg=BASE_IMAGE=%s", config.BaseImages[modelVersion.PythonVersion].ImageName),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
fmt.Sprintf("--destination=%s", fmt.Sprintf("%s/%s-%s:%s", config.DockerRegistry, project.Name, model.Name, modelVersion.ID)),
"--cache=true",
"--single-snapshot",
fmt.Sprintf("--context-sub-path=%s", config.ContextSubPath),
fmt.Sprintf("--build-arg=GOOGLE_APPLICATION_CREDENTIALS=%s", "/secret/kaniko-secret.json"),
},
VolumeMounts: []v1.VolumeMount{
{
Expand Down
21 changes: 21 additions & 0 deletions charts/merlin/templates/kaniko-service-account.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{{- if and .Values.merlin.imageBuilder.serviceAccount .Values.merlin.imageBuilder.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ .Values.merlin.imageBuilder.serviceAccount.name }}-{{ .Release.Name }}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Is there any reason not to use full name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hang on, i think this is wrong, will change to just use

{{ .Values.merlin.imageBuilder.serviceAccount.name }}

without the {{ .Release.Name }}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But without the release name, there could be conflict of service account when there is multiple merlin release in same namespace?

Copy link
Contributor Author

@shydefoo shydefoo Feb 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that was my initial thought, but then when specifying service account specified externally it should respect the name provided. Pushing a fix for this.

namespace: {{ .Release.Namespace }}
{{- if .Values.merlin.imageBuilder.serviceAccount.annotations }}
annotations:
{{- toYaml .Values.merlin.imageBuilder.serviceAccount.annotations | nindent 4 }}
{{- end }}
labels:
app: {{ include "merlin.name" . }}
release: {{ .Release.Name }}
helm.sh/chart: {{ include "merlin.chart" . }}
app.kubernetes.io/name: {{ include "merlin.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Values.merlin.imageBuilder.serviceAccount.labels }}
{{- toYaml .Values.merlin.imageBuilder.serviceAccount.labels | nindent 4 }}
{{- end }}
{{ end }}
Loading