diff --git a/Makefile b/Makefile index 67154c6d4..a5f2b4a86 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ lint-ui: .PHONY: lint-api lint-api: @echo "> Analyzing API source code..." - @cd ${API_PATH} && golangci-lint run + @cd ${API_PATH} && golangci-lint run # ============================================================ # Testing recipes diff --git a/api/api/router_test.go b/api/api/router_test.go index f51eb3e13..b888db828 100644 --- a/api/api/router_test.go +++ b/api/api/router_test.go @@ -16,7 +16,7 @@ package api import ( "bytes" - "io/ioutil" + "io" "net/http" "net/http/httptest" "strings" @@ -440,7 +440,7 @@ func Test_prometheusMiddleware_post(t *testing.T) { router := mux.NewRouter() router.HandleFunc("/foo", func(rw http.ResponseWriter, r *http.Request) { - body, err := ioutil.ReadAll(r.Body) + body, err := io.ReadAll(r.Body) assert.Nil(t, err) _, err = rw.Write(body) @@ -476,7 +476,7 @@ func Test_prometheusMiddleware_post_500(t *testing.T) { router := mux.NewRouter() router.HandleFunc("/foo-500", func(rw http.ResponseWriter, r *http.Request) { - body, err := ioutil.ReadAll(r.Body) + body, err := io.ReadAll(r.Body) assert.Nil(t, err) rw.WriteHeader(500) diff --git a/api/batch/resource_test.go b/api/batch/resource_test.go index acc1d15f9..347c813e6 100644 --- a/api/batch/resource_test.go +++ b/api/batch/resource_test.go @@ -15,7 +15,6 @@ package batch import ( - "fmt" "testing" "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta2" @@ -52,24 +51,26 @@ var ( modelID = models.ID(2) versionID = models.ID(3) - labelTeamName = "gojek.com/team" - labelStreamName = "gojek.com/stream" labelAppName = "gojek.com/app" - labelOrchestratorName = "gojek.com/orchestrator" + labelComponentName = "gojek.com/component" labelEnvironment = "gojek.com/environment" - labelUsersPrefix = "gojek.com/%s" + labelOrchestratorName = "gojek.com/orchestrator" + labelStreamName = "gojek.com/stream" + labelTeamName = "gojek.com/team" defaultLabels = map[string]string{ + labelModelID: modelID.String(), + labelModelVersionID: versionID.String(), + labelPredictionJobID: jobID.String(), + + labelAppName: modelName, + labelComponentName: models.ComponentBatchJob, + labelEnvironment: environementName, labelOrchestratorName: "merlin", - labelModelID: modelID.String(), - labelModelVersionID: versionID.String(), - labelPredictionJobID: jobID.String(), + labelStreamName: streamName, + labelTeamName: teamName, - labelTeamName: teamName, - labelStreamName: streamName, - labelAppName: modelName, - labelEnvironment: environementName, - fmt.Sprintf(labelUsersPrefix, "my-key"): "my-value", + "my-key": "my-value", } driverCore int32 = 1 @@ -118,6 +119,9 @@ var ( ) func TestCreateSparkApplicationResource(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + tests := []struct { name string arg *models.PredictionJob @@ -131,10 +135,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -210,10 +215,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -289,10 +295,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -368,10 +375,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -447,10 +455,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -538,10 +547,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -568,10 +578,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, @@ -598,10 +609,11 @@ func TestCreateSparkApplicationResource(t *testing.T) { Name: jobName, ID: jobID, Metadata: models.Metadata{ - Team: teamName, - Stream: streamName, App: modelName, + Component: models.ComponentBatchJob, Environment: environementName, + Stream: streamName, + Team: teamName, Labels: userLabels, }, VersionModelID: modelID, diff --git a/api/cluster/controller.go b/api/cluster/controller.go index 004b891fd..e1cc35ec7 100644 --- a/api/cluster/controller.go +++ b/api/cluster/controller.go @@ -112,7 +112,8 @@ func newController(kfservingClient kservev1beta1client.ServingV1beta1Interface, batchV1Client batchv1client.BatchV1Interface, deploymentConfig config.DeploymentConfig, containerFetcher ContainerFetcher, - templater *resource.InferenceServiceTemplater) (Controller, error) { + templater *resource.InferenceServiceTemplater, +) (Controller, error) { return &controller{ servingClient: kfservingClient, clusterClient: coreV1Client, @@ -197,6 +198,7 @@ func (k *controller) Deploy(ctx context.Context, modelService *models.Service) ( Namespace: s.Namespace, ServiceName: s.Status.URL.Host, URL: inferenceURL, + Metadata: modelService.Metadata, }, nil } diff --git a/api/cluster/resource/templater_test.go b/api/cluster/resource/templater_test.go index 76a9be467..97b900909 100644 --- a/api/cluster/resource/templater_test.go +++ b/api/cluster/resource/templater_test.go @@ -125,6 +125,9 @@ var ( ) func TestCreateInferenceServiceSpec(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + project := mlp.Project{ Name: "project", } @@ -135,10 +138,11 @@ func TestCreateInferenceServiceSpec(t *testing.T) { ModelVersion: "1", ArtifactURI: "gs://my-artifacet", Metadata: models.Metadata{ - Team: "dsp", - Stream: "dsp", App: "model", + Component: models.ComponentModelVersion, Environment: "dev", + Stream: "dsp", + Team: "dsp", Labels: mlp.Labels{ { Key: "sample", @@ -187,11 +191,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -247,11 +252,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -307,11 +313,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -360,11 +367,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -410,11 +418,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -462,11 +471,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -514,11 +524,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -565,11 +576,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, @@ -623,11 +635,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -680,11 +693,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -731,11 +745,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -794,11 +809,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -852,11 +868,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -918,11 +935,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -999,11 +1017,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1059,11 +1078,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1120,11 +1140,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1180,11 +1201,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1240,11 +1262,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1292,11 +1315,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1349,11 +1373,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1402,11 +1427,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1466,11 +1492,12 @@ func TestCreateInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1518,6 +1545,9 @@ func TestCreateInferenceServiceSpec(t *testing.T) { } func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + project := mlp.Project{ Name: "project", } @@ -1530,10 +1560,11 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Namespace: "project", ArtifactURI: "gs://my-artifacet", Metadata: models.Metadata{ - Team: "dsp", - Stream: "dsp", App: "model", + Component: models.ComponentModelVersion, Environment: "dev", + Stream: "dsp", + Team: "dsp", Labels: mlp.Labels{ { Key: "sample", @@ -1551,10 +1582,11 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { Namespace: "project", ArtifactURI: "gs://my-artifacet", Metadata: models.Metadata{ - Team: "dsp", - Stream: "dsp", App: "model", + Component: models.ComponentModelVersion, Environment: "dev", + Stream: "dsp", + Team: "dsp", Labels: mlp.Labels{ { Key: "sample", @@ -1615,11 +1647,12 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1699,11 +1732,12 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1780,11 +1814,12 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1869,11 +1904,12 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -1969,11 +2005,12 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2055,6 +2092,9 @@ func TestCreateInferenceServiceSpecWithTransformer(t *testing.T) { } func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + project := mlp.Project{ Name: "project", } @@ -2067,10 +2107,11 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { ModelVersion: "1", ArtifactURI: "gs://my-artifacet", Metadata: models.Metadata{ - Team: "dsp", - Stream: "dsp", App: "model", + Component: models.ComponentModelVersion, Environment: "dev", + Stream: "dsp", + Team: "dsp", Labels: mlp.Labels{ { Key: "sample", @@ -2126,11 +2167,12 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2194,11 +2236,12 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2281,11 +2324,12 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2364,11 +2408,12 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2451,11 +2496,12 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2521,6 +2567,9 @@ func TestCreateInferenceServiceSpecWithLogger(t *testing.T) { } func TestPatchInferenceServiceSpec(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + project := mlp.Project{ Name: "project", } @@ -2532,10 +2581,11 @@ func TestPatchInferenceServiceSpec(t *testing.T) { Namespace: project.Name, ArtifactURI: "gs://my-artifacet", Metadata: models.Metadata{ - Team: "dsp", - Stream: "dsp", App: "model", + Component: models.ComponentModelVersion, Environment: "dev", + Stream: "dsp", + Team: "dsp", Labels: mlp.Labels{ { Key: "sample", @@ -2635,11 +2685,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2727,11 +2778,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2868,11 +2920,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2927,11 +2980,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -2970,11 +3024,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -3062,11 +3117,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -3151,11 +3207,12 @@ func TestPatchInferenceServiceSpec(t *testing.T) { }, Labels: map[string]string{ "gojek.com/app": modelSvc.Metadata.App, + "gojek.com/component": models.ComponentModelVersion, + "gojek.com/environment": modelSvc.Metadata.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": modelSvc.Metadata.Stream, "gojek.com/team": modelSvc.Metadata.Team, - "gojek.com/sample": "true", - "gojek.com/environment": modelSvc.Metadata.Environment, + "sample": "true", }, }, Spec: kservev1beta1.InferenceServiceSpec{ @@ -3270,7 +3327,7 @@ func TestCreateTransformerSpec(t *testing.T) { MemoryRequest: memoryRequest, }, EnvVars: models.EnvVars{ - {Name: transformer.JaegerAgentHost, Value: "NEW_HOST"}, //test user overwrite + {Name: transformer.JaegerAgentHost, Value: "NEW_HOST"}, // test user overwrite }, }, &config.DeploymentConfig{}, diff --git a/api/cmd/api/main.go b/api/cmd/api/main.go index 1958c27b9..e3048c828 100644 --- a/api/cmd/api/main.go +++ b/api/cmd/api/main.go @@ -41,6 +41,7 @@ import ( "github.com/gojek/merlin/config" "github.com/gojek/merlin/log" "github.com/gojek/merlin/mlflow" + "github.com/gojek/merlin/models" "github.com/gojek/merlin/pkg/gitlab" "github.com/gojek/merlin/queue" "github.com/gojek/merlin/queue/work" @@ -206,6 +207,10 @@ func buildDependencies(ctx context.Context, cfg *config.Config, db *gorm.DB, dis mlpAPIClient := initMLPAPIClient(ctx, cfg.MlpAPIConfig) coreClient := initFeastCoreClient(cfg.StandardTransformerConfig.FeastCoreURL, cfg.StandardTransformerConfig.FeastCoreAuthAudience, cfg.StandardTransformerConfig.EnableAuth) + if err := models.InitKubernetesLabeller(cfg.DeploymentLabelPrefix); err != nil { + log.Panicf("invalid deployment label prefix (%s): %s", cfg.DeploymentLabelPrefix, err) + } + webServiceBuilder, predJobBuilder, imageBuilderJanitor := initImageBuilder(cfg) clusterControllers := initClusterControllers(cfg) diff --git a/api/config/config.go b/api/config/config.go index 93fdfcc3b..9ea35aca2 100644 --- a/api/config/config.go +++ b/api/config/config.go @@ -43,6 +43,8 @@ type Config struct { NumOfQueueWorkers int `envconfig:"NUM_OF_WORKERS" default:"2"` SwaggerPath string `envconfig:"SWAGGER_PATH" default:"./swagger.yaml"` + DeploymentLabelPrefix string `envconfig:"DEPLOYMENT_LABEL_PREFIX" default:"gojek.com/"` + DbConfig DatabaseConfig ImageBuilderConfig ImageBuilderConfig EnvironmentConfigs []EnvironmentConfig diff --git a/api/mlp/project.go b/api/mlp/project.go index d60d147cc..9233ca96d 100644 --- a/api/mlp/project.go +++ b/api/mlp/project.go @@ -71,6 +71,26 @@ type Projects []client.Project // Labels is a list of mlp-api's Label. type Labels []client.Label +func LabelsToMaps(labels []client.Label) map[string]string { + output := make(map[string]string, len(labels)) + + for _, label := range labels { + output[label.Key] = label.Value + } + + return output +} + +func MapsToLabels(maps map[string]string) Labels { + labels := Labels{} + + for k, v := range maps { + labels = append(labels, client.Label{Key: k, Value: v}) + } + + return labels +} + func (c *apiClient) ListProjects(ctx context.Context, projectName string) (Projects, error) { var opt *client.ProjectApiProjectsGetOpts if projectName != "" { diff --git a/api/mlp/project_test.go b/api/mlp/project_test.go index da1bafee9..3fd41fe8a 100644 --- a/api/mlp/project_test.go +++ b/api/mlp/project_test.go @@ -18,8 +18,10 @@ import ( "context" "net/http" "net/http/httptest" + "reflect" "testing" + "github.com/gojek/mlp/api/client" "github.com/stretchr/testify/assert" ) @@ -148,3 +150,23 @@ func TestProject_MlflowRunURL(t *testing.T) { }) } } + +func TestLabels(t *testing.T) { + labels := []client.Label{ + {Key: "key-1", Value: "value-1"}, + {Key: "key-2", Value: "value-2"}, + } + + maps := map[string]string{ + "key-1": "value-1", + "key-2": "value-2", + } + + gotMaps := LabelsToMaps(labels) + assert.Equal(t, maps, gotMaps) + + gotLabels := MapsToLabels(maps) + if !reflect.DeepEqual(gotLabels, Labels(labels)) { + t.Errorf("MapsToLabels() = %v, want %v", gotLabels, Labels(labels)) + } +} diff --git a/api/models/metadata.go b/api/models/metadata.go index 122d06e98..9c0ea00cc 100644 --- a/api/models/metadata.go +++ b/api/models/metadata.go @@ -19,33 +19,60 @@ import ( "encoding/json" "errors" "fmt" + "regexp" "github.com/gojek/merlin/mlp" "github.com/gojek/merlin/utils" ) const ( - labelTeamName = "gojek.com/team" - labelStreamName = "gojek.com/stream" - labelAppName = "gojek.com/app" - labelOrchestratorName = "gojek.com/orchestrator" - labelEnvironment = "gojek.com/environment" - labelUsersPrefix = "gojek.com/%s" + labelAppName = "app" + labelComponent = "component" + labelEnvironment = "environment" + LabelOrchestratorName = "orchestrator" + labelStreamName = "stream" + labelTeamName = "team" + + ComponentBatchJob = "batch-job" + ComponentImageBuilder = "image-builder" + ComponentModelEndpoint = "model-endpoint" + ComponentModelVersion = "model-version" ) var reservedKeys = map[string]bool{ - labelTeamName: true, - labelStreamName: true, labelAppName: true, - labelOrchestratorName: true, + labelComponent: true, labelEnvironment: true, + LabelOrchestratorName: true, + labelStreamName: true, + labelTeamName: true, +} + +var ( + prefix string + validPrefixRegex = regexp.MustCompile("^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?(/)$") +) + +// InitKubernetesLabeller builds a new KubernetesLabeller Singleton +func InitKubernetesLabeller(p string) error { + if len(p) > 253 { + return fmt.Errorf("length of prefix is greater than 253 characters") + } + + if isValidPrefix := validPrefixRegex.MatchString(p); !isValidPrefix { + return fmt.Errorf("name violates kubernetes label's prefix constraint") + } + + prefix = p + return nil } type Metadata struct { - Team string - Stream string App string + Component string Environment string + Stream string + Team string Labels mlp.Labels } @@ -64,28 +91,32 @@ func (metadata *Metadata) Scan(value interface{}) error { func (metadata *Metadata) ToLabel() map[string]string { labels := map[string]string{ - labelOrchestratorName: "merlin", - labelAppName: metadata.App, - labelEnvironment: metadata.Environment, - labelStreamName: metadata.Stream, - labelTeamName: metadata.Team, + prefix + labelAppName: metadata.App, + prefix + labelComponent: metadata.Component, + prefix + labelEnvironment: metadata.Environment, + prefix + LabelOrchestratorName: "merlin", + prefix + labelStreamName: metadata.Stream, + prefix + labelTeamName: metadata.Team, } for _, label := range metadata.Labels { - key := fmt.Sprintf(labelUsersPrefix, label.Key) // skip label that is trying to override reserved key - if _, usingReservedKeys := reservedKeys[key]; usingReservedKeys { + if _, usingReservedKeys := reservedKeys[prefix+label.Key]; usingReservedKeys { continue } + // skip label that has invalid key name if err := utils.IsValidLabel(label.Key); err != nil { continue } + // skip label that has invalid value name if err := utils.IsValidLabel(label.Value); err != nil { continue } - labels[key] = label.Value + + labels[label.Key] = label.Value } + return labels } diff --git a/api/models/metadata_test.go b/api/models/metadata_test.go index a0929715c..01026597f 100644 --- a/api/models/metadata_test.go +++ b/api/models/metadata_test.go @@ -8,6 +8,9 @@ import ( ) func TestToLabel(t *testing.T) { + InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer InitKubernetesLabeller("") //nolint:errcheck + testCases := []struct { desc string metadata Metadata @@ -16,10 +19,11 @@ func TestToLabel(t *testing.T) { { desc: "All keys and value is valid", metadata: Metadata{ - Team: "abc", - Stream: "abc", App: "app", + Component: "model-version", Environment: "staging", + Stream: "abc", + Team: "abc", Labels: mlp.Labels{ { Key: "key", @@ -28,21 +32,23 @@ func TestToLabel(t *testing.T) { }, }, expectedLabels: map[string]string{ - "gojek.com/team": "abc", - "gojek.com/stream": "abc", "gojek.com/app": "app", + "gojek.com/component": "model-version", "gojek.com/environment": "staging", "gojek.com/orchestrator": "merlin", - "gojek.com/key": "value", + "gojek.com/stream": "abc", + "gojek.com/team": "abc", + "key": "value", }, }, { - desc: "MLP labels has using reserved keys, should be ignored", + desc: "MLP labels has using reserved keys", metadata: Metadata{ - Team: "abc", - Stream: "abc", App: "app", + Component: "model-version", Environment: "staging", + Stream: "abc", + Team: "abc", Labels: mlp.Labels{ { Key: "app", @@ -67,20 +73,27 @@ func TestToLabel(t *testing.T) { }, }, expectedLabels: map[string]string{ - "gojek.com/team": "abc", - "gojek.com/stream": "abc", "gojek.com/app": "app", + "gojek.com/component": "model-version", "gojek.com/environment": "staging", "gojek.com/orchestrator": "merlin", + "gojek.com/stream": "abc", + "gojek.com/team": "abc", + + "app": "newApp", + "environment": "env", + "orchestrator": "clockwork", + "stream": "stream", }, }, { desc: "Should ignored invalid labels", metadata: Metadata{ - Team: "abc", - Stream: "abc", App: "app", + Component: "model-version", Environment: "staging", + Stream: "abc", + Team: "abc", Labels: mlp.Labels{ { Key: "key", @@ -97,12 +110,14 @@ func TestToLabel(t *testing.T) { }, }, expectedLabels: map[string]string{ - "gojek.com/team": "abc", - "gojek.com/stream": "abc", "gojek.com/app": "app", + "gojek.com/component": "model-version", "gojek.com/environment": "staging", "gojek.com/orchestrator": "merlin", - "gojek.com/key": "value", + "gojek.com/stream": "abc", + "gojek.com/team": "abc", + + "key": "value", }, }, } @@ -113,3 +128,53 @@ func TestToLabel(t *testing.T) { }) } } + +func TestInitKubernetesLabeller(t *testing.T) { + InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer InitKubernetesLabeller("") //nolint:errcheck + + tests := []struct { + prefix string + wantErr bool + }{ + { + "gojek.com/", + false, + }, + { + "model.caraml.dev/", + false, + }, + { + "goto/gojek", + true, + }, + { + "gojek", + true, + }, + { + "gojek.com/caraml", + true, + }, + { + "gojek//", + true, + }, + { + "gojek.com//", + true, + }, + { + "//gojek.com", + true, + }, + } + for _, tt := range tests { + t.Run(tt.prefix, func(t *testing.T) { + if err := InitKubernetesLabeller(tt.prefix); (err != nil) != tt.wantErr { + t.Errorf("InitKubernetesLabeller() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/api/models/service.go b/api/models/service.go index 6be4375d7..cbe84aa09 100644 --- a/api/models/service.go +++ b/api/models/service.go @@ -18,6 +18,9 @@ import ( "fmt" "strings" + mlpclient "github.com/gojek/mlp/api/client" + + "github.com/gojek/merlin/mlp" "github.com/gojek/merlin/pkg/autoscaling" "github.com/gojek/merlin/pkg/deployment" "github.com/gojek/merlin/pkg/protocol" @@ -56,11 +59,12 @@ func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint ResourceRequest: endpoint.ResourceRequest, EnvVars: endpoint.EnvVars, Metadata: Metadata{ - Team: model.Project.Team, - Stream: model.Project.Stream, App: model.Name, + Component: ComponentModelVersion, Environment: endpoint.EnvironmentName, - Labels: model.Project.Labels, + Stream: model.Project.Stream, + Team: model.Project.Team, + Labels: MergeProjectVersionLabels(model.Project.Labels, version.Labels), }, Transformer: endpoint.Transformer, Logger: endpoint.Logger, @@ -70,6 +74,28 @@ func NewService(model *Model, version *Version, modelOpt *ModelOption, endpoint } } +func MergeProjectVersionLabels(projectLabels mlp.Labels, versionLabels KV) mlp.Labels { + projectLabelsMap := map[string]int{} + for index, projectLabel := range projectLabels { + projectLabelsMap[projectLabel.Key] = index + } + + for versionLabelKey, versionLabelValue := range versionLabels { + if _, labelExists := projectLabelsMap[versionLabelKey]; labelExists { + index := projectLabelsMap[versionLabelKey] + projectLabels[index].Value = fmt.Sprint(versionLabelValue) + continue + } + + projectLabels = append(projectLabels, mlpclient.Label{ + Key: versionLabelKey, + Value: fmt.Sprint(versionLabelValue), + }) + } + + return projectLabels +} + func CreateInferenceServiceName(modelName string, versionID string) string { return fmt.Sprintf("%s-%s", modelName, versionID) } diff --git a/api/models/service_test.go b/api/models/service_test.go index f6d36c6af..aabacd266 100644 --- a/api/models/service_test.go +++ b/api/models/service_test.go @@ -1,8 +1,10 @@ package models import ( + "reflect" "testing" + "github.com/gojek/merlin/mlp" "github.com/gojek/merlin/pkg/protocol" "github.com/stretchr/testify/assert" "knative.dev/pkg/apis" @@ -44,3 +46,188 @@ func TestGetValidInferenceURL(t *testing.T) { }) } } + +func Test_mergeProjectVersionLabels(t *testing.T) { + InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer InitKubernetesLabeller("") //nolint:errcheck + + type args struct { + projectLabels mlp.Labels + versionLabels KV + } + tests := []struct { + name string + args args + want mlp.Labels + }{ + { + "both maps has different keys", + args{ + projectLabels: mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + }, + versionLabels: KV{ + "key-2": "value-2", + }, + }, + mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + {Key: "key-2", Value: "value-2"}, + }, + }, + { + "both maps has different keys", + args{ + projectLabels: mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + {Key: "key-1", Value: "value-1"}, + }, + versionLabels: KV{ + "key-1": "value-11", + "key-2": "value-2", + }, + }, + mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + {Key: "key-1", Value: "value-11"}, + {Key: "key-2", Value: "value-2"}, + }, + }, + { + "duplicate key name without prefix", + args{ + projectLabels: mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + }, + versionLabels: KV{ + "key-1": "value-11", + "key-2": "value-2", + }, + }, + mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + {Key: "key-1", Value: "value-11"}, + {Key: "key-2", Value: "value-2"}, + }, + }, + { + "only project labels", + args{ + projectLabels: mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + }, + versionLabels: nil, + }, + mlp.Labels{ + {Key: "gojek.com/key-1", Value: "value-1"}, + }, + }, + { + "only version labels", + args{ + projectLabels: nil, + versionLabels: KV{ + "key-2": "value-2", + }, + }, + mlp.Labels{ + {Key: "key-2", Value: "value-2"}, + }, + }, + { + "both empty", + args{ + projectLabels: mlp.Labels{}, + versionLabels: KV{}, + }, + mlp.Labels{}, + }, + { + "both nil", + args{ + projectLabels: nil, + versionLabels: nil, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := MergeProjectVersionLabels(tt.args.projectLabels, tt.args.versionLabels); !reflect.DeepEqual(got, tt.want) { + t.Errorf("mergeProjectVersionLabels() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestNewService(t *testing.T) { + mlpLabels := mlp.Labels{ + {Key: "key-1", Value: "value-1"}, + } + versionLabels := KV{ + "key-1": "value-11", + "key-2": "value-2", + } + serviceLabels := mlp.Labels{ + {Key: "key-1", Value: "value-11"}, + {Key: "key-2", Value: "value-2"}, + } + + project := mlp.Project{Name: "project", Labels: mlpLabels} + model := &Model{Name: "model", Project: project} + version := &Version{ID: 1, Labels: versionLabels} + endpoint := &VersionEndpoint{} + + type args struct { + model *Model + version *Version + modelOpt *ModelOption + endpoint *VersionEndpoint + } + tests := []struct { + name string + args args + want *Service + }{ + { + name: "No model option", + args: args{ + model: model, + version: version, + modelOpt: &ModelOption{}, + endpoint: endpoint, + }, + want: &Service{ + Name: CreateInferenceServiceName(model.Name, version.ID.String()), + ModelName: model.Name, + ModelVersion: version.ID.String(), + Namespace: model.Project.Name, + ArtifactURI: version.ArtifactURI, + Type: model.Type, + Options: &ModelOption{}, + ResourceRequest: endpoint.ResourceRequest, + EnvVars: endpoint.EnvVars, + Metadata: Metadata{ + App: model.Name, + Component: ComponentModelVersion, + Environment: endpoint.EnvironmentName, + Labels: serviceLabels, + Stream: model.Project.Stream, + Team: model.Project.Team, + }, + Transformer: endpoint.Transformer, + Logger: endpoint.Logger, + DeploymentMode: endpoint.DeploymentMode, + AutoscalingPolicy: endpoint.AutoscalingPolicy, + Protocol: endpoint.Protocol, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := NewService(tt.args.model, tt.args.version, tt.args.modelOpt, tt.args.endpoint); !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewService() =\n\t%+v\n, want\n\t%+v", got, tt.want) + } + }) + } +} diff --git a/api/pkg/imagebuilder/imagebuilder.go b/api/pkg/imagebuilder/imagebuilder.go index ba0675c12..0731ae2bf 100644 --- a/api/pkg/imagebuilder/imagebuilder.go +++ b/api/pkg/imagebuilder/imagebuilder.go @@ -70,13 +70,6 @@ const ( kanikoSecretName = "kaniko-secret" tickDurationSecond = 5 - labelTeamName = "gojek.com/team" - labelStreamName = "gojek.com/stream" - labelAppName = "gojek.com/app" - labelEnvironment = "gojek.com/environment" - labelOrchestratorName = "gojek.com/orchestrator" - labelComponent = "gojek.com/component" - gacEnvKey = "GOOGLE_APPLICATION_CREDENTIALS" saFilePath = "/secret/kaniko-secret.json" ) @@ -342,13 +335,12 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo kanikoPodName := c.nameGenerator.generateBuilderJobName(project, model, version) imageRef := c.imageRef(project, model, version) - labels := map[string]string{ - labelTeamName: project.Team, - labelStreamName: project.Stream, - labelAppName: model.Name, - labelEnvironment: c.config.Environment, - labelOrchestratorName: "merlin", - labelComponent: "image-builder", + metadata := models.Metadata{ + App: model.Name, + Component: models.ComponentImageBuilder, + Environment: c.config.Environment, + Stream: project.Stream, + Team: project.Team, } baseImageTag, ok := c.config.BaseImages[version.PythonVersion] @@ -407,7 +399,7 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo ObjectMeta: metav1.ObjectMeta{ Name: kanikoPodName, Namespace: c.config.BuildNamespace, - Labels: labels, + Labels: metadata.ToLabel(), }, Spec: batchv1.JobSpec{ Completions: &jobCompletions, @@ -416,7 +408,7 @@ func (c *imageBuilder) createKanikoJobSpec(project mlp.Project, model *models.Mo ActiveDeadlineSeconds: &activeDeadlineSeconds, Template: v1.PodTemplateSpec{ ObjectMeta: metav1.ObjectMeta{ - Labels: labels, + Labels: metadata.ToLabel(), }, Spec: v1.PodSpec{ // https://stackoverflow.com/questions/54091659/kubernetes-pods-disappear-after-failed-jobs diff --git a/api/pkg/imagebuilder/imagebuilder_test.go b/api/pkg/imagebuilder/imagebuilder_test.go index 782bf77b3..506b2a050 100644 --- a/api/pkg/imagebuilder/imagebuilder_test.go +++ b/api/pkg/imagebuilder/imagebuilder_test.go @@ -171,6 +171,9 @@ var ( ) func TestBuildImage(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + type args struct { project mlp.Project model *models.Model @@ -200,11 +203,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -216,11 +219,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -382,11 +385,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -398,11 +401,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -511,11 +514,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -527,11 +530,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -650,11 +653,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -666,11 +669,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -783,11 +786,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -799,11 +802,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -884,11 +887,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -900,11 +903,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -988,11 +991,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -1004,11 +1007,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ @@ -1080,11 +1083,11 @@ func TestBuildImage(t *testing.T) { Namespace: config.BuildNamespace, Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: batchv1.JobSpec{ @@ -1096,11 +1099,11 @@ func TestBuildImage(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Labels: map[string]string{ "gojek.com/app": model.Name, + "gojek.com/component": models.ComponentImageBuilder, + "gojek.com/environment": config.Environment, "gojek.com/orchestrator": "merlin", "gojek.com/stream": project.Stream, "gojek.com/team": project.Team, - "gojek.com/environment": config.Environment, - "gojek.com/component": "image-builder", }, }, Spec: v1.PodSpec{ diff --git a/api/pkg/imagebuilder/janitor.go b/api/pkg/imagebuilder/janitor.go index fa3472657..44c28d133 100644 --- a/api/pkg/imagebuilder/janitor.go +++ b/api/pkg/imagebuilder/janitor.go @@ -13,6 +13,7 @@ import ( "github.com/gojek/merlin/cluster" "github.com/gojek/merlin/log" + "github.com/gojek/merlin/models" ) var ( @@ -71,7 +72,7 @@ func (j *Janitor) CleanJobs() { } func (j *Janitor) getExpiredJobs(ctx context.Context) ([]batchv1.Job, error) { - jobs, err := j.cc.ListJobs(ctx, j.cfg.BuildNamespace, labelOrchestratorName+"=merlin") + jobs, err := j.cc.ListJobs(ctx, j.cfg.BuildNamespace, models.LabelOrchestratorName+"=merlin") if err != nil { return nil, err } diff --git a/api/pkg/imagebuilder/janitor_test.go b/api/pkg/imagebuilder/janitor_test.go index 480390100..d3d7a8f21 100644 --- a/api/pkg/imagebuilder/janitor_test.go +++ b/api/pkg/imagebuilder/janitor_test.go @@ -14,6 +14,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/gojek/merlin/cluster/mocks" + "github.com/gojek/merlin/models" ) var ( @@ -120,7 +121,7 @@ func TestJanitor_CleanJobs(t *testing.T) { totalDelete := 0 - mc.On("ListJobs", context.Background(), namespace, labelOrchestratorName+"=merlin"). + mc.On("ListJobs", context.Background(), namespace, models.LabelOrchestratorName+"=merlin"). Return(&batchv1.JobList{Items: []batchv1.Job{completedJob1, completedJob2, activeJob1, failedJob1}}, nil) mc.On("DeleteJob", context.Background(), namespace, completedJob1.Name, mock.Anything). @@ -155,7 +156,7 @@ func TestJanitor_getExpiredJobs(t *testing.T) { cfg: JanitorConfig{BuildNamespace: namespace, Retention: retention}, }, mockFn: func(mc *mocks.Controller) { - mc.On("ListJobs", context.Background(), namespace, labelOrchestratorName+"=merlin"). + mc.On("ListJobs", context.Background(), namespace, models.LabelOrchestratorName+"=merlin"). Return(&batchv1.JobList{}, nil) }, want: []batchv1.Job{}, @@ -167,7 +168,7 @@ func TestJanitor_getExpiredJobs(t *testing.T) { cfg: JanitorConfig{BuildNamespace: namespace, Retention: retention}, }, mockFn: func(mc *mocks.Controller) { - mc.On("ListJobs", context.Background(), namespace, labelOrchestratorName+"=merlin"). + mc.On("ListJobs", context.Background(), namespace, models.LabelOrchestratorName+"=merlin"). Return(&batchv1.JobList{Items: []batchv1.Job{completedJob1}}, nil) }, want: []batchv1.Job{completedJob1}, @@ -179,7 +180,7 @@ func TestJanitor_getExpiredJobs(t *testing.T) { cfg: JanitorConfig{BuildNamespace: namespace, Retention: retention}, }, mockFn: func(mc *mocks.Controller) { - mc.On("ListJobs", context.Background(), namespace, labelOrchestratorName+"=merlin"). + mc.On("ListJobs", context.Background(), namespace, models.LabelOrchestratorName+"=merlin"). Return(&batchv1.JobList{Items: []batchv1.Job{completedJob1, activeJob1}}, nil) }, want: []batchv1.Job{completedJob1}, @@ -191,7 +192,7 @@ func TestJanitor_getExpiredJobs(t *testing.T) { cfg: JanitorConfig{BuildNamespace: namespace, Retention: retention}, }, mockFn: func(mc *mocks.Controller) { - mc.On("ListJobs", context.Background(), namespace, labelOrchestratorName+"=merlin"). + mc.On("ListJobs", context.Background(), namespace, models.LabelOrchestratorName+"=merlin"). Return(&batchv1.JobList{Items: []batchv1.Job{completedJob1, activeJob1, failedJob1}}, nil) }, want: []batchv1.Job{completedJob1}, diff --git a/api/queue/work/model_service_deployment_test.go b/api/queue/work/model_service_deployment_test.go index 8072c4172..35f099eb7 100644 --- a/api/queue/work/model_service_deployment_test.go +++ b/api/queue/work/model_service_deployment_test.go @@ -35,9 +35,26 @@ func TestExecuteDeployment(t *testing.T) { MemoryRequest: resource.MustParse("1Gi"), }, } - project := mlp.Project{Name: "project"} + + mlpLabels := mlp.Labels{ + {Key: "key-1", Value: "value-1"}, + } + + versionLabels := models.KV{ + "key-1": "value-11", + "key-2": "value-2", + } + + svcMetadata := models.Metadata{ + Labels: mlp.Labels{ + {Key: "key-1", Value: "value-11"}, + {Key: "key-2", Value: "value-2"}, + }, + } + + project := mlp.Project{Name: "project", Labels: mlpLabels} model := &models.Model{Name: "model", Project: project} - version := &models.Version{ID: 1} + version := &models.Version{ID: 1, Labels: versionLabels} iSvcName := fmt.Sprintf("%s-%d", model.Name, version.ID) svcName := fmt.Sprintf("%s-%d.project.svc.cluster.local", model.Name, version.ID) url := fmt.Sprintf("%s-%d.example.com", model.Name, version.ID) @@ -88,6 +105,7 @@ func TestExecuteDeployment(t *testing.T) { Namespace: project.Name, ServiceName: svcName, URL: url, + Metadata: svcMetadata, }, nil) return ctrl }, @@ -131,6 +149,7 @@ func TestExecuteDeployment(t *testing.T) { Namespace: project.Name, ServiceName: svcName, URL: url, + Metadata: svcMetadata, }, nil) return ctrl }, @@ -174,6 +193,7 @@ func TestExecuteDeployment(t *testing.T) { Namespace: project.Name, ServiceName: svcName, URL: url, + Metadata: svcMetadata, }, nil) return ctrl }, @@ -219,6 +239,7 @@ func TestExecuteDeployment(t *testing.T) { Namespace: project.Name, ServiceName: svcName, URL: url, + Metadata: svcMetadata, }, nil) return ctrl }, @@ -336,6 +357,11 @@ func TestExecuteDeployment(t *testing.T) { err := svc.Deploy(job) assert.Equal(t, tt.deployErr, err) + if len(ctrl.ExpectedCalls) > 0 && ctrl.ExpectedCalls[0].ReturnArguments[0] != nil { + deployedSvc := ctrl.ExpectedCalls[0].ReturnArguments[0].(*models.Service) + assert.Equal(t, svcMetadata, deployedSvc.Metadata) + } + mockStorage.AssertNumberOfCalls(t, "Save", 1) savedEndpoint := mockStorage.Calls[1].Arguments[0].(*models.VersionEndpoint) assert.Equal(t, tt.model.ID, savedEndpoint.VersionModelID) diff --git a/api/service/model_endpoint_service.go b/api/service/model_endpoint_service.go index 39c5c3700..c9309ba69 100644 --- a/api/service/model_endpoint_service.go +++ b/api/service/model_endpoint_service.go @@ -211,11 +211,12 @@ func (s *modelEndpointsService) UndeployEndpoint(ctx context.Context, model *mod func (s *modelEndpointsService) createVirtualService(model *models.Model, endpoint *models.ModelEndpoint) (*v1beta1.VirtualService, error) { metadata := models.Metadata{ - Team: model.Project.Team, - Stream: model.Project.Stream, App: model.Name, + Component: models.ComponentModelEndpoint, Environment: s.environment, Labels: model.Project.Labels, + Stream: model.Project.Stream, + Team: model.Project.Team, } labels := metadata.ToLabel() diff --git a/api/service/model_endpoint_service_test.go b/api/service/model_endpoint_service_test.go index f2002347a..8c32e8775 100644 --- a/api/service/model_endpoint_service_test.go +++ b/api/service/model_endpoint_service_test.go @@ -442,6 +442,9 @@ func Test_modelEndpointsService_UndeployEndpoint(t *testing.T) { } func TestModelEndpointService_createVirtualService(t *testing.T) { + models.InitKubernetesLabeller("gojek.com/") //nolint:errcheck + defer models.InitKubernetesLabeller("") //nolint:errcheck + type fields struct { environment string } @@ -473,11 +476,12 @@ func TestModelEndpointService_createVirtualService(t *testing.T) { Namespace: model1.Project.Name, Labels: map[string]string{ "gojek.com/app": model1.Name, + "gojek.com/component": models.ComponentModelEndpoint, + "gojek.com/environment": "staging", "gojek.com/orchestrator": "merlin", "gojek.com/stream": model1.Project.Stream, "gojek.com/team": model1.Project.Team, - "gojek.com/environment": "staging", - "gojek.com/sample": "true", + "sample": "true", }, }, Spec: networking.VirtualService{ @@ -552,11 +556,12 @@ func TestModelEndpointService_createVirtualService(t *testing.T) { Namespace: model1.Project.Name, Labels: map[string]string{ "gojek.com/app": model1.Name, + "gojek.com/component": models.ComponentModelEndpoint, + "gojek.com/environment": "staging", "gojek.com/orchestrator": "merlin", "gojek.com/stream": model1.Project.Stream, "gojek.com/team": model1.Project.Team, - "gojek.com/environment": "staging", - "gojek.com/sample": "true", + "sample": "true", }, }, Spec: networking.VirtualService{ diff --git a/api/service/prediction_job_service.go b/api/service/prediction_job_service.go index 47d3fdb1e..9553b353a 100644 --- a/api/service/prediction_job_service.go +++ b/api/service/prediction_job_service.go @@ -98,11 +98,12 @@ func (p *predictionJobService) CreatePredictionJob(ctx context.Context, env *mod predictionJob.Name = jobName predictionJob.Metadata = models.Metadata{ - Team: model.Project.Team, - Stream: model.Project.Stream, App: model.Name, + Component: models.ComponentBatchJob, Environment: p.environmentLabel, - Labels: model.Project.Labels, + Labels: models.MergeProjectVersionLabels(model.Project.Labels, version.Labels), + Stream: model.Project.Stream, + Team: model.Project.Team, } predictionJob.Status = models.JobPending predictionJob.VersionModelID = model.ID diff --git a/api/service/prediction_job_service_test.go b/api/service/prediction_job_service_test.go index e84f1757f..785d7520c 100644 --- a/api/service/prediction_job_service_test.go +++ b/api/service/prediction_job_service_test.go @@ -87,10 +87,11 @@ var ( ID: 0, Name: fmt.Sprintf("%s-%s-%s", model.Name, version.ID, strconv.FormatInt(now.UnixNano(), 10)[:13]), Metadata: models.Metadata{ - Team: project.Team, - Stream: project.Stream, App: model.Name, + Component: models.ComponentBatchJob, Environment: environmentLabel, + Stream: project.Stream, + Team: project.Team, Labels: project.Labels, }, VersionID: 3, diff --git a/charts/merlin/templates/merlin-deployment.yaml b/charts/merlin/templates/merlin-deployment.yaml index 3497e69b6..bef788ce1 100644 --- a/charts/merlin/templates/merlin-deployment.yaml +++ b/charts/merlin/templates/merlin-deployment.yaml @@ -65,6 +65,8 @@ spec: env: - name: ENVIRONMENT value: "{{ .Values.merlin.environment }}" + - name: DEPLOYMENT_LABEL_PREFIX + value: "{{ .Values.merlin.deploymentLabelPrefix }}" - name: LOGGER_DESTINATION_URL value: "{{ .Values.merlin.loggerDestinationURL }}" - name: NUM_OF_WORKERS diff --git a/charts/merlin/values.yaml b/charts/merlin/values.yaml index 99e194538..4eaed05ca 100644 --- a/charts/merlin/values.yaml +++ b/charts/merlin/values.yaml @@ -29,6 +29,7 @@ merlin: enabled: false environment: dev + deploymentLabelPrefix: "gojek.com/" loggerDestinationURL: "http://yourDestinationLogger" diff --git a/python/pyfunc-server/README.md b/python/pyfunc-server/README.md index 5a77655a6..713f004de 100644 --- a/python/pyfunc-server/README.md +++ b/python/pyfunc-server/README.md @@ -93,7 +93,10 @@ Pyfunc server can be configured via following environment variables | WORKERS | Number of Python processes that will be created to allow multi processing (default = 1) | | LOG_LEVEL | Log level, valid values are `INFO`, `ERROR`, `DEBUG`, `WARN`, `CRITICAL` (default='INFO') | | GRPC_OPTIONS | GRPC options to configure UPI server as json string. The possible options can be found in [grpc_types.h](https://github.com/grpc/grpc/blob/v1.46.x/include/grpc/impl/codegen/grpc_types.h). Example: '{"grpc.max_concurrent_streams":100}' | -| GRPC_CONCURRENCY | Size of grpc handler threadpool per worker (default = 10) | +| GRPC_CONCURRENCY | Size of grpc handler threadpool per worker (default = 10) | +| PUSHGATEWAY_ENABLED | Enable pushing metrics to prometheus push gateway, only available when `CARAML_PROTOCOL` is set to `UPI_V1` (default = false) | +| PUSHGATEWAY_URL | Url of the prometheus push gateway (default = localhost:9091) | +| PUSHGATEWAY_PUSH_INTERVAL_SEC | Interval in seconds for pushing metrics to prometheus push gateway (default = 30) | ## Directory Structure diff --git a/python/pyfunc-server/pyfuncserver/__main__.py b/python/pyfunc-server/pyfuncserver/__main__.py index ef740b1dc..68663bbc1 100644 --- a/python/pyfunc-server/pyfuncserver/__main__.py +++ b/python/pyfunc-server/pyfuncserver/__main__.py @@ -24,8 +24,6 @@ from pyfuncserver.server import PyFuncServer from pyfuncserver.utils.contants import ERR_DRY_RUN -DEFAULT_MODEL_NAME = "model" - parser = argparse.ArgumentParser() parser.add_argument('--model_dir', required=True, help='A URI pointer to the model binary') diff --git a/python/pyfunc-server/pyfuncserver/config.py b/python/pyfunc-server/pyfuncserver/config.py index e26b2bbb1..6bac43e68 100644 --- a/python/pyfunc-server/pyfuncserver/config.py +++ b/python/pyfunc-server/pyfuncserver/config.py @@ -2,30 +2,24 @@ import logging import os -# Following environment variables are expected to be populated by Merlin from merlin.protocol import Protocol -HTTP_PORT = "CARAML_HTTP_PORT" -MODEL_NAME = "CARAML_MODEL_NAME" -MODEL_VERSION = "CARAML_MODEL_VERSION" -MODEL_FULL_NAME = "CARAML_MODEL_FULL_NAME" -PROTOCOL = "CARAML_PROTOCOL" -WORKERS = "WORKERS" -GRPC_PORT = "CARAML_GRPC_PORT" -LOG_LEVEL = "LOG_LEVEL" -GRPC_OPTIONS = "GRPC_OPTIONS" -GRPC_CONCURRENCY = "GRPC_CONCURRENCY" - -DEFAULT_HTTP_PORT = 8080 -DEFAULT_GRPC_PORT = 9000 -DEFAULT_MODEL_NAME = "model" -DEFAULT_MODEL_VERSION = "1" -DEFAULT_FULL_NAME = f"{DEFAULT_MODEL_NAME}-{DEFAULT_MODEL_VERSION}" -DEFAULT_LOG_LEVEL = "INFO" -DEFAULT_PROTOCOL = "HTTP_JSON" -DEFAULT_GRPC_OPTIONS = "{}" -DEFAULT_GRPC_CONCURRENCY = "10" +# Following environment variables are expected to be populated by Merlin +HTTP_PORT = ("CARAML_HTTP_PORT", 8080) +MODEL_NAME = ("CARAML_MODEL_NAME", "model") +MODEL_VERSION = ("CARAML_MODEL_VERSION", "1") +MODEL_FULL_NAME = ("CARAML_MODEL_FULL_NAME", "model-1") +PROTOCOL = ("CARAML_PROTOCOL", "HTTP_JSON") + +WORKERS = ("WORKERS", 1) +GRPC_PORT = ("CARAML_GRPC_PORT", 9000) +LOG_LEVEL = ("LOG_LEVEL", "INFO") +GRPC_OPTIONS = ("GRPC_OPTIONS", "{}") +GRPC_CONCURRENCY = ("GRPC_CONCURRENCY", 10) +PUSHGATEWAY_ENABLED = ("PUSHGATEWAY_ENABLED", "false") +PUSHGATEWAY_URL = ("PUSHGATEWAY_URL", "localhost:9091") +PUSHGATEWAY_PUSH_INTERVAL_SEC = ("PUSHGATEWAY_PUSH_INTERVAL_SEC", 30) class ModelManifest: """ @@ -39,39 +33,57 @@ def __init__(self, model_name: str, model_version: str, model_full_name: str, mo self.model_dir = model_dir +class PushGateway: + def __init__(self, enabled, url, push_interval_sec): + self.url = url + self.enabled = enabled + self.push_interval_sec = push_interval_sec + + class Config: """ Server Configuration """ def __init__(self, model_dir: str): - self.protocol = Protocol(os.getenv(PROTOCOL, DEFAULT_PROTOCOL)) - self.http_port = int(os.getenv(HTTP_PORT, DEFAULT_HTTP_PORT)) - self.grpc_port = int(os.getenv(GRPC_PORT, DEFAULT_GRPC_PORT)) + self.protocol = Protocol(os.getenv(*PROTOCOL)) + self.http_port = int(os.getenv(*HTTP_PORT)) + self.grpc_port = int(os.getenv(*GRPC_PORT)) # Model manifest - model_name = os.getenv(MODEL_NAME, DEFAULT_MODEL_NAME) - model_version = os.getenv(MODEL_VERSION, DEFAULT_MODEL_VERSION) - model_full_name = os.getenv(MODEL_FULL_NAME, DEFAULT_FULL_NAME) + model_name = os.getenv(*MODEL_NAME) + model_version = os.getenv(*MODEL_VERSION) + model_full_name = os.getenv(*MODEL_FULL_NAME) self.model_manifest = ModelManifest(model_name, model_version, model_full_name, model_dir) - self.workers = int(os.getenv(WORKERS, 1)) + self.workers = int(os.getenv(*WORKERS)) self.log_level = self._log_level() - self.grpc_options = self._to_grpc_options(os.getenv(GRPC_OPTIONS, DEFAULT_GRPC_OPTIONS)) - self.grpc_concurrency = int(os.getenv(GRPC_CONCURRENCY, DEFAULT_GRPC_CONCURRENCY)) + self.grpc_options = self._grpc_options() + self.grpc_concurrency = int(os.getenv(*GRPC_CONCURRENCY)) + + push_enabled = str_to_bool(os.getenv(*PUSHGATEWAY_ENABLED)) + push_url = os.getenv(*PUSHGATEWAY_URL) + push_interval = os.getenv(*PUSHGATEWAY_PUSH_INTERVAL_SEC) + self.push_gateway = PushGateway(push_enabled, + push_url, + push_interval) def _log_level(self): - log_level = os.getenv(LOG_LEVEL, DEFAULT_LOG_LEVEL) + log_level = os.getenv(*LOG_LEVEL) numeric_level = getattr(logging, log_level.upper(), None) if not isinstance(numeric_level, int): logging.warning(f"invalid log level {log_level}") return logging.INFO return numeric_level - def _to_grpc_options(self, raw_options: str): + def _grpc_options(self): + raw_options = os.getenv(*GRPC_OPTIONS) options = json.loads(raw_options) grpc_options = [] for k, v in options.items(): grpc_options.append((k, v)) return grpc_options + +def str_to_bool(str: str)->bool: + return str.lower() in ("true", "1") \ No newline at end of file diff --git a/python/pyfunc-server/pyfuncserver/metrics/pusher.py b/python/pyfunc-server/pyfuncserver/metrics/pusher.py new file mode 100644 index 000000000..528c59a90 --- /dev/null +++ b/python/pyfunc-server/pyfuncserver/metrics/pusher.py @@ -0,0 +1,39 @@ +import logging +import socket +import time +from threading import Thread + +from prometheus_client import push_to_gateway + +from pyfuncserver.config import Config + + +def start_metrics_pusher(push_gateway, registry, target_info, interval_sec): + """ + Start periodic job to push metrics to prometheus push gatewat + """ + logging.info(f"starting metrics pusher, url: {push_gateway} with interval {interval_sec} s") + daemon = Thread(target=push_metrics, args=(push_gateway, registry, interval_sec, target_info), + daemon=True, name='metrics_push') + daemon.start() + + +def push_metrics(gateway_url, registry, interval_sec, grouping_keys): + """ + push metrics to prometheus push gateway every interval_sec + Should be called in separate thread + """ + while True: + push_to_gateway(gateway_url, "merlin_pyfunc_upi", registry=registry, grouping_key=grouping_keys) + time.sleep(interval_sec) + + +def labels(config: Config): + """ + Labels to be added to all metrics + """ + return { + "merlin_model_name": config.model_manifest.model_name, + "merlin_model_version": config.model_manifest.model_version, + "host": socket.getfqdn() + } diff --git a/python/pyfunc-server/pyfuncserver/server.py b/python/pyfunc-server/pyfuncserver/server.py index 6df2644ad..27a581e33 100644 --- a/python/pyfunc-server/pyfuncserver/server.py +++ b/python/pyfunc-server/pyfuncserver/server.py @@ -11,12 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging import prometheus_client from merlin.protocol import Protocol from prometheus_client import CollectorRegistry, multiprocess from pyfuncserver.config import Config +from pyfuncserver.metrics.pusher import labels, start_metrics_pusher from pyfuncserver.model.model import PyFuncModel from pyfuncserver.protocol.rest.server import HTTPServer from pyfuncserver.protocol.upi.server import UPIServer @@ -38,8 +40,18 @@ def start(self, model: PyFuncModel): http_server = HTTPServer(model=model, config=self._config, metrics_registry=registry) http_server.start() elif self._config.protocol == Protocol.UPI_V1: - # start prometheus metrics server and listen at http port - prometheus_client.start_http_server(self._config.http_port, registry=registry) + # Due to https://github.com/knative/serving/issues/8471, we have to resort to pushing metrics to + # prometheus push gateway. + if (self._config.push_gateway.enabled): + target_info = labels(self._config) + start_metrics_pusher(self._config.push_gateway.url, + registry, + target_info, + self._config.push_gateway.push_interval_sec) + else: + # start prometheus metrics server and listen at http port + logging.info(f"starting metrics server at {self._config.http_port}") + prometheus_client.start_http_server(self._config.http_port, registry=registry) # start grpc/upi server and listen at grpc port upi_server = UPIServer(model=model, config=self._config) diff --git a/python/pyfunc-server/test/test_backward_compatibility.py b/python/pyfunc-server/test/test_backward_compatibility.py index b9763b4c8..3adc452c9 100644 --- a/python/pyfunc-server/test/test_backward_compatibility.py +++ b/python/pyfunc-server/test/test_backward_compatibility.py @@ -230,8 +230,8 @@ def test_model_int(model): try: env["PROMETHEUS_MULTIPROC_DIR"] = "prometheus" - env[HTTP_PORT] = "8081" - env[WORKERS] = "1" + env[HTTP_PORT[0]] = "8081" + env[WORKERS[0]] = "1" c = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env) # wait till the server is up @@ -265,8 +265,8 @@ def test_model_headers(model): try: env["PROMETHEUS_MULTIPROC_DIR"] = "prometheus" - env[HTTP_PORT] = "8081" - env[WORKERS] = "1" + env[HTTP_PORT[0]] = "8081" + env[WORKERS[0]] = "1" c = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env) # wait till the server is up @@ -303,8 +303,8 @@ def test_error_model_int(error_core, message, model): try: env["PROMETHEUS_MULTIPROC_DIR"] = "prometheus" - env[HTTP_PORT] = "8081" - env[WORKERS] = "1" + env[HTTP_PORT[0]] = "8081" + env[WORKERS[0]] = "1" c = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env) # wait till the server is up diff --git a/python/pyfunc-server/test/test_http.py b/python/pyfunc-server/test/test_http.py index f12ecc770..2392a9109 100644 --- a/python/pyfunc-server/test/test_http.py +++ b/python/pyfunc-server/test/test_http.py @@ -42,11 +42,11 @@ def test_http_protocol(): port = "8081" try: - env[HTTP_PORT] = port - env[MODEL_NAME] = model_name - env[MODEL_VERSION] = model_version - env[MODEL_FULL_NAME] = model_full_name - env[WORKERS] = "1" + env[HTTP_PORT[0]] = port + env[MODEL_NAME[0]] = model_name + env[MODEL_VERSION[0]] = model_version + env[MODEL_FULL_NAME[0]] = model_full_name + env[WORKERS[0]] = "1" env["PROMETHEUS_MULTIPROC_DIR"] = "prometheus" c = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env) @@ -93,11 +93,11 @@ def test_metrics(): try: pathlib.Path(metrics_path).mkdir(exist_ok=True) - env[HTTP_PORT] = port - env[MODEL_NAME] = model_name - env[MODEL_VERSION] = model_version - env[MODEL_FULL_NAME] = model_full_name - env[WORKERS] = "4" + env[HTTP_PORT[0]] = port + env[MODEL_NAME[0]] = model_name + env[MODEL_VERSION[0]] = model_version + env[MODEL_FULL_NAME[0]] = model_full_name + env[WORKERS[0]] = "4" env["PROMETHEUS_MULTIPROC_DIR"] = metrics_path c = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env, start_new_session=True) diff --git a/python/pyfunc-server/test/test_upi.py b/python/pyfunc-server/test/test_upi.py index 9d5d639f2..a61e86cf0 100644 --- a/python/pyfunc-server/test/test_upi.py +++ b/python/pyfunc-server/test/test_upi.py @@ -1,3 +1,4 @@ +import logging import os import pathlib import re @@ -184,13 +185,13 @@ def start_upi_server(model_name="my-model", model_version="1", http_port=8080, g mlflow.end_run() pathlib.Path(metrics_path).mkdir(exist_ok=True) - env[PROTOCOL] = Protocol.UPI_V1.value - env[HTTP_PORT] = str(http_port) - env[GRPC_PORT] = str(grpc_port) - env[MODEL_NAME] = model_name - env[MODEL_VERSION] = model_version - env[MODEL_FULL_NAME] = model_full_name - env[WORKERS] = str(workers) + env[PROTOCOL[0]] = Protocol.UPI_V1.value + env[HTTP_PORT[0]] = str(http_port) + env[GRPC_PORT[0]] = str(grpc_port) + env[MODEL_NAME[0]] = model_name + env[MODEL_VERSION[0]] = model_version + env[MODEL_FULL_NAME[0]] = model_full_name + env[WORKERS[0]] = str(workers) env["PROMETHEUS_MULTIPROC_DIR"] = metrics_path pid = subprocess.Popen(["python", "-m", "pyfuncserver", "--model_dir", model_path], env=env, start_new_session=True) diff --git a/scripts/e2e/config/kserve/overlay.yaml b/scripts/e2e/config/kserve/overlay.yaml index 81ef5d55c..9dcf7850f 100644 --- a/scripts/e2e/config/kserve/overlay.yaml +++ b/scripts/e2e/config/kserve/overlay.yaml @@ -14,7 +14,7 @@ spec: cpu: 1 memory: 300Mi requests: - cpu: 80m + cpu: 100m memory: 64Mi --- @@ -206,7 +206,7 @@ data: "image" : "ghcr.io/ariefrahmansyah/kfserving-storage-init:latest", "memoryRequest": "50Mi", "memoryLimit": "1Gi", - "cpuRequest": "20m", + "cpuRequest": "10m", "cpuLimit": "1" } transformers: |- diff --git a/scripts/e2e/deploy-merlin.sh b/scripts/e2e/deploy-merlin.sh index a42fcca72..b2e0e80ce 100755 --- a/scripts/e2e/deploy-merlin.sh +++ b/scripts/e2e/deploy-merlin.sh @@ -29,9 +29,9 @@ install_mlp() { --set ingress.host=mlp.mlp.${INGRESS_HOST} \ --wait --timeout=${TIMEOUT} - kubectl apply -f config/mock/message-dumper.yaml - kubectl rollout status deployment/mlp -n mlp -w --timeout=${TIMEOUT} + + kubectl apply -f config/mock/message-dumper.yaml } install_merlin() {