From 6d22c9f9fb851ceb002e079ded3658d7d8be59f4 Mon Sep 17 00:00:00 2001 From: Julian Date: Fri, 22 Jul 2022 02:19:30 +1000 Subject: [PATCH] enh: Add support to configure PrepackedTriton with no storage initialiser (#4216) * Adding support to configure PrepackTriton with no storage initializer, and pass down arguments to load models #4203 * Update to compare args with suffix instead of index * Update to set secret before spec copied. Added tests to create and validate envFrom secret set --- .../v1/seldondeployment_types.go | 1 + operator/constants/constants.go | 10 +- .../controllers/model_initializer_injector.go | 18 +- .../model_initializer_injector_test.go | 3 +- .../seldondeployment_prepackaged_servers.go | 57 ++++-- ...ldondeployment_prepackaged_servers_test.go | 165 +++++++++++++++++- 6 files changed, 229 insertions(+), 25 deletions(-) diff --git a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go index 12661965a8..0a51a4f55e 100644 --- a/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go +++ b/operator/apis/machinelearning.seldon.io/v1/seldondeployment_types.go @@ -71,6 +71,7 @@ const ( ANNOTATION_SEPARATE_ENGINE = "seldon.io/engine-separate-pod" ANNOTATION_HEADLESS_SVC = "seldon.io/headless-svc" ANNOTATION_NO_ENGINE = "seldon.io/no-engine" + ANNOTATION_NO_STOARGE_INITIALIZER = "seldon.io/no-storage-initializer" ANNOTATION_CUSTOM_SVC_NAME = "seldon.io/svc-name" ANNOTATION_LOGGER_WORK_QUEUE_SIZE = "seldon.io/executor-logger-queue-size" ANNOTATION_LOGGER_WRITE_TIMEOUT_MS = "seldon.io/executor-logger-write-timeout-ms" diff --git a/operator/constants/constants.go b/operator/constants/constants.go index d9ceab8a46..37ea89e2bd 100644 --- a/operator/constants/constants.go +++ b/operator/constants/constants.go @@ -24,8 +24,14 @@ const ( GrpcPortName = "grpc" HttpPortName = "http" - TritonDefaultGrpcPort = 2001 - TritonDefaultHttpPort = 2000 + TritonDefaultGrpcPort = 2001 + TritonDefaultHttpPort = 2000 + TritonArgGrpcPort = "--grpc-port=" + TritonArgHttpPort = "--http-port=" + TritonArgModelRepository = "--model-repository=" + TritonArgModelControlMode = "--model-control-mode=" + TritonArgLoadModel = "--load-model=" + TritonArgStrictModelConfig = "--strict-model-config=" KFServingProbeLivePath = "/v2/health/live" KFServingProbeReadyPath = "/v2/health/ready" diff --git a/operator/controllers/model_initializer_injector.go b/operator/controllers/model_initializer_injector.go index 92677c7c40..5c3f446c9f 100644 --- a/operator/controllers/model_initializer_injector.go +++ b/operator/controllers/model_initializer_injector.go @@ -17,6 +17,7 @@ import ( "context" "encoding/json" "fmt" + "os" "strings" "github.com/seldonio/seldon-core/operator/controllers/resources/credentials" @@ -26,7 +27,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" - "os" ) // TODO: change image to seldon? is at least configurable by configmap now (with fixed version there) @@ -264,8 +264,17 @@ func (mi *ModelInitialiser) InjectModelInitializer(deployment *appsv1.Deployment } // Inject credentials using secretRef + addEnvFromSecret(initContainer, envSecretRefName) + + // Add init container to the spec + podSpec.InitContainers = append(podSpec.InitContainers, *initContainer) + + return deployment, nil +} + +func addEnvFromSecret(userContainer *corev1.Container, envSecretRefName string) { if envSecretRefName != "" { - initContainer.EnvFrom = append(initContainer.EnvFrom, + userContainer.EnvFrom = append(userContainer.EnvFrom, corev1.EnvFromSource{ SecretRef: &corev1.SecretEnvSource{ LocalObjectReference: corev1.LocalObjectReference{ @@ -274,11 +283,6 @@ func (mi *ModelInitialiser) InjectModelInitializer(deployment *appsv1.Deployment }, }) } - - // Add init container to the spec - podSpec.InitContainers = append(podSpec.InitContainers, *initContainer) - - return deployment, nil } func addVolumeMountToContainer(userContainer *corev1.Container, ModelInitializerVolumeName string, MountPath string) { diff --git a/operator/controllers/model_initializer_injector_test.go b/operator/controllers/model_initializer_injector_test.go index 674111781b..478df15213 100644 --- a/operator/controllers/model_initializer_injector_test.go +++ b/operator/controllers/model_initializer_injector_test.go @@ -2,12 +2,13 @@ package controllers import ( "context" + "testing" + . "github.com/onsi/gomega" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" v1meta "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" - "testing" ) func TestStorageInitalizerInjector(t *testing.T) { diff --git a/operator/controllers/seldondeployment_prepackaged_servers.go b/operator/controllers/seldondeployment_prepackaged_servers.go index 96ca97d577..815c780df4 100644 --- a/operator/controllers/seldondeployment_prepackaged_servers.go +++ b/operator/controllers/seldondeployment_prepackaged_servers.go @@ -135,15 +135,37 @@ func (pi *PrePackedInitialiser) addTritonServer(mlDepSpec *machinelearningv1.Sel c := utils.GetContainerForDeployment(deploy, pu.Name) existing := c != nil + // Define the default arguments + args := []string{ + "/opt/tritonserver/bin/tritonserver", + constants.TritonArgGrpcPort + strconv.Itoa(int(pu.Endpoint.GrpcPort)), + constants.TritonArgHttpPort + strconv.Itoa(int(pu.Endpoint.HttpPort)), + } + + // Triton can support loading models directory from cloud storage modelURI, enabled with "no-storage-initializer" annotation + // see: https://github.com/triton-inference-server/server/blob/main/docs/model_repository.md + noStorage := strings.ToLower(mlDepSpec.Annotations[machinelearningv1.ANNOTATION_NO_STOARGE_INITIALIZER]) == "true" + if !noStorage { + args = append(args, constants.TritonArgModelRepository+DefaultModelLocalMountPath) + args = append(args, constants.TritonArgStrictModelConfig+"false") + } else { + args = append(args, constants.TritonArgModelRepository+pu.ModelURI) + // Optionally allow "model_control_mode=explicit" and one or more "load_model=model_name" parameters + // see: https://github.com/triton-inference-server/server/blob/main/docs/model_management.md + for _, paramElement := range pu.Parameters { + if strings.ToLower(paramElement.Name) == "model_control_mode" { + args = append(args, constants.TritonArgModelControlMode+paramElement.Value) + } else if strings.ToLower(paramElement.Name) == "load_model" { + args = append(args, constants.TritonArgLoadModel+paramElement.Value) + } else if strings.ToLower(paramElement.Name) == "strict_model_config" { + args = append(args, constants.TritonArgStrictModelConfig+paramElement.Value) + } + } + } + cServer := &v1.Container{ Name: pu.Name, - Args: []string{ - "/opt/tritonserver/bin/tritonserver", - "--grpc-port=" + strconv.Itoa(int(pu.Endpoint.GrpcPort)), - "--http-port=" + strconv.Itoa(int(pu.Endpoint.HttpPort)), - "--model-repository=" + DefaultModelLocalMountPath, - "--strict-model-config=false", - }, + Args: args, Ports: []v1.ContainerPort{ { Name: "grpc", @@ -187,6 +209,12 @@ func (pi *PrePackedInitialiser) addTritonServer(mlDepSpec *machinelearningv1.Sel } cServer.Image = serverConfig.PrepackImageName(mlDepSpec.Protocol, pu) + envSecretRefName := extractEnvSecretRefName(pu) + if noStorage { + // Add secrets directly to triton server if not using storage initializer + addEnvFromSecret(cServer, envSecretRefName) + } + if existing { // Overwrite core items if not existing or required if c.Image == "" { @@ -195,6 +223,9 @@ func (pi *PrePackedInitialiser) addTritonServer(mlDepSpec *machinelearningv1.Sel if c.Args == nil { c.Args = cServer.Args } + if c.EnvFrom == nil { + c.EnvFrom = cServer.EnvFrom + } if c.ReadinessProbe == nil { c.ReadinessProbe = cServer.ReadinessProbe } @@ -215,12 +246,14 @@ func (pi *PrePackedInitialiser) addTritonServer(mlDepSpec *machinelearningv1.Sel } } - envSecretRefName := extractEnvSecretRefName(pu) - mi := NewModelInitializer(pi.ctx, pi.clientset) - _, err := mi.InjectModelInitializer(deploy, c.Name, pu.ModelURI, pu.ServiceAccountName, envSecretRefName, pu.StorageInitializerImage) - if err != nil { - return err + if !noStorage { + mi := NewModelInitializer(pi.ctx, pi.clientset) + _, err := mi.InjectModelInitializer(deploy, c.Name, pu.ModelURI, pu.ServiceAccountName, envSecretRefName, pu.StorageInitializerImage) + if err != nil { + return err + } } + return nil } diff --git a/operator/controllers/seldondeployment_prepackaged_servers_test.go b/operator/controllers/seldondeployment_prepackaged_servers_test.go index da68a53d62..9352c41a19 100644 --- a/operator/controllers/seldondeployment_prepackaged_servers_test.go +++ b/operator/controllers/seldondeployment_prepackaged_servers_test.go @@ -2,6 +2,10 @@ package controllers import ( "context" + "strconv" + "strings" + "time" + . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" machinelearningv1 "github.com/seldonio/seldon-core/operator/apis/machinelearning.seldon.io/v1" @@ -12,9 +16,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "strconv" - "strings" - "time" ) var _ = Describe("Create a prepacked sklearn server", func() { @@ -1054,3 +1055,161 @@ var _ = Describe("Create a prepacked mlflow server with existing container", fun }) }) + +var _ = Describe("Create a prepacked triton server with seldon.io/no-storage-initializer annotation", func() { + const timeout = time.Second * 30 + const interval = time.Second * 1 + const name = "pp1" + const sdepName = "prepack12" + envExecutorUser = "2" + By("Creating a resource") + It("should create a resource with no storage initializer but triton args", func() { + Expect(k8sClient).NotTo(BeNil()) + secretName := "s3-credentials" + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: secretName, + Namespace: "default", + }, + Type: corev1.SecretTypeOpaque, + StringData: map[string]string{ + "AWS_DEFAULT_REGION": "us-east-1", + "AWS_ACCESS_KEY_ID": "mykey", + "AWS_SECRET_ACCESS_KEY": "mysecret", + }, + } + var modelType = machinelearningv1.MODEL + modelName := "classifier" + modelUri := "s3://mybucket/mymodel" + var impl = machinelearningv1.PredictiveUnitImplementation(constants.PrePackedServerTriton) + key := types.NamespacedName{ + Name: sdepName, + Namespace: "default", + } + instance := &machinelearningv1.SeldonDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: key.Name, + Namespace: key.Namespace, + }, + Spec: machinelearningv1.SeldonDeploymentSpec{ + Annotations: map[string]string{ + "seldon.io/no-storage-initializer": "true", + }, + Name: name, + Protocol: machinelearningv1.ProtocolV2, + Predictors: []machinelearningv1.PredictorSpec{ + { + Annotations: map[string]string{ + "seldon.io/no-engine": "true", + }, + Name: "p1", + ComponentSpecs: []*machinelearningv1.SeldonPodSpec{ + { + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: modelName, + }, + }, + }, + }, + }, + Graph: machinelearningv1.PredictiveUnit{ + Name: modelName, + ModelURI: modelUri, + Type: &modelType, + Implementation: &impl, + Endpoint: &machinelearningv1.Endpoint{Type: machinelearningv1.REST}, + EnvSecretRefName: secretName, + Parameters: []machinelearningv1.Parameter{ + { + Name: "model_control_mode", + Type: machinelearningv1.STRING, + Value: "explicit", + }, + { + Name: "load_model", + Type: machinelearningv1.STRING, + Value: "model1", + }, + { + Name: "strict_model_config", + Type: machinelearningv1.STRING, + Value: "true", + }, + }, + }, + }, + }, + }, + } + + configMapName := types.NamespacedName{Name: "seldon-config", + Namespace: "seldon-system"} + + configResult := &corev1.ConfigMap{} + const timeout = time.Second * 30 + Eventually(func() error { return k8sClient.Get(context.TODO(), configMapName, configResult) }, timeout). + Should(Succeed()) + + // Create secret + Expect(k8sClient.Create(context.Background(), secret)).Should(Succeed()) + + // Run Defaulter + instance.Default() + + Expect(k8sClient.Create(context.Background(), instance)).Should(Succeed()) + //time.Sleep(time.Second * 5) + + fetched := &machinelearningv1.SeldonDeployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), key, fetched) + return err + }, timeout, interval).Should(BeNil()) + Expect(fetched.Name).Should(Equal(sdepName)) + + predictor := instance.Spec.Predictors[0] + sPodSpec, idx := utils.GetSeldonPodSpecForPredictiveUnit(&predictor, predictor.Graph.Name) + depName := machinelearningv1.GetDeploymentName(instance, predictor, sPodSpec, idx) + depKey := types.NamespacedName{ + Name: depName, + Namespace: "default", + } + depFetched := &appsv1.Deployment{} + Eventually(func() error { + err := k8sClient.Get(context.Background(), depKey, depFetched) + return err + }, timeout, interval).Should(BeNil()) + Expect(len(depFetched.Spec.Template.Spec.InitContainers)).To(Equal(0)) // no storage + Expect(len(depFetched.Spec.Template.Spec.Containers)).Should(Equal(1)) // no engine + + for _, c := range depFetched.Spec.Template.Spec.Containers { + if c.Name == modelName { + // Check we have 7 args total (server, http, grpc, model_uri, model_control_mode, load_model, strict_model_config) + Expect(len(c.Args)).Should(Equal(7)) + for _, arg := range c.Args { + if strings.Index(arg, constants.TritonArgModelRepository) == 0 { + Expect(arg).To(Equal(constants.TritonArgModelRepository + modelUri)) + } + if strings.Index(arg, constants.TritonArgModelControlMode) == 0 { + Expect(arg).To(Equal(constants.TritonArgModelControlMode + "explicit")) + } + if strings.Index(arg, constants.TritonArgLoadModel) == 0 { + Expect(arg).To(Equal(constants.TritonArgLoadModel + "model1")) + } + if strings.Index(arg, constants.TritonArgStrictModelConfig) == 0 { + Expect(arg).To(Equal(constants.TritonArgStrictModelConfig + "true")) + } + } + + // Check env is set from secretName + Expect(len(c.EnvFrom)).Should(Equal(1)) + Expect(c.EnvFrom[0].SecretRef.Name).Should(Equal(secretName)) + } + } + + //j, _ := json.Marshal(depFetched) + //fmt.Println(string(j)) + }) + +})