Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add storage-initializer uid handling for OpenShift with istio-cni #18

Merged
merged 5 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/webhook/admission/pod/accelerator_injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ const (
NvidiaGPUTaintValue = "present"
)

func InjectGKEAcceleratorSelector(pod *v1.Pod) error {
func InjectGKEAcceleratorSelector(pod *v1.Pod, _ *v1.Namespace) error {
gpuEnabled := false
for _, container := range pod.Spec.Containers {
if _, ok := container.Resources.Limits[constants.NvidiaGPUResourceType]; ok {
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/admission/pod/accelerator_injector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func TestAcceleratorInjector(t *testing.T) {
}

for name, scenario := range scenarios {
InjectGKEAcceleratorSelector(scenario.original)
InjectGKEAcceleratorSelector(scenario.original, nil)
// cmd.Diff complains on ResourceList when Nvidia is key. Objects are explicitly compared
if diff := cmp.Diff(
scenario.expected.Spec.NodeSelector,
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/admission/pod/agent_injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func getLoggerConfigs(configMap *v1.ConfigMap) (*LoggerConfig, error) {
return loggerConfig, nil
}

func (ag *AgentInjector) InjectAgent(pod *v1.Pod) error {
func (ag *AgentInjector) InjectAgent(pod *v1.Pod, _ *v1.Namespace) error {
// Only inject the model agent sidecar if the required annotations are set
_, injectLogger := pod.ObjectMeta.Annotations[constants.LoggerInternalAnnotationKey]
_, injectPuller := pod.ObjectMeta.Annotations[constants.AgentShouldInjectAnnotationKey]
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/admission/pod/agent_injector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,7 @@ func TestAgentInjector(t *testing.T) {
loggerConfig,
batcherTestConfig,
}
injector.InjectAgent(scenario.original)
injector.InjectAgent(scenario.original, nil)
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
t.Errorf("Test %q unexpected result (-want +got): %v", name, diff)
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/webhook/admission/pod/metrics_aggregate_injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package pod
import (
"encoding/json"
"fmt"

"github.com/kserve/kserve/pkg/constants"
v1 "k8s.io/api/core/v1"
)
Expand Down Expand Up @@ -75,7 +76,7 @@ func setMetricAggregationEnvVars(pod *v1.Pod) {

// InjectMetricsAggregator looks for the annotations to enable aggregate kserve-container and queue-proxy metrics and
// if specified, sets port-related EnvVars in queue-proxy and the aggregate prometheus annotation.
func (ma *MetricsAggregator) InjectMetricsAggregator(pod *v1.Pod) error {
func (ma *MetricsAggregator) InjectMetricsAggregator(pod *v1.Pod, _ *v1.Namespace) error {
//Only set metric configs if the required annotations are set
enableMetricAggregation, ok := pod.ObjectMeta.Annotations[constants.EnableMetricAggregation]
if !ok {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ func TestInjectMetricsAggregator(t *testing.T) {
}

for name, scenario := range scenarios {
ma.InjectMetricsAggregator(scenario.original)
ma.InjectMetricsAggregator(scenario.original, nil)
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
t.Errorf("Test %q unexpected result (-want +got): %v", name, diff)
}
Expand Down
15 changes: 11 additions & 4 deletions pkg/webhook/admission/pod/mutator.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,14 @@ func (mutator *Mutator) Handle(ctx context.Context, req admission.Request) admis
// For some reason pod namespace is always empty when coming to pod mutator, need to set from admission request
pod.Namespace = req.AdmissionRequest.Namespace

if err := mutator.mutate(pod, configMap); err != nil {
targetNs := &v1.Namespace{}
err = mutator.Client.Get(context.TODO(), k8types.NamespacedName{Name: pod.Namespace, Namespace: pod.Namespace}, targetNs)
danielezonca marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.Error(err, "Failed to get the target namespace", "name", pod.Namespace)
danielezonca marked this conversation as resolved.
Show resolved Hide resolved
return admission.Errored(http.StatusInternalServerError, err)
}

if err := mutator.mutate(pod, configMap, targetNs); err != nil {
log.Error(err, "Failed to mutate pod", "name", pod.Labels[constants.InferenceServicePodLabelKey])
return admission.Errored(http.StatusInternalServerError, err)
}
Expand All @@ -76,7 +83,7 @@ func (mutator *Mutator) Handle(ctx context.Context, req admission.Request) admis
return admission.PatchResponseFromRaw(req.AdmissionRequest.Object.Raw, patch)
}

func (mutator *Mutator) mutate(pod *v1.Pod, configMap *v1.ConfigMap) error {
func (mutator *Mutator) mutate(pod *v1.Pod, configMap *v1.ConfigMap, targetNs *v1.Namespace) error {
credentialBuilder := credentials.NewCredentialBulder(mutator.Client, configMap)

storageInitializerConfig, err := getStorageInitializerConfigs(configMap)
Expand Down Expand Up @@ -116,15 +123,15 @@ func (mutator *Mutator) mutate(pod *v1.Pod, configMap *v1.ConfigMap) error {
return err
}

mutators := []func(pod *v1.Pod) error{
mutators := []func(pod *v1.Pod, targetNs *v1.Namespace) error{
ReToCode marked this conversation as resolved.
Show resolved Hide resolved
InjectGKEAcceleratorSelector,
storageInitializer.InjectStorageInitializer,
agentInjector.InjectAgent,
metricsAggregator.InjectMetricsAggregator,
}

for _, mutator := range mutators {
if err := mutator(pod); err != nil {
if err := mutator(pod, targetNs); err != nil {
return err
}
}
Expand Down
29 changes: 22 additions & 7 deletions pkg/webhook/admission/pod/storage_initializer_injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const (
PvcURIPrefix = "pvc://"
PvcSourceMountName = "kserve-pvc-source"
PvcSourceMountPath = "/mnt/pvc"
OpenShiftUidRangeAnnotationKey = "openshift.io/sa.scc.uid-range"
)

type StorageInitializerConfig struct {
Expand Down Expand Up @@ -85,7 +86,7 @@ func getStorageInitializerConfigs(configMap *v1.ConfigMap) (*StorageInitializerC
// for the serving container in a unified way across storage tech by injecting
// a provisioning INIT container. This is a work around because KNative does not
// support INIT containers: https://github.com/knative/serving/issues/4307
func (mi *StorageInitializerInjector) InjectStorageInitializer(pod *v1.Pod) error {
func (mi *StorageInitializerInjector) InjectStorageInitializer(pod *v1.Pod, targetNs *v1.Namespace) error {
// Only inject if the required annotations are set
srcURI, ok := pod.ObjectMeta.Annotations[constants.StorageInitializerSourceUriInternalAnnotationKey]
if !ok {
Expand Down Expand Up @@ -330,12 +331,26 @@ func (mi *StorageInitializerInjector) InjectStorageInitializer(pod *v1.Pod) erro
}
}

// Allow to override the uid for the case where ISTIO CNI with DNS proxy is enabled
// See for more: https://istio.io/latest/docs/setup/additional-setup/cni/#compatibility-with-application-init-containers.
if value, ok := pod.GetAnnotations()[constants.IstioSidecarUIDAnnotationKey]; ok {
if uid, err := strconv.ParseInt(value, 10, 64); err == nil {
initContainer.SecurityContext.RunAsUser = ptr.Int64(uid)
}
/*
OpenShift uses istio-cni which causes an issue with init-containers when calling external services
like S3 or similar. Setting the `uid` for the `storage-initializer` to the same `uid` as the
`uid` of the `istio-proxy` resolves the issue. In OpenShift the `istio-proxy` always gets assigned
the first `uid` from the namespaces `uid` range + 1 (The range is defined in an annotation on the namespace).
*/
if initContainer.SecurityContext == nil {
ReToCode marked this conversation as resolved.
Show resolved Hide resolved
initContainer.SecurityContext = &v1.SecurityContext{}
}
uidStr := targetNs.Annotations[OpenShiftUidRangeAnnotationKey]

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I see this mechanism is replacing the IstioSidecarUIDAnnotationKey alternative. Do you think it could make sense to keep it as fallback if OpenShiftUidRangeAnnotationKey is not available?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm not sure if it is better to fail or not (if the annotation is not there, something went pretty wrong on OCP). As the user in OCP probably will never know about the issue and how to resolve it (needing to know to add 1 for example), do we expect that he even could bring the correct user? I'm currently favouring erroring I think.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that this mechanism with "+1" would work as Istio itself would not find that range, right ? I'm not sure what Istio/Service Mesh would do as a fallback when the range is not available as an annotation. I would then probably do the same here, too.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I think it should be fine to assume it is there or error out.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'm more in agreement with @danielezonca here.

I think the code should honor the IstioSidecarUIDAnnotationKey if present. Otherwise, apply OpenShift logic. If both the OpenShift annotations and IstioSidecarUIDAnnotationKey aren't there, continue without doing any change and without error .

That logic ^ would make the code compatible with both upstream Istio-cni (which will want the 1337 annotation) and Maistra/OSSM (which will use OpenShift annotations). And the code would also remain compatible with an upstream non-cni-Istio installation (which presumably works well without UID hacking), in case community wants to try that.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...I'm also thinking that such logic ^ (despite being a workaround) is perhaps more feasible to push to upstream, as it is less invasive.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like your proposal and explanation. I think that makes sense. I updated the PR accordingly, please re-review.

if uidStr == "" {
return fmt.Errorf("could not find OpenShift internal annotation %s on namespace: %s", OpenShiftUidRangeAnnotationKey, targetNs.Name)
}
uidStrParts := strings.Split(uidStr, "/")
if uid, err := strconv.ParseInt(uidStrParts[0], 10, 64); err == nil {
// Set the uid to the first uid in the namespaces range + 1
uid++
initContainer.SecurityContext.RunAsUser = ptr.Int64(uid)
} else {
return fmt.Errorf("could not parse value %s in annotation %s on namespace: %s", uidStr, OpenShiftUidRangeAnnotationKey, targetNs.Name)
ReToCode marked this conversation as resolved.
Show resolved Hide resolved
}

// Add init container to the spec
Expand Down
56 changes: 49 additions & 7 deletions pkg/webhook/admission/pod/storage_initializer_injector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"k8s.io/apimachinery/pkg/api/resource"
"knative.dev/pkg/kmp"
"knative.dev/pkg/ptr"

"github.com/kserve/kserve/pkg/constants"
"github.com/kserve/kserve/pkg/credentials"
Expand Down Expand Up @@ -62,6 +63,17 @@ var (
v1.ResourceMemory: resource.MustParse(StorageInitializerDefaultMemoryRequest),
},
}

targetNS = &v1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Namespace: "my-ns",
Annotations: map[string]string{
OpenShiftUidRangeAnnotationKey: "1000740000/10000",
ReToCode marked this conversation as resolved.
Show resolved Hide resolved
},
},
}

expectedInitContainerUid = ptr.Int64(1000740001)
)

func TestStorageInitializerInjector(t *testing.T) {
Expand Down Expand Up @@ -182,6 +194,9 @@ func TestStorageInitializerInjector(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand Down Expand Up @@ -252,6 +267,9 @@ func TestStorageInitializerInjector(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand Down Expand Up @@ -332,6 +350,9 @@ func TestStorageInitializerInjector(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand All @@ -354,7 +375,7 @@ func TestStorageInitializerInjector(t *testing.T) {
}),
config: storageInitializerConfig,
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected result: %s", name, err)
}
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
Expand Down Expand Up @@ -394,7 +415,7 @@ func TestStorageInitializerFailureCases(t *testing.T) {
}),
config: storageInitializerConfig,
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
if !strings.HasPrefix(err.Error(), scenario.expectedErrorPrefix) {
t.Errorf("Test %q unexpected failure [%s], expected: %s", name, err.Error(), scenario.expectedErrorPrefix)
}
Expand Down Expand Up @@ -493,7 +514,7 @@ func TestCustomSpecStorageUriInjection(t *testing.T) {
}),
config: storageInitializerConfig,
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected result: %s", name, err)
}

Expand Down Expand Up @@ -597,6 +618,9 @@ func TestCredentialInjection(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
Env: []v1.EnvVar{
{
Name: s3.AWSAccessKeyId,
Expand Down Expand Up @@ -702,6 +726,9 @@ func TestCredentialInjection(t *testing.T) {
MountPath: gcs.GCSCredentialVolumeMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
Env: []v1.EnvVar{
{
Name: gcs.GCSCredentialEnvKey,
Expand Down Expand Up @@ -790,6 +817,9 @@ func TestCredentialInjection(t *testing.T) {
Name: "storage-initializer",
Image: StorageInitializerContainerImage + ":" + StorageInitializerContainerImageVersion,
Args: []string{"s3://my-bucket/foo/bar", constants.DefaultModelLocalMountPath},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
Env: []v1.EnvVar{
{
Name: credentials.StorageConfigEnvKey,
Expand Down Expand Up @@ -885,6 +915,9 @@ func TestCredentialInjection(t *testing.T) {
Name: "storage-initializer",
Image: StorageInitializerContainerImage + ":" + StorageInitializerContainerImageVersion,
Args: []string{"s3://my-bucket/foo/bar", constants.DefaultModelLocalMountPath},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
Env: []v1.EnvVar{
{
Name: credentials.StorageConfigEnvKey,
Expand Down Expand Up @@ -944,7 +977,7 @@ func TestCredentialInjection(t *testing.T) {
credentialBuilder: builder,
config: storageInitializerConfig,
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected failure [%s]", name, err.Error())
}
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
Expand Down Expand Up @@ -1008,6 +1041,9 @@ func TestStorageInitializerConfigmap(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand Down Expand Up @@ -1037,7 +1073,7 @@ func TestStorageInitializerConfigmap(t *testing.T) {
StorageSpecSecretName: StorageInitializerDefaultStorageSpecSecretName,
},
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected result: %s", name, err)
}
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
Expand Down Expand Up @@ -1272,7 +1308,7 @@ func TestDirectVolumeMountForPvc(t *testing.T) {
EnableDirectPvcVolumeMount: true, // enable direct volume mount for PVC
},
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected result: %s", name, err)
}
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
Expand Down Expand Up @@ -1385,6 +1421,9 @@ func TestTransformerCollocation(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand Down Expand Up @@ -1563,6 +1602,9 @@ func TestTransformerCollocation(t *testing.T) {
MountPath: constants.DefaultModelLocalMountPath,
},
},
SecurityContext: &v1.SecurityContext{
RunAsUser: expectedInitContainerUid,
},
},
},
Volumes: []v1.Volume{
Expand Down Expand Up @@ -1594,7 +1636,7 @@ func TestTransformerCollocation(t *testing.T) {
}),
config: scenario.storageConfig,
}
if err := injector.InjectStorageInitializer(scenario.original); err != nil {
if err := injector.InjectStorageInitializer(scenario.original, targetNS); err != nil {
t.Errorf("Test %q unexpected result: %s", name, err)
}
if diff, _ := kmp.SafeDiff(scenario.expected.Spec, scenario.original.Spec); diff != "" {
Expand Down