Skip to content

Commit da24e19

Browse files
fix: increase shm default size and make it configurable (#2616) (#2687)
Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>
1 parent c03f83b commit da24e19

File tree

11 files changed

+132
-58
lines changed

11 files changed

+132
-58
lines changed

deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10241,6 +10241,18 @@ spec:
1024110241
serviceName:
1024210242
description: contains the name of the component
1024310243
type: string
10244+
sharedMemory:
10245+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10246+
properties:
10247+
disabled:
10248+
type: boolean
10249+
size:
10250+
anyOf:
10251+
- type: integer
10252+
- type: string
10253+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10254+
x-kubernetes-int-or-string: true
10255+
type: object
1024410256
type: object
1024510257
status:
1024610258
description: Status reflects the current observed state of the component deployment.

deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10340,6 +10340,18 @@ spec:
1034010340
serviceName:
1034110341
description: contains the name of the component
1034210342
type: string
10343+
sharedMemory:
10344+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10345+
properties:
10346+
disabled:
10347+
type: boolean
10348+
size:
10349+
anyOf:
10350+
- type: integer
10351+
- type: string
10352+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10353+
x-kubernetes-int-or-string: true
10354+
type: object
1034310355
type: object
1034410356
description: |-
1034510357
Services allows per-service overrides of the component deployment settings.

deploy/cloud/operator/api/v1alpha1/common.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,8 @@ type Autoscaling struct {
4444
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
4545
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
4646
}
47+
48+
type SharedMemorySpec struct {
49+
Disabled bool `json:"disabled,omitempty"`
50+
Size resource.Quantity `json:"size,omitempty"`
51+
}

deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ type DynamoComponentDeploymentSharedSpec struct {
9292
// Ingress config to expose the component outside the cluster (or through a service mesh).
9393
Ingress *IngressSpec `json:"ingress,omitempty"`
9494

95+
// SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
96+
SharedMemory *SharedMemorySpec `json:"sharedMemory,omitempty"`
97+
9598
// +optional
9699
// ExtraPodMetadata adds labels/annotations to the created Pods.
97100
ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"`

deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10241,6 +10241,18 @@ spec:
1024110241
serviceName:
1024210242
description: contains the name of the component
1024310243
type: string
10244+
sharedMemory:
10245+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10246+
properties:
10247+
disabled:
10248+
type: boolean
10249+
size:
10250+
anyOf:
10251+
- type: integer
10252+
- type: string
10253+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10254+
x-kubernetes-int-or-string: true
10255+
type: object
1024410256
type: object
1024510257
status:
1024610258
description: Status reflects the current observed state of the component deployment.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10340,6 +10340,18 @@ spec:
1034010340
serviceName:
1034110341
description: contains the name of the component
1034210342
type: string
10343+
sharedMemory:
10344+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10345+
properties:
10346+
disabled:
10347+
type: boolean
10348+
size:
10349+
anyOf:
10350+
- type: integer
10351+
- type: string
10352+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10353+
x-kubernetes-int-or-string: true
10354+
type: object
1034310355
type: object
1034410356
description: |-
1034510357
Services allows per-service overrides of the component deployment settings.

deploy/cloud/operator/internal/consts/consts.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ const (
4848
DefaultGroveTerminationDelay = 15 * time.Minute
4949

5050
// Metrics related constants
51-
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
52-
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
53-
KubeValueNameSharedMemory = "shared-memory"
51+
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
52+
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
53+
KubeValueNameSharedMemory = "shared-memory"
54+
DefaultSharedMemoryMountPath = "/dev/shm"
55+
DefaultSharedMemorySize = "8Gi"
5456

5557
// Grove multinode role suffixes
5658
GroveRoleSuffixLeader = "ldr"

deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"fmt"
2525
"testing"
2626

27-
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
2827
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
2928
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
3029
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
@@ -699,18 +698,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
699698
Multinode: &v1alpha1.MultinodeSpec{
700699
NodeCount: 2,
701700
},
702-
Resources: &common.Resources{
703-
Requests: &common.ResourceItem{
701+
Resources: &dynamoCommon.Resources{
702+
Requests: &dynamoCommon.ResourceItem{
704703
CPU: "300m",
705704
Memory: "500Mi",
706705
},
707-
Limits: &common.ResourceItem{
706+
Limits: &dynamoCommon.ResourceItem{
708707
GPU: "1",
709708
Memory: "20Gi",
710709
CPU: "10",
711710
},
712711
},
713-
ExtraPodMetadata: &common.ExtraPodMetadata{
712+
ExtraPodMetadata: &dynamoCommon.ExtraPodMetadata{
714713
Annotations: map[string]string{
715714
"nvidia.com/annotation1": "annotation1",
716715
},
@@ -793,7 +792,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
793792
VolumeSource: corev1.VolumeSource{
794793
EmptyDir: &corev1.EmptyDirVolumeSource{
795794
Medium: corev1.StorageMediumMemory,
796-
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
795+
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
797796
},
798797
},
799798
},
@@ -814,7 +813,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
814813
VolumeMounts: []corev1.VolumeMount{
815814
{
816815
Name: "shared-memory",
817-
MountPath: "/dev/shm",
816+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
818817
},
819818
},
820819
Resources: corev1.ResourceRequirements{
@@ -893,7 +892,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
893892
VolumeSource: corev1.VolumeSource{
894893
EmptyDir: &corev1.EmptyDirVolumeSource{
895894
Medium: corev1.StorageMediumMemory,
896-
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
895+
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
897896
},
898897
},
899898
},
@@ -914,7 +913,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
914913
VolumeMounts: []corev1.VolumeMount{
915914
{
916915
Name: "shared-memory",
917-
MountPath: "/dev/shm",
916+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
918917
},
919918
},
920919
Resources: corev1.ResourceRequirements{
@@ -956,8 +955,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
956955
Multinode: &v1alpha1.MultinodeSpec{
957956
NodeCount: 2,
958957
},
959-
Resources: &common.Resources{
960-
Limits: &common.ResourceItem{
958+
Resources: &dynamoCommon.Resources{
959+
Limits: &dynamoCommon.ResourceItem{
961960
GPU: "1",
962961
},
963962
},
@@ -1000,8 +999,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
1000999
Multinode: &v1alpha1.MultinodeSpec{
10011000
NodeCount: 2,
10021001
},
1003-
Resources: &common.Resources{
1004-
Limits: &common.ResourceItem{
1002+
Resources: &dynamoCommon.Resources{
1003+
Limits: &dynamoCommon.ResourceItem{
10051004
GPU: "1",
10061005
},
10071006
},

deploy/cloud/operator/internal/dynamo/graph.go

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,8 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
677677
// GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove
678678
// Includes standard environment variables (DYNAMO_PORT, NATS_SERVER, ETCD_ENDPOINTS)
679679
// Deployment-specific environment merging should be handled by the caller
680+
//
681+
//nolint:gocyclo
680682
func GenerateBasePodSpec(
681683
component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
682684
backendFramework BackendFramework,
@@ -778,9 +780,10 @@ func GenerateBasePodSpec(
778780
MountPath: *component.PVC.MountPoint,
779781
})
780782
}
781-
shmVolume, shmVolumeMount := generateSharedMemoryVolumeAndMount(&container.Resources)
782-
volumes = append(volumes, shmVolume)
783-
container.VolumeMounts = append(container.VolumeMounts, shmVolumeMount)
783+
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
784+
volumes = append(volumes, *shmVol)
785+
container.VolumeMounts = append(container.VolumeMounts, *shmMount)
786+
}
784787

785788
// Apply backend-specific container modifications
786789
multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType)
@@ -1179,36 +1182,29 @@ func GenerateBasePodSpecForController(
11791182
return podSpec, nil
11801183
}
11811184

1182-
func generateSharedMemoryVolumeAndMount(resources *corev1.ResourceRequirements) (corev1.Volume, corev1.VolumeMount) {
1183-
sharedMemorySizeLimit := resource.MustParse("512Mi")
1184-
// Check if we have memory limits to work with
1185-
memoryLimit := resources.Limits[corev1.ResourceMemory]
1186-
if !memoryLimit.IsZero() {
1187-
// Use 1/4 of memory limit
1188-
calculatedSize := resource.NewQuantity(memoryLimit.Value()/4, resource.BinarySI)
1189-
// Apply bounds: minimum 512Mi, maximum 8Gi
1190-
minSize := resource.MustParse("512Mi")
1191-
maxSize := resource.MustParse("8Gi")
1192-
1193-
if calculatedSize.Cmp(minSize) > 0 && calculatedSize.Cmp(maxSize) < 0 {
1194-
sharedMemorySizeLimit = *calculatedSize
1195-
} else if calculatedSize.Cmp(maxSize) >= 0 {
1196-
sharedMemorySizeLimit = maxSize // Cap at maximum
1185+
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
1186+
// default: enabled=true, size=8Gi
1187+
size := resource.MustParse(commonconsts.DefaultSharedMemorySize)
1188+
if spec != nil {
1189+
if spec.Disabled {
1190+
return nil, nil
1191+
}
1192+
if !spec.Size.IsZero() {
1193+
size = spec.Size
11971194
}
1198-
// If calculatedSize < minSize, keep the 512Mi base
11991195
}
12001196
volume := corev1.Volume{
12011197
Name: commonconsts.KubeValueNameSharedMemory,
12021198
VolumeSource: corev1.VolumeSource{
12031199
EmptyDir: &corev1.EmptyDirVolumeSource{
12041200
Medium: corev1.StorageMediumMemory,
1205-
SizeLimit: &sharedMemorySizeLimit,
1201+
SizeLimit: &size,
12061202
},
12071203
},
12081204
}
12091205
volumeMount := corev1.VolumeMount{
12101206
Name: commonconsts.KubeValueNameSharedMemory,
1211-
MountPath: "/dev/shm",
1207+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
12121208
}
1213-
return volume, volumeMount
1209+
return &volume, &volumeMount
12141210
}

0 commit comments

Comments
 (0)