Skip to content

Commit 80279ad

Browse files
fix: increase shm default size and make it configurable (#2616)
1 parent 8ad558d commit 80279ad

File tree

11 files changed

+132
-58
lines changed

11 files changed

+132
-58
lines changed

deploy/cloud/helm/crds/templates/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10241,6 +10241,18 @@ spec:
1024110241
serviceName:
1024210242
description: contains the name of the component
1024310243
type: string
10244+
sharedMemory:
10245+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10246+
properties:
10247+
disabled:
10248+
type: boolean
10249+
size:
10250+
anyOf:
10251+
- type: integer
10252+
- type: string
10253+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10254+
x-kubernetes-int-or-string: true
10255+
type: object
1024410256
type: object
1024510257
status:
1024610258
description: Status reflects the current observed state of the component deployment.

deploy/cloud/helm/crds/templates/nvidia.com_dynamographdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10340,6 +10340,18 @@ spec:
1034010340
serviceName:
1034110341
description: contains the name of the component
1034210342
type: string
10343+
sharedMemory:
10344+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10345+
properties:
10346+
disabled:
10347+
type: boolean
10348+
size:
10349+
anyOf:
10350+
- type: integer
10351+
- type: string
10352+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10353+
x-kubernetes-int-or-string: true
10354+
type: object
1034310355
type: object
1034410356
description: |-
1034510357
Services allows per-service overrides of the component deployment settings.

deploy/cloud/operator/api/v1alpha1/common.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,8 @@ type Autoscaling struct {
4444
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
4545
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
4646
}
47+
48+
type SharedMemorySpec struct {
49+
Disabled bool `json:"disabled,omitempty"`
50+
Size resource.Quantity `json:"size,omitempty"`
51+
}

deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ type DynamoComponentDeploymentSharedSpec struct {
9292
// Ingress config to expose the component outside the cluster (or through a service mesh).
9393
Ingress *IngressSpec `json:"ingress,omitempty"`
9494

95+
// SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
96+
SharedMemory *SharedMemorySpec `json:"sharedMemory,omitempty"`
97+
9598
// +optional
9699
// ExtraPodMetadata adds labels/annotations to the created Pods.
97100
ExtraPodMetadata *dynamoCommon.ExtraPodMetadata `json:"extraPodMetadata,omitempty"`

deploy/cloud/operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 21 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamocomponentdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10241,6 +10241,18 @@ spec:
1024110241
serviceName:
1024210242
description: contains the name of the component
1024310243
type: string
10244+
sharedMemory:
10245+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10246+
properties:
10247+
disabled:
10248+
type: boolean
10249+
size:
10250+
anyOf:
10251+
- type: integer
10252+
- type: string
10253+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10254+
x-kubernetes-int-or-string: true
10255+
type: object
1024410256
type: object
1024510257
status:
1024610258
description: Status reflects the current observed state of the component deployment.

deploy/cloud/operator/config/crd/bases/nvidia.com_dynamographdeployments.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10340,6 +10340,18 @@ spec:
1034010340
serviceName:
1034110341
description: contains the name of the component
1034210342
type: string
10343+
sharedMemory:
10344+
description: SharedMemory controls the tmpfs mounted at /dev/shm (enable/disable and size).
10345+
properties:
10346+
disabled:
10347+
type: boolean
10348+
size:
10349+
anyOf:
10350+
- type: integer
10351+
- type: string
10352+
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
10353+
x-kubernetes-int-or-string: true
10354+
type: object
1034310355
type: object
1034410356
description: |-
1034510357
Services allows per-service overrides of the component deployment settings.

deploy/cloud/operator/internal/consts/consts.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ const (
4848
DefaultGroveTerminationDelay = 15 * time.Minute
4949

5050
// Metrics related constants
51-
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
52-
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
53-
KubeValueNameSharedMemory = "shared-memory"
51+
KubeAnnotationEnableMetrics = "nvidia.com/enable-metrics" // User-provided annotation to control metrics
52+
KubeLabelMetricsEnabled = "nvidia.com/metrics-enabled" // Controller-managed label for pod selection
53+
KubeValueNameSharedMemory = "shared-memory"
54+
DefaultSharedMemoryMountPath = "/dev/shm"
55+
DefaultSharedMemorySize = "8Gi"
5456

5557
// Grove multinode role suffixes
5658
GroveRoleSuffixLeader = "ldr"

deploy/cloud/operator/internal/controller/dynamocomponentdeployment_controller_test.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"fmt"
2525
"testing"
2626

27-
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
2827
dynamoCommon "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/dynamo/common"
2928
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
3029
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
@@ -705,18 +704,18 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
705704
Multinode: &v1alpha1.MultinodeSpec{
706705
NodeCount: 2,
707706
},
708-
Resources: &common.Resources{
709-
Requests: &common.ResourceItem{
707+
Resources: &dynamoCommon.Resources{
708+
Requests: &dynamoCommon.ResourceItem{
710709
CPU: "300m",
711710
Memory: "500Mi",
712711
},
713-
Limits: &common.ResourceItem{
712+
Limits: &dynamoCommon.ResourceItem{
714713
GPU: "1",
715714
Memory: "20Gi",
716715
CPU: "10",
717716
},
718717
},
719-
ExtraPodMetadata: &common.ExtraPodMetadata{
718+
ExtraPodMetadata: &dynamoCommon.ExtraPodMetadata{
720719
Annotations: map[string]string{
721720
"nvidia.com/annotation1": "annotation1",
722721
},
@@ -799,7 +798,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
799798
VolumeSource: corev1.VolumeSource{
800799
EmptyDir: &corev1.EmptyDirVolumeSource{
801800
Medium: corev1.StorageMediumMemory,
802-
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
801+
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
803802
},
804803
},
805804
},
@@ -829,7 +828,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
829828
VolumeMounts: []corev1.VolumeMount{
830829
{
831830
Name: "shared-memory",
832-
MountPath: "/dev/shm",
831+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
833832
},
834833
},
835834
Resources: corev1.ResourceRequirements{
@@ -908,7 +907,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
908907
VolumeSource: corev1.VolumeSource{
909908
EmptyDir: &corev1.EmptyDirVolumeSource{
910909
Medium: corev1.StorageMediumMemory,
911-
SizeLimit: resource.NewQuantity(5*1024*1024*1024, resource.BinarySI), // 5gi (calculated from memory limit / 4)
910+
SizeLimit: func() *resource.Quantity { q := resource.MustParse(commonconsts.DefaultSharedMemorySize); return &q }(),
912911
},
913912
},
914913
},
@@ -938,7 +937,7 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
938937
VolumeMounts: []corev1.VolumeMount{
939938
{
940939
Name: "shared-memory",
941-
MountPath: "/dev/shm",
940+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
942941
},
943942
},
944943
Resources: corev1.ResourceRequirements{
@@ -980,8 +979,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
980979
Multinode: &v1alpha1.MultinodeSpec{
981980
NodeCount: 2,
982981
},
983-
Resources: &common.Resources{
984-
Limits: &common.ResourceItem{
982+
Resources: &dynamoCommon.Resources{
983+
Limits: &dynamoCommon.ResourceItem{
985984
GPU: "1",
986985
},
987986
},
@@ -1024,8 +1023,8 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
10241023
Multinode: &v1alpha1.MultinodeSpec{
10251024
NodeCount: 2,
10261025
},
1027-
Resources: &common.Resources{
1028-
Limits: &common.ResourceItem{
1026+
Resources: &dynamoCommon.Resources{
1027+
Limits: &dynamoCommon.ResourceItem{
10291028
GPU: "1",
10301029
},
10311030
},

deploy/cloud/operator/internal/dynamo/graph.go

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,8 @@ func addStandardEnvVars(container *corev1.Container, controllerConfig controller
677677
// GenerateBasePodSpec creates a basic PodSpec with common logic shared between controller and grove
678678
// Includes standard environment variables (DYNAMO_PORT, NATS_SERVER, ETCD_ENDPOINTS)
679679
// Deployment-specific environment merging should be handled by the caller
680+
//
681+
//nolint:gocyclo
680682
func GenerateBasePodSpec(
681683
component *v1alpha1.DynamoComponentDeploymentOverridesSpec,
682684
backendFramework BackendFramework,
@@ -780,9 +782,10 @@ func GenerateBasePodSpec(
780782
MountPath: *component.PVC.MountPoint,
781783
})
782784
}
783-
shmVolume, shmVolumeMount := generateSharedMemoryVolumeAndMount(&container.Resources)
784-
volumes = append(volumes, shmVolume)
785-
container.VolumeMounts = append(container.VolumeMounts, shmVolumeMount)
785+
if shmVol, shmMount := generateSharedMemoryVolumeAndMount(component.SharedMemory); shmVol != nil && shmMount != nil {
786+
volumes = append(volumes, *shmVol)
787+
container.VolumeMounts = append(container.VolumeMounts, *shmMount)
788+
}
786789

787790
// Apply backend-specific container modifications
788791
multinodeDeployer := MultinodeDeployerFactory(multinodeDeploymentType)
@@ -1181,36 +1184,29 @@ func GenerateBasePodSpecForController(
11811184
return podSpec, nil
11821185
}
11831186

1184-
func generateSharedMemoryVolumeAndMount(resources *corev1.ResourceRequirements) (corev1.Volume, corev1.VolumeMount) {
1185-
sharedMemorySizeLimit := resource.MustParse("512Mi")
1186-
// Check if we have memory limits to work with
1187-
memoryLimit := resources.Limits[corev1.ResourceMemory]
1188-
if !memoryLimit.IsZero() {
1189-
// Use 1/4 of memory limit
1190-
calculatedSize := resource.NewQuantity(memoryLimit.Value()/4, resource.BinarySI)
1191-
// Apply bounds: minimum 512Mi, maximum 8Gi
1192-
minSize := resource.MustParse("512Mi")
1193-
maxSize := resource.MustParse("8Gi")
1194-
1195-
if calculatedSize.Cmp(minSize) > 0 && calculatedSize.Cmp(maxSize) < 0 {
1196-
sharedMemorySizeLimit = *calculatedSize
1197-
} else if calculatedSize.Cmp(maxSize) >= 0 {
1198-
sharedMemorySizeLimit = maxSize // Cap at maximum
1187+
func generateSharedMemoryVolumeAndMount(spec *v1alpha1.SharedMemorySpec) (*corev1.Volume, *corev1.VolumeMount) {
1188+
// default: enabled=true, size=8Gi
1189+
size := resource.MustParse(commonconsts.DefaultSharedMemorySize)
1190+
if spec != nil {
1191+
if spec.Disabled {
1192+
return nil, nil
1193+
}
1194+
if !spec.Size.IsZero() {
1195+
size = spec.Size
11991196
}
1200-
// If calculatedSize < minSize, keep the 512Mi base
12011197
}
12021198
volume := corev1.Volume{
12031199
Name: commonconsts.KubeValueNameSharedMemory,
12041200
VolumeSource: corev1.VolumeSource{
12051201
EmptyDir: &corev1.EmptyDirVolumeSource{
12061202
Medium: corev1.StorageMediumMemory,
1207-
SizeLimit: &sharedMemorySizeLimit,
1203+
SizeLimit: &size,
12081204
},
12091205
},
12101206
}
12111207
volumeMount := corev1.VolumeMount{
12121208
Name: commonconsts.KubeValueNameSharedMemory,
1213-
MountPath: "/dev/shm",
1209+
MountPath: commonconsts.DefaultSharedMemoryMountPath,
12141210
}
1215-
return volume, volumeMount
1211+
return &volume, &volumeMount
12161212
}

0 commit comments

Comments
 (0)