Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add configurable cpu request and limit to node collector settings #1990

Merged
merged 7 commits into from
Dec 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions autoscaler/controllers/datacollection/daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,10 @@ func getDesiredDaemonSet(datacollection *odigosv1.CollectorsGroup,
rollingUpdate.MaxSurge = &maxSurge
}

requestMemoryRequestQuantity := resource.MustParse(fmt.Sprintf("%dMi", datacollection.Spec.ResourcesSettings.MemoryRequestMiB))
requestMemoryLimitQuantity := resource.MustParse(fmt.Sprintf("%dMi", datacollection.Spec.ResourcesSettings.MemoryLimitMiB))
resourceMemoryRequestQuantity := resource.MustParse(fmt.Sprintf("%dMi", datacollection.Spec.ResourcesSettings.MemoryRequestMiB))
resourceMemoryLimitQuantity := resource.MustParse(fmt.Sprintf("%dMi", datacollection.Spec.ResourcesSettings.MemoryLimitMiB))
resourceCpuRequestQuantity := resource.MustParse(fmt.Sprintf("%dm", datacollection.Spec.ResourcesSettings.CpuRequestMillicores))
resourceCpuLimitQuantity := resource.MustParse(fmt.Sprintf("%dm", datacollection.Spec.ResourcesSettings.CpuLimitMillicores))

desiredDs := &appsv1.DaemonSet{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -326,10 +328,12 @@ func getDesiredDaemonSet(datacollection *odigosv1.CollectorsGroup,
},
Resources: corev1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceMemory: requestMemoryRequestQuantity,
corev1.ResourceMemory: resourceMemoryRequestQuantity,
corev1.ResourceCPU: resourceCpuRequestQuantity,
},
Limits: corev1.ResourceList{
corev1.ResourceMemory: requestMemoryLimitQuantity,
corev1.ResourceMemory: resourceMemoryLimitQuantity,
corev1.ResourceCPU: resourceCpuLimitQuantity,
},
},
SecurityContext: &corev1.SecurityContext{
Expand Down
15 changes: 13 additions & 2 deletions cli/cmd/resources/odigosconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,15 @@ func (a *odigosConfigResourceManager) Name() string { return "OdigosConfig" }
func (a *odigosConfigResourceManager) InstallFromScratch(ctx context.Context) error {

sizingProfile := k8sprofiles.FilterSizeProfiles(a.config.Profiles)

collectorGatewayConfig := GetGatewayConfigBasedOnSize(sizingProfile)
collectorNodeConfig := GetNodeCollectorConfigBasedOnSize(sizingProfile)
a.config.CollectorGateway = collectorGatewayConfig
if a.config.CollectorNode != nil {

collectorNodeConfig := GetNodeCollectorConfigBasedOnSize(sizingProfile)
if a.config.CollectorNode != nil && a.config.CollectorNode.CollectorOwnMetricsPort != 0 {
if collectorNodeConfig == nil {
collectorNodeConfig = &common.CollectorNodeConfiguration{}
}
collectorNodeConfig.CollectorOwnMetricsPort = a.config.CollectorNode.CollectorOwnMetricsPort
}
a.config.CollectorNode = collectorNodeConfig
Expand All @@ -77,16 +82,22 @@ func GetNodeCollectorConfigBasedOnSize(profile common.ProfileName) *common.Colle
return &common.CollectorNodeConfiguration{
RequestMemoryMiB: 150,
LimitMemoryMiB: 300,
RequestCPUm: 150,
LimitCPUm: 300,
}
case k8sprofiles.SizeMProfile.ProfileName:
return &common.CollectorNodeConfiguration{
RequestMemoryMiB: 250,
LimitMemoryMiB: 500,
RequestCPUm: 250,
LimitCPUm: 500,
}
case k8sprofiles.SizeLProfile.ProfileName:
return &common.CollectorNodeConfiguration{
RequestMemoryMiB: 500,
LimitMemoryMiB: 750,
RequestCPUm: 500,
LimitCPUm: 750,
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions common/odigos_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ type CollectorNodeConfiguration struct {
// default value is 2x the memory request.
LimitMemoryMiB int `json:"limitMemoryMiB,omitempty"`

// RequestCPUm is the CPU request for the node collector daemonset.
// it will be embedded in the daemonset as a resource request of the form "cpu: <value>m"
// default value is 250m
RequestCPUm int `json:"requestCPUm,omitempty"`

// LimitCPUm is the CPU limit for the node collector daemonset.
// it will be embedded in the daemonset as a resource limit of the form "cpu: <value>m"
// default value is 500m
LimitCPUm int `json:"limitCPUm,omitempty"`

// this parameter sets the "limit_mib" parameter in the memory limiter configuration for the node collector.
// it is the hard limit after which a force garbage collection will be performed.
// if not set, it will be 50Mi below the memory request.
Expand Down
30 changes: 20 additions & 10 deletions docs/pipeline/configuration.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ Sizing Profiles `size_s`, `size_m`, `size_l` are pre-defined configurations desi

**Node Data Collection Collector**:

| Profile | Request Memory (Mi) | Limit Memory (Mi) |
|----------|----------------------|-------------------|
| `size_s` | **150Mi** | **300Mi** |
| `size_m` | **250Mi** | **500Mi** |
| `size_l` | **500Mi** | **750Mi** |
| Profile | Request Memory (Mi) | Limit Memory (Mi) | Request CPU (m) | Limit CPU (m)
|----------|----------------------|-------------------| ---------------------|-------------------|
| `size_s` | **150Mi** | **300Mi** | **150m** | **300m** |
| `size_m` | **250Mi** | **500Mi** | **250m** | **500m** |
| `size_l` | **500Mi** | **750Mi** | **500m** | **750m** |


To use profiles, you need to use the [Odigos CLI Command for Profiles](/cli/odigos_profile).
Expand Down Expand Up @@ -96,27 +96,37 @@ collectorNode:
# RequestMemoryMiB is the memory request for the node collector daemonset.
# it will be embedded in the daemonset as a resource request of the form "memory: <value>Mi"
# default value is 250Mi
RequestMemoryMiB int `json:"requestMemoryMiB,omitempty"`
requestMemoryMiB: 250

# LimitMemoryMiB is the memory limit for the node collector daemonset.
# it will be embedded in the daemonset as a resource limit of the form "memory: <value>Mi"
# default value is 2x the memory request.
LimitMemoryMiB int `json:"limitMemoryMiB,omitempty"`
limitMemoryMiB: 500

# RequestCPUm is the CPU request for the node collector daemonset.
# it will be embedded in the daemonset as a resource request of the form "cpu: <value>m"
# default value is 250m
requestCPUm: 250

# LimitCPUm is the CPU limit for the node collector daemonset.
# it will be embedded in the daemonset as a resource limit of the form "cpu: <value>m"
# default value is 500m
limitCPUm: 500

# this parameter sets the "limit_mib" parameter in the memory limiter configuration for the node collector.
# it is the hard limit after which a force garbage collection will be performed.
# if not set, it will be 50Mi below the memory request.
MemoryLimiterLimitMiB int `json:"memoryLimiterLimitMiB,omitempty"`
memoryLimiterLimitMiB:

# this parameter sets the "spike_limit_mib" parameter in the memory limiter configuration for the node collector.
# note that this is not the processor soft limit, but the diff in Mib between the hard limit and the soft limit.
# if not set, this will be set to 20% of the hard limit (so the soft limit will be 80% of the hard limit).
MemoryLimiterSpikeLimitMiB int `json:"memoryLimiterSpikeLimitMiB,omitempty"`
memoryLimiterSpikeLimitMiB:

# the GOMEMLIMIT environment variable value for the node collector daemonset.
# this is when go runtime will start garbage collection.
# if not specified, it will be set to 80% of the hard limit of the memory limiter.
GoMemLimitMib int `json:"goMemLimitMiB,omitempty"`
goMemLimitMiB:
```


Expand Down
6 changes: 6 additions & 0 deletions helm/odigos/templates/odigos-config-cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ data:
{{- with .Values.collectorNode.limitMemoryMiB }}
limitMemoryMiB: {{ . }}
{{- end }}
{{- with .Values.collectorNode.requestCPUm }}
requestCPUm: {{ . }}
{{- end }}
{{- with .Values.collectorNode.limitCPUm }}
limitCPUm: {{ . }}
{{- end }}
{{- with .Values.collectorNode.memoryLimiterLimitMiB }}
memoryLimiterLimitMiB: {{ . }}
{{- end }}
Expand Down
11 changes: 11 additions & 0 deletions helm/odigos/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ collectorNode:
# default value is 2x the memory request.
limitMemoryMiB: 500

# the CPU request for the node collector daemonset.
# it will be embedded in the daemonset as a resource request
# of the form "cpu: <value>m".
# default value is 250m
requestCPUm: 250
# the CPU limit for the node collector daemonset.
# it will be embedded in the daemonset as a resource limit
# of the form "cpu: <value>m".
# default value is 500m
limitCPUm: 555
blumamir marked this conversation as resolved.
Show resolved Hide resolved

# this parameter sets the "limit_mib" parameter in the memory limiter configuration for the node collector.
# it is the hard limit after which a force garbage collection will be performed.
# if not set, it will be 50Mi below the memory limit.
Expand Down
20 changes: 18 additions & 2 deletions scheduler/controllers/nodecollectorsgroup/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,14 @@ const (
// allowing the memory limit to be slightly above the memory request can help in reducing the chances of OOMs in edge cases.
// instead of having the process killed, it can use extra memory available on the node without allocating it preemptively.
memoryLimitAboveRequestFactor = 2.0

// the default CPU request in millicores
defaultRequestCPUm = 250
// the default CPU limit in millicores
defaultLimitCPUm = 500
)

func getMemorySettings(odigosConfig common.OdigosConfiguration) odigosv1.CollectorsGroupResourcesSettings {
func getResourceSettings(odigosConfig common.OdigosConfiguration) odigosv1.CollectorsGroupResourcesSettings {
// memory request is expensive on daemonsets since it will consume this memory
// on each node in the cluster. setting to 256, but allowing memory to spike higher
// to consume more available memory on the node.
Expand Down Expand Up @@ -78,12 +83,23 @@ func getMemorySettings(odigosConfig common.OdigosConfiguration) odigosv1.Collect
gomemlimitMiB = nodeCollectorConfig.GoMemLimitMib
}

cpuRequestm := defaultRequestCPUm
if nodeCollectorConfig != nil && nodeCollectorConfig.RequestCPUm > 0 {
cpuRequestm = nodeCollectorConfig.RequestCPUm
}
cpuLimitm := defaultLimitCPUm
if nodeCollectorConfig != nil && nodeCollectorConfig.LimitCPUm > 0 {
cpuLimitm = nodeCollectorConfig.LimitCPUm
}
blumamir marked this conversation as resolved.
Show resolved Hide resolved

return odigosv1.CollectorsGroupResourcesSettings{
MemoryRequestMiB: memoryRequestMiB,
MemoryLimitMiB: memoryLimitMiB,
MemoryLimiterLimitMiB: memoryLimiterLimitMiB,
MemoryLimiterSpikeLimitMiB: memoryLimiterSpikeLimitMiB,
GomemlimitMiB: gomemlimitMiB,
CpuRequestMillicores: cpuRequestm,
CpuLimitMillicores: cpuLimitm,
}
}

Expand All @@ -106,7 +122,7 @@ func newNodeCollectorGroup(odigosConfig common.OdigosConfiguration) *odigosv1.Co
Spec: odigosv1.CollectorsGroupSpec{
Role: odigosv1.CollectorsGroupRoleNodeCollector,
CollectorOwnMetricsPort: ownMetricsPort,
ResourcesSettings: getMemorySettings(odigosConfig),
ResourcesSettings: getResourceSettings(odigosConfig),
},
}
}
Expand Down
Loading