Skip to content

Commit

Permalink
Use GOMAXPROCS in gateway based on limits.cpu (#1989)
Browse files Browse the repository at this point in the history
Relevant references on why this is important:
https://www.ardanlabs.com/blog/2024/02/kubernetes-cpu-limits-go.html
https://engineering.grab.com/performance-bottlenecks-go-apps

---------

Co-authored-by: Amir Blum <amirgiraffe@gmail.com>
  • Loading branch information
RonFed and blumamir authored Dec 15, 2024
1 parent 847059b commit b9b8b70
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 3 deletions.
13 changes: 13 additions & 0 deletions autoscaler/controllers/gateway/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,19 @@ func getDesiredDeployment(dests *odigosv1.DestinationList, configDataHash string
Name: "GOMEMLIMIT",
Value: fmt.Sprintf("%dMiB", gateway.Spec.ResourcesSettings.GomemlimitMiB),
},
{
// let the Go runtime know how many CPUs are available,
// without this, Go will assume all the cores are available.
Name: "GOMAXPROCS",
ValueFrom: &corev1.EnvVarSource{
ResourceFieldRef: &corev1.ResourceFieldSelector{
ContainerName: containerName,
// limitCPU, Kubernetes automatically rounds up the value to an integer
// (700m -> 1, 1200m -> 2)
Resource: "limits.cpu",
},
},
},
},
SecurityContext: &corev1.SecurityContext{
AllowPrivilegeEscalation: boolPtr(false),
Expand Down
8 changes: 8 additions & 0 deletions tests/common/assert/pipeline-ready.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,20 @@ spec:
fieldPath: metadata.name
- name: GOMEMLIMIT
(value != null): true
- name: GOMAXPROCS
valueFrom:
resourceFieldRef:
containerName: gateway
divisor: "0"
resource: limits.cpu
name: gateway
resources:
requests:
(memory != null): true
(cpu != null): true
limits:
(memory != null): true
(cpu != null): true
volumeMounts:
- mountPath: /conf
name: collector-conf
Expand Down
45 changes: 45 additions & 0 deletions tests/common/assert_pipeline_pods_ready.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

DEFAULT_NAMESPACE="odigos-test"
DEFAULT_TIMEOUT="120s"
CHECK_INTERVAL=5

NAMESPACE=${1:-$DEFAULT_NAMESPACE}
TIMEOUT=${2:-$DEFAULT_TIMEOUT}

# Define expected labels for pods (adjust as needed)
EXPECTED_LABELS=(
"odigos.io/collector-role=NODE_COLLECTOR" # For odigos-data-collection pods
"odigos.io/collector-role=CLUSTER_GATEWAY" # For odigos-gateway pods
"app.kubernetes.io/name=odigos-autoscaler"
"app.kubernetes.io/name=odigos-instrumentor"
"app.kubernetes.io/name=odigos-scheduler"
"app=odigos-ui"
)

echo "Waiting for all expected pods to be ready in namespace '$NAMESPACE' with timeout of $TIMEOUT..."

for label in "${EXPECTED_LABELS[@]}"; do
echo "Waiting for pods with label: $label..."
# Wait for pods to exist before using kubectl wait
EXISTS=false
while [[ "$EXISTS" == "false" ]]; do
POD_COUNT=$(kubectl get pods -l "$label" -n "$NAMESPACE" --no-headers 2>/dev/null | wc -l)
if [[ "$POD_COUNT" -gt 0 ]]; then
EXISTS=true
echo "Found $POD_COUNT pod(s) with label '$label'. Proceeding to wait for readiness..."
else
echo "No pods found with label '$label'. Checking again in $CHECK_INTERVAL seconds..."
sleep $CHECK_INTERVAL
fi
done

# Use `kubectl wait` to check all pods matching the label
kubectl wait --for=condition=Ready pods -l "$label" -n "$NAMESPACE" --timeout="$TIMEOUT"
if [[ $? -ne 0 ]]; then
echo "Pods with label '$label' did not become ready within $TIMEOUT in namespace '$NAMESPACE'"
exit 1
fi
done

echo "All expected pods are ready!"
11 changes: 8 additions & 3 deletions tests/e2e/cli-upgrade/chainsaw-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,15 @@ spec:
try:
- apply:
file: ../../common/apply/add-tempo-traces-destination.yaml
- name: Odigos pipeline ready
- name: Odigos pipeline pods ready
# We make sure that the pipeline pods are ready before proceeding with the next steps
# This is intentionally different from pipeline-ready.yaml, which checks for the pipeline CRDs
# When adding a feature related to the pipeline, if we would use the same assert before the upgrade, the test would fail.
# Since the version installed here is latest official one.
try:
- assert:
file: ../../common/assert/pipeline-ready.yaml
- script:
content: ../../common/assert_pipeline_pods_ready.sh
timeout: 60s
- name: Simple-demo instrumented after destination added
try:
- assert:
Expand Down
6 changes: 6 additions & 0 deletions tests/e2e/workload-lifecycle/03-assert-action-created.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ spec:
fieldPath: metadata.name
- name: GOMEMLIMIT
(value != null): true
- name: GOMAXPROCS
valueFrom:
resourceFieldRef:
containerName: gateway
divisor: "0"
resource: limits.cpu
name: gateway
resources:
requests:
Expand Down

0 comments on commit b9b8b70

Please sign in to comment.