stackabletech · adwk67 · May 22, 2024 · May 21, 2024 · May 21, 2024 · May 22, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,7 +8,14 @@
 
 - Update Rust dependency versions, most notably operator-rs 0.67.1 ([#401])
 
+### Fixed
+
+- BREAKING (behaviour): Specified CPU resources are now applied correctly (instead of rounding it to the next whole number).
+ This might affect your jobs, as they now e.g. only have 200m CPU resources requested instead of the 1000m it had so far,
+ meaning they might slow down significantly ([#408]).
+
 [#401]: https://github.com/stackabletech/spark-k8s-operator/pull/401
+[#408]: https://github.com/stackabletech/spark-k8s-operator/pull/408
 
 ## [24.3.0] - 2024-03-20
 

diff --git a/docs/modules/spark-k8s/pages/usage-guide/resources.adoc b/docs/modules/spark-k8s/pages/usage-guide/resources.adoc
@@ -58,26 +58,32 @@ To illustrate resource configuration consider the use-case where resources are d
 
 === CPU
 
-CPU request and limit will be rounded up to the next integer value, resulting in the following:
+CPU request and limit will be used as defined in the custom resource resulting in the following:
 
 
 |===
-|CRD |Spark conf
+|CRD |spark.kubernetes.{driver/executor} cores|spark.{driver/executor} cores (rounded up)
 
+|1800m
 |1800m
 |2
 
+|100m
 |100m
 |1
 
+|1.5
 |1.5
 |2
 
+|2
 |2
 |2
 |===
 
-Spark allows CPU limits to be set for the driver and executor using Spark settings (`spark.{driver|executor}.cores}`) as well as Kubernetes-specific ones (`spark.kubernetes.{driver,executor}.{request|limit}.cores`). `spark.kubernetes.executor.request.cores` takes precedence over `spark.executor.cores` in determining the pod CPU request, but does not affect task parallelism (the number of tasks an executor can run concurrently), so for this reason `spark.executor.cores` is set to the value of `spark.kubernetes.executor.limit.cores`.
+`spark.kubernetes.{driver,executor}.{request|limit}.cores` determine the actual pod CPU request and are taken directly from the manifest as defined by the user.
+`spark.{driver|executor}.cores}` are set to the rounded(-up) value of the manifest settings.
+Task parallelism (the number of tasks an executor can run concurrently), is determined by `spark.executor.cores`.
 
 === Memory
 

diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs
@@ -915,15 +915,17 @@ fn resources_to_driver_props(
  ..
  } = &driver_config.resources
  {
- let min_cores = cores_from_quantity(min.0.clone())?;
- let max_cores = cores_from_quantity(max.0.clone())?;
- // will have default value from resources to apply if nothing set specifically
- props.insert("spark.driver.cores".to_string(), max_cores.clone());
+ let driver_cores = cores_from_quantity(max.0.clone())?;
+ // take rounded value for driver.cores but actual values for the pod
+ props.insert("spark.driver.cores".to_string(), driver_cores.clone());
  props.insert(
  "spark.kubernetes.driver.request.cores".to_string(),
- min_cores,
+ min.0.clone(),
+ );
+ props.insert(
+ "spark.kubernetes.driver.limit.cores".to_string(),
+ max.0.clone(),
  );
- props.insert("spark.kubernetes.driver.limit.cores".to_string(), max_cores);
  }
 
  if let Resources {
@@ -955,17 +957,16 @@ fn resources_to_executor_props(
  ..
  } = &executor_config.resources
  {
- let min_cores = cores_from_quantity(min.0.clone())?;
- let max_cores = cores_from_quantity(max.0.clone())?;
- // will have default value from resources to apply if nothing set specifically
- props.insert("spark.executor.cores".to_string(), max_cores.clone());
+ let executor_cores = cores_from_quantity(max.0.clone())?;
+ // take rounded value for executor.cores (to determine the parallelism) but actual values for the pod
+ props.insert("spark.executor.cores".to_string(), executor_cores.clone());
  props.insert(
  "spark.kubernetes.executor.request.cores".to_string(),
- min_cores,
+ min.0.clone(),
  );
  props.insert(
  "spark.kubernetes.executor.limit.cores".to_string(),
- max_cores,
+ max.0.clone(),
  );
  }
 
@@ -1154,7 +1155,7 @@ mod tests {
  ),
  (
  "spark.kubernetes.driver.request.cores".to_string(),
- "1".to_string(),
+ "250m".to_string(),
  ),
  ]
  .into_iter()
@@ -1194,7 +1195,7 @@ mod tests {
  ("spark.executor.memory".to_string(), "128m".to_string()), // 128 and not 512 because memory overhead is subtracted
  (
  "spark.kubernetes.executor.request.cores".to_string(),
- "1".to_string(),
+ "250m".to_string(),
  ),
  (
  "spark.kubernetes.executor.limit.cores".to_string(),

diff --git a/tests/templates/kuttl/resources/10-assert.yaml.j2 b/tests/templates/kuttl/resources/10-assert.yaml.j2
@@ -33,10 +33,10 @@ spec:
  resources:
  # these resources are set via Spark submit properties like "spark.driver.cores"
  limits:
- cpu: "2"
+ cpu: 1200m
  memory: 1Gi
  requests:
- cpu: "1"
+ cpu: 300m
  memory: 1Gi
 ---
 apiVersion: v1
@@ -55,5 +55,5 @@ spec:
  cpu: "2"
  memory: 1Gi
  requests:
- cpu: "2"
+ cpu: 1250m
  memory: 1Gi
diff --git a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2
@@ -36,7 +36,7 @@ spec:
  enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
  resources:
  cpu:
- min: 200m
+ min: 300m
  max: 1200m
  memory:
  limit: 1024Mi