diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b9397a4..f3243125 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,14 @@ All notable changes to this project will be documented in this file. - Update Rust dependency versions, most notably operator-rs 0.67.1 ([#401]) +### Fixed + +- BREAKING (behaviour): Specified CPU resources are now applied correctly (instead of rounding it to the next whole number). + This might affect your jobs, as they now e.g. only have 200m CPU resources requested instead of the 1000m it had so far, + meaning they might slow down significantly ([#408]). + [#401]: https://github.com/stackabletech/spark-k8s-operator/pull/401 +[#408]: https://github.com/stackabletech/spark-k8s-operator/pull/408 ## [24.3.0] - 2024-03-20 diff --git a/docs/modules/spark-k8s/pages/usage-guide/resources.adoc b/docs/modules/spark-k8s/pages/usage-guide/resources.adoc index 9224a2a2..a2daecb7 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/resources.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/resources.adoc @@ -58,26 +58,32 @@ To illustrate resource configuration consider the use-case where resources are d === CPU -CPU request and limit will be rounded up to the next integer value, resulting in the following: +CPU request and limit will be used as defined in the custom resource resulting in the following: |=== -|CRD |Spark conf +|CRD |spark.kubernetes.{driver/executor} cores|spark.{driver/executor} cores (rounded up) +|1800m |1800m |2 +|100m |100m |1 +|1.5 |1.5 |2 +|2 |2 |2 |=== -Spark allows CPU limits to be set for the driver and executor using Spark settings (`spark.{driver|executor}.cores}`) as well as Kubernetes-specific ones (`spark.kubernetes.{driver,executor}.{request|limit}.cores`). `spark.kubernetes.executor.request.cores` takes precedence over `spark.executor.cores` in determining the pod CPU request, but does not affect task parallelism (the number of tasks an executor can run concurrently), so for this reason `spark.executor.cores` is set to the value of `spark.kubernetes.executor.limit.cores`. +`spark.kubernetes.{driver,executor}.{request|limit}.cores` determine the actual pod CPU request and are taken directly from the manifest as defined by the user. +`spark.{driver|executor}.cores}` are set to the rounded(-up) value of the manifest settings. +Task parallelism (the number of tasks an executor can run concurrently), is determined by `spark.executor.cores`. === Memory diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 0b07b1fd..b526726e 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -915,15 +915,17 @@ fn resources_to_driver_props( .. } = &driver_config.resources { - let min_cores = cores_from_quantity(min.0.clone())?; - let max_cores = cores_from_quantity(max.0.clone())?; - // will have default value from resources to apply if nothing set specifically - props.insert("spark.driver.cores".to_string(), max_cores.clone()); + let driver_cores = cores_from_quantity(max.0.clone())?; + // take rounded value for driver.cores but actual values for the pod + props.insert("spark.driver.cores".to_string(), driver_cores.clone()); props.insert( "spark.kubernetes.driver.request.cores".to_string(), - min_cores, + min.0.clone(), + ); + props.insert( + "spark.kubernetes.driver.limit.cores".to_string(), + max.0.clone(), ); - props.insert("spark.kubernetes.driver.limit.cores".to_string(), max_cores); } if let Resources { @@ -955,17 +957,16 @@ fn resources_to_executor_props( .. } = &executor_config.resources { - let min_cores = cores_from_quantity(min.0.clone())?; - let max_cores = cores_from_quantity(max.0.clone())?; - // will have default value from resources to apply if nothing set specifically - props.insert("spark.executor.cores".to_string(), max_cores.clone()); + let executor_cores = cores_from_quantity(max.0.clone())?; + // take rounded value for executor.cores (to determine the parallelism) but actual values for the pod + props.insert("spark.executor.cores".to_string(), executor_cores.clone()); props.insert( "spark.kubernetes.executor.request.cores".to_string(), - min_cores, + min.0.clone(), ); props.insert( "spark.kubernetes.executor.limit.cores".to_string(), - max_cores, + max.0.clone(), ); } @@ -1154,7 +1155,7 @@ mod tests { ), ( "spark.kubernetes.driver.request.cores".to_string(), - "1".to_string(), + "250m".to_string(), ), ] .into_iter() @@ -1194,7 +1195,7 @@ mod tests { ("spark.executor.memory".to_string(), "128m".to_string()), // 128 and not 512 because memory overhead is subtracted ( "spark.kubernetes.executor.request.cores".to_string(), - "1".to_string(), + "250m".to_string(), ), ( "spark.kubernetes.executor.limit.cores".to_string(), diff --git a/tests/templates/kuttl/resources/10-assert.yaml.j2 b/tests/templates/kuttl/resources/10-assert.yaml.j2 index eb4e706b..55c9e63c 100644 --- a/tests/templates/kuttl/resources/10-assert.yaml.j2 +++ b/tests/templates/kuttl/resources/10-assert.yaml.j2 @@ -33,10 +33,10 @@ spec: resources: # these resources are set via Spark submit properties like "spark.driver.cores" limits: - cpu: "2" + cpu: 1200m memory: 1Gi requests: - cpu: "1" + cpu: 300m memory: 1Gi --- apiVersion: v1 @@ -55,5 +55,5 @@ spec: cpu: "2" memory: 1Gi requests: - cpu: "2" + cpu: 1250m memory: 1Gi diff --git a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 index cc8b9e5a..f56bef9b 100644 --- a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 @@ -36,7 +36,7 @@ spec: enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} resources: cpu: - min: 200m + min: 300m max: 1200m memory: limit: 1024Mi