-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-36462][K8S] Add the ability to selectively disable watching or polling #34264
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -395,6 +395,23 @@ private[spark] object Config extends Logging { | |
| .checkValue(interval => interval > 0, s"Logging interval must be a positive time value.") | ||
| .createWithDefaultString("1s") | ||
|
|
||
| val KUBERNETES_EXECUTOR_ENABLE_API_POLLING = | ||
| ConfigBuilder("spark.kubernetes.executor.enableApiPolling") | ||
| .doc("If Spark should poll Kubernetes for executor pod status. " + | ||
| "You should leave this enabled unless your encountering performance issues with your etcd.") | ||
| .version("3.3.0") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
|
||
| val KUBERNETES_EXECUTOR_ENABLE_API_WATCHER = | ||
| ConfigBuilder("spark.kubernetes.executor.enableApiWatcher") | ||
| .doc("If Spark should create watchers for executor pod status. " + | ||
| "You should leave this enabled unless your encountering performance issues with your etcd.") | ||
| .version("3.3.0") | ||
| .booleanConf | ||
| .createWithDefault(true) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto. Can we make it as an |
||
|
|
||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit. extra empty line. |
||
| val KUBERNETES_EXECUTOR_API_POLLING_INTERVAL = | ||
| ConfigBuilder("spark.kubernetes.executor.apiPollingInterval") | ||
| .doc("Interval between polls against the Kubernetes API server to inspect the " + | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,24 +22,30 @@ import io.fabric8.kubernetes.api.model.Pod | |
| import io.fabric8.kubernetes.client.{KubernetesClient, Watcher, WatcherException} | ||
| import io.fabric8.kubernetes.client.Watcher.Action | ||
|
|
||
| import org.apache.spark.SparkConf | ||
| import org.apache.spark.deploy.k8s.Config.KUBERNETES_EXECUTOR_ENABLE_API_WATCHER | ||
| import org.apache.spark.deploy.k8s.Constants._ | ||
| import org.apache.spark.internal.Logging | ||
| import org.apache.spark.util.Utils | ||
|
|
||
| private[spark] class ExecutorPodsWatchSnapshotSource( | ||
| snapshotsStore: ExecutorPodsSnapshotsStore, | ||
| kubernetesClient: KubernetesClient) extends Logging { | ||
| kubernetesClient: KubernetesClient, | ||
| conf: SparkConf) extends Logging { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Although this is a cc @shrutig since this is a breaking change in your context.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this one, I made a PR to provide a better backward compatibility and to help the downstreams' ExternalClusterManager. Since these classes are unchanged since 2.4.0, I believe we can declare it |
||
|
|
||
| private var watchConnection: Closeable = _ | ||
| private val enablePolling = conf.get(KUBERNETES_EXECUTOR_ENABLE_API_WATCHER) | ||
|
|
||
| def start(applicationId: String): Unit = { | ||
| require(watchConnection == null, "Cannot start the watcher twice.") | ||
| logDebug(s"Starting watch for pods with labels $SPARK_APP_ID_LABEL=$applicationId," + | ||
| s" $SPARK_ROLE_LABEL=$SPARK_POD_EXECUTOR_ROLE.") | ||
| watchConnection = kubernetesClient.pods() | ||
| .withLabel(SPARK_APP_ID_LABEL, applicationId) | ||
| .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE) | ||
| .watch(new ExecutorPodsWatcher()) | ||
| if (enablePolling) { | ||
| require(watchConnection == null, "Cannot start the watcher twice.") | ||
| logDebug(s"Starting watch for pods with labels $SPARK_APP_ID_LABEL=$applicationId," + | ||
| s" $SPARK_ROLE_LABEL=$SPARK_POD_EXECUTOR_ROLE.") | ||
| watchConnection = kubernetesClient.pods() | ||
| .withLabel(SPARK_APP_ID_LABEL, applicationId) | ||
| .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE) | ||
| .watch(new ExecutorPodsWatcher()) | ||
| } | ||
| } | ||
|
|
||
| def stop(): Unit = { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ListOptionsBuilder, PodListBuilder} | |
| import io.fabric8.kubernetes.client.KubernetesClient | ||
| import org.jmock.lib.concurrent.DeterministicScheduler | ||
| import org.mockito.{Mock, MockitoAnnotations} | ||
| import org.mockito.Mockito.{verify, when} | ||
| import org.mockito.Mockito.{never, verify, when} | ||
| import org.scalatest.BeforeAndAfter | ||
|
|
||
| import org.apache.spark.{SparkConf, SparkFunSuite} | ||
|
|
@@ -33,9 +33,9 @@ import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._ | |
|
|
||
| class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAndAfter { | ||
|
|
||
| private val sparkConf = new SparkConf | ||
| private val defaultConf = new SparkConf() | ||
|
|
||
| private val pollingInterval = sparkConf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL) | ||
| private val pollingInterval = defaultConf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL) | ||
|
|
||
| @Mock | ||
| private var kubernetesClient: KubernetesClient = _ | ||
|
|
@@ -61,12 +61,6 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn | |
| before { | ||
| MockitoAnnotations.openMocks(this).close() | ||
| pollingExecutor = new DeterministicScheduler() | ||
| pollingSourceUnderTest = new ExecutorPodsPollingSnapshotSource( | ||
| sparkConf, | ||
| kubernetesClient, | ||
| eventQueue, | ||
| pollingExecutor) | ||
| pollingSourceUnderTest.start(TEST_SPARK_APP_ID) | ||
| when(kubernetesClient.pods()).thenReturn(podOperations) | ||
| when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)) | ||
| .thenReturn(appIdLabeledPods) | ||
|
|
@@ -77,6 +71,13 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn | |
| } | ||
|
|
||
| test("Items returned by the API should be pushed to the event queue") { | ||
| val sparkConf = new SparkConf() | ||
| pollingSourceUnderTest = new ExecutorPodsPollingSnapshotSource( | ||
| sparkConf, | ||
| kubernetesClient, | ||
| eventQueue, | ||
| pollingExecutor) | ||
| pollingSourceUnderTest.start(TEST_SPARK_APP_ID) | ||
| val exec1 = runningExecutor(1) | ||
| val exec2 = runningExecutor(2) | ||
| when(activeExecutorPods.list()) | ||
|
|
@@ -89,13 +90,27 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn | |
| verify(eventQueue).replaceSnapshot(Seq(exec1, exec2)) | ||
| } | ||
|
|
||
| test("If polling is disabled we don't call pods() on the client") { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit. If you don't mind, could you add a test prefix, |
||
| val sparkConf = new SparkConf() | ||
| val source = new ExecutorPodsPollingSnapshotSource( | ||
| sparkConf.set(KUBERNETES_EXECUTOR_ENABLE_API_POLLING, false), | ||
| kubernetesClient, | ||
| eventQueue, | ||
| pollingExecutor) | ||
| source.start(TEST_SPARK_APP_ID) | ||
| pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS) | ||
| verify(kubernetesClient, never()).pods() | ||
| } | ||
|
|
||
| test("SPARK-36334: Support pod listing with resource version") { | ||
| Seq(true, false).foreach { value => | ||
| val sparkConf = new SparkConf() | ||
| val source = new ExecutorPodsPollingSnapshotSource( | ||
| sparkConf.set(KUBERNETES_EXECUTOR_API_POLLING_WITH_RESOURCE_VERSION, value), | ||
| kubernetesClient, | ||
| eventQueue, | ||
| pollingExecutor) | ||
| source.start(TEST_SPARK_APP_ID) | ||
| pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS) | ||
| if (value) { | ||
| verify(activeExecutorPods).list(new ListOptionsBuilder().withResourceVersion("0").build()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel the warning,
You should leave, is insufficient. Can we make thisinternalconf?