-
Notifications
You must be signed in to change notification settings - Fork 15k
KAFKA-14296; Partition leaders are not demoted during kraft controlled shutdown #12741
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2142,7 +2142,6 @@ class ReplicaManager(val config: KafkaConfig, | |
| ): Unit = { | ||
| stateChangeLogger.info(s"Transitioning ${localFollowers.size} partition(s) to " + | ||
| "local followers.") | ||
| val shuttingDown = isShuttingDown.get() | ||
| val partitionsToStartFetching = new mutable.HashMap[TopicPartition, Partition] | ||
| val partitionsToStopFetching = new mutable.HashMap[TopicPartition, Boolean] | ||
| val followerTopicSet = new mutable.HashSet[String] | ||
|
|
@@ -2151,28 +2150,24 @@ class ReplicaManager(val config: KafkaConfig, | |
| try { | ||
| followerTopicSet.add(tp.topic) | ||
|
|
||
| if (shuttingDown) { | ||
| stateChangeLogger.trace(s"Unable to start fetching $tp with topic " + | ||
| s"ID ${info.topicId} because the replica manager is shutting down.") | ||
| } else { | ||
| // We always update the follower state. | ||
| // - This ensure that a replica with no leader can step down; | ||
| // - This also ensures that the local replica is created even if the leader | ||
| // is unavailable. This is required to ensure that we include the partition's | ||
| // high watermark in the checkpoint file (see KAFKA-1647). | ||
| val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew) | ||
| val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId)) | ||
|
|
||
| if (isInControlledShutdown && (info.partition.leader == NO_LEADER || | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, why do we need this special
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We want to do this only when we are in controlled shutdown and it could be disabled. If you look at the caller side, we aligned this on how we do it for the |
||
| !info.partition.isr.contains(config.brokerId))) { | ||
| // During controlled shutdown, replica with no leaders and replica | ||
| // where this broker is not in the ISR are stopped. | ||
| partitionsToStopFetching.put(tp, false) | ||
| } else if (isNewLeaderEpoch) { | ||
| // Otherwise, fetcher is restarted if the leader epoch has changed. | ||
| partitionsToStartFetching.put(tp, partition) | ||
| } | ||
| // We always update the follower state. | ||
| // - This ensure that a replica with no leader can step down; | ||
| // - This also ensures that the local replica is created even if the leader | ||
| // is unavailable. This is required to ensure that we include the partition's | ||
| // high watermark in the checkpoint file (see KAFKA-1647). | ||
| val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew) | ||
| val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId)) | ||
|
|
||
| if (isInControlledShutdown && (info.partition.leader == NO_LEADER || | ||
| !info.partition.isr.contains(config.brokerId))) { | ||
| // During controlled shutdown, replica with no leaders and replica | ||
| // where this broker is not in the ISR are stopped. | ||
| partitionsToStopFetching.put(tp, false) | ||
| } else if (isNewLeaderEpoch) { | ||
| // Otherwise, fetcher is restarted if the leader epoch has changed. | ||
| partitionsToStartFetching.put(tp, partition) | ||
| } | ||
|
|
||
| changedPartitions.add(partition) | ||
| } catch { | ||
| case e: KafkaStorageException => | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,7 @@ import java.io.File | |
| import java.net.InetAddress | ||
| import java.nio.file.Files | ||
| import java.util | ||
| import java.util.concurrent.atomic.{AtomicLong, AtomicReference} | ||
| import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference} | ||
| import java.util.concurrent.{CountDownLatch, TimeUnit} | ||
| import java.util.stream.IntStream | ||
| import java.util.{Collections, Optional, Properties} | ||
|
|
@@ -2210,7 +2210,8 @@ class ReplicaManagerTest { | |
| aliveBrokerIds: Seq[Int] = Seq(0, 1), | ||
| propsModifier: Properties => Unit = _ => {}, | ||
| mockReplicaFetcherManager: Option[ReplicaFetcherManager] = None, | ||
| mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None | ||
| mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None, | ||
| isShuttingDown: AtomicBoolean = new AtomicBoolean(false) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the use of the field a bit confusing. We don't necessarily have to fix it here, but what do you think about letting |
||
| ): ReplicaManager = { | ||
| val props = TestUtils.createBrokerConfig(brokerId, TestUtils.MockZkConnect) | ||
| props.put("log.dirs", TestUtils.tempRelativeDir("data").getAbsolutePath + "," + TestUtils.tempRelativeDir("data2").getAbsolutePath) | ||
|
|
@@ -2245,6 +2246,7 @@ class ReplicaManagerTest { | |
| metadataCache = metadataCache, | ||
| logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size), | ||
| alterPartitionManager = alterPartitionManager, | ||
| isShuttingDown = isShuttingDown, | ||
| delayedProducePurgatoryParam = Some(mockProducePurgatory), | ||
| delayedFetchPurgatoryParam = Some(mockFetchPurgatory), | ||
| delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory), | ||
|
|
@@ -3868,10 +3870,12 @@ class ReplicaManagerTest { | |
| val foo2 = new TopicPartition("foo", 2) | ||
|
|
||
| val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager]) | ||
| val isShuttingDown = new AtomicBoolean(false) | ||
| val replicaManager = setupReplicaManagerWithMockedPurgatories( | ||
| timer = new MockTimer(time), | ||
| brokerId = localId, | ||
| mockReplicaFetcherManager = Some(mockReplicaFetcherManager) | ||
| mockReplicaFetcherManager = Some(mockReplicaFetcherManager), | ||
| isShuttingDown = isShuttingDown | ||
| ) | ||
|
|
||
| try { | ||
|
|
@@ -3940,6 +3944,10 @@ class ReplicaManagerTest { | |
|
|
||
| reset(mockReplicaFetcherManager) | ||
|
|
||
| // The broker transitions to SHUTTING_DOWN state. This should not have | ||
| // any impact in KRaft mode. | ||
| isShuttingDown.set(true) | ||
|
|
||
| // The replica begins the controlled shutdown. | ||
| replicaManager.beginControlledShutdown() | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code is a bit brittle, can we have something like the following as the field instead?
Then we don't have to make sure to update
isShuttingDownwhen the logic changes inKafkaServer.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose that you are saying that relying on
isShuttingDownis brittle, right? If that is the case, I do agree that relying on the broker state is better. I think that we can refactor the remaining usages as a follow-up.